bplmodels 0.0.91

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +0 -0
  4. data/Rakefile +40 -0
  5. data/app/assets/javascripts/bplmodels/application.js +15 -0
  6. data/app/assets/stylesheets/bplmodels/application.css +13 -0
  7. data/app/controllers/bplmodels/application_controller.rb +4 -0
  8. data/app/helpers/bplmodels/application_helper.rb +4 -0
  9. data/app/models/bplmodels/audio_file.rb +14 -0
  10. data/app/models/bplmodels/book.rb +35 -0
  11. data/app/models/bplmodels/card.rb +35 -0
  12. data/app/models/bplmodels/characterization.rb +92 -0
  13. data/app/models/bplmodels/collection.rb +118 -0
  14. data/app/models/bplmodels/complex_object_base.rb +24 -0
  15. data/app/models/bplmodels/correspondence.rb +35 -0
  16. data/app/models/bplmodels/document.rb +35 -0
  17. data/app/models/bplmodels/document_file.rb +8 -0
  18. data/app/models/bplmodels/ephemera.rb +35 -0
  19. data/app/models/bplmodels/file.rb +151 -0
  20. data/app/models/bplmodels/file_content_datastream.rb +10 -0
  21. data/app/models/bplmodels/fits_datastream.rb +190 -0
  22. data/app/models/bplmodels/image.rb +14 -0
  23. data/app/models/bplmodels/image_file.rb +18 -0
  24. data/app/models/bplmodels/institution.rb +159 -0
  25. data/app/models/bplmodels/manuscript.rb +34 -0
  26. data/app/models/bplmodels/map.rb +34 -0
  27. data/app/models/bplmodels/mods_desc_metadata.rb +1826 -0
  28. data/app/models/bplmodels/musical_notation.rb +34 -0
  29. data/app/models/bplmodels/newspaper.rb +15 -0
  30. data/app/models/bplmodels/nom_terminology.rb +1242 -0
  31. data/app/models/bplmodels/non_photographic_print.rb +34 -0
  32. data/app/models/bplmodels/oai_collection.rb +19 -0
  33. data/app/models/bplmodels/oai_metadata.rb +75 -0
  34. data/app/models/bplmodels/oai_object.rb +45 -0
  35. data/app/models/bplmodels/object.rb +36 -0
  36. data/app/models/bplmodels/object_base.rb +1241 -0
  37. data/app/models/bplmodels/objects/collection.rb~ +28 -0
  38. data/app/models/bplmodels/objects/image.rb~ +59 -0
  39. data/app/models/bplmodels/objects/postcard.rb~ +56 -0
  40. data/app/models/bplmodels/organizational_set.rb +25 -0
  41. data/app/models/bplmodels/periodical.rb +37 -0
  42. data/app/models/bplmodels/photographic_print.rb +34 -0
  43. data/app/models/bplmodels/relation_base.rb +99 -0
  44. data/app/models/bplmodels/scrapbook.rb +35 -0
  45. data/app/models/bplmodels/simple_object_base.rb +27 -0
  46. data/app/models/bplmodels/sound_recording.rb +15 -0
  47. data/app/models/bplmodels/system_collection.rb +8 -0
  48. data/app/models/bplmodels/uploads_set.rb +3 -0
  49. data/app/models/bplmodels/workflow_metadata.rb +99 -0
  50. data/app/views/layouts/bplmodels/application.html.erb +14 -0
  51. data/config/application.rb +6 -0
  52. data/config/predicate_mappings.yml +61 -0
  53. data/config/routes.rb +2 -0
  54. data/lib/bplmodels.rb +21 -0
  55. data/lib/bplmodels/constants.rb +119 -0
  56. data/lib/bplmodels/datastream_input_funcs.rb +949 -0
  57. data/lib/bplmodels/engine.rb +5 -0
  58. data/lib/bplmodels/engine.rb~ +5 -0
  59. data/lib/bplmodels/finder.rb +192 -0
  60. data/lib/bplmodels/object_funcs.rb +10 -0
  61. data/lib/bplmodels/version.rb +3 -0
  62. data/lib/tasks/bplmodels_tasks.rake +4 -0
  63. data/test/bplmodels_test.rb +7 -0
  64. data/test/dummy/README.rdoc +261 -0
  65. data/test/dummy/Rakefile +7 -0
  66. data/test/dummy/app/assets/javascripts/application.js +15 -0
  67. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  68. data/test/dummy/app/controllers/application_controller.rb +3 -0
  69. data/test/dummy/app/helpers/application_helper.rb +2 -0
  70. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  71. data/test/dummy/config.ru +4 -0
  72. data/test/dummy/config/application.rb +59 -0
  73. data/test/dummy/config/boot.rb +10 -0
  74. data/test/dummy/config/environment.rb +5 -0
  75. data/test/dummy/config/environments/development.rb +37 -0
  76. data/test/dummy/config/environments/production.rb +67 -0
  77. data/test/dummy/config/environments/test.rb +37 -0
  78. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  79. data/test/dummy/config/initializers/inflections.rb +15 -0
  80. data/test/dummy/config/initializers/mime_types.rb +5 -0
  81. data/test/dummy/config/initializers/secret_token.rb +7 -0
  82. data/test/dummy/config/initializers/session_store.rb +8 -0
  83. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  84. data/test/dummy/config/locales/en.yml +5 -0
  85. data/test/dummy/config/routes.rb +4 -0
  86. data/test/dummy/public/404.html +26 -0
  87. data/test/dummy/public/422.html +26 -0
  88. data/test/dummy/public/500.html +25 -0
  89. data/test/dummy/public/favicon.ico +0 -0
  90. data/test/dummy/script/rails +6 -0
  91. data/test/integration/navigation_test.rb +10 -0
  92. data/test/test_helper.rb +15 -0
  93. metadata +234 -0
@@ -0,0 +1,99 @@
1
+ module Bplmodels
2
+ class WorkflowMetadata < ActiveFedora::OmDatastream
3
+ include OM::XML::Document
4
+
5
+ WORKFLOW_NS = 'http://www.bpl.org/repository/xml/ns/workflow'
6
+ WORKFLOW_SCHEMA = 'http://www.bpl.org/repository/xml/xsd/workflow.xsd'
7
+ WORKFLOW_PARAMS = {
8
+ "version" => "0.0.1",
9
+ "xmlns:xlink" => "http://www.w3.org/1999/xlink",
10
+ "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
11
+ "xmlns" => WORKFLOW_NS,
12
+ "xsi:schemaLocation" => "#{WORKFLOW_NS} #{WORKFLOW_SCHEMA}",
13
+ }
14
+
15
+ set_terminology do |t|
16
+ t.root :path => 'workflowMetadata', :xmlns => WORKFLOW_NS
17
+
18
+ t.item_status(:path=>"itemStatus") {
19
+ t.state(:path=>"state")
20
+ t.state_comment(:path=>"stateComment")
21
+ t.processing(:path=>"processing")
22
+ t.processing_comment(:path=>"processingComment")
23
+ }
24
+
25
+ t.item_source(:path=>"itemSource") {
26
+ t.ingest_origin(:path=>"ingestOrigin")
27
+ t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
28
+ t.ingest_filename(:path=>"ingestFilename") #Only recently added
29
+ }
30
+
31
+ t.item_ark_info(:path=>"arkInformation") {
32
+ t.ark_id(:path=>"arkID")
33
+ t.ark_type(:path=>"arkType")
34
+ t.ark_parent_pid(:path=>"arkParentPID")
35
+ }
36
+
37
+ t.source(:path=>"source") {
38
+ t.ingest_origin(:path=>"ingestOrigin")
39
+ t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
40
+ t.ingest_filename(:path=>"ingestFilename") #Only recently added
41
+ t.ingest_datastream(:path=>"ingestDatastream")
42
+ }
43
+
44
+ t.item_designations(:path=>'itemDesignations') {
45
+ t.flagged_for_content(:path=>"flaggedForContent")
46
+ }
47
+
48
+ t.marked_for_deletion(:path=>'markedForDelation') {
49
+ t.reason(:path=>'reason')
50
+ }
51
+
52
+ end
53
+
54
+ def self.xml_template
55
+ Nokogiri::XML::Builder.new do |xml|
56
+ xml.workflowMetadata(WORKFLOW_PARAMS) {
57
+
58
+ }
59
+ end.doc
60
+ end
61
+
62
+ #Required for Active Fedora 9
63
+ def prefix(path=nil)
64
+ return ''
65
+ end
66
+
67
+
68
+ def insert_file_path(value=nil)
69
+ ingest_filepath_index = self.item_source.ingest_filepath.count
70
+
71
+ self.item_source.ingest_filepath(ingest_filepath_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
72
+ end
73
+
74
+ def insert_file_name(value=nil)
75
+ ingest_filename_index = self.item_source.ingest_filepath.count
76
+
77
+ self.item_source.ingest_filename(ingest_filename_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
78
+ end
79
+
80
+ def insert_file_source(filepath, filename, datastream)
81
+ source_count = self.source.count
82
+
83
+ self.source(source_count).ingest_filepath(0, filepath) unless filepath.blank?
84
+ self.source(source_count).ingest_filename(0, filename) unless filename.blank?
85
+ self.source(source_count).ingest_datastream(0, datastream) unless datastream.blank?
86
+ end
87
+
88
+ def insert_flagged(value=nil)
89
+ self.item_designations(0).flagged_for_content(0, value) unless value.blank?
90
+ end
91
+
92
+ def insert_oai_defaults
93
+ self.item_status(0).state = "published"
94
+ self.item_status(0).state_comment = "OAI Harvested Record"
95
+ self.item_status(0).processing = "complete"
96
+ self.item_status(0).processing_comment = "Object Processing Complete"
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Bplmodels</title>
5
+ <%= stylesheet_link_tag "bplmodels/application", :media => "all" %>
6
+ <%= javascript_include_tag "bplmodels/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,6 @@
1
+ module Bplmodels
2
+ class Application < Rails::Application
3
+
4
+ config.autoload_paths += Dir[Rails.root.join('app', 'models', '{**}')]
5
+ end
6
+ end
@@ -0,0 +1,61 @@
1
+ # The default namespace maps to the default namespace for generating rels_ext from solr
2
+ :default_namespace: info:fedora/fedora-system:def/relations-external#
3
+
4
+ :predicate_namespaces:
5
+ fedora-model: info:fedora/fedora-system:def/model#
6
+ fedora-relations-model: info:fedora/fedora-system:def/relations-external#
7
+ bpllib-rel: http://projecthydra.org/ns/relations#
8
+
9
+ # namespace mappings---
10
+ # you can add specific mappings for your institution by providing the following:
11
+ # namespace_uri:
12
+ # :relationship_symbol: relationship_identifier
13
+ #
14
+ # For example, if you have the following element in your rels_ext:
15
+ #
16
+ # <oai:itemID>oai:example.edu:changeme:500</oai:itemID>
17
+ #
18
+ # With the last two lines of this file uncommented, the relationships hash of your object will include:
19
+ # :oai_item_id => ["info:fedora/oai:example.edu:changeme:500"]
20
+ #
21
+ :predicate_mapping:
22
+ info:fedora/fedora-system:def/relations-external#:
23
+ :conforms_to: conformsTo
24
+ :has_annotation: hasAnnotation
25
+ :has_collection_member: hasCollectionMember
26
+ :has_constituent: hasConstituent
27
+ :has_dependent: hasDependent
28
+ :has_derivation: hasDerivation
29
+ :has_description: hasDescription
30
+ :has_equivalent: hasEquivalent
31
+ :has_metadata: hasMetadata
32
+ :has_member: hasMember
33
+ :has_model: hasModel
34
+ :has_part: hasPart
35
+ :has_subset: hasSubset
36
+ :is_annotation_of: isAnnotationOf
37
+ :is_constituent_of: isConstituentOf
38
+ :is_dependent_of: isDependentOf
39
+ :is_derivation_of: isDerivationOf
40
+ :is_description_of: isDescriptionOf
41
+ :is_member_of: isMemberOf
42
+ :is_member_of_collection: isMemberOfCollection
43
+ :is_metadata_for: isMetadataFor
44
+ :is_part_of: isPartOf
45
+ :is_subset_of: isSubsetOf
46
+ :is_topic_of: isTopicOf
47
+ info:fedora/fedora-system:def/model#:
48
+ :is_contractor_of: isContractorOf
49
+ :is_deployment_of: isDeploymentOf
50
+ :has_service: hasService
51
+ :has_model: hasModel
52
+ http://www.openarchives.org/OAI/2.0/:
53
+ :oai_item_id: itemID
54
+ http://projecthydra.org/ns/relations#:
55
+ :is_governed_by: isGovernedBy
56
+ :is_image_of: isImageOf
57
+ :has_image: hasImage
58
+ :has_subcollection: hasSubcollection
59
+ :has_crop: hasCrop
60
+ :is_crop_of: isCropOf
61
+ :is_exemplary_image_of: isExemplaryImageOf
data/config/routes.rb ADDED
@@ -0,0 +1,2 @@
1
+ Bplmodels::Engine.routes.draw do
2
+ end
data/lib/bplmodels.rb ADDED
@@ -0,0 +1,21 @@
1
+ require "bplmodels/engine"
2
+ require "bplmodels/datastream_input_funcs"
3
+ require "bplmodels/finder"
4
+ require "bplmodels/constants"
5
+ require "timeliness"
6
+
7
+ module Bplmodels
8
+ def self.environment
9
+ if defined?(DERIVATIVE_CONFIG_GLOBAL) && DERIVATIVE_CONFIG_GLOBAL.present? && DERIVATIVE_CONFIG_GLOBAL['environment'].present?
10
+ return DERIVATIVE_CONFIG_GLOBAL['environment']
11
+ elsif defined?(Rails.env) and !Rails.env.nil?
12
+ return Rails.env.to_s
13
+ elsif defined?(ENV['environment']) and !(ENV['environment'].nil?)
14
+ return ENV['environment']
15
+ elsif defined?(ENV['RAILS_ENV']) and !(ENV['RAILS_ENV'].nil?)
16
+ raise RuntimeError, "You're depending on RAILS_ENV for setting your environment. Please use ENV['environment'] for non-rails environment setting: 'rake foo:bar environment=test'"
17
+ else
18
+ ENV['environment'] = 'development'
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,119 @@
1
+ module Bplmodels
2
+ class Constants
3
+ GENRE_LOOKUP = {}
4
+ GENRE_LOOKUP['Cards'] = {:id=>'tgm001686', :authority=>'gmgpc'}
5
+ GENRE_LOOKUP['Correspondence'] = {:id=>'tgm002590', :authority=>'lctgm'}
6
+ GENRE_LOOKUP['Documents'] = {:id=>'tgm003185', :authority=>'gmgpc'}
7
+ GENRE_LOOKUP['Drawings'] = {:id=>'tgm003279', :authority=>'gmgpc'}
8
+ GENRE_LOOKUP['Ephemera'] = {:id=>'tgm003634', :authority=>'gmgpc'}
9
+ GENRE_LOOKUP['Manuscripts'] = {:id=>'tgm012286', :authority=>'gmgpc'}
10
+ GENRE_LOOKUP['Maps'] = {:id=>'tgm006261', :authority=>'gmgpc'}
11
+ GENRE_LOOKUP['Objects'] = {:id=>'tgm007159', :authority=>'lctgm'}
12
+ GENRE_LOOKUP['Paintings'] = {:id=>'tgm007393', :authority=>'gmgpc'}
13
+ GENRE_LOOKUP['Photographs'] = {:id=>'tgm007721', :authority=>'gmgpc'}
14
+ GENRE_LOOKUP['Posters'] = {:id=>'tgm008104', :authority=>'gmgpc'}
15
+ GENRE_LOOKUP['Prints'] = {:id=>'tgm008237', :authority=>'gmgpc'}
16
+ GENRE_LOOKUP['Newspapers'] = {:id=>'tgm007068', :authority=>'lctgm'}
17
+ GENRE_LOOKUP['Sound recordings'] = {:id=>'tgm009874', :authority=>'lctgm'}
18
+ GENRE_LOOKUP['Motion pictures'] = {:id=>'tgm006804', :authority=>'lctgm'}
19
+ GENRE_LOOKUP['Periodicals'] = {:id=>'tgm007641', :authority=>'gmgpc'}
20
+ GENRE_LOOKUP['Books'] = {:id=>'tgm001221', :authority=>'gmgpc'}
21
+ GENRE_LOOKUP['Albums'] = {:id=>'tgm000229', :authority=>'gmgpc'}
22
+ GENRE_LOOKUP['Musical notation'] = {:id=>'tgm006926', :authority=>'lctgm'}
23
+
24
+ COUNTRY_TGN_LOOKUP = {}
25
+ COUNTRY_TGN_LOOKUP['United States'] = {:tgn_id=>7012149, :tgn_country_name=>'United States'}
26
+ COUNTRY_TGN_LOOKUP['Canada'] = {:tgn_id=>7005685, :tgn_country_name=>'Canada'}
27
+ COUNTRY_TGN_LOOKUP['France'] = {:tgn_id=>1000070, :tgn_country_name=>'France'}
28
+ COUNTRY_TGN_LOOKUP['Vietnam'] = {:tgn_id=>1000145, :tgn_country_name=>'Viet Nam'}
29
+ COUNTRY_TGN_LOOKUP['South Africa'] = {:tgn_id=>1000193, :tgn_country_name=>'South Africa'}
30
+ COUNTRY_TGN_LOOKUP['Philippines'] = {:tgn_id=>1000135, :tgn_country_name=>'Pilipinas'}
31
+ COUNTRY_TGN_LOOKUP['China'] = {:tgn_id=>1000111, :tgn_country_name=>'Zhongguo'}
32
+ COUNTRY_TGN_LOOKUP['Japan'] = {:tgn_id=>1000120, :tgn_country_name=>'Nihon'}
33
+
34
+
35
+
36
+
37
+
38
+ =begin
39
+ COUNTRY_TGN_LOOKUP = {
40
+ 'US' => 7012149,
41
+ 'CA' => 7005685,
42
+ 'FR' => 1000070,
43
+ 'VN' => 1000145,
44
+ 'ZA' => 1000193,
45
+ 'PH' => 1000135,
46
+ 'United States' => 7012149,
47
+ 'Canada' => 7005685,
48
+ 'France' => 1000070,
49
+ 'Vietnam' => 1000145,
50
+ 'Viet Nam' => 1000145,
51
+ 'South Africa' => 1000193,
52
+ 'Philippines' => 1000135
53
+ }
54
+ =end
55
+
56
+ STATE_ABBR = {
57
+ 'AL' => 'Alabama',
58
+ 'AK' => 'Alaska',
59
+ 'AS' => 'America Samoa',
60
+ 'AZ' => 'Arizona',
61
+ 'AR' => 'Arkansas',
62
+ 'CA' => 'California',
63
+ 'CO' => 'Colorado',
64
+ 'CT' => 'Connecticut',
65
+ 'DE' => 'Delaware',
66
+ 'DC' => 'District of Columbia',
67
+ 'FM' => 'Micronesia1',
68
+ 'FL' => 'Florida',
69
+ 'GA' => 'Georgia',
70
+ 'GU' => 'Guam',
71
+ 'HI' => 'Hawaii',
72
+ 'ID' => 'Idaho',
73
+ 'IL' => 'Illinois',
74
+ 'IN' => 'Indiana',
75
+ 'IA' => 'Iowa',
76
+ 'KS' => 'Kansas',
77
+ 'KY' => 'Kentucky',
78
+ 'LA' => 'Louisiana',
79
+ 'ME' => 'Maine',
80
+ 'MH' => 'Islands1',
81
+ 'MD' => 'Maryland',
82
+ 'MA' => 'Massachusetts',
83
+ 'MI' => 'Michigan',
84
+ 'MN' => 'Minnesota',
85
+ 'MS' => 'Mississippi',
86
+ 'MO' => 'Missouri',
87
+ 'MT' => 'Montana',
88
+ 'NE' => 'Nebraska',
89
+ 'NV' => 'Nevada',
90
+ 'NH' => 'New Hampshire',
91
+ 'NJ' => 'New Jersey',
92
+ 'NM' => 'New Mexico',
93
+ 'NY' => 'New York',
94
+ 'NC' => 'North Carolina',
95
+ 'ND' => 'North Dakota',
96
+ 'OH' => 'Ohio',
97
+ 'OK' => 'Oklahoma',
98
+ 'OR' => 'Oregon',
99
+ 'PW' => 'Palau',
100
+ 'PA' => 'Pennsylvania',
101
+ 'PR' => 'Puerto Rico',
102
+ 'RI' => 'Rhode Island',
103
+ 'SC' => 'South Carolina',
104
+ 'SD' => 'South Dakota',
105
+ 'TN' => 'Tennessee',
106
+ 'TX' => 'Texas',
107
+ 'UT' => 'Utah',
108
+ 'VT' => 'Vermont',
109
+ 'VI' => 'Virgin Island',
110
+ 'VA' => 'Virginia',
111
+ 'WA' => 'Washington',
112
+ 'WV' => 'West Virginia',
113
+ 'WI' => 'Wisconsin',
114
+ 'WY' => 'Wyoming'
115
+ }
116
+
117
+
118
+ end
119
+ end
@@ -0,0 +1,949 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ module Bplmodels
5
+ class DatastreamInputFuncs
6
+
7
+ # these functions can be used to split names into subparts for <mods:name> and <mods:subject><mods:name>
8
+
9
+ # use for personal name headings e.g., <mods:name type="personal">
10
+ # returns personal name data as a hash which can be used to populate <mods:namePart> and <mads:namePart type="date">
11
+
12
+ def self.persNamePartSplitter(inputstring)
13
+ splitNamePartsHash = Hash.new
14
+ unless inputstring =~ /\d{4}/
15
+ splitNamePartsHash[:namePart] = inputstring
16
+ else
17
+ if inputstring =~ /\(.*\d{4}.*\)/
18
+ splitNamePartsHash[:namePart] = inputstring
19
+ else
20
+ splitNamePartsHash[:namePart] = inputstring.gsub(/,[\d\- \.\w?]*$/,"")
21
+ splitArray = inputstring.split(/.*,/)
22
+ splitNamePartsHash[:datePart] = splitArray[1].strip
23
+ end
24
+ end
25
+ return splitNamePartsHash
26
+ end
27
+
28
+ # use for corporate name headings e.g., <mods:name type="corporate">
29
+ # returns corporate name data as an array which can be used to populate <mods:namePart> subparts
30
+ # (corporate name subparts are not differentiated by any attributes in the xml)
31
+ # (see http://id.loc.gov/authorities/names/n82139319.madsxml.xml for example)
32
+ # Note: (?!\)) part is to check for examples like: 'Boston (Mass.) Police Dept.'
33
+
34
+ def self.corpNamePartSplitter(inputstring)
35
+ splitNamePartsArray = Array.new
36
+ unless inputstring =~ /[\S]{5}\.(?!\))/
37
+ splitNamePartsArray << inputstring
38
+ else
39
+ while inputstring =~ /[\S]{5}\.(?!\))/
40
+ snip = /[\S]{5}\.(?!\))/.match(inputstring).post_match
41
+ subpart = inputstring.gsub(snip,"")
42
+ splitNamePartsArray << subpart.gsub(/\.\z/,"").strip
43
+ inputstring = snip
44
+ end
45
+ splitNamePartsArray << inputstring.gsub(/\.\z/,"").strip
46
+ end
47
+ return splitNamePartsArray
48
+ end
49
+
50
+ # a function to convert date data from OAI feeds into MODS-usable date data
51
+ # assumes date values containing ";" have already been split
52
+ # returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
53
+ def self.convert_to_mods_date(value)
54
+
55
+ date_data = {} # create the hash to hold all the data
56
+ source_date_string = value.strip # variable to hold original value
57
+
58
+ # weed out obvious bad dates before processing
59
+ if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
60
+ (value.match(/\d\d\d\d-\z/)) || # 1975-
61
+ (value.match(/\d\d-\d\d\/\d\d/)) || # 1975-09-09/10
62
+ (value.match(/\d*\(\d*\)/)) || # 1975(1976)
63
+ (value.scan(/\d\d\d\d/).length > 2) || # 1861/1869/1915
64
+ (value.scan(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).length > 1) ||
65
+ # or if data does not match any of these
66
+ (!value.match(/(\d\dth [Cc]entury|\d\d\d-\?*|\d\d\d\?|\d\d\?\?|\d\d\d\d)/))
67
+ date_data[:date_note] = source_date_string
68
+ else
69
+ # find date qualifier
70
+ if value.include? '?'
71
+ date_data[:date_qualifier] = 'questionable'
72
+ elsif value.match(/\A[Cc]/)
73
+ date_data[:date_qualifier] = 'approximate'
74
+ elsif (value.match(/[\[\]]+/)) || (value.match(/[(][A-Za-z, \d]*[\d]+[A-Za-z, \d]*[)]+/)) # if [] or ()
75
+ date_data[:date_qualifier] = 'inferred'
76
+ end
77
+
78
+ # remove unnecessary chars and words
79
+ value = value.gsub(/[\[\]\(\)\.,']/,'')
80
+ value = value.gsub(/(\b[Bb]etween\b|\bcirca\b|\bca\b|\Aca|\Ac)/,'').strip
81
+
82
+ # differentiate between ranges and single dates
83
+ if (value.scan(/\d\d\d\d/).length == 2) ||
84
+ (value.include? '0s') || # 1970s
85
+ (value.include? 'entury') || # 20th century
86
+ (value.match(/(\A\d\d\d\?|\A\d\d\?\?|\A\d\d\d-\?*|\d\d\d\d-\d\z|\d\d\d\d\/[\d]{1,2}\z)/)) ||
87
+ (value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/)) ||
88
+ ((value.match(/\d\d\d\d-\d\d\z/)) && (value[-2..-1].to_i > 12)) # 1975-76 but NOT 1910-11
89
+
90
+ # RANGES
91
+ date_data[:date_range] = {}
92
+
93
+ # deal with date strings with 2 4-digit year values separately
94
+ if value.scan(/\d\d\d\d/).length == 2
95
+
96
+ # convert weird span indicators ('or','and','||'), remove extraneous text
97
+ value = value.gsub(/(or|and|\|\|)/,'-').gsub(/[A-Za-z\?\s]/,'')
98
+
99
+ if value.match(/\A[12][\d]{3}-[01][\d]-[12][\d]{3}-[01][\d]\z/) # 1895-05-1898-01
100
+ date_data_range_start = value[0..6]
101
+ date_data_range_end = value[-7..-1]
102
+ elsif value.match(/\A[12][\d]{3}\/[12][\d]{3}\z/) # 1987/1988
103
+ date_data_range_start = value[0..3]
104
+ date_data_range_end = value[-4..-1]
105
+ else
106
+ range_dates = value.split('-') # split the dates into an array
107
+ range_dates.each_with_index do |range_date,index|
108
+ # format the data properly
109
+ if range_date.include? '/' # 11/05/1965
110
+ range_date_pieces = range_date.split('/')
111
+ range_date_piece_year = range_date_pieces.last
112
+ range_date_piece_month = range_date_pieces.first.length == 2 ? range_date_pieces.first : '0' + range_date_pieces.first
113
+ if range_date_pieces.length == 3
114
+ range_date_piece_day = range_date_pieces[1].length == 2 ? range_date_pieces[1] : '0' + range_date_pieces[1]
115
+ end
116
+ value_to_insert = range_date_piece_year + '-' + range_date_piece_month
117
+ value_to_insert << '-' + range_date_piece_day if range_date_piece_day
118
+ elsif range_date.match(/\A[12][\d]{3}\z/)
119
+ value_to_insert = range_date
120
+ end
121
+ # add the data to the proper variable
122
+ if value_to_insert
123
+ if index == 0
124
+ date_data_range_start = value_to_insert
125
+ else
126
+ date_data_range_end = value_to_insert
127
+ end
128
+ end
129
+ end
130
+ end
131
+ else
132
+ # if there are 'natural language' range values, find, assign to var, then remove
133
+ text_range = value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).to_s
134
+ if text_range.length > 0
135
+ date_data[:date_qualifier] ||= 'approximate' # TODO - remove this??
136
+ value = value.gsub(/#{text_range}/,'').strip
137
+ end
138
+
139
+ # deal with ranges for which 'natural language' range values are ignored
140
+ if value.match(/\A1\d\?\?\z/) # 19??
141
+ date_data_range_start = value[0..1] + '00'
142
+ date_data_range_end = value[0..1] + '99'
143
+ elsif value.match(/\A[12]\d\d-*\?*\z/) # 195? || 195-? || 195-
144
+ date_data_range_start = value[0..2] + '0'
145
+ date_data_range_end = value[0..2] + '9'
146
+ elsif value.match(/\A[12]\d\d\d[-\/][\d]{1,2}\z/) # 1956-57 || 1956/57 || 1956-7
147
+ if value.length == 7 && (value[5..6].to_i > value[2..3].to_i)
148
+ date_data_range_start = value[0..3]
149
+ date_data_range_end = value[0..1] + value[5..6]
150
+ elsif value.length == 6 && (value[5].to_i > value[3].to_i)
151
+ date_data_range_start = value[0..3]
152
+ date_data_range_end = value[0..2] + value[5]
153
+ end
154
+ date_data[:date_note] = source_date_string if text_range.length > 0
155
+ end
156
+ # deal with ranges where text range values are evaluated
157
+ value = value.gsub(/\?/,'').strip # remove question marks
158
+
159
+ # centuries
160
+ if value.match(/([12][\d]{1}th [Cc]entury|[12][\d]{1}00s)/) # 19th century || 1800s
161
+ if value.match(/[12][\d]{1}00s/)
162
+ century_prefix_date = value.match(/[12][\d]{1}/).to_s
163
+ else
164
+ century_prefix_date = (value.match(/[12][\d]{1}/).to_s.to_i-1).to_s
165
+ end
166
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
167
+ if text_range.match(/[Ee]arly/)
168
+ century_suffix_dates = %w[00 39]
169
+ elsif text_range.match(/[Mm]id/)
170
+ century_suffix_dates = %w[30 69]
171
+ else
172
+ century_suffix_dates = %w[60 99]
173
+ end
174
+ end
175
+ date_data_range_start = century_suffix_dates ? century_prefix_date + century_suffix_dates[0] : century_prefix_date + '00'
176
+ date_data_range_end = century_suffix_dates ? century_prefix_date + century_suffix_dates[1] : century_prefix_date + '99'
177
+ else
178
+ # remove any remaining non-date text
179
+ value.match(/[12][1-9][1-9]0s/) ? is_decade = true : is_decade = false # but preserve decade-ness
180
+ remaining_text = value.match(/\D+/).to_s
181
+ value = value.gsub(/#{remaining_text}/,'').strip if remaining_text.length > 0
182
+
183
+ # decades
184
+ if is_decade
185
+ decade_prefix_date = value.match(/\A[12][1-9][1-9]/).to_s
186
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
187
+ if text_range.match(/[Ee]arly/)
188
+ decade_suffix_dates = %w[0 3]
189
+ elsif text_range.match(/[Mm]id/)
190
+ decade_suffix_dates = %w[4 6]
191
+ else
192
+ decade_suffix_dates = %w[7 9]
193
+ end
194
+ end
195
+ date_data_range_start = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[0] : decade_prefix_date + '0'
196
+ date_data_range_end = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[1] : decade_prefix_date + '9'
197
+ else
198
+ # single year ranges
199
+ single_year_prefix = value.match(/[12][0-9]{3}/).to_s
200
+ if text_range.length > 0
201
+ if text_range.match(/[Ee]arly/)
202
+ single_year_suffixes = %w[01 04]
203
+ elsif text_range.match(/[Mm]id/)
204
+ single_year_suffixes = %w[05 08]
205
+ elsif text_range.match(/[Ll]ate/)
206
+ single_year_suffixes = %w[09 12]
207
+ elsif text_range.match(/[Ww]inter/)
208
+ single_year_suffixes = %w[01 03]
209
+ elsif text_range.match(/[Ss]pring/)
210
+ single_year_suffixes = %w[03 05]
211
+ elsif text_range.match(/[Ss]ummer/)
212
+ single_year_suffixes = %w[06 08]
213
+ else text_range.match(/[F]all/)
214
+ single_year_suffixes = %w[09 11]
215
+ end
216
+ date_data_range_start = single_year_prefix + '-' + single_year_suffixes[0]
217
+ date_data_range_end = single_year_prefix + '-' + single_year_suffixes[1]
218
+ end
219
+ end
220
+ # if possibly significant info removed, include as note
221
+ date_data[:date_note] = source_date_string if remaining_text.length > 1
222
+ end
223
+ end
224
+
225
+ # insert the values into the date_data hash
226
+ if date_data_range_start && date_data_range_end
227
+ date_data[:date_range][:start] = date_data_range_start
228
+ date_data[:date_range][:end] = date_data_range_end
229
+ else
230
+ date_data[:date_note] ||= source_date_string
231
+ date_data.delete :date_range
232
+ end
233
+
234
+ else
235
+ # SINGLE DATES
236
+ value = value.gsub(/\?/,'') # remove question marks
237
+ # fix bad spacing (e.g. December 13,1985 || December 3,1985)
238
+ value = value.insert(-5, ' ') if value.match(/[A-Za-z]* \d{6}/) || value.match(/[A-Za-z]* \d{5}/)
239
+
240
+ # try to automatically parse single dates with YYYY && MM && DD values
241
+ if Timeliness.parse(value).nil?
242
+ # start further processing
243
+ if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
244
+ date_data[:single_date] = value
245
+ elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
246
+ value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
247
+ date_data[:single_date] = value[3..6] + '-' + value[0..1]
248
+ elsif value.match(/\A[A-Za-z]{3,9}[\.]? [12]\d\d\d\z/) # April 1987 || Apr. 1987
249
+ value = value.split(' ')
250
+ if value[0].match(/\A[A-Za-z]{3}[\.]?\z/)
251
+ value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0]) if Date::ABBR_MONTHNAMES.index(value[0])
252
+ else
253
+ value_month = '%02d' % Date::MONTHNAMES.index(value[0]) if Date::MONTHNAMES.index(value[0])
254
+ end
255
+ date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
256
+ elsif value.match(/\A[12]\d\d\d\z/) # 1999
257
+ date_data[:single_date] = value
258
+ else
259
+ date_data[:date_note] = source_date_string
260
+ end
261
+ else
262
+ date_data[:single_date] = Timeliness.parse(value).strftime("%Y-%m-%d")
263
+ end
264
+
265
+ end
266
+
267
+ end
268
+
269
+ # some final validation, just in case
270
+ date_validation_array = []
271
+ date_validation_array << date_data[:single_date] if date_data[:single_date]
272
+ date_validation_array << date_data[:date_range][:start] if date_data[:date_range]
273
+ date_validation_array << date_data[:date_range][:end] if date_data[:date_range]
274
+ date_validation_array.each do |date_to_val|
275
+ if date_to_val.length == '7'
276
+ bad_date = true unless date_to_val[-2..-1].to_i.between?(1,12) && !date_to_val.nil?
277
+ elsif
278
+ date_to_val.length == '10'
279
+ bad_date = true unless Timeliness.parse(value) && !date_to_val.nil?
280
+ end
281
+ if bad_date
282
+ date_data[:date_note] ||= source_date_string
283
+ date_data.delete :single_date if date_data[:single_date]
284
+ date_data.delete :date_range if date_data[:date_range]
285
+ end
286
+ end
287
+
288
+ # if the date slipped by all the processing somehow!
289
+ if date_data[:single_date].nil? && date_data[:date_range].nil? && date_data[:date_note].nil?
290
+ date_data[:date_note] = source_date_string
291
+ end
292
+
293
+ date_data
294
+
295
+ end
296
+
297
+ # retrieve data from Getty TGN to populate <mods:subject auth="tgn">
298
+ def self.get_tgn_data(tgn_id)
299
+ tgn_response = Typhoeus::Request.get('http://vocabsservices.getty.edu/TGNService.asmx/TGNGetSubject?subjectID=' + tgn_id, userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
300
+ unless tgn_response.code == 500
301
+ tgnrec = Nokogiri::XML(tgn_response.body)
302
+ #puts tgnrec.to_s
303
+
304
+ # coordinates
305
+ if tgnrec.at_xpath("//Coordinates")
306
+ coords = {}
307
+ coords[:latitude] = tgnrec.at_xpath("//Latitude/Decimal").children.to_s
308
+ coords[:longitude] = tgnrec.at_xpath("//Longitude/Decimal").children.to_s
309
+ else
310
+ coords = nil
311
+ end
312
+
313
+ hier_geo = {}
314
+
315
+ #main term
316
+ if tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text")
317
+ tgn_term_type = tgnrec.at_xpath("//Preferred_Place_Type/Place_Type_ID").children.to_s
318
+ pref_term_langs = tgnrec.xpath("//Terms/Preferred_Term/Term_Languages/Term_Language/Language")
319
+ # if the preferred term is the preferred English form, use that
320
+ if pref_term_langs.children.to_s.include? "English"
321
+ tgn_term = tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
322
+ else # use the non-preferred term which is the preferred English form
323
+ if tgnrec.xpath("//Terms/Non-Preferred_Term")
324
+ non_pref_terms = tgnrec.xpath("//Terms/Non-Preferred_Term")
325
+ non_pref_terms.each do |non_pref_term|
326
+ non_pref_term_langs = non_pref_term.children.css("Term_Language")
327
+ # have to loop through these, as sometimes languages share form
328
+ non_pref_term_langs.each do |non_pref_term_lang|
329
+ if non_pref_term_lang.children.css("Preferred").children.to_s == "Preferred" && non_pref_term_lang.children.css("Language").children.to_s == "English"
330
+ tgn_term = non_pref_term.children.css("Term_Text").children.to_s
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
336
+ # if no term is the preferred English form, just use the preferred term
337
+ tgn_term ||= tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
338
+ end
339
+ if tgn_term && tgn_term_type
340
+ case tgn_term_type
341
+ when '29000/continent'
342
+ hier_geo[:continent] = tgn_term
343
+ when '81010/nation'
344
+ hier_geo[:country] = tgn_term
345
+ when '81161/province'
346
+ hier_geo[:province] = tgn_term
347
+ when '81165/region', '82193/union', '80005/semi-independent political entity'
348
+ hier_geo[:region] = tgn_term
349
+ when '81175/state', '81117/department', '82133/governorate'
350
+ hier_geo[:state] = tgn_term
351
+ when '81181/territory', '81021/dependent state', '81186/union territory', '81125/national district'
352
+ hier_geo[:territory] = tgn_term
353
+ when '81115/county'
354
+ hier_geo[:county] = tgn_term
355
+ when '83002/inhabited place'
356
+ hier_geo[:city] = tgn_term
357
+ when '84251/neighborhood'
358
+ hier_geo[:city_section] = tgn_term
359
+ when '21471/island'
360
+ hier_geo[:island] = tgn_term
361
+ when '81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
362
+ hier_geo[:area] = tgn_term
363
+ else
364
+ non_hier_geo = tgn_term
365
+ end
366
+ end
367
+
368
+ # parent data for <mods:hierarchicalGeographic>
369
+ if tgnrec.at_xpath("//Parent_String")
370
+ parents = tgnrec.at_xpath("//Parent_String").children.to_s.split('], ')
371
+ parents.each do |parent|
372
+ if parent.include? '(continent)'
373
+ hier_geo[:continent] = parent
374
+ elsif parent.include? '(nation)'
375
+ hier_geo[:country] = parent
376
+ elsif parent.include? '(province)'
377
+ hier_geo[:province] = parent
378
+ elsif (parent.include? '(region)') || (parent.include? '(union)') || (parent.include? '(semi-independent political entity)')
379
+ hier_geo[:region] = parent
380
+ elsif (parent.include? '(state)') || (parent.include? '(department)') || (parent.include? '(governorate)')
381
+ hier_geo[:state] = parent
382
+ elsif (parent.include? '(territory)') || (parent.include? '(dependent state)') || (parent.include? '(union territory)') || (parent.include? '(national district)')
383
+ hier_geo[:territory] = parent
384
+ elsif parent.include? '(county)'
385
+ hier_geo[:county] = parent
386
+ elsif parent.include? '(inhabited place)'
387
+ hier_geo[:city] = parent
388
+ elsif parent.include? '(neighborhood)'
389
+ hier_geo[:city_section] = parent
390
+ elsif parent.include? '(island)'
391
+ hier_geo[:island] = parent
392
+ elsif (parent.include? '(area)') || (parent.include? '(general region)') || (parent.include? '(deserted settlement)') || (parent.include? '(historical region)') || (parent.include? '(national division)')
393
+ hier_geo[:area] = parent
394
+ end
395
+ end
396
+ hier_geo.each do |k,v|
397
+ hier_geo[k] = v.gsub(/ \(.*/,'')
398
+ end
399
+ end
400
+
401
+ tgn_data = {}
402
+ tgn_data[:coords] = coords
403
+ tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil
404
+ tgn_data[:non_hier_geo] = non_hier_geo ? non_hier_geo : nil
405
+
406
+ else
407
+
408
+ tgn_data = nil
409
+
410
+ end
411
+
412
+ return tgn_data
413
+
414
+ end
415
+
416
+ #Note: Limited to only looking at United States places...
417
+ def self.parse_bing_api(term)
418
+ return_hash = {}
419
+
420
+ #Bing API does badly with paranthesis...
421
+ if term.match(/[\(\)]+/)
422
+ return return_hash
423
+ end
424
+
425
+ #Sometimes with building, city, state, bing is dumb and will only return state. Example: Boston Harbor, Boston, Mass.
426
+ #So if not a street address, pass to have google handle it for better results...
427
+ #Example of another bad record: South Street bridge, West Bridgewater, Mass. would give a place in Holyoke
428
+ if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
429
+ return return_hash
430
+ end
431
+
432
+ Geocoder.configure(:lookup => :bing,:api_key => 'Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn',:timeout => 7)
433
+ bing_api_result = Geocoder.search(term)
434
+
435
+
436
+
437
+ #Use bing first and only for United States results...
438
+ if bing_api_result.present? && bing_api_result.first.data["address"]["countryRegion"] == 'United States'
439
+ if bing_api_result.first.data["address"]["addressLine"].present?
440
+ return_hash[:keep_original_string] = true
441
+ return_hash[:coordinates] = bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s + ',' + bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s
442
+ end
443
+
444
+ return_hash[:country_part] = bing_api_result.first.data["address"]["countryRegion"]
445
+
446
+ if return_hash[:country_part] == 'United States'
447
+ return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[bing_api_result.first.data["address"]["adminDistrict"]]
448
+ else
449
+ return_hash[:state_part] = bing_api_result.first.data["address"]["adminDistrict"]
450
+ end
451
+
452
+ return_hash[:city_part] = bing_api_result.first.data["address"]["locality"]
453
+ end
454
+
455
+ return return_hash
456
+ end
457
+
458
+ #Mapquest allows unlimited requests - start here?
459
+ def self.parse_mapquest_api(term)
460
+ return_hash = {}
461
+
462
+ #Mapquest returns bad data for: Manchester, Mass.
463
+ if term.include?('Manchester')
464
+ return return_hash
465
+ end
466
+
467
+ #Messed up with just neighborhoods. Example: Hyde Park (Boston, Mass.) or Hyde Park (Boston, Mass.)
468
+ #So if not a street address, pass to have google handle it for better results...
469
+ if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
470
+ return return_hash
471
+ end
472
+
473
+ Geocoder.configure(:lookup => :mapquest,:api_key => 'Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a',:timeout => 7)
474
+
475
+ mapquest_api_result = Geocoder.search(term)
476
+
477
+
478
+ #If this call returned a result...
479
+ if mapquest_api_result.present?
480
+
481
+ if mapquest_api_result.first.data["street"].present?
482
+ return_hash[:keep_original_string] = true
483
+ return_hash[:coordinates] = mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s
484
+ end
485
+
486
+ return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
487
+
488
+ if return_hash[:country_part] == 'United States'
489
+ return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]]
490
+ if mapquest_api_result.first.data["adminArea4"] == 'District of Columbia'
491
+ return_hash[:state_part] = mapquest_api_result.first.data["adminArea4"]
492
+ end
493
+ else
494
+ return_hash[:state_part] = mapquest_api_result.first.data["adminArea3"].gsub(' province', '')
495
+ end
496
+
497
+ return_hash[:city_part] = mapquest_api_result.first.data["adminArea5"]
498
+ end
499
+
500
+ return return_hash
501
+ end
502
+
503
+ #Final fallback is google API. The best but we are limited to 2500 requests per day unless we pay the $10k a year premium account...
504
+ #Note: If google cannot find street, it will return just city/state, like for "Salem Street and Paradise Road, Swampscott, MA, 01907"
505
+ #Seems like it sets a partial_match=>true in the data section...
506
+ def self.parse_google_api(term)
507
+ return_hash = {}
508
+
509
+ Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => 7)
510
+ google_api_result = Geocoder.search(term)
511
+
512
+ #Check if only a partial match. To avoid errors, strip out the first part and try again...
513
+ #Need better way to check for street endings. See: http://pe.usps.gov/text/pub28/28apc_002.htm
514
+ if google_api_result.present?
515
+ if google_api_result.first.data['partial_match'] && term.split(',').length > 1 && !term.downcase.include?('street') && !term.downcase.include?('st.') && !term.downcase.include?('avenue') && !term.downcase.include?('ave.') && !term.downcase.include?('court')
516
+ term = term.split(',')[1..term.split(',').length-1].join(',').strip
517
+ google_api_result = Geocoder.search(term)
518
+ end
519
+ end
520
+
521
+ if google_api_result.present?
522
+ #Types: street number, route, neighborhood, establishment, transit_station, bus_station
523
+ google_api_result.first.data["address_components"].each do |result|
524
+ if (result['types'] & ['street number', 'route', 'neighborhood', 'establishment', 'transit_station', 'bus_station']).present?
525
+ return_hash[:keep_original_string] = true
526
+ return_hash[:coordinates] = google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s
527
+ elsif (result['types'] & ['country']).present?
528
+ return_hash[:country_part] = result['long_name']
529
+ elsif (result['types'] & ['administrative_area_level_1']).present?
530
+ return_hash[:state_part] = result['long_name'].to_ascii
531
+ elsif (result['types'] & ['locality']).present?
532
+ return_hash[:city_part] = result['long_name']
533
+ elsif (result['types'] & ['sublocality', 'political']).present?
534
+ return_hash[:neighborhood_part] = result['long_name']
535
+ end
536
+ end
537
+
538
+ return_hash[:keep_original_string] ||= google_api_result.first.data['partial_match'] unless google_api_result.first.data['partial_match'].blank?
539
+ end
540
+
541
+
542
+ return return_hash
543
+ end
544
+
545
+ def self.parse_geographic_term(term)
546
+ geo_term = nil
547
+
548
+ #Weird incorrect dash seperator
549
+ term = term.gsub('–', '--')
550
+
551
+ #Likely too long to be an address... some fields have junk with an address string...
552
+ if term.length > 125
553
+ return nil
554
+ end
555
+
556
+ #TODO: Use Countries gem of https://github.com/hexorx/countries
557
+ #test = Country.new('US')
558
+ #test.states
559
+
560
+ #Parsing a subject geographic term.
561
+ if term.include?('--')
562
+ term.split('--').each_with_index do |split_term, index|
563
+ if split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
564
+ geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
565
+ elsif split_term.include?('Mass') || split_term.include?(' MA')
566
+ geo_term = split_term
567
+ end
568
+ end
569
+ #Other than a '--' field
570
+ #Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
571
+ elsif term.include?(' - ')
572
+ term.split(' - ').each do |split_term|
573
+ if split_term.include?('Mass') || split_term.include?(' MA') || split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
574
+ geo_term = split_term
575
+ end
576
+
577
+ end
578
+ else
579
+ if term.include?('Mass') || term.include?(' MA') || term.include?('Massachusetts') || term.include?('New Jersey') || term.include?('Wisconsin') || term.include?('New Hampshire') || term.include?('New York') || term.include?('Maine')
580
+ geo_term = term
581
+ end
582
+ end
583
+
584
+ #if geo_term.blank?
585
+ #return nil
586
+ #end
587
+
588
+ return geo_term
589
+ end
590
+
591
+ def self.standardize_geographic_term(geo_term)
592
+ #Remove common junk terms
593
+ geo_term = geo_term.gsub('Cranberries', '').gsub('History', '').gsub('Maps', '').gsub('State Police', '').gsub('Pictorial works.', '').gsub(/[nN]ation/, '').gsub('Asia', '').gsub('(Republic)', '').strip
594
+
595
+ #Strip any leading periods or commas from junk terms
596
+ geo_term = geo_term.gsub(/^[\.,]+/, '').strip
597
+
598
+ #Replace any semicolons with commas... possible strip them?
599
+ geo_term = geo_term.gsub(';', ',')
600
+
601
+ #Note: the following returns junk from Bing as if these are in WI, California, Etc:
602
+ #East Monponsett Lake (Halifax, Mass.)
603
+ #Silver Lake (Halifax, Mass.)
604
+ #Scarier note: Washington Park (Reading, Mass.) will always return Boston, MA in google
605
+ if geo_term.match(/[\(\)]+/)
606
+ #Attempt to fix address if something like (word)
607
+ if geo_term.match(/ \(+.*\)+/)
608
+ #Make this replacement better?
609
+ geo_term = geo_term.gsub(' (', ', ').gsub(')', '')
610
+ #Else skip this as data returned likely will be unreliable for now... FIXME when use case occurs.
611
+ else
612
+ return nil
613
+ end
614
+ end
615
+
616
+ return geo_term
617
+ end
618
+
619
+
620
+
621
+ def self.tgn_id_from_term(term,parse_term=false)
622
+ return_hash = {}
623
+ max_retry = 3
624
+ sleep_time = 60 # In seconds
625
+ retry_count = 0
626
+
627
+ #If not a good address source, parsing is done here...
628
+ term = parse_geographic_term(term) unless !parse_term
629
+
630
+ term = standardize_geographic_term(term) unless term.blank?
631
+
632
+ if term.blank?
633
+ return return_hash
634
+ end
635
+
636
+ return_hash = parse_mapquest_api(term)
637
+
638
+ if return_hash.blank?
639
+ return_hash = parse_bing_api(term)
640
+ end
641
+
642
+ if return_hash.blank?
643
+ return_hash = parse_google_api(term)
644
+ end
645
+
646
+ if return_hash.blank?
647
+ return nil
648
+ end
649
+
650
+ state_part = return_hash[:state_part]
651
+
652
+
653
+ country_code = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_id] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
654
+ country_code ||= ''
655
+
656
+
657
+ country_part = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_country_name] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
658
+ country_part ||= return_hash[:country_part]
659
+ country_part ||= ''
660
+
661
+ city_part = return_hash[:city_part]
662
+
663
+ #Keep original string if three parts at least or if there is a number in the term.
664
+ if term.split(',').length >= 3 || term.match(/\d/).present?
665
+ return_hash[:keep_original_string] = true
666
+ end
667
+
668
+ top_match_term = ''
669
+ match_term = nil
670
+
671
+ if city_part.blank? && state_part.blank?
672
+ # Limit to nations
673
+ place_type = 81010
674
+ top_match_term = ''
675
+ match_term = country_part.to_ascii.downcase || term.to_ascii.downcase
676
+ elsif state_part.present? && city_part.blank? && country_code == 7012149
677
+ #Limit to states
678
+ place_type = 81175
679
+ top_match_term = country_part.to_ascii.downcase
680
+ match_term = state_part.to_ascii.downcase
681
+ elsif state_part.present? && city_part.blank?
682
+ #Limit to regions
683
+ place_type = 81165
684
+ top_match_term = country_part.to_ascii.downcase
685
+ match_term = state_part.to_ascii.downcase
686
+ elsif state_part.present? && city_part.present?
687
+ #Limited to only inhabited places at the moment...
688
+ place_type = 83002
689
+ top_match_term = state_part.to_ascii.downcase
690
+ match_term = city_part.to_ascii.downcase
691
+ else
692
+ return nil
693
+ end
694
+
695
+ begin
696
+ if retry_count > 0
697
+ sleep(sleep_time)
698
+ end
699
+ retry_count = retry_count + 1
700
+
701
+ tgn_response = Typhoeus::Request.get("http://vocabsservices.getty.edu/TGNService.asmx/TGNGetTermMatch?placetypeid=#{place_type}&nationid=#{country_code}&name=" + CGI.escape(match_term), userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
702
+
703
+
704
+ end until (tgn_response.code != 500 || retry_count == max_retry)
705
+
706
+ unless tgn_response.code == 500
707
+ puts 'match found!'
708
+ parsed_xml = Nokogiri::Slop(tgn_response.body)
709
+
710
+ if parsed_xml.Vocabulary.Count.text == '0'
711
+ return nil
712
+ end
713
+
714
+ #If only one result, then not array. Otherwise array....
715
+ if parsed_xml.Vocabulary.Subject.first.blank?
716
+ subject = parsed_xml.Vocabulary.Subject
717
+
718
+ current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
719
+
720
+ if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
721
+ return_hash[:tgn_id] = subject.Subject_ID.text
722
+ end
723
+ else
724
+ parsed_xml.Vocabulary.Subject.each do |subject|
725
+ current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
726
+
727
+ if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
728
+ return_hash[:tgn_id] = subject.Subject_ID.text
729
+ end
730
+ end
731
+ end
732
+
733
+ end
734
+
735
+ if tgn_response.code == 500
736
+ raise 'TGN Server appears to not be responding for Geographic query: ' + term
737
+ end
738
+
739
+
740
+ return return_hash
741
+ end
742
+
743
+
744
+ def self.LCSHize(value)
745
+
746
+ if value.blank?
747
+ return ''
748
+ end
749
+
750
+ #Remove stuff that is quoted (quotation for first and last words)..
751
+ value = value.gsub(/^['"]/, '').gsub(/['"]$/, '').strip
752
+
753
+ #Remove ending periods ... except when an initial or etc.
754
+ if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
755
+ value = value.slice(0..-2)
756
+ end
757
+
758
+ #Fix when '- -' occurs
759
+ value = value.gsub(/-\s-/,'--')
760
+
761
+ #Fix for "em" dashes - two types?
762
+ value = value.gsub('—','--')
763
+
764
+ #Fix for "em" dashes - two types?
765
+ value = value.gsub('–','--')
766
+
767
+ #Fix for ' - ' combinations
768
+ value = value.gsub(' - ','--')
769
+
770
+ #Remove white space after and before '--'
771
+ value = value.gsub(/\s+--/,'--')
772
+ value = value.gsub(/--\s+/,'--')
773
+
774
+ #Ensure first work is capitalized
775
+ value[0] = value.first.capitalize[0]
776
+
777
+ #Strip an white space
778
+ value = Bplmodels::DatastreamInputFuncs.strip_value(value)
779
+
780
+ return value
781
+ end
782
+
783
+
784
+ def self.strip_value(value)
785
+ if(value.blank?)
786
+ return nil
787
+ else
788
+ if value.class == Float || value.class == Fixnum
789
+ value = value.to_i.to_s
790
+ end
791
+
792
+ # Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
793
+ return utf8Encode(value)
794
+ end
795
+ end
796
+
797
+ def self.utf8Encode(value)
798
+ return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
799
+ end
800
+
801
+ def self.split_with_nils(value)
802
+ if(value == nil)
803
+ return ""
804
+ else
805
+ split_value = value.split("||")
806
+ 0.upto split_value.length-1 do |pos|
807
+ split_value[pos] = strip_value(split_value[pos])
808
+ end
809
+
810
+ return split_value
811
+ end
812
+ end
813
+
814
+
815
+ #Problems: A . Some Name and A & R
816
+ def self.getProperTitle(title)
817
+ nonSort = nil
818
+ title = title
819
+
820
+ if title[0..1].downcase == "a " && (title[0..2].downcase != "a ." && title[0..2].downcase != "a &")
821
+ nonSort = title[0..1]
822
+ title = title[2..title.length]
823
+ elsif title[0..3].downcase == "the "
824
+ nonSort = title[0..3]
825
+ title = title[4..title.length]
826
+ elsif title[0..2].downcase == "an "
827
+ nonSort = title[0..2]
828
+ title = title[3..title.length]
829
+ #elsif title[0..6].downcase == "in the "
830
+ #return [title[0..5], title[7..title.length]]
831
+ end
832
+
833
+ return [nonSort, title]
834
+ end
835
+
836
+ def self.parse_language(language_value)
837
+ return_hash = {}
838
+ authority_check = Qa::Authorities::Loc.new
839
+ authority_result = authority_check.search(URI.escape(language_value), 'iso639-2')
840
+
841
+ if authority_result.present?
842
+ authority_result = authority_result.select{|hash| hash['label'].downcase == language_value.downcase || hash['id'].split('/').last.downcase == language_value.downcase }
843
+ if authority_result.present?
844
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
845
+ return_hash[:label] = authority_result.first["label"]
846
+ end
847
+ end
848
+
849
+ return return_hash
850
+ end
851
+
852
+ def self.parse_role(role_value)
853
+ return_hash = {}
854
+ authority_check = Qa::Authorities::Loc.new
855
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
856
+ if authority_result.present?
857
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
858
+ if authority_result.present?
859
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
860
+ return_hash[:label] = authority_result.first["label"]
861
+ end
862
+ end
863
+
864
+ return return_hash
865
+ end
866
+
867
+ def self.parse_name_roles(name)
868
+ return_hash = {}
869
+
870
+ #Make sure we have at least three distinct parts of 2-letter+ words. Avoid something like: Steven C. Painter or Painter, Steven C.
871
+ #Possible Issue: Full name of Steven Carlos Painter ?
872
+ potential_role_check = name.match(/[\(\"\',]*\w\w+[\),\"\']* [\w\.,\d\-\"]*[\w\d][\w\d][\w\.,\d\-\"]* [\(\"\',]*\w\w+[\),\"\']*$/) || name.split(/[ ]+/).length >= 4
873
+
874
+ if potential_role_check.present?
875
+ authority_check = Qa::Authorities::Loc.new
876
+
877
+ #Check the last value of the name string...
878
+ role_value = name.match(/(?<=[\(\"\', ])\w+(?=[\),\"\']*$)/).to_s
879
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
880
+ if authority_result.present?
881
+
882
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
883
+ if authority_result.present?
884
+ #Remove the word and any other characters around it. $ means the end of the line.
885
+ #
886
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[\),\"\']*$/, '').gsub(/^[ ]*:/, '').strip
887
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
888
+ return_hash[:label] = authority_result.first["label"]
889
+ end
890
+ end
891
+
892
+ #Check the last value of the name string...
893
+ role_value = name.match(/\w+(?=[\),\"\']*)/).to_s
894
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
895
+ if authority_result.present? && return_hash.blank?
896
+
897
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
898
+ if authority_result.present?
899
+ #Remove the word and any other characters around it. $ means the end of the line.
900
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[ \),\"\']*/, '').gsub(/^[ ]*:/, '').strip
901
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
902
+ return_hash[:label] = authority_result.first["label"]
903
+ end
904
+ end
905
+ end
906
+
907
+ return return_hash
908
+ end
909
+
910
+ def self.is_numeric? (string)
911
+ true if Float(string) rescue false
912
+ end
913
+
914
+ # returns a well-formatted placename for display on a map
915
+ # hiergeo_hash = hash of <mods:hierarchicahlGeographic> elements
916
+ def self.render_display_placename(hiergeo_hash)
917
+ placename = []
918
+ case hiergeo_hash[:country]
919
+ when 'United States','Canada'
920
+ if hiergeo_hash[:state] || hiergeo_hash[:province]
921
+ placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence
922
+ if placename[0].nil? && hiergeo_hash[:county]
923
+ placename[0] = hiergeo_hash[:county] + ' (county)'
924
+ end
925
+ if placename[0]
926
+ placename[1] = Constants::STATE_ABBR.key(hiergeo_hash[:state]) || hiergeo_hash[:province].presence
927
+ else
928
+ placename[1] = hiergeo_hash[:state].presence || hiergeo_hash[:province].presence
929
+ end
930
+ else
931
+ placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence || hiergeo_hash[:country].presence
932
+ end
933
+ else
934
+ placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:state].presence || hiergeo_hash[:province].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence
935
+ if placename[0].nil? && hiergeo_hash[:county]
936
+ placename[0] = hiergeo_hash[:county] + ' (county)'
937
+ end
938
+ placename[1] = hiergeo_hash[:country]
939
+ end
940
+
941
+ if !placename.blank?
942
+ placename.join(', ').gsub(/(\A,\s)|(,\s\z)/,'')
943
+ else
944
+ nil
945
+ end
946
+ end
947
+
948
+ end
949
+ end