bplmodels 0.0.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +0 -0
  4. data/Rakefile +40 -0
  5. data/app/assets/javascripts/bplmodels/application.js +15 -0
  6. data/app/assets/stylesheets/bplmodels/application.css +13 -0
  7. data/app/controllers/bplmodels/application_controller.rb +4 -0
  8. data/app/helpers/bplmodels/application_helper.rb +4 -0
  9. data/app/models/bplmodels/audio_file.rb +14 -0
  10. data/app/models/bplmodels/book.rb +35 -0
  11. data/app/models/bplmodels/card.rb +35 -0
  12. data/app/models/bplmodels/characterization.rb +92 -0
  13. data/app/models/bplmodels/collection.rb +118 -0
  14. data/app/models/bplmodels/complex_object_base.rb +24 -0
  15. data/app/models/bplmodels/correspondence.rb +35 -0
  16. data/app/models/bplmodels/document.rb +35 -0
  17. data/app/models/bplmodels/document_file.rb +8 -0
  18. data/app/models/bplmodels/ephemera.rb +35 -0
  19. data/app/models/bplmodels/file.rb +151 -0
  20. data/app/models/bplmodels/file_content_datastream.rb +10 -0
  21. data/app/models/bplmodels/fits_datastream.rb +190 -0
  22. data/app/models/bplmodels/image.rb +14 -0
  23. data/app/models/bplmodels/image_file.rb +18 -0
  24. data/app/models/bplmodels/institution.rb +159 -0
  25. data/app/models/bplmodels/manuscript.rb +34 -0
  26. data/app/models/bplmodels/map.rb +34 -0
  27. data/app/models/bplmodels/mods_desc_metadata.rb +1826 -0
  28. data/app/models/bplmodels/musical_notation.rb +34 -0
  29. data/app/models/bplmodels/newspaper.rb +15 -0
  30. data/app/models/bplmodels/nom_terminology.rb +1242 -0
  31. data/app/models/bplmodels/non_photographic_print.rb +34 -0
  32. data/app/models/bplmodels/oai_collection.rb +19 -0
  33. data/app/models/bplmodels/oai_metadata.rb +75 -0
  34. data/app/models/bplmodels/oai_object.rb +45 -0
  35. data/app/models/bplmodels/object.rb +36 -0
  36. data/app/models/bplmodels/object_base.rb +1241 -0
  37. data/app/models/bplmodels/objects/collection.rb~ +28 -0
  38. data/app/models/bplmodels/objects/image.rb~ +59 -0
  39. data/app/models/bplmodels/objects/postcard.rb~ +56 -0
  40. data/app/models/bplmodels/organizational_set.rb +25 -0
  41. data/app/models/bplmodels/periodical.rb +37 -0
  42. data/app/models/bplmodels/photographic_print.rb +34 -0
  43. data/app/models/bplmodels/relation_base.rb +99 -0
  44. data/app/models/bplmodels/scrapbook.rb +35 -0
  45. data/app/models/bplmodels/simple_object_base.rb +27 -0
  46. data/app/models/bplmodels/sound_recording.rb +15 -0
  47. data/app/models/bplmodels/system_collection.rb +8 -0
  48. data/app/models/bplmodels/uploads_set.rb +3 -0
  49. data/app/models/bplmodels/workflow_metadata.rb +99 -0
  50. data/app/views/layouts/bplmodels/application.html.erb +14 -0
  51. data/config/application.rb +6 -0
  52. data/config/predicate_mappings.yml +61 -0
  53. data/config/routes.rb +2 -0
  54. data/lib/bplmodels.rb +21 -0
  55. data/lib/bplmodels/constants.rb +119 -0
  56. data/lib/bplmodels/datastream_input_funcs.rb +949 -0
  57. data/lib/bplmodels/engine.rb +5 -0
  58. data/lib/bplmodels/engine.rb~ +5 -0
  59. data/lib/bplmodels/finder.rb +192 -0
  60. data/lib/bplmodels/object_funcs.rb +10 -0
  61. data/lib/bplmodels/version.rb +3 -0
  62. data/lib/tasks/bplmodels_tasks.rake +4 -0
  63. data/test/bplmodels_test.rb +7 -0
  64. data/test/dummy/README.rdoc +261 -0
  65. data/test/dummy/Rakefile +7 -0
  66. data/test/dummy/app/assets/javascripts/application.js +15 -0
  67. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  68. data/test/dummy/app/controllers/application_controller.rb +3 -0
  69. data/test/dummy/app/helpers/application_helper.rb +2 -0
  70. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  71. data/test/dummy/config.ru +4 -0
  72. data/test/dummy/config/application.rb +59 -0
  73. data/test/dummy/config/boot.rb +10 -0
  74. data/test/dummy/config/environment.rb +5 -0
  75. data/test/dummy/config/environments/development.rb +37 -0
  76. data/test/dummy/config/environments/production.rb +67 -0
  77. data/test/dummy/config/environments/test.rb +37 -0
  78. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  79. data/test/dummy/config/initializers/inflections.rb +15 -0
  80. data/test/dummy/config/initializers/mime_types.rb +5 -0
  81. data/test/dummy/config/initializers/secret_token.rb +7 -0
  82. data/test/dummy/config/initializers/session_store.rb +8 -0
  83. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  84. data/test/dummy/config/locales/en.yml +5 -0
  85. data/test/dummy/config/routes.rb +4 -0
  86. data/test/dummy/public/404.html +26 -0
  87. data/test/dummy/public/422.html +26 -0
  88. data/test/dummy/public/500.html +25 -0
  89. data/test/dummy/public/favicon.ico +0 -0
  90. data/test/dummy/script/rails +6 -0
  91. data/test/integration/navigation_test.rb +10 -0
  92. data/test/test_helper.rb +15 -0
  93. metadata +234 -0
@@ -0,0 +1,99 @@
1
+ module Bplmodels
2
+ class WorkflowMetadata < ActiveFedora::OmDatastream
3
+ include OM::XML::Document
4
+
5
+ WORKFLOW_NS = 'http://www.bpl.org/repository/xml/ns/workflow'
6
+ WORKFLOW_SCHEMA = 'http://www.bpl.org/repository/xml/xsd/workflow.xsd'
7
+ WORKFLOW_PARAMS = {
8
+ "version" => "0.0.1",
9
+ "xmlns:xlink" => "http://www.w3.org/1999/xlink",
10
+ "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
11
+ "xmlns" => WORKFLOW_NS,
12
+ "xsi:schemaLocation" => "#{WORKFLOW_NS} #{WORKFLOW_SCHEMA}",
13
+ }
14
+
15
+ set_terminology do |t|
16
+ t.root :path => 'workflowMetadata', :xmlns => WORKFLOW_NS
17
+
18
+ t.item_status(:path=>"itemStatus") {
19
+ t.state(:path=>"state")
20
+ t.state_comment(:path=>"stateComment")
21
+ t.processing(:path=>"processing")
22
+ t.processing_comment(:path=>"processingComment")
23
+ }
24
+
25
+ t.item_source(:path=>"itemSource") {
26
+ t.ingest_origin(:path=>"ingestOrigin")
27
+ t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
28
+ t.ingest_filename(:path=>"ingestFilename") #Only recently added
29
+ }
30
+
31
+ t.item_ark_info(:path=>"arkInformation") {
32
+ t.ark_id(:path=>"arkID")
33
+ t.ark_type(:path=>"arkType")
34
+ t.ark_parent_pid(:path=>"arkParentPID")
35
+ }
36
+
37
+ t.source(:path=>"source") {
38
+ t.ingest_origin(:path=>"ingestOrigin")
39
+ t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
40
+ t.ingest_filename(:path=>"ingestFilename") #Only recently added
41
+ t.ingest_datastream(:path=>"ingestDatastream")
42
+ }
43
+
44
+ t.item_designations(:path=>'itemDesignations') {
45
+ t.flagged_for_content(:path=>"flaggedForContent")
46
+ }
47
+
48
+ t.marked_for_deletion(:path=>'markedForDelation') {
49
+ t.reason(:path=>'reason')
50
+ }
51
+
52
+ end
53
+
54
+ def self.xml_template
55
+ Nokogiri::XML::Builder.new do |xml|
56
+ xml.workflowMetadata(WORKFLOW_PARAMS) {
57
+
58
+ }
59
+ end.doc
60
+ end
61
+
62
+ #Required for Active Fedora 9
63
+ def prefix(path=nil)
64
+ return ''
65
+ end
66
+
67
+
68
+ def insert_file_path(value=nil)
69
+ ingest_filepath_index = self.item_source.ingest_filepath.count
70
+
71
+ self.item_source.ingest_filepath(ingest_filepath_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
72
+ end
73
+
74
+ def insert_file_name(value=nil)
75
+ ingest_filename_index = self.item_source.ingest_filepath.count
76
+
77
+ self.item_source.ingest_filename(ingest_filename_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
78
+ end
79
+
80
+ def insert_file_source(filepath, filename, datastream)
81
+ source_count = self.source.count
82
+
83
+ self.source(source_count).ingest_filepath(0, filepath) unless filepath.blank?
84
+ self.source(source_count).ingest_filename(0, filename) unless filename.blank?
85
+ self.source(source_count).ingest_datastream(0, datastream) unless datastream.blank?
86
+ end
87
+
88
+ def insert_flagged(value=nil)
89
+ self.item_designations(0).flagged_for_content(0, value) unless value.blank?
90
+ end
91
+
92
+ def insert_oai_defaults
93
+ self.item_status(0).state = "published"
94
+ self.item_status(0).state_comment = "OAI Harvested Record"
95
+ self.item_status(0).processing = "complete"
96
+ self.item_status(0).processing_comment = "Object Processing Complete"
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Bplmodels</title>
5
+ <%= stylesheet_link_tag "bplmodels/application", :media => "all" %>
6
+ <%= javascript_include_tag "bplmodels/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,6 @@
1
+ module Bplmodels
2
+ class Application < Rails::Application
3
+
4
+ config.autoload_paths += Dir[Rails.root.join('app', 'models', '{**}')]
5
+ end
6
+ end
@@ -0,0 +1,61 @@
1
+ # The default namespace maps to the default namespace for generating rels_ext from solr
2
+ :default_namespace: info:fedora/fedora-system:def/relations-external#
3
+
4
+ :predicate_namespaces:
5
+ fedora-model: info:fedora/fedora-system:def/model#
6
+ fedora-relations-model: info:fedora/fedora-system:def/relations-external#
7
+ bpllib-rel: http://projecthydra.org/ns/relations#
8
+
9
+ # namespace mappings---
10
+ # you can add specific mappings for your institution by providing the following:
11
+ # namespace_uri:
12
+ # :relationship_symbol: relationship_identifier
13
+ #
14
+ # For example, if you have the following element in your rels_ext:
15
+ #
16
+ # <oai:itemID>oai:example.edu:changeme:500</oai:itemID>
17
+ #
18
+ # With the last two lines of this file uncommented, the relationships hash of your object will include:
19
+ # :oai_item_id => ["info:fedora/oai:example.edu:changeme:500"]
20
+ #
21
+ :predicate_mapping:
22
+ info:fedora/fedora-system:def/relations-external#:
23
+ :conforms_to: conformsTo
24
+ :has_annotation: hasAnnotation
25
+ :has_collection_member: hasCollectionMember
26
+ :has_constituent: hasConstituent
27
+ :has_dependent: hasDependent
28
+ :has_derivation: hasDerivation
29
+ :has_description: hasDescription
30
+ :has_equivalent: hasEquivalent
31
+ :has_metadata: hasMetadata
32
+ :has_member: hasMember
33
+ :has_model: hasModel
34
+ :has_part: hasPart
35
+ :has_subset: hasSubset
36
+ :is_annotation_of: isAnnotationOf
37
+ :is_constituent_of: isConstituentOf
38
+ :is_dependent_of: isDependentOf
39
+ :is_derivation_of: isDerivationOf
40
+ :is_description_of: isDescriptionOf
41
+ :is_member_of: isMemberOf
42
+ :is_member_of_collection: isMemberOfCollection
43
+ :is_metadata_for: isMetadataFor
44
+ :is_part_of: isPartOf
45
+ :is_subset_of: isSubsetOf
46
+ :is_topic_of: isTopicOf
47
+ info:fedora/fedora-system:def/model#:
48
+ :is_contractor_of: isContractorOf
49
+ :is_deployment_of: isDeploymentOf
50
+ :has_service: hasService
51
+ :has_model: hasModel
52
+ http://www.openarchives.org/OAI/2.0/:
53
+ :oai_item_id: itemID
54
+ http://projecthydra.org/ns/relations#:
55
+ :is_governed_by: isGovernedBy
56
+ :is_image_of: isImageOf
57
+ :has_image: hasImage
58
+ :has_subcollection: hasSubcollection
59
+ :has_crop: hasCrop
60
+ :is_crop_of: isCropOf
61
+ :is_exemplary_image_of: isExemplaryImageOf
data/config/routes.rb ADDED
@@ -0,0 +1,2 @@
1
+ Bplmodels::Engine.routes.draw do
2
+ end
data/lib/bplmodels.rb ADDED
@@ -0,0 +1,21 @@
1
+ require "bplmodels/engine"
2
+ require "bplmodels/datastream_input_funcs"
3
+ require "bplmodels/finder"
4
+ require "bplmodels/constants"
5
+ require "timeliness"
6
+
7
+ module Bplmodels
8
+ def self.environment
9
+ if defined?(DERIVATIVE_CONFIG_GLOBAL) && DERIVATIVE_CONFIG_GLOBAL.present? && DERIVATIVE_CONFIG_GLOBAL['environment'].present?
10
+ return DERIVATIVE_CONFIG_GLOBAL['environment']
11
+ elsif defined?(Rails.env) and !Rails.env.nil?
12
+ return Rails.env.to_s
13
+ elsif defined?(ENV['environment']) and !(ENV['environment'].nil?)
14
+ return ENV['environment']
15
+ elsif defined?(ENV['RAILS_ENV']) and !(ENV['RAILS_ENV'].nil?)
16
+ raise RuntimeError, "You're depending on RAILS_ENV for setting your environment. Please use ENV['environment'] for non-rails environment setting: 'rake foo:bar environment=test'"
17
+ else
18
+ ENV['environment'] = 'development'
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,119 @@
1
+ module Bplmodels
2
+ class Constants
3
+ GENRE_LOOKUP = {}
4
+ GENRE_LOOKUP['Cards'] = {:id=>'tgm001686', :authority=>'gmgpc'}
5
+ GENRE_LOOKUP['Correspondence'] = {:id=>'tgm002590', :authority=>'lctgm'}
6
+ GENRE_LOOKUP['Documents'] = {:id=>'tgm003185', :authority=>'gmgpc'}
7
+ GENRE_LOOKUP['Drawings'] = {:id=>'tgm003279', :authority=>'gmgpc'}
8
+ GENRE_LOOKUP['Ephemera'] = {:id=>'tgm003634', :authority=>'gmgpc'}
9
+ GENRE_LOOKUP['Manuscripts'] = {:id=>'tgm012286', :authority=>'gmgpc'}
10
+ GENRE_LOOKUP['Maps'] = {:id=>'tgm006261', :authority=>'gmgpc'}
11
+ GENRE_LOOKUP['Objects'] = {:id=>'tgm007159', :authority=>'lctgm'}
12
+ GENRE_LOOKUP['Paintings'] = {:id=>'tgm007393', :authority=>'gmgpc'}
13
+ GENRE_LOOKUP['Photographs'] = {:id=>'tgm007721', :authority=>'gmgpc'}
14
+ GENRE_LOOKUP['Posters'] = {:id=>'tgm008104', :authority=>'gmgpc'}
15
+ GENRE_LOOKUP['Prints'] = {:id=>'tgm008237', :authority=>'gmgpc'}
16
+ GENRE_LOOKUP['Newspapers'] = {:id=>'tgm007068', :authority=>'lctgm'}
17
+ GENRE_LOOKUP['Sound recordings'] = {:id=>'tgm009874', :authority=>'lctgm'}
18
+ GENRE_LOOKUP['Motion pictures'] = {:id=>'tgm006804', :authority=>'lctgm'}
19
+ GENRE_LOOKUP['Periodicals'] = {:id=>'tgm007641', :authority=>'gmgpc'}
20
+ GENRE_LOOKUP['Books'] = {:id=>'tgm001221', :authority=>'gmgpc'}
21
+ GENRE_LOOKUP['Albums'] = {:id=>'tgm000229', :authority=>'gmgpc'}
22
+ GENRE_LOOKUP['Musical notation'] = {:id=>'tgm006926', :authority=>'lctgm'}
23
+
24
+ COUNTRY_TGN_LOOKUP = {}
25
+ COUNTRY_TGN_LOOKUP['United States'] = {:tgn_id=>7012149, :tgn_country_name=>'United States'}
26
+ COUNTRY_TGN_LOOKUP['Canada'] = {:tgn_id=>7005685, :tgn_country_name=>'Canada'}
27
+ COUNTRY_TGN_LOOKUP['France'] = {:tgn_id=>1000070, :tgn_country_name=>'France'}
28
+ COUNTRY_TGN_LOOKUP['Vietnam'] = {:tgn_id=>1000145, :tgn_country_name=>'Viet Nam'}
29
+ COUNTRY_TGN_LOOKUP['South Africa'] = {:tgn_id=>1000193, :tgn_country_name=>'South Africa'}
30
+ COUNTRY_TGN_LOOKUP['Philippines'] = {:tgn_id=>1000135, :tgn_country_name=>'Pilipinas'}
31
+ COUNTRY_TGN_LOOKUP['China'] = {:tgn_id=>1000111, :tgn_country_name=>'Zhongguo'}
32
+ COUNTRY_TGN_LOOKUP['Japan'] = {:tgn_id=>1000120, :tgn_country_name=>'Nihon'}
33
+
34
+
35
+
36
+
37
+
38
+ =begin
39
+ COUNTRY_TGN_LOOKUP = {
40
+ 'US' => 7012149,
41
+ 'CA' => 7005685,
42
+ 'FR' => 1000070,
43
+ 'VN' => 1000145,
44
+ 'ZA' => 1000193,
45
+ 'PH' => 1000135,
46
+ 'United States' => 7012149,
47
+ 'Canada' => 7005685,
48
+ 'France' => 1000070,
49
+ 'Vietnam' => 1000145,
50
+ 'Viet Nam' => 1000145,
51
+ 'South Africa' => 1000193,
52
+ 'Philippines' => 1000135
53
+ }
54
+ =end
55
+
56
+ STATE_ABBR = {
57
+ 'AL' => 'Alabama',
58
+ 'AK' => 'Alaska',
59
+ 'AS' => 'America Samoa',
60
+ 'AZ' => 'Arizona',
61
+ 'AR' => 'Arkansas',
62
+ 'CA' => 'California',
63
+ 'CO' => 'Colorado',
64
+ 'CT' => 'Connecticut',
65
+ 'DE' => 'Delaware',
66
+ 'DC' => 'District of Columbia',
67
+ 'FM' => 'Micronesia1',
68
+ 'FL' => 'Florida',
69
+ 'GA' => 'Georgia',
70
+ 'GU' => 'Guam',
71
+ 'HI' => 'Hawaii',
72
+ 'ID' => 'Idaho',
73
+ 'IL' => 'Illinois',
74
+ 'IN' => 'Indiana',
75
+ 'IA' => 'Iowa',
76
+ 'KS' => 'Kansas',
77
+ 'KY' => 'Kentucky',
78
+ 'LA' => 'Louisiana',
79
+ 'ME' => 'Maine',
80
+ 'MH' => 'Islands1',
81
+ 'MD' => 'Maryland',
82
+ 'MA' => 'Massachusetts',
83
+ 'MI' => 'Michigan',
84
+ 'MN' => 'Minnesota',
85
+ 'MS' => 'Mississippi',
86
+ 'MO' => 'Missouri',
87
+ 'MT' => 'Montana',
88
+ 'NE' => 'Nebraska',
89
+ 'NV' => 'Nevada',
90
+ 'NH' => 'New Hampshire',
91
+ 'NJ' => 'New Jersey',
92
+ 'NM' => 'New Mexico',
93
+ 'NY' => 'New York',
94
+ 'NC' => 'North Carolina',
95
+ 'ND' => 'North Dakota',
96
+ 'OH' => 'Ohio',
97
+ 'OK' => 'Oklahoma',
98
+ 'OR' => 'Oregon',
99
+ 'PW' => 'Palau',
100
+ 'PA' => 'Pennsylvania',
101
+ 'PR' => 'Puerto Rico',
102
+ 'RI' => 'Rhode Island',
103
+ 'SC' => 'South Carolina',
104
+ 'SD' => 'South Dakota',
105
+ 'TN' => 'Tennessee',
106
+ 'TX' => 'Texas',
107
+ 'UT' => 'Utah',
108
+ 'VT' => 'Vermont',
109
+ 'VI' => 'Virgin Island',
110
+ 'VA' => 'Virginia',
111
+ 'WA' => 'Washington',
112
+ 'WV' => 'West Virginia',
113
+ 'WI' => 'Wisconsin',
114
+ 'WY' => 'Wyoming'
115
+ }
116
+
117
+
118
+ end
119
+ end
@@ -0,0 +1,949 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ module Bplmodels
5
+ class DatastreamInputFuncs
6
+
7
+ # these functions can be used to split names into subparts for <mods:name> and <mods:subject><mods:name>
8
+
9
+ # use for personal name headings e.g., <mods:name type="personal">
10
+ # returns personal name data as a hash which can be used to populate <mods:namePart> and <mads:namePart type="date">
11
+
12
+ def self.persNamePartSplitter(inputstring)
13
+ splitNamePartsHash = Hash.new
14
+ unless inputstring =~ /\d{4}/
15
+ splitNamePartsHash[:namePart] = inputstring
16
+ else
17
+ if inputstring =~ /\(.*\d{4}.*\)/
18
+ splitNamePartsHash[:namePart] = inputstring
19
+ else
20
+ splitNamePartsHash[:namePart] = inputstring.gsub(/,[\d\- \.\w?]*$/,"")
21
+ splitArray = inputstring.split(/.*,/)
22
+ splitNamePartsHash[:datePart] = splitArray[1].strip
23
+ end
24
+ end
25
+ return splitNamePartsHash
26
+ end
27
+
28
+ # use for corporate name headings e.g., <mods:name type="corporate">
29
+ # returns corporate name data as an array which can be used to populate <mods:namePart> subparts
30
+ # (corporate name subparts are not differentiated by any attributes in the xml)
31
+ # (see http://id.loc.gov/authorities/names/n82139319.madsxml.xml for example)
32
+ # Note: (?!\)) part is to check for examples like: 'Boston (Mass.) Police Dept.'
33
+
34
+ def self.corpNamePartSplitter(inputstring)
35
+ splitNamePartsArray = Array.new
36
+ unless inputstring =~ /[\S]{5}\.(?!\))/
37
+ splitNamePartsArray << inputstring
38
+ else
39
+ while inputstring =~ /[\S]{5}\.(?!\))/
40
+ snip = /[\S]{5}\.(?!\))/.match(inputstring).post_match
41
+ subpart = inputstring.gsub(snip,"")
42
+ splitNamePartsArray << subpart.gsub(/\.\z/,"").strip
43
+ inputstring = snip
44
+ end
45
+ splitNamePartsArray << inputstring.gsub(/\.\z/,"").strip
46
+ end
47
+ return splitNamePartsArray
48
+ end
49
+
50
+ # a function to convert date data from OAI feeds into MODS-usable date data
51
+ # assumes date values containing ";" have already been split
52
+ # returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
53
+ def self.convert_to_mods_date(value)
54
+
55
+ date_data = {} # create the hash to hold all the data
56
+ source_date_string = value.strip # variable to hold original value
57
+
58
+ # weed out obvious bad dates before processing
59
+ if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
60
+ (value.match(/\d\d\d\d-\z/)) || # 1975-
61
+ (value.match(/\d\d-\d\d\/\d\d/)) || # 1975-09-09/10
62
+ (value.match(/\d*\(\d*\)/)) || # 1975(1976)
63
+ (value.scan(/\d\d\d\d/).length > 2) || # 1861/1869/1915
64
+ (value.scan(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).length > 1) ||
65
+ # or if data does not match any of these
66
+ (!value.match(/(\d\dth [Cc]entury|\d\d\d-\?*|\d\d\d\?|\d\d\?\?|\d\d\d\d)/))
67
+ date_data[:date_note] = source_date_string
68
+ else
69
+ # find date qualifier
70
+ if value.include? '?'
71
+ date_data[:date_qualifier] = 'questionable'
72
+ elsif value.match(/\A[Cc]/)
73
+ date_data[:date_qualifier] = 'approximate'
74
+ elsif (value.match(/[\[\]]+/)) || (value.match(/[(][A-Za-z, \d]*[\d]+[A-Za-z, \d]*[)]+/)) # if [] or ()
75
+ date_data[:date_qualifier] = 'inferred'
76
+ end
77
+
78
+ # remove unnecessary chars and words
79
+ value = value.gsub(/[\[\]\(\)\.,']/,'')
80
+ value = value.gsub(/(\b[Bb]etween\b|\bcirca\b|\bca\b|\Aca|\Ac)/,'').strip
81
+
82
+ # differentiate between ranges and single dates
83
+ if (value.scan(/\d\d\d\d/).length == 2) ||
84
+ (value.include? '0s') || # 1970s
85
+ (value.include? 'entury') || # 20th century
86
+ (value.match(/(\A\d\d\d\?|\A\d\d\?\?|\A\d\d\d-\?*|\d\d\d\d-\d\z|\d\d\d\d\/[\d]{1,2}\z)/)) ||
87
+ (value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/)) ||
88
+ ((value.match(/\d\d\d\d-\d\d\z/)) && (value[-2..-1].to_i > 12)) # 1975-76 but NOT 1910-11
89
+
90
+ # RANGES
91
+ date_data[:date_range] = {}
92
+
93
+ # deal with date strings with 2 4-digit year values separately
94
+ if value.scan(/\d\d\d\d/).length == 2
95
+
96
+ # convert weird span indicators ('or','and','||'), remove extraneous text
97
+ value = value.gsub(/(or|and|\|\|)/,'-').gsub(/[A-Za-z\?\s]/,'')
98
+
99
+ if value.match(/\A[12][\d]{3}-[01][\d]-[12][\d]{3}-[01][\d]\z/) # 1895-05-1898-01
100
+ date_data_range_start = value[0..6]
101
+ date_data_range_end = value[-7..-1]
102
+ elsif value.match(/\A[12][\d]{3}\/[12][\d]{3}\z/) # 1987/1988
103
+ date_data_range_start = value[0..3]
104
+ date_data_range_end = value[-4..-1]
105
+ else
106
+ range_dates = value.split('-') # split the dates into an array
107
+ range_dates.each_with_index do |range_date,index|
108
+ # format the data properly
109
+ if range_date.include? '/' # 11/05/1965
110
+ range_date_pieces = range_date.split('/')
111
+ range_date_piece_year = range_date_pieces.last
112
+ range_date_piece_month = range_date_pieces.first.length == 2 ? range_date_pieces.first : '0' + range_date_pieces.first
113
+ if range_date_pieces.length == 3
114
+ range_date_piece_day = range_date_pieces[1].length == 2 ? range_date_pieces[1] : '0' + range_date_pieces[1]
115
+ end
116
+ value_to_insert = range_date_piece_year + '-' + range_date_piece_month
117
+ value_to_insert << '-' + range_date_piece_day if range_date_piece_day
118
+ elsif range_date.match(/\A[12][\d]{3}\z/)
119
+ value_to_insert = range_date
120
+ end
121
+ # add the data to the proper variable
122
+ if value_to_insert
123
+ if index == 0
124
+ date_data_range_start = value_to_insert
125
+ else
126
+ date_data_range_end = value_to_insert
127
+ end
128
+ end
129
+ end
130
+ end
131
+ else
132
+ # if there are 'natural language' range values, find, assign to var, then remove
133
+ text_range = value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).to_s
134
+ if text_range.length > 0
135
+ date_data[:date_qualifier] ||= 'approximate' # TODO - remove this??
136
+ value = value.gsub(/#{text_range}/,'').strip
137
+ end
138
+
139
+ # deal with ranges for which 'natural language' range values are ignored
140
+ if value.match(/\A1\d\?\?\z/) # 19??
141
+ date_data_range_start = value[0..1] + '00'
142
+ date_data_range_end = value[0..1] + '99'
143
+ elsif value.match(/\A[12]\d\d-*\?*\z/) # 195? || 195-? || 195-
144
+ date_data_range_start = value[0..2] + '0'
145
+ date_data_range_end = value[0..2] + '9'
146
+ elsif value.match(/\A[12]\d\d\d[-\/][\d]{1,2}\z/) # 1956-57 || 1956/57 || 1956-7
147
+ if value.length == 7 && (value[5..6].to_i > value[2..3].to_i)
148
+ date_data_range_start = value[0..3]
149
+ date_data_range_end = value[0..1] + value[5..6]
150
+ elsif value.length == 6 && (value[5].to_i > value[3].to_i)
151
+ date_data_range_start = value[0..3]
152
+ date_data_range_end = value[0..2] + value[5]
153
+ end
154
+ date_data[:date_note] = source_date_string if text_range.length > 0
155
+ end
156
+ # deal with ranges where text range values are evaluated
157
+ value = value.gsub(/\?/,'').strip # remove question marks
158
+
159
+ # centuries
160
+ if value.match(/([12][\d]{1}th [Cc]entury|[12][\d]{1}00s)/) # 19th century || 1800s
161
+ if value.match(/[12][\d]{1}00s/)
162
+ century_prefix_date = value.match(/[12][\d]{1}/).to_s
163
+ else
164
+ century_prefix_date = (value.match(/[12][\d]{1}/).to_s.to_i-1).to_s
165
+ end
166
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
167
+ if text_range.match(/[Ee]arly/)
168
+ century_suffix_dates = %w[00 39]
169
+ elsif text_range.match(/[Mm]id/)
170
+ century_suffix_dates = %w[30 69]
171
+ else
172
+ century_suffix_dates = %w[60 99]
173
+ end
174
+ end
175
+ date_data_range_start = century_suffix_dates ? century_prefix_date + century_suffix_dates[0] : century_prefix_date + '00'
176
+ date_data_range_end = century_suffix_dates ? century_prefix_date + century_suffix_dates[1] : century_prefix_date + '99'
177
+ else
178
+ # remove any remaining non-date text
179
+ value.match(/[12][1-9][1-9]0s/) ? is_decade = true : is_decade = false # but preserve decade-ness
180
+ remaining_text = value.match(/\D+/).to_s
181
+ value = value.gsub(/#{remaining_text}/,'').strip if remaining_text.length > 0
182
+
183
+ # decades
184
+ if is_decade
185
+ decade_prefix_date = value.match(/\A[12][1-9][1-9]/).to_s
186
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
187
+ if text_range.match(/[Ee]arly/)
188
+ decade_suffix_dates = %w[0 3]
189
+ elsif text_range.match(/[Mm]id/)
190
+ decade_suffix_dates = %w[4 6]
191
+ else
192
+ decade_suffix_dates = %w[7 9]
193
+ end
194
+ end
195
+ date_data_range_start = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[0] : decade_prefix_date + '0'
196
+ date_data_range_end = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[1] : decade_prefix_date + '9'
197
+ else
198
+ # single year ranges
199
+ single_year_prefix = value.match(/[12][0-9]{3}/).to_s
200
+ if text_range.length > 0
201
+ if text_range.match(/[Ee]arly/)
202
+ single_year_suffixes = %w[01 04]
203
+ elsif text_range.match(/[Mm]id/)
204
+ single_year_suffixes = %w[05 08]
205
+ elsif text_range.match(/[Ll]ate/)
206
+ single_year_suffixes = %w[09 12]
207
+ elsif text_range.match(/[Ww]inter/)
208
+ single_year_suffixes = %w[01 03]
209
+ elsif text_range.match(/[Ss]pring/)
210
+ single_year_suffixes = %w[03 05]
211
+ elsif text_range.match(/[Ss]ummer/)
212
+ single_year_suffixes = %w[06 08]
213
+ else text_range.match(/[F]all/)
214
+ single_year_suffixes = %w[09 11]
215
+ end
216
+ date_data_range_start = single_year_prefix + '-' + single_year_suffixes[0]
217
+ date_data_range_end = single_year_prefix + '-' + single_year_suffixes[1]
218
+ end
219
+ end
220
+ # if possibly significant info removed, include as note
221
+ date_data[:date_note] = source_date_string if remaining_text.length > 1
222
+ end
223
+ end
224
+
225
+ # insert the values into the date_data hash
226
+ if date_data_range_start && date_data_range_end
227
+ date_data[:date_range][:start] = date_data_range_start
228
+ date_data[:date_range][:end] = date_data_range_end
229
+ else
230
+ date_data[:date_note] ||= source_date_string
231
+ date_data.delete :date_range
232
+ end
233
+
234
+ else
235
+ # SINGLE DATES
236
+ value = value.gsub(/\?/,'') # remove question marks
237
+ # fix bad spacing (e.g. December 13,1985 || December 3,1985)
238
+ value = value.insert(-5, ' ') if value.match(/[A-Za-z]* \d{6}/) || value.match(/[A-Za-z]* \d{5}/)
239
+
240
+ # try to automatically parse single dates with YYYY && MM && DD values
241
+ if Timeliness.parse(value).nil?
242
+ # start further processing
243
+ if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
244
+ date_data[:single_date] = value
245
+ elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
246
+ value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
247
+ date_data[:single_date] = value[3..6] + '-' + value[0..1]
248
+ elsif value.match(/\A[A-Za-z]{3,9}[\.]? [12]\d\d\d\z/) # April 1987 || Apr. 1987
249
+ value = value.split(' ')
250
+ if value[0].match(/\A[A-Za-z]{3}[\.]?\z/)
251
+ value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0]) if Date::ABBR_MONTHNAMES.index(value[0])
252
+ else
253
+ value_month = '%02d' % Date::MONTHNAMES.index(value[0]) if Date::MONTHNAMES.index(value[0])
254
+ end
255
+ date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
256
+ elsif value.match(/\A[12]\d\d\d\z/) # 1999
257
+ date_data[:single_date] = value
258
+ else
259
+ date_data[:date_note] = source_date_string
260
+ end
261
+ else
262
+ date_data[:single_date] = Timeliness.parse(value).strftime("%Y-%m-%d")
263
+ end
264
+
265
+ end
266
+
267
+ end
268
+
269
+ # some final validation, just in case
270
+ date_validation_array = []
271
+ date_validation_array << date_data[:single_date] if date_data[:single_date]
272
+ date_validation_array << date_data[:date_range][:start] if date_data[:date_range]
273
+ date_validation_array << date_data[:date_range][:end] if date_data[:date_range]
274
+ date_validation_array.each do |date_to_val|
275
+ if date_to_val.length == '7'
276
+ bad_date = true unless date_to_val[-2..-1].to_i.between?(1,12) && !date_to_val.nil?
277
+ elsif
278
+ date_to_val.length == '10'
279
+ bad_date = true unless Timeliness.parse(value) && !date_to_val.nil?
280
+ end
281
+ if bad_date
282
+ date_data[:date_note] ||= source_date_string
283
+ date_data.delete :single_date if date_data[:single_date]
284
+ date_data.delete :date_range if date_data[:date_range]
285
+ end
286
+ end
287
+
288
+ # if the date slipped by all the processing somehow!
289
+ if date_data[:single_date].nil? && date_data[:date_range].nil? && date_data[:date_note].nil?
290
+ date_data[:date_note] = source_date_string
291
+ end
292
+
293
+ date_data
294
+
295
+ end
296
+
297
+ # retrieve data from Getty TGN to populate <mods:subject auth="tgn">
298
+ def self.get_tgn_data(tgn_id)
299
+ tgn_response = Typhoeus::Request.get('http://vocabsservices.getty.edu/TGNService.asmx/TGNGetSubject?subjectID=' + tgn_id, userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
300
+ unless tgn_response.code == 500
301
+ tgnrec = Nokogiri::XML(tgn_response.body)
302
+ #puts tgnrec.to_s
303
+
304
+ # coordinates
305
+ if tgnrec.at_xpath("//Coordinates")
306
+ coords = {}
307
+ coords[:latitude] = tgnrec.at_xpath("//Latitude/Decimal").children.to_s
308
+ coords[:longitude] = tgnrec.at_xpath("//Longitude/Decimal").children.to_s
309
+ else
310
+ coords = nil
311
+ end
312
+
313
+ hier_geo = {}
314
+
315
+ #main term
316
+ if tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text")
317
+ tgn_term_type = tgnrec.at_xpath("//Preferred_Place_Type/Place_Type_ID").children.to_s
318
+ pref_term_langs = tgnrec.xpath("//Terms/Preferred_Term/Term_Languages/Term_Language/Language")
319
+ # if the preferred term is the preferred English form, use that
320
+ if pref_term_langs.children.to_s.include? "English"
321
+ tgn_term = tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
322
+ else # use the non-preferred term which is the preferred English form
323
+ if tgnrec.xpath("//Terms/Non-Preferred_Term")
324
+ non_pref_terms = tgnrec.xpath("//Terms/Non-Preferred_Term")
325
+ non_pref_terms.each do |non_pref_term|
326
+ non_pref_term_langs = non_pref_term.children.css("Term_Language")
327
+ # have to loop through these, as sometimes languages share form
328
+ non_pref_term_langs.each do |non_pref_term_lang|
329
+ if non_pref_term_lang.children.css("Preferred").children.to_s == "Preferred" && non_pref_term_lang.children.css("Language").children.to_s == "English"
330
+ tgn_term = non_pref_term.children.css("Term_Text").children.to_s
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
336
+ # if no term is the preferred English form, just use the preferred term
337
+ tgn_term ||= tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
338
+ end
339
+ if tgn_term && tgn_term_type
340
+ case tgn_term_type
341
+ when '29000/continent'
342
+ hier_geo[:continent] = tgn_term
343
+ when '81010/nation'
344
+ hier_geo[:country] = tgn_term
345
+ when '81161/province'
346
+ hier_geo[:province] = tgn_term
347
+ when '81165/region', '82193/union', '80005/semi-independent political entity'
348
+ hier_geo[:region] = tgn_term
349
+ when '81175/state', '81117/department', '82133/governorate'
350
+ hier_geo[:state] = tgn_term
351
+ when '81181/territory', '81021/dependent state', '81186/union territory', '81125/national district'
352
+ hier_geo[:territory] = tgn_term
353
+ when '81115/county'
354
+ hier_geo[:county] = tgn_term
355
+ when '83002/inhabited place'
356
+ hier_geo[:city] = tgn_term
357
+ when '84251/neighborhood'
358
+ hier_geo[:city_section] = tgn_term
359
+ when '21471/island'
360
+ hier_geo[:island] = tgn_term
361
+ when '81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
362
+ hier_geo[:area] = tgn_term
363
+ else
364
+ non_hier_geo = tgn_term
365
+ end
366
+ end
367
+
368
+ # parent data for <mods:hierarchicalGeographic>
369
+ if tgnrec.at_xpath("//Parent_String")
370
+ parents = tgnrec.at_xpath("//Parent_String").children.to_s.split('], ')
371
+ parents.each do |parent|
372
+ if parent.include? '(continent)'
373
+ hier_geo[:continent] = parent
374
+ elsif parent.include? '(nation)'
375
+ hier_geo[:country] = parent
376
+ elsif parent.include? '(province)'
377
+ hier_geo[:province] = parent
378
+ elsif (parent.include? '(region)') || (parent.include? '(union)') || (parent.include? '(semi-independent political entity)')
379
+ hier_geo[:region] = parent
380
+ elsif (parent.include? '(state)') || (parent.include? '(department)') || (parent.include? '(governorate)')
381
+ hier_geo[:state] = parent
382
+ elsif (parent.include? '(territory)') || (parent.include? '(dependent state)') || (parent.include? '(union territory)') || (parent.include? '(national district)')
383
+ hier_geo[:territory] = parent
384
+ elsif parent.include? '(county)'
385
+ hier_geo[:county] = parent
386
+ elsif parent.include? '(inhabited place)'
387
+ hier_geo[:city] = parent
388
+ elsif parent.include? '(neighborhood)'
389
+ hier_geo[:city_section] = parent
390
+ elsif parent.include? '(island)'
391
+ hier_geo[:island] = parent
392
+ elsif (parent.include? '(area)') || (parent.include? '(general region)') || (parent.include? '(deserted settlement)') || (parent.include? '(historical region)') || (parent.include? '(national division)')
393
+ hier_geo[:area] = parent
394
+ end
395
+ end
396
+ hier_geo.each do |k,v|
397
+ hier_geo[k] = v.gsub(/ \(.*/,'')
398
+ end
399
+ end
400
+
401
+ tgn_data = {}
402
+ tgn_data[:coords] = coords
403
+ tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil
404
+ tgn_data[:non_hier_geo] = non_hier_geo ? non_hier_geo : nil
405
+
406
+ else
407
+
408
+ tgn_data = nil
409
+
410
+ end
411
+
412
+ return tgn_data
413
+
414
+ end
415
+
416
+ #Note: Limited to only looking at United States places...
417
+ def self.parse_bing_api(term)
418
+ return_hash = {}
419
+
420
+ #Bing API does badly with paranthesis...
421
+ if term.match(/[\(\)]+/)
422
+ return return_hash
423
+ end
424
+
425
+ #Sometimes with building, city, state, bing is dumb and will only return state. Example: Boston Harbor, Boston, Mass.
426
+ #So if not a street address, pass to have google handle it for better results...
427
+ #Example of another bad record: South Street bridge, West Bridgewater, Mass. would give a place in Holyoke
428
+ if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
429
+ return return_hash
430
+ end
431
+
432
+ Geocoder.configure(:lookup => :bing,:api_key => 'Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn',:timeout => 7)
433
+ bing_api_result = Geocoder.search(term)
434
+
435
+
436
+
437
+ #Use bing first and only for United States results...
438
+ if bing_api_result.present? && bing_api_result.first.data["address"]["countryRegion"] == 'United States'
439
+ if bing_api_result.first.data["address"]["addressLine"].present?
440
+ return_hash[:keep_original_string] = true
441
+ return_hash[:coordinates] = bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s + ',' + bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s
442
+ end
443
+
444
+ return_hash[:country_part] = bing_api_result.first.data["address"]["countryRegion"]
445
+
446
+ if return_hash[:country_part] == 'United States'
447
+ return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[bing_api_result.first.data["address"]["adminDistrict"]]
448
+ else
449
+ return_hash[:state_part] = bing_api_result.first.data["address"]["adminDistrict"]
450
+ end
451
+
452
+ return_hash[:city_part] = bing_api_result.first.data["address"]["locality"]
453
+ end
454
+
455
+ return return_hash
456
+ end
457
+
458
+ #Mapquest allows unlimited requests - start here?
459
+ def self.parse_mapquest_api(term)
460
+ return_hash = {}
461
+
462
+ #Mapquest returns bad data for: Manchester, Mass.
463
+ if term.include?('Manchester')
464
+ return return_hash
465
+ end
466
+
467
+ #Messed up with just neighborhoods. Example: Hyde Park (Boston, Mass.) or Hyde Park (Boston, Mass.)
468
+ #So if not a street address, pass to have google handle it for better results...
469
+ if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
470
+ return return_hash
471
+ end
472
+
473
+ Geocoder.configure(:lookup => :mapquest,:api_key => 'Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a',:timeout => 7)
474
+
475
+ mapquest_api_result = Geocoder.search(term)
476
+
477
+
478
+ #If this call returned a result...
479
+ if mapquest_api_result.present?
480
+
481
+ if mapquest_api_result.first.data["street"].present?
482
+ return_hash[:keep_original_string] = true
483
+ return_hash[:coordinates] = mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s
484
+ end
485
+
486
+ return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
487
+
488
+ if return_hash[:country_part] == 'United States'
489
+ return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]]
490
+ if mapquest_api_result.first.data["adminArea4"] == 'District of Columbia'
491
+ return_hash[:state_part] = mapquest_api_result.first.data["adminArea4"]
492
+ end
493
+ else
494
+ return_hash[:state_part] = mapquest_api_result.first.data["adminArea3"].gsub(' province', '')
495
+ end
496
+
497
+ return_hash[:city_part] = mapquest_api_result.first.data["adminArea5"]
498
+ end
499
+
500
+ return return_hash
501
+ end
502
+
503
+ #Final fallback is google API. The best but we are limited to 2500 requests per day unless we pay the $10k a year premium account...
504
+ #Note: If google cannot find street, it will return just city/state, like for "Salem Street and Paradise Road, Swampscott, MA, 01907"
505
+ #Seems like it sets a partial_match=>true in the data section...
506
+ def self.parse_google_api(term)
507
+ return_hash = {}
508
+
509
+ Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => 7)
510
+ google_api_result = Geocoder.search(term)
511
+
512
+ #Check if only a partial match. To avoid errors, strip out the first part and try again...
513
+ #Need better way to check for street endings. See: http://pe.usps.gov/text/pub28/28apc_002.htm
514
+ if google_api_result.present?
515
+ if google_api_result.first.data['partial_match'] && term.split(',').length > 1 && !term.downcase.include?('street') && !term.downcase.include?('st.') && !term.downcase.include?('avenue') && !term.downcase.include?('ave.') && !term.downcase.include?('court')
516
+ term = term.split(',')[1..term.split(',').length-1].join(',').strip
517
+ google_api_result = Geocoder.search(term)
518
+ end
519
+ end
520
+
521
+ if google_api_result.present?
522
+ #Types: street number, route, neighborhood, establishment, transit_station, bus_station
523
+ google_api_result.first.data["address_components"].each do |result|
524
+ if (result['types'] & ['street number', 'route', 'neighborhood', 'establishment', 'transit_station', 'bus_station']).present?
525
+ return_hash[:keep_original_string] = true
526
+ return_hash[:coordinates] = google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s
527
+ elsif (result['types'] & ['country']).present?
528
+ return_hash[:country_part] = result['long_name']
529
+ elsif (result['types'] & ['administrative_area_level_1']).present?
530
+ return_hash[:state_part] = result['long_name'].to_ascii
531
+ elsif (result['types'] & ['locality']).present?
532
+ return_hash[:city_part] = result['long_name']
533
+ elsif (result['types'] & ['sublocality', 'political']).present?
534
+ return_hash[:neighborhood_part] = result['long_name']
535
+ end
536
+ end
537
+
538
+ return_hash[:keep_original_string] ||= google_api_result.first.data['partial_match'] unless google_api_result.first.data['partial_match'].blank?
539
+ end
540
+
541
+
542
+ return return_hash
543
+ end
544
+
545
+ def self.parse_geographic_term(term)
546
+ geo_term = nil
547
+
548
+ #Weird incorrect dash seperator
549
+ term = term.gsub('–', '--')
550
+
551
+ #Likely too long to be an address... some fields have junk with an address string...
552
+ if term.length > 125
553
+ return nil
554
+ end
555
+
556
+ #TODO: Use Countries gem of https://github.com/hexorx/countries
557
+ #test = Country.new('US')
558
+ #test.states
559
+
560
+ #Parsing a subject geographic term.
561
+ if term.include?('--')
562
+ term.split('--').each_with_index do |split_term, index|
563
+ if split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
564
+ geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
565
+ elsif split_term.include?('Mass') || split_term.include?(' MA')
566
+ geo_term = split_term
567
+ end
568
+ end
569
+ #Other than a '--' field
570
+ #Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
571
+ elsif term.include?(' - ')
572
+ term.split(' - ').each do |split_term|
573
+ if split_term.include?('Mass') || split_term.include?(' MA') || split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
574
+ geo_term = split_term
575
+ end
576
+
577
+ end
578
+ else
579
+ if term.include?('Mass') || term.include?(' MA') || term.include?('Massachusetts') || term.include?('New Jersey') || term.include?('Wisconsin') || term.include?('New Hampshire') || term.include?('New York') || term.include?('Maine')
580
+ geo_term = term
581
+ end
582
+ end
583
+
584
+ #if geo_term.blank?
585
+ #return nil
586
+ #end
587
+
588
+ return geo_term
589
+ end
590
+
591
+ def self.standardize_geographic_term(geo_term)
592
+ #Remove common junk terms
593
+ geo_term = geo_term.gsub('Cranberries', '').gsub('History', '').gsub('Maps', '').gsub('State Police', '').gsub('Pictorial works.', '').gsub(/[nN]ation/, '').gsub('Asia', '').gsub('(Republic)', '').strip
594
+
595
+ #Strip any leading periods or commas from junk terms
596
+ geo_term = geo_term.gsub(/^[\.,]+/, '').strip
597
+
598
+ #Replace any semicolons with commas... possible strip them?
599
+ geo_term = geo_term.gsub(';', ',')
600
+
601
+ #Note: the following returns junk from Bing as if these are in WI, California, Etc:
602
+ #East Monponsett Lake (Halifax, Mass.)
603
+ #Silver Lake (Halifax, Mass.)
604
+ #Scarier note: Washington Park (Reading, Mass.) will always return Boston, MA in google
605
+ if geo_term.match(/[\(\)]+/)
606
+ #Attempt to fix address if something like (word)
607
+ if geo_term.match(/ \(+.*\)+/)
608
+ #Make this replacement better?
609
+ geo_term = geo_term.gsub(' (', ', ').gsub(')', '')
610
+ #Else skip this as data returned likely will be unreliable for now... FIXME when use case occurs.
611
+ else
612
+ return nil
613
+ end
614
+ end
615
+
616
+ return geo_term
617
+ end
618
+
619
+
620
+
621
+ def self.tgn_id_from_term(term,parse_term=false)
622
+ return_hash = {}
623
+ max_retry = 3
624
+ sleep_time = 60 # In seconds
625
+ retry_count = 0
626
+
627
+ #If not a good address source, parsing is done here...
628
+ term = parse_geographic_term(term) unless !parse_term
629
+
630
+ term = standardize_geographic_term(term) unless term.blank?
631
+
632
+ if term.blank?
633
+ return return_hash
634
+ end
635
+
636
+ return_hash = parse_mapquest_api(term)
637
+
638
+ if return_hash.blank?
639
+ return_hash = parse_bing_api(term)
640
+ end
641
+
642
+ if return_hash.blank?
643
+ return_hash = parse_google_api(term)
644
+ end
645
+
646
+ if return_hash.blank?
647
+ return nil
648
+ end
649
+
650
+ state_part = return_hash[:state_part]
651
+
652
+
653
+ country_code = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_id] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
654
+ country_code ||= ''
655
+
656
+
657
+ country_part = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_country_name] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
658
+ country_part ||= return_hash[:country_part]
659
+ country_part ||= ''
660
+
661
+ city_part = return_hash[:city_part]
662
+
663
+ #Keep original string if three parts at least or if there is a number in the term.
664
+ if term.split(',').length >= 3 || term.match(/\d/).present?
665
+ return_hash[:keep_original_string] = true
666
+ end
667
+
668
+ top_match_term = ''
669
+ match_term = nil
670
+
671
+ if city_part.blank? && state_part.blank?
672
+ # Limit to nations
673
+ place_type = 81010
674
+ top_match_term = ''
675
+ match_term = country_part.to_ascii.downcase || term.to_ascii.downcase
676
+ elsif state_part.present? && city_part.blank? && country_code == 7012149
677
+ #Limit to states
678
+ place_type = 81175
679
+ top_match_term = country_part.to_ascii.downcase
680
+ match_term = state_part.to_ascii.downcase
681
+ elsif state_part.present? && city_part.blank?
682
+ #Limit to regions
683
+ place_type = 81165
684
+ top_match_term = country_part.to_ascii.downcase
685
+ match_term = state_part.to_ascii.downcase
686
+ elsif state_part.present? && city_part.present?
687
+ #Limited to only inhabited places at the moment...
688
+ place_type = 83002
689
+ top_match_term = state_part.to_ascii.downcase
690
+ match_term = city_part.to_ascii.downcase
691
+ else
692
+ return nil
693
+ end
694
+
695
+ begin
696
+ if retry_count > 0
697
+ sleep(sleep_time)
698
+ end
699
+ retry_count = retry_count + 1
700
+
701
+ tgn_response = Typhoeus::Request.get("http://vocabsservices.getty.edu/TGNService.asmx/TGNGetTermMatch?placetypeid=#{place_type}&nationid=#{country_code}&name=" + CGI.escape(match_term), userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
702
+
703
+
704
+ end until (tgn_response.code != 500 || retry_count == max_retry)
705
+
706
+ unless tgn_response.code == 500
707
+ puts 'match found!'
708
+ parsed_xml = Nokogiri::Slop(tgn_response.body)
709
+
710
+ if parsed_xml.Vocabulary.Count.text == '0'
711
+ return nil
712
+ end
713
+
714
+ #If only one result, then not array. Otherwise array....
715
+ if parsed_xml.Vocabulary.Subject.first.blank?
716
+ subject = parsed_xml.Vocabulary.Subject
717
+
718
+ current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
719
+
720
+ if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
721
+ return_hash[:tgn_id] = subject.Subject_ID.text
722
+ end
723
+ else
724
+ parsed_xml.Vocabulary.Subject.each do |subject|
725
+ current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
726
+
727
+ if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
728
+ return_hash[:tgn_id] = subject.Subject_ID.text
729
+ end
730
+ end
731
+ end
732
+
733
+ end
734
+
735
+ if tgn_response.code == 500
736
+ raise 'TGN Server appears to not be responding for Geographic query: ' + term
737
+ end
738
+
739
+
740
+ return return_hash
741
+ end
742
+
743
+
744
+ def self.LCSHize(value)
745
+
746
+ if value.blank?
747
+ return ''
748
+ end
749
+
750
+ #Remove stuff that is quoted (quotation for first and last words)..
751
+ value = value.gsub(/^['"]/, '').gsub(/['"]$/, '').strip
752
+
753
+ #Remove ending periods ... except when an initial or etc.
754
+ if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
755
+ value = value.slice(0..-2)
756
+ end
757
+
758
+ #Fix when '- -' occurs
759
+ value = value.gsub(/-\s-/,'--')
760
+
761
+ #Fix for "em" dashes - two types?
762
+ value = value.gsub('—','--')
763
+
764
+ #Fix for "em" dashes - two types?
765
+ value = value.gsub('–','--')
766
+
767
+ #Fix for ' - ' combinations
768
+ value = value.gsub(' - ','--')
769
+
770
+ #Remove white space after and before '--'
771
+ value = value.gsub(/\s+--/,'--')
772
+ value = value.gsub(/--\s+/,'--')
773
+
774
+ #Ensure first work is capitalized
775
+ value[0] = value.first.capitalize[0]
776
+
777
+ #Strip an white space
778
+ value = Bplmodels::DatastreamInputFuncs.strip_value(value)
779
+
780
+ return value
781
+ end
782
+
783
+
784
+ def self.strip_value(value)
785
+ if(value.blank?)
786
+ return nil
787
+ else
788
+ if value.class == Float || value.class == Fixnum
789
+ value = value.to_i.to_s
790
+ end
791
+
792
+ # Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
793
+ return utf8Encode(value)
794
+ end
795
+ end
796
+
797
+ def self.utf8Encode(value)
798
+ return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
799
+ end
800
+
801
+ def self.split_with_nils(value)
802
+ if(value == nil)
803
+ return ""
804
+ else
805
+ split_value = value.split("||")
806
+ 0.upto split_value.length-1 do |pos|
807
+ split_value[pos] = strip_value(split_value[pos])
808
+ end
809
+
810
+ return split_value
811
+ end
812
+ end
813
+
814
+
815
+ #Problems: A . Some Name and A & R
816
+ def self.getProperTitle(title)
817
+ nonSort = nil
818
+ title = title
819
+
820
+ if title[0..1].downcase == "a " && (title[0..2].downcase != "a ." && title[0..2].downcase != "a &")
821
+ nonSort = title[0..1]
822
+ title = title[2..title.length]
823
+ elsif title[0..3].downcase == "the "
824
+ nonSort = title[0..3]
825
+ title = title[4..title.length]
826
+ elsif title[0..2].downcase == "an "
827
+ nonSort = title[0..2]
828
+ title = title[3..title.length]
829
+ #elsif title[0..6].downcase == "in the "
830
+ #return [title[0..5], title[7..title.length]]
831
+ end
832
+
833
+ return [nonSort, title]
834
+ end
835
+
836
+ def self.parse_language(language_value)
837
+ return_hash = {}
838
+ authority_check = Qa::Authorities::Loc.new
839
+ authority_result = authority_check.search(URI.escape(language_value), 'iso639-2')
840
+
841
+ if authority_result.present?
842
+ authority_result = authority_result.select{|hash| hash['label'].downcase == language_value.downcase || hash['id'].split('/').last.downcase == language_value.downcase }
843
+ if authority_result.present?
844
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
845
+ return_hash[:label] = authority_result.first["label"]
846
+ end
847
+ end
848
+
849
+ return return_hash
850
+ end
851
+
852
+ def self.parse_role(role_value)
853
+ return_hash = {}
854
+ authority_check = Qa::Authorities::Loc.new
855
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
856
+ if authority_result.present?
857
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
858
+ if authority_result.present?
859
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
860
+ return_hash[:label] = authority_result.first["label"]
861
+ end
862
+ end
863
+
864
+ return return_hash
865
+ end
866
+
867
+ def self.parse_name_roles(name)
868
+ return_hash = {}
869
+
870
+ #Make sure we have at least three distinct parts of 2-letter+ words. Avoid something like: Steven C. Painter or Painter, Steven C.
871
+ #Possible Issue: Full name of Steven Carlos Painter ?
872
+ potential_role_check = name.match(/[\(\"\',]*\w\w+[\),\"\']* [\w\.,\d\-\"]*[\w\d][\w\d][\w\.,\d\-\"]* [\(\"\',]*\w\w+[\),\"\']*$/) || name.split(/[ ]+/).length >= 4
873
+
874
+ if potential_role_check.present?
875
+ authority_check = Qa::Authorities::Loc.new
876
+
877
+ #Check the last value of the name string...
878
+ role_value = name.match(/(?<=[\(\"\', ])\w+(?=[\),\"\']*$)/).to_s
879
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
880
+ if authority_result.present?
881
+
882
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
883
+ if authority_result.present?
884
+ #Remove the word and any other characters around it. $ means the end of the line.
885
+ #
886
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[\),\"\']*$/, '').gsub(/^[ ]*:/, '').strip
887
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
888
+ return_hash[:label] = authority_result.first["label"]
889
+ end
890
+ end
891
+
892
+ #Check the last value of the name string...
893
+ role_value = name.match(/\w+(?=[\),\"\']*)/).to_s
894
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
895
+ if authority_result.present? && return_hash.blank?
896
+
897
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
898
+ if authority_result.present?
899
+ #Remove the word and any other characters around it. $ means the end of the line.
900
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[ \),\"\']*/, '').gsub(/^[ ]*:/, '').strip
901
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
902
+ return_hash[:label] = authority_result.first["label"]
903
+ end
904
+ end
905
+ end
906
+
907
+ return return_hash
908
+ end
909
+
910
+ def self.is_numeric? (string)
911
+ true if Float(string) rescue false
912
+ end
913
+
914
+ # returns a well-formatted placename for display on a map
915
+ # hiergeo_hash = hash of <mods:hierarchicahlGeographic> elements
916
+ def self.render_display_placename(hiergeo_hash)
917
+ placename = []
918
+ case hiergeo_hash[:country]
919
+ when 'United States','Canada'
920
+ if hiergeo_hash[:state] || hiergeo_hash[:province]
921
+ placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence
922
+ if placename[0].nil? && hiergeo_hash[:county]
923
+ placename[0] = hiergeo_hash[:county] + ' (county)'
924
+ end
925
+ if placename[0]
926
+ placename[1] = Constants::STATE_ABBR.key(hiergeo_hash[:state]) || hiergeo_hash[:province].presence
927
+ else
928
+ placename[1] = hiergeo_hash[:state].presence || hiergeo_hash[:province].presence
929
+ end
930
+ else
931
+ placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence || hiergeo_hash[:country].presence
932
+ end
933
+ else
934
+ placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:state].presence || hiergeo_hash[:province].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence
935
+ if placename[0].nil? && hiergeo_hash[:county]
936
+ placename[0] = hiergeo_hash[:county] + ' (county)'
937
+ end
938
+ placename[1] = hiergeo_hash[:country]
939
+ end
940
+
941
+ if !placename.blank?
942
+ placename.join(', ').gsub(/(\A,\s)|(,\s\z)/,'')
943
+ else
944
+ nil
945
+ end
946
+ end
947
+
948
+ end
949
+ end