bplmodels 0.0.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +0 -0
- data/Rakefile +40 -0
- data/app/assets/javascripts/bplmodels/application.js +15 -0
- data/app/assets/stylesheets/bplmodels/application.css +13 -0
- data/app/controllers/bplmodels/application_controller.rb +4 -0
- data/app/helpers/bplmodels/application_helper.rb +4 -0
- data/app/models/bplmodels/audio_file.rb +14 -0
- data/app/models/bplmodels/book.rb +35 -0
- data/app/models/bplmodels/card.rb +35 -0
- data/app/models/bplmodels/characterization.rb +92 -0
- data/app/models/bplmodels/collection.rb +118 -0
- data/app/models/bplmodels/complex_object_base.rb +24 -0
- data/app/models/bplmodels/correspondence.rb +35 -0
- data/app/models/bplmodels/document.rb +35 -0
- data/app/models/bplmodels/document_file.rb +8 -0
- data/app/models/bplmodels/ephemera.rb +35 -0
- data/app/models/bplmodels/file.rb +151 -0
- data/app/models/bplmodels/file_content_datastream.rb +10 -0
- data/app/models/bplmodels/fits_datastream.rb +190 -0
- data/app/models/bplmodels/image.rb +14 -0
- data/app/models/bplmodels/image_file.rb +18 -0
- data/app/models/bplmodels/institution.rb +159 -0
- data/app/models/bplmodels/manuscript.rb +34 -0
- data/app/models/bplmodels/map.rb +34 -0
- data/app/models/bplmodels/mods_desc_metadata.rb +1826 -0
- data/app/models/bplmodels/musical_notation.rb +34 -0
- data/app/models/bplmodels/newspaper.rb +15 -0
- data/app/models/bplmodels/nom_terminology.rb +1242 -0
- data/app/models/bplmodels/non_photographic_print.rb +34 -0
- data/app/models/bplmodels/oai_collection.rb +19 -0
- data/app/models/bplmodels/oai_metadata.rb +75 -0
- data/app/models/bplmodels/oai_object.rb +45 -0
- data/app/models/bplmodels/object.rb +36 -0
- data/app/models/bplmodels/object_base.rb +1241 -0
- data/app/models/bplmodels/objects/collection.rb~ +28 -0
- data/app/models/bplmodels/objects/image.rb~ +59 -0
- data/app/models/bplmodels/objects/postcard.rb~ +56 -0
- data/app/models/bplmodels/organizational_set.rb +25 -0
- data/app/models/bplmodels/periodical.rb +37 -0
- data/app/models/bplmodels/photographic_print.rb +34 -0
- data/app/models/bplmodels/relation_base.rb +99 -0
- data/app/models/bplmodels/scrapbook.rb +35 -0
- data/app/models/bplmodels/simple_object_base.rb +27 -0
- data/app/models/bplmodels/sound_recording.rb +15 -0
- data/app/models/bplmodels/system_collection.rb +8 -0
- data/app/models/bplmodels/uploads_set.rb +3 -0
- data/app/models/bplmodels/workflow_metadata.rb +99 -0
- data/app/views/layouts/bplmodels/application.html.erb +14 -0
- data/config/application.rb +6 -0
- data/config/predicate_mappings.yml +61 -0
- data/config/routes.rb +2 -0
- data/lib/bplmodels.rb +21 -0
- data/lib/bplmodels/constants.rb +119 -0
- data/lib/bplmodels/datastream_input_funcs.rb +949 -0
- data/lib/bplmodels/engine.rb +5 -0
- data/lib/bplmodels/engine.rb~ +5 -0
- data/lib/bplmodels/finder.rb +192 -0
- data/lib/bplmodels/object_funcs.rb +10 -0
- data/lib/bplmodels/version.rb +3 -0
- data/lib/tasks/bplmodels_tasks.rake +4 -0
- data/test/bplmodels_test.rb +7 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +59 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +4 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/test_helper.rb +15 -0
- metadata +234 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
module Bplmodels
|
2
|
+
class WorkflowMetadata < ActiveFedora::OmDatastream
|
3
|
+
include OM::XML::Document
|
4
|
+
|
5
|
+
WORKFLOW_NS = 'http://www.bpl.org/repository/xml/ns/workflow'
|
6
|
+
WORKFLOW_SCHEMA = 'http://www.bpl.org/repository/xml/xsd/workflow.xsd'
|
7
|
+
WORKFLOW_PARAMS = {
|
8
|
+
"version" => "0.0.1",
|
9
|
+
"xmlns:xlink" => "http://www.w3.org/1999/xlink",
|
10
|
+
"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
|
11
|
+
"xmlns" => WORKFLOW_NS,
|
12
|
+
"xsi:schemaLocation" => "#{WORKFLOW_NS} #{WORKFLOW_SCHEMA}",
|
13
|
+
}
|
14
|
+
|
15
|
+
set_terminology do |t|
|
16
|
+
t.root :path => 'workflowMetadata', :xmlns => WORKFLOW_NS
|
17
|
+
|
18
|
+
t.item_status(:path=>"itemStatus") {
|
19
|
+
t.state(:path=>"state")
|
20
|
+
t.state_comment(:path=>"stateComment")
|
21
|
+
t.processing(:path=>"processing")
|
22
|
+
t.processing_comment(:path=>"processingComment")
|
23
|
+
}
|
24
|
+
|
25
|
+
t.item_source(:path=>"itemSource") {
|
26
|
+
t.ingest_origin(:path=>"ingestOrigin")
|
27
|
+
t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
|
28
|
+
t.ingest_filename(:path=>"ingestFilename") #Only recently added
|
29
|
+
}
|
30
|
+
|
31
|
+
t.item_ark_info(:path=>"arkInformation") {
|
32
|
+
t.ark_id(:path=>"arkID")
|
33
|
+
t.ark_type(:path=>"arkType")
|
34
|
+
t.ark_parent_pid(:path=>"arkParentPID")
|
35
|
+
}
|
36
|
+
|
37
|
+
t.source(:path=>"source") {
|
38
|
+
t.ingest_origin(:path=>"ingestOrigin")
|
39
|
+
t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
|
40
|
+
t.ingest_filename(:path=>"ingestFilename") #Only recently added
|
41
|
+
t.ingest_datastream(:path=>"ingestDatastream")
|
42
|
+
}
|
43
|
+
|
44
|
+
t.item_designations(:path=>'itemDesignations') {
|
45
|
+
t.flagged_for_content(:path=>"flaggedForContent")
|
46
|
+
}
|
47
|
+
|
48
|
+
t.marked_for_deletion(:path=>'markedForDelation') {
|
49
|
+
t.reason(:path=>'reason')
|
50
|
+
}
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.xml_template
|
55
|
+
Nokogiri::XML::Builder.new do |xml|
|
56
|
+
xml.workflowMetadata(WORKFLOW_PARAMS) {
|
57
|
+
|
58
|
+
}
|
59
|
+
end.doc
|
60
|
+
end
|
61
|
+
|
62
|
+
#Required for Active Fedora 9
|
63
|
+
def prefix(path=nil)
|
64
|
+
return ''
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def insert_file_path(value=nil)
|
69
|
+
ingest_filepath_index = self.item_source.ingest_filepath.count
|
70
|
+
|
71
|
+
self.item_source.ingest_filepath(ingest_filepath_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
|
72
|
+
end
|
73
|
+
|
74
|
+
def insert_file_name(value=nil)
|
75
|
+
ingest_filename_index = self.item_source.ingest_filepath.count
|
76
|
+
|
77
|
+
self.item_source.ingest_filename(ingest_filename_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
|
78
|
+
end
|
79
|
+
|
80
|
+
def insert_file_source(filepath, filename, datastream)
|
81
|
+
source_count = self.source.count
|
82
|
+
|
83
|
+
self.source(source_count).ingest_filepath(0, filepath) unless filepath.blank?
|
84
|
+
self.source(source_count).ingest_filename(0, filename) unless filename.blank?
|
85
|
+
self.source(source_count).ingest_datastream(0, datastream) unless datastream.blank?
|
86
|
+
end
|
87
|
+
|
88
|
+
def insert_flagged(value=nil)
|
89
|
+
self.item_designations(0).flagged_for_content(0, value) unless value.blank?
|
90
|
+
end
|
91
|
+
|
92
|
+
def insert_oai_defaults
|
93
|
+
self.item_status(0).state = "published"
|
94
|
+
self.item_status(0).state_comment = "OAI Harvested Record"
|
95
|
+
self.item_status(0).processing = "complete"
|
96
|
+
self.item_status(0).processing_comment = "Object Processing Complete"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>Bplmodels</title>
|
5
|
+
<%= stylesheet_link_tag "bplmodels/application", :media => "all" %>
|
6
|
+
<%= javascript_include_tag "bplmodels/application" %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<%= yield %>
|
12
|
+
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# The default namespace maps to the default namespace for generating rels_ext from solr
|
2
|
+
:default_namespace: info:fedora/fedora-system:def/relations-external#
|
3
|
+
|
4
|
+
:predicate_namespaces:
|
5
|
+
fedora-model: info:fedora/fedora-system:def/model#
|
6
|
+
fedora-relations-model: info:fedora/fedora-system:def/relations-external#
|
7
|
+
bpllib-rel: http://projecthydra.org/ns/relations#
|
8
|
+
|
9
|
+
# namespace mappings---
|
10
|
+
# you can add specific mappings for your institution by providing the following:
|
11
|
+
# namespace_uri:
|
12
|
+
# :relationship_symbol: relationship_identifier
|
13
|
+
#
|
14
|
+
# For example, if you have the following element in your rels_ext:
|
15
|
+
#
|
16
|
+
# <oai:itemID>oai:example.edu:changeme:500</oai:itemID>
|
17
|
+
#
|
18
|
+
# With the last two lines of this file uncommented, the relationships hash of your object will include:
|
19
|
+
# :oai_item_id => ["info:fedora/oai:example.edu:changeme:500"]
|
20
|
+
#
|
21
|
+
:predicate_mapping:
|
22
|
+
info:fedora/fedora-system:def/relations-external#:
|
23
|
+
:conforms_to: conformsTo
|
24
|
+
:has_annotation: hasAnnotation
|
25
|
+
:has_collection_member: hasCollectionMember
|
26
|
+
:has_constituent: hasConstituent
|
27
|
+
:has_dependent: hasDependent
|
28
|
+
:has_derivation: hasDerivation
|
29
|
+
:has_description: hasDescription
|
30
|
+
:has_equivalent: hasEquivalent
|
31
|
+
:has_metadata: hasMetadata
|
32
|
+
:has_member: hasMember
|
33
|
+
:has_model: hasModel
|
34
|
+
:has_part: hasPart
|
35
|
+
:has_subset: hasSubset
|
36
|
+
:is_annotation_of: isAnnotationOf
|
37
|
+
:is_constituent_of: isConstituentOf
|
38
|
+
:is_dependent_of: isDependentOf
|
39
|
+
:is_derivation_of: isDerivationOf
|
40
|
+
:is_description_of: isDescriptionOf
|
41
|
+
:is_member_of: isMemberOf
|
42
|
+
:is_member_of_collection: isMemberOfCollection
|
43
|
+
:is_metadata_for: isMetadataFor
|
44
|
+
:is_part_of: isPartOf
|
45
|
+
:is_subset_of: isSubsetOf
|
46
|
+
:is_topic_of: isTopicOf
|
47
|
+
info:fedora/fedora-system:def/model#:
|
48
|
+
:is_contractor_of: isContractorOf
|
49
|
+
:is_deployment_of: isDeploymentOf
|
50
|
+
:has_service: hasService
|
51
|
+
:has_model: hasModel
|
52
|
+
http://www.openarchives.org/OAI/2.0/:
|
53
|
+
:oai_item_id: itemID
|
54
|
+
http://projecthydra.org/ns/relations#:
|
55
|
+
:is_governed_by: isGovernedBy
|
56
|
+
:is_image_of: isImageOf
|
57
|
+
:has_image: hasImage
|
58
|
+
:has_subcollection: hasSubcollection
|
59
|
+
:has_crop: hasCrop
|
60
|
+
:is_crop_of: isCropOf
|
61
|
+
:is_exemplary_image_of: isExemplaryImageOf
|
data/config/routes.rb
ADDED
data/lib/bplmodels.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "bplmodels/engine"
|
2
|
+
require "bplmodels/datastream_input_funcs"
|
3
|
+
require "bplmodels/finder"
|
4
|
+
require "bplmodels/constants"
|
5
|
+
require "timeliness"
|
6
|
+
|
7
|
+
module Bplmodels
|
8
|
+
def self.environment
|
9
|
+
if defined?(DERIVATIVE_CONFIG_GLOBAL) && DERIVATIVE_CONFIG_GLOBAL.present? && DERIVATIVE_CONFIG_GLOBAL['environment'].present?
|
10
|
+
return DERIVATIVE_CONFIG_GLOBAL['environment']
|
11
|
+
elsif defined?(Rails.env) and !Rails.env.nil?
|
12
|
+
return Rails.env.to_s
|
13
|
+
elsif defined?(ENV['environment']) and !(ENV['environment'].nil?)
|
14
|
+
return ENV['environment']
|
15
|
+
elsif defined?(ENV['RAILS_ENV']) and !(ENV['RAILS_ENV'].nil?)
|
16
|
+
raise RuntimeError, "You're depending on RAILS_ENV for setting your environment. Please use ENV['environment'] for non-rails environment setting: 'rake foo:bar environment=test'"
|
17
|
+
else
|
18
|
+
ENV['environment'] = 'development'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Bplmodels
|
2
|
+
class Constants
|
3
|
+
GENRE_LOOKUP = {}
|
4
|
+
GENRE_LOOKUP['Cards'] = {:id=>'tgm001686', :authority=>'gmgpc'}
|
5
|
+
GENRE_LOOKUP['Correspondence'] = {:id=>'tgm002590', :authority=>'lctgm'}
|
6
|
+
GENRE_LOOKUP['Documents'] = {:id=>'tgm003185', :authority=>'gmgpc'}
|
7
|
+
GENRE_LOOKUP['Drawings'] = {:id=>'tgm003279', :authority=>'gmgpc'}
|
8
|
+
GENRE_LOOKUP['Ephemera'] = {:id=>'tgm003634', :authority=>'gmgpc'}
|
9
|
+
GENRE_LOOKUP['Manuscripts'] = {:id=>'tgm012286', :authority=>'gmgpc'}
|
10
|
+
GENRE_LOOKUP['Maps'] = {:id=>'tgm006261', :authority=>'gmgpc'}
|
11
|
+
GENRE_LOOKUP['Objects'] = {:id=>'tgm007159', :authority=>'lctgm'}
|
12
|
+
GENRE_LOOKUP['Paintings'] = {:id=>'tgm007393', :authority=>'gmgpc'}
|
13
|
+
GENRE_LOOKUP['Photographs'] = {:id=>'tgm007721', :authority=>'gmgpc'}
|
14
|
+
GENRE_LOOKUP['Posters'] = {:id=>'tgm008104', :authority=>'gmgpc'}
|
15
|
+
GENRE_LOOKUP['Prints'] = {:id=>'tgm008237', :authority=>'gmgpc'}
|
16
|
+
GENRE_LOOKUP['Newspapers'] = {:id=>'tgm007068', :authority=>'lctgm'}
|
17
|
+
GENRE_LOOKUP['Sound recordings'] = {:id=>'tgm009874', :authority=>'lctgm'}
|
18
|
+
GENRE_LOOKUP['Motion pictures'] = {:id=>'tgm006804', :authority=>'lctgm'}
|
19
|
+
GENRE_LOOKUP['Periodicals'] = {:id=>'tgm007641', :authority=>'gmgpc'}
|
20
|
+
GENRE_LOOKUP['Books'] = {:id=>'tgm001221', :authority=>'gmgpc'}
|
21
|
+
GENRE_LOOKUP['Albums'] = {:id=>'tgm000229', :authority=>'gmgpc'}
|
22
|
+
GENRE_LOOKUP['Musical notation'] = {:id=>'tgm006926', :authority=>'lctgm'}
|
23
|
+
|
24
|
+
COUNTRY_TGN_LOOKUP = {}
|
25
|
+
COUNTRY_TGN_LOOKUP['United States'] = {:tgn_id=>7012149, :tgn_country_name=>'United States'}
|
26
|
+
COUNTRY_TGN_LOOKUP['Canada'] = {:tgn_id=>7005685, :tgn_country_name=>'Canada'}
|
27
|
+
COUNTRY_TGN_LOOKUP['France'] = {:tgn_id=>1000070, :tgn_country_name=>'France'}
|
28
|
+
COUNTRY_TGN_LOOKUP['Vietnam'] = {:tgn_id=>1000145, :tgn_country_name=>'Viet Nam'}
|
29
|
+
COUNTRY_TGN_LOOKUP['South Africa'] = {:tgn_id=>1000193, :tgn_country_name=>'South Africa'}
|
30
|
+
COUNTRY_TGN_LOOKUP['Philippines'] = {:tgn_id=>1000135, :tgn_country_name=>'Pilipinas'}
|
31
|
+
COUNTRY_TGN_LOOKUP['China'] = {:tgn_id=>1000111, :tgn_country_name=>'Zhongguo'}
|
32
|
+
COUNTRY_TGN_LOOKUP['Japan'] = {:tgn_id=>1000120, :tgn_country_name=>'Nihon'}
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
=begin
|
39
|
+
COUNTRY_TGN_LOOKUP = {
|
40
|
+
'US' => 7012149,
|
41
|
+
'CA' => 7005685,
|
42
|
+
'FR' => 1000070,
|
43
|
+
'VN' => 1000145,
|
44
|
+
'ZA' => 1000193,
|
45
|
+
'PH' => 1000135,
|
46
|
+
'United States' => 7012149,
|
47
|
+
'Canada' => 7005685,
|
48
|
+
'France' => 1000070,
|
49
|
+
'Vietnam' => 1000145,
|
50
|
+
'Viet Nam' => 1000145,
|
51
|
+
'South Africa' => 1000193,
|
52
|
+
'Philippines' => 1000135
|
53
|
+
}
|
54
|
+
=end
|
55
|
+
|
56
|
+
STATE_ABBR = {
|
57
|
+
'AL' => 'Alabama',
|
58
|
+
'AK' => 'Alaska',
|
59
|
+
'AS' => 'America Samoa',
|
60
|
+
'AZ' => 'Arizona',
|
61
|
+
'AR' => 'Arkansas',
|
62
|
+
'CA' => 'California',
|
63
|
+
'CO' => 'Colorado',
|
64
|
+
'CT' => 'Connecticut',
|
65
|
+
'DE' => 'Delaware',
|
66
|
+
'DC' => 'District of Columbia',
|
67
|
+
'FM' => 'Micronesia1',
|
68
|
+
'FL' => 'Florida',
|
69
|
+
'GA' => 'Georgia',
|
70
|
+
'GU' => 'Guam',
|
71
|
+
'HI' => 'Hawaii',
|
72
|
+
'ID' => 'Idaho',
|
73
|
+
'IL' => 'Illinois',
|
74
|
+
'IN' => 'Indiana',
|
75
|
+
'IA' => 'Iowa',
|
76
|
+
'KS' => 'Kansas',
|
77
|
+
'KY' => 'Kentucky',
|
78
|
+
'LA' => 'Louisiana',
|
79
|
+
'ME' => 'Maine',
|
80
|
+
'MH' => 'Islands1',
|
81
|
+
'MD' => 'Maryland',
|
82
|
+
'MA' => 'Massachusetts',
|
83
|
+
'MI' => 'Michigan',
|
84
|
+
'MN' => 'Minnesota',
|
85
|
+
'MS' => 'Mississippi',
|
86
|
+
'MO' => 'Missouri',
|
87
|
+
'MT' => 'Montana',
|
88
|
+
'NE' => 'Nebraska',
|
89
|
+
'NV' => 'Nevada',
|
90
|
+
'NH' => 'New Hampshire',
|
91
|
+
'NJ' => 'New Jersey',
|
92
|
+
'NM' => 'New Mexico',
|
93
|
+
'NY' => 'New York',
|
94
|
+
'NC' => 'North Carolina',
|
95
|
+
'ND' => 'North Dakota',
|
96
|
+
'OH' => 'Ohio',
|
97
|
+
'OK' => 'Oklahoma',
|
98
|
+
'OR' => 'Oregon',
|
99
|
+
'PW' => 'Palau',
|
100
|
+
'PA' => 'Pennsylvania',
|
101
|
+
'PR' => 'Puerto Rico',
|
102
|
+
'RI' => 'Rhode Island',
|
103
|
+
'SC' => 'South Carolina',
|
104
|
+
'SD' => 'South Dakota',
|
105
|
+
'TN' => 'Tennessee',
|
106
|
+
'TX' => 'Texas',
|
107
|
+
'UT' => 'Utah',
|
108
|
+
'VT' => 'Vermont',
|
109
|
+
'VI' => 'Virgin Island',
|
110
|
+
'VA' => 'Virginia',
|
111
|
+
'WA' => 'Washington',
|
112
|
+
'WV' => 'West Virginia',
|
113
|
+
'WI' => 'Wisconsin',
|
114
|
+
'WY' => 'Wyoming'
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,949 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
module Bplmodels
|
5
|
+
class DatastreamInputFuncs
|
6
|
+
|
7
|
+
# these functions can be used to split names into subparts for <mods:name> and <mods:subject><mods:name>
|
8
|
+
|
9
|
+
# use for personal name headings e.g., <mods:name type="personal">
|
10
|
+
# returns personal name data as a hash which can be used to populate <mods:namePart> and <mads:namePart type="date">
|
11
|
+
|
12
|
+
def self.persNamePartSplitter(inputstring)
|
13
|
+
splitNamePartsHash = Hash.new
|
14
|
+
unless inputstring =~ /\d{4}/
|
15
|
+
splitNamePartsHash[:namePart] = inputstring
|
16
|
+
else
|
17
|
+
if inputstring =~ /\(.*\d{4}.*\)/
|
18
|
+
splitNamePartsHash[:namePart] = inputstring
|
19
|
+
else
|
20
|
+
splitNamePartsHash[:namePart] = inputstring.gsub(/,[\d\- \.\w?]*$/,"")
|
21
|
+
splitArray = inputstring.split(/.*,/)
|
22
|
+
splitNamePartsHash[:datePart] = splitArray[1].strip
|
23
|
+
end
|
24
|
+
end
|
25
|
+
return splitNamePartsHash
|
26
|
+
end
|
27
|
+
|
28
|
+
# use for corporate name headings e.g., <mods:name type="corporate">
|
29
|
+
# returns corporate name data as an array which can be used to populate <mods:namePart> subparts
|
30
|
+
# (corporate name subparts are not differentiated by any attributes in the xml)
|
31
|
+
# (see http://id.loc.gov/authorities/names/n82139319.madsxml.xml for example)
|
32
|
+
# Note: (?!\)) part is to check for examples like: 'Boston (Mass.) Police Dept.'
|
33
|
+
|
34
|
+
def self.corpNamePartSplitter(inputstring)
|
35
|
+
splitNamePartsArray = Array.new
|
36
|
+
unless inputstring =~ /[\S]{5}\.(?!\))/
|
37
|
+
splitNamePartsArray << inputstring
|
38
|
+
else
|
39
|
+
while inputstring =~ /[\S]{5}\.(?!\))/
|
40
|
+
snip = /[\S]{5}\.(?!\))/.match(inputstring).post_match
|
41
|
+
subpart = inputstring.gsub(snip,"")
|
42
|
+
splitNamePartsArray << subpart.gsub(/\.\z/,"").strip
|
43
|
+
inputstring = snip
|
44
|
+
end
|
45
|
+
splitNamePartsArray << inputstring.gsub(/\.\z/,"").strip
|
46
|
+
end
|
47
|
+
return splitNamePartsArray
|
48
|
+
end
|
49
|
+
|
50
|
+
# a function to convert date data from OAI feeds into MODS-usable date data
|
51
|
+
# assumes date values containing ";" have already been split
|
52
|
+
# returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
|
53
|
+
def self.convert_to_mods_date(value)
|
54
|
+
|
55
|
+
date_data = {} # create the hash to hold all the data
|
56
|
+
source_date_string = value.strip # variable to hold original value
|
57
|
+
|
58
|
+
# weed out obvious bad dates before processing
|
59
|
+
if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
|
60
|
+
(value.match(/\d\d\d\d-\z/)) || # 1975-
|
61
|
+
(value.match(/\d\d-\d\d\/\d\d/)) || # 1975-09-09/10
|
62
|
+
(value.match(/\d*\(\d*\)/)) || # 1975(1976)
|
63
|
+
(value.scan(/\d\d\d\d/).length > 2) || # 1861/1869/1915
|
64
|
+
(value.scan(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).length > 1) ||
|
65
|
+
# or if data does not match any of these
|
66
|
+
(!value.match(/(\d\dth [Cc]entury|\d\d\d-\?*|\d\d\d\?|\d\d\?\?|\d\d\d\d)/))
|
67
|
+
date_data[:date_note] = source_date_string
|
68
|
+
else
|
69
|
+
# find date qualifier
|
70
|
+
if value.include? '?'
|
71
|
+
date_data[:date_qualifier] = 'questionable'
|
72
|
+
elsif value.match(/\A[Cc]/)
|
73
|
+
date_data[:date_qualifier] = 'approximate'
|
74
|
+
elsif (value.match(/[\[\]]+/)) || (value.match(/[(][A-Za-z, \d]*[\d]+[A-Za-z, \d]*[)]+/)) # if [] or ()
|
75
|
+
date_data[:date_qualifier] = 'inferred'
|
76
|
+
end
|
77
|
+
|
78
|
+
# remove unnecessary chars and words
|
79
|
+
value = value.gsub(/[\[\]\(\)\.,']/,'')
|
80
|
+
value = value.gsub(/(\b[Bb]etween\b|\bcirca\b|\bca\b|\Aca|\Ac)/,'').strip
|
81
|
+
|
82
|
+
# differentiate between ranges and single dates
|
83
|
+
if (value.scan(/\d\d\d\d/).length == 2) ||
|
84
|
+
(value.include? '0s') || # 1970s
|
85
|
+
(value.include? 'entury') || # 20th century
|
86
|
+
(value.match(/(\A\d\d\d\?|\A\d\d\?\?|\A\d\d\d-\?*|\d\d\d\d-\d\z|\d\d\d\d\/[\d]{1,2}\z)/)) ||
|
87
|
+
(value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/)) ||
|
88
|
+
((value.match(/\d\d\d\d-\d\d\z/)) && (value[-2..-1].to_i > 12)) # 1975-76 but NOT 1910-11
|
89
|
+
|
90
|
+
# RANGES
|
91
|
+
date_data[:date_range] = {}
|
92
|
+
|
93
|
+
# deal with date strings with 2 4-digit year values separately
|
94
|
+
if value.scan(/\d\d\d\d/).length == 2
|
95
|
+
|
96
|
+
# convert weird span indicators ('or','and','||'), remove extraneous text
|
97
|
+
value = value.gsub(/(or|and|\|\|)/,'-').gsub(/[A-Za-z\?\s]/,'')
|
98
|
+
|
99
|
+
if value.match(/\A[12][\d]{3}-[01][\d]-[12][\d]{3}-[01][\d]\z/) # 1895-05-1898-01
|
100
|
+
date_data_range_start = value[0..6]
|
101
|
+
date_data_range_end = value[-7..-1]
|
102
|
+
elsif value.match(/\A[12][\d]{3}\/[12][\d]{3}\z/) # 1987/1988
|
103
|
+
date_data_range_start = value[0..3]
|
104
|
+
date_data_range_end = value[-4..-1]
|
105
|
+
else
|
106
|
+
range_dates = value.split('-') # split the dates into an array
|
107
|
+
range_dates.each_with_index do |range_date,index|
|
108
|
+
# format the data properly
|
109
|
+
if range_date.include? '/' # 11/05/1965
|
110
|
+
range_date_pieces = range_date.split('/')
|
111
|
+
range_date_piece_year = range_date_pieces.last
|
112
|
+
range_date_piece_month = range_date_pieces.first.length == 2 ? range_date_pieces.first : '0' + range_date_pieces.first
|
113
|
+
if range_date_pieces.length == 3
|
114
|
+
range_date_piece_day = range_date_pieces[1].length == 2 ? range_date_pieces[1] : '0' + range_date_pieces[1]
|
115
|
+
end
|
116
|
+
value_to_insert = range_date_piece_year + '-' + range_date_piece_month
|
117
|
+
value_to_insert << '-' + range_date_piece_day if range_date_piece_day
|
118
|
+
elsif range_date.match(/\A[12][\d]{3}\z/)
|
119
|
+
value_to_insert = range_date
|
120
|
+
end
|
121
|
+
# add the data to the proper variable
|
122
|
+
if value_to_insert
|
123
|
+
if index == 0
|
124
|
+
date_data_range_start = value_to_insert
|
125
|
+
else
|
126
|
+
date_data_range_end = value_to_insert
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
else
|
132
|
+
# if there are 'natural language' range values, find, assign to var, then remove
|
133
|
+
text_range = value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).to_s
|
134
|
+
if text_range.length > 0
|
135
|
+
date_data[:date_qualifier] ||= 'approximate' # TODO - remove this??
|
136
|
+
value = value.gsub(/#{text_range}/,'').strip
|
137
|
+
end
|
138
|
+
|
139
|
+
# deal with ranges for which 'natural language' range values are ignored
|
140
|
+
if value.match(/\A1\d\?\?\z/) # 19??
|
141
|
+
date_data_range_start = value[0..1] + '00'
|
142
|
+
date_data_range_end = value[0..1] + '99'
|
143
|
+
elsif value.match(/\A[12]\d\d-*\?*\z/) # 195? || 195-? || 195-
|
144
|
+
date_data_range_start = value[0..2] + '0'
|
145
|
+
date_data_range_end = value[0..2] + '9'
|
146
|
+
elsif value.match(/\A[12]\d\d\d[-\/][\d]{1,2}\z/) # 1956-57 || 1956/57 || 1956-7
|
147
|
+
if value.length == 7 && (value[5..6].to_i > value[2..3].to_i)
|
148
|
+
date_data_range_start = value[0..3]
|
149
|
+
date_data_range_end = value[0..1] + value[5..6]
|
150
|
+
elsif value.length == 6 && (value[5].to_i > value[3].to_i)
|
151
|
+
date_data_range_start = value[0..3]
|
152
|
+
date_data_range_end = value[0..2] + value[5]
|
153
|
+
end
|
154
|
+
date_data[:date_note] = source_date_string if text_range.length > 0
|
155
|
+
end
|
156
|
+
# deal with ranges where text range values are evaluated
|
157
|
+
value = value.gsub(/\?/,'').strip # remove question marks
|
158
|
+
|
159
|
+
# centuries
|
160
|
+
if value.match(/([12][\d]{1}th [Cc]entury|[12][\d]{1}00s)/) # 19th century || 1800s
|
161
|
+
if value.match(/[12][\d]{1}00s/)
|
162
|
+
century_prefix_date = value.match(/[12][\d]{1}/).to_s
|
163
|
+
else
|
164
|
+
century_prefix_date = (value.match(/[12][\d]{1}/).to_s.to_i-1).to_s
|
165
|
+
end
|
166
|
+
if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
|
167
|
+
if text_range.match(/[Ee]arly/)
|
168
|
+
century_suffix_dates = %w[00 39]
|
169
|
+
elsif text_range.match(/[Mm]id/)
|
170
|
+
century_suffix_dates = %w[30 69]
|
171
|
+
else
|
172
|
+
century_suffix_dates = %w[60 99]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
date_data_range_start = century_suffix_dates ? century_prefix_date + century_suffix_dates[0] : century_prefix_date + '00'
|
176
|
+
date_data_range_end = century_suffix_dates ? century_prefix_date + century_suffix_dates[1] : century_prefix_date + '99'
|
177
|
+
else
|
178
|
+
# remove any remaining non-date text
|
179
|
+
value.match(/[12][1-9][1-9]0s/) ? is_decade = true : is_decade = false # but preserve decade-ness
|
180
|
+
remaining_text = value.match(/\D+/).to_s
|
181
|
+
value = value.gsub(/#{remaining_text}/,'').strip if remaining_text.length > 0
|
182
|
+
|
183
|
+
# decades
|
184
|
+
if is_decade
|
185
|
+
decade_prefix_date = value.match(/\A[12][1-9][1-9]/).to_s
|
186
|
+
if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
|
187
|
+
if text_range.match(/[Ee]arly/)
|
188
|
+
decade_suffix_dates = %w[0 3]
|
189
|
+
elsif text_range.match(/[Mm]id/)
|
190
|
+
decade_suffix_dates = %w[4 6]
|
191
|
+
else
|
192
|
+
decade_suffix_dates = %w[7 9]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
date_data_range_start = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[0] : decade_prefix_date + '0'
|
196
|
+
date_data_range_end = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[1] : decade_prefix_date + '9'
|
197
|
+
else
|
198
|
+
# single year ranges
|
199
|
+
single_year_prefix = value.match(/[12][0-9]{3}/).to_s
|
200
|
+
if text_range.length > 0
|
201
|
+
if text_range.match(/[Ee]arly/)
|
202
|
+
single_year_suffixes = %w[01 04]
|
203
|
+
elsif text_range.match(/[Mm]id/)
|
204
|
+
single_year_suffixes = %w[05 08]
|
205
|
+
elsif text_range.match(/[Ll]ate/)
|
206
|
+
single_year_suffixes = %w[09 12]
|
207
|
+
elsif text_range.match(/[Ww]inter/)
|
208
|
+
single_year_suffixes = %w[01 03]
|
209
|
+
elsif text_range.match(/[Ss]pring/)
|
210
|
+
single_year_suffixes = %w[03 05]
|
211
|
+
elsif text_range.match(/[Ss]ummer/)
|
212
|
+
single_year_suffixes = %w[06 08]
|
213
|
+
else text_range.match(/[F]all/)
|
214
|
+
single_year_suffixes = %w[09 11]
|
215
|
+
end
|
216
|
+
date_data_range_start = single_year_prefix + '-' + single_year_suffixes[0]
|
217
|
+
date_data_range_end = single_year_prefix + '-' + single_year_suffixes[1]
|
218
|
+
end
|
219
|
+
end
|
220
|
+
# if possibly significant info removed, include as note
|
221
|
+
date_data[:date_note] = source_date_string if remaining_text.length > 1
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# insert the values into the date_data hash
|
226
|
+
if date_data_range_start && date_data_range_end
|
227
|
+
date_data[:date_range][:start] = date_data_range_start
|
228
|
+
date_data[:date_range][:end] = date_data_range_end
|
229
|
+
else
|
230
|
+
date_data[:date_note] ||= source_date_string
|
231
|
+
date_data.delete :date_range
|
232
|
+
end
|
233
|
+
|
234
|
+
else
|
235
|
+
# SINGLE DATES
|
236
|
+
value = value.gsub(/\?/,'') # remove question marks
|
237
|
+
# fix bad spacing (e.g. December 13,1985 || December 3,1985)
|
238
|
+
value = value.insert(-5, ' ') if value.match(/[A-Za-z]* \d{6}/) || value.match(/[A-Za-z]* \d{5}/)
|
239
|
+
|
240
|
+
# try to automatically parse single dates with YYYY && MM && DD values
|
241
|
+
if Timeliness.parse(value).nil?
|
242
|
+
# start further processing
|
243
|
+
if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
|
244
|
+
date_data[:single_date] = value
|
245
|
+
elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
|
246
|
+
value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
|
247
|
+
date_data[:single_date] = value[3..6] + '-' + value[0..1]
|
248
|
+
elsif value.match(/\A[A-Za-z]{3,9}[\.]? [12]\d\d\d\z/) # April 1987 || Apr. 1987
|
249
|
+
value = value.split(' ')
|
250
|
+
if value[0].match(/\A[A-Za-z]{3}[\.]?\z/)
|
251
|
+
value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0]) if Date::ABBR_MONTHNAMES.index(value[0])
|
252
|
+
else
|
253
|
+
value_month = '%02d' % Date::MONTHNAMES.index(value[0]) if Date::MONTHNAMES.index(value[0])
|
254
|
+
end
|
255
|
+
date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
|
256
|
+
elsif value.match(/\A[12]\d\d\d\z/) # 1999
|
257
|
+
date_data[:single_date] = value
|
258
|
+
else
|
259
|
+
date_data[:date_note] = source_date_string
|
260
|
+
end
|
261
|
+
else
|
262
|
+
date_data[:single_date] = Timeliness.parse(value).strftime("%Y-%m-%d")
|
263
|
+
end
|
264
|
+
|
265
|
+
end
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
# some final validation, just in case
|
270
|
+
date_validation_array = []
|
271
|
+
date_validation_array << date_data[:single_date] if date_data[:single_date]
|
272
|
+
date_validation_array << date_data[:date_range][:start] if date_data[:date_range]
|
273
|
+
date_validation_array << date_data[:date_range][:end] if date_data[:date_range]
|
274
|
+
date_validation_array.each do |date_to_val|
|
275
|
+
if date_to_val.length == '7'
|
276
|
+
bad_date = true unless date_to_val[-2..-1].to_i.between?(1,12) && !date_to_val.nil?
|
277
|
+
elsif
|
278
|
+
date_to_val.length == '10'
|
279
|
+
bad_date = true unless Timeliness.parse(value) && !date_to_val.nil?
|
280
|
+
end
|
281
|
+
if bad_date
|
282
|
+
date_data[:date_note] ||= source_date_string
|
283
|
+
date_data.delete :single_date if date_data[:single_date]
|
284
|
+
date_data.delete :date_range if date_data[:date_range]
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# if the date slipped by all the processing somehow!
|
289
|
+
if date_data[:single_date].nil? && date_data[:date_range].nil? && date_data[:date_note].nil?
|
290
|
+
date_data[:date_note] = source_date_string
|
291
|
+
end
|
292
|
+
|
293
|
+
date_data
|
294
|
+
|
295
|
+
end
|
296
|
+
|
297
|
+
# retrieve data from Getty TGN to populate <mods:subject auth="tgn">
|
298
|
+
def self.get_tgn_data(tgn_id)
|
299
|
+
tgn_response = Typhoeus::Request.get('http://vocabsservices.getty.edu/TGNService.asmx/TGNGetSubject?subjectID=' + tgn_id, userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
|
300
|
+
unless tgn_response.code == 500
|
301
|
+
tgnrec = Nokogiri::XML(tgn_response.body)
|
302
|
+
#puts tgnrec.to_s
|
303
|
+
|
304
|
+
# coordinates
|
305
|
+
if tgnrec.at_xpath("//Coordinates")
|
306
|
+
coords = {}
|
307
|
+
coords[:latitude] = tgnrec.at_xpath("//Latitude/Decimal").children.to_s
|
308
|
+
coords[:longitude] = tgnrec.at_xpath("//Longitude/Decimal").children.to_s
|
309
|
+
else
|
310
|
+
coords = nil
|
311
|
+
end
|
312
|
+
|
313
|
+
hier_geo = {}
|
314
|
+
|
315
|
+
#main term
|
316
|
+
if tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text")
|
317
|
+
tgn_term_type = tgnrec.at_xpath("//Preferred_Place_Type/Place_Type_ID").children.to_s
|
318
|
+
pref_term_langs = tgnrec.xpath("//Terms/Preferred_Term/Term_Languages/Term_Language/Language")
|
319
|
+
# if the preferred term is the preferred English form, use that
|
320
|
+
if pref_term_langs.children.to_s.include? "English"
|
321
|
+
tgn_term = tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
|
322
|
+
else # use the non-preferred term which is the preferred English form
|
323
|
+
if tgnrec.xpath("//Terms/Non-Preferred_Term")
|
324
|
+
non_pref_terms = tgnrec.xpath("//Terms/Non-Preferred_Term")
|
325
|
+
non_pref_terms.each do |non_pref_term|
|
326
|
+
non_pref_term_langs = non_pref_term.children.css("Term_Language")
|
327
|
+
# have to loop through these, as sometimes languages share form
|
328
|
+
non_pref_term_langs.each do |non_pref_term_lang|
|
329
|
+
if non_pref_term_lang.children.css("Preferred").children.to_s == "Preferred" && non_pref_term_lang.children.css("Language").children.to_s == "English"
|
330
|
+
tgn_term = non_pref_term.children.css("Term_Text").children.to_s
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
# if no term is the preferred English form, just use the preferred term
|
337
|
+
tgn_term ||= tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
|
338
|
+
end
|
339
|
+
if tgn_term && tgn_term_type
|
340
|
+
case tgn_term_type
|
341
|
+
when '29000/continent'
|
342
|
+
hier_geo[:continent] = tgn_term
|
343
|
+
when '81010/nation'
|
344
|
+
hier_geo[:country] = tgn_term
|
345
|
+
when '81161/province'
|
346
|
+
hier_geo[:province] = tgn_term
|
347
|
+
when '81165/region', '82193/union', '80005/semi-independent political entity'
|
348
|
+
hier_geo[:region] = tgn_term
|
349
|
+
when '81175/state', '81117/department', '82133/governorate'
|
350
|
+
hier_geo[:state] = tgn_term
|
351
|
+
when '81181/territory', '81021/dependent state', '81186/union territory', '81125/national district'
|
352
|
+
hier_geo[:territory] = tgn_term
|
353
|
+
when '81115/county'
|
354
|
+
hier_geo[:county] = tgn_term
|
355
|
+
when '83002/inhabited place'
|
356
|
+
hier_geo[:city] = tgn_term
|
357
|
+
when '84251/neighborhood'
|
358
|
+
hier_geo[:city_section] = tgn_term
|
359
|
+
when '21471/island'
|
360
|
+
hier_geo[:island] = tgn_term
|
361
|
+
when '81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
362
|
+
hier_geo[:area] = tgn_term
|
363
|
+
else
|
364
|
+
non_hier_geo = tgn_term
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
# parent data for <mods:hierarchicalGeographic>
|
369
|
+
if tgnrec.at_xpath("//Parent_String")
|
370
|
+
parents = tgnrec.at_xpath("//Parent_String").children.to_s.split('], ')
|
371
|
+
parents.each do |parent|
|
372
|
+
if parent.include? '(continent)'
|
373
|
+
hier_geo[:continent] = parent
|
374
|
+
elsif parent.include? '(nation)'
|
375
|
+
hier_geo[:country] = parent
|
376
|
+
elsif parent.include? '(province)'
|
377
|
+
hier_geo[:province] = parent
|
378
|
+
elsif (parent.include? '(region)') || (parent.include? '(union)') || (parent.include? '(semi-independent political entity)')
|
379
|
+
hier_geo[:region] = parent
|
380
|
+
elsif (parent.include? '(state)') || (parent.include? '(department)') || (parent.include? '(governorate)')
|
381
|
+
hier_geo[:state] = parent
|
382
|
+
elsif (parent.include? '(territory)') || (parent.include? '(dependent state)') || (parent.include? '(union territory)') || (parent.include? '(national district)')
|
383
|
+
hier_geo[:territory] = parent
|
384
|
+
elsif parent.include? '(county)'
|
385
|
+
hier_geo[:county] = parent
|
386
|
+
elsif parent.include? '(inhabited place)'
|
387
|
+
hier_geo[:city] = parent
|
388
|
+
elsif parent.include? '(neighborhood)'
|
389
|
+
hier_geo[:city_section] = parent
|
390
|
+
elsif parent.include? '(island)'
|
391
|
+
hier_geo[:island] = parent
|
392
|
+
elsif (parent.include? '(area)') || (parent.include? '(general region)') || (parent.include? '(deserted settlement)') || (parent.include? '(historical region)') || (parent.include? '(national division)')
|
393
|
+
hier_geo[:area] = parent
|
394
|
+
end
|
395
|
+
end
|
396
|
+
hier_geo.each do |k,v|
|
397
|
+
hier_geo[k] = v.gsub(/ \(.*/,'')
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
tgn_data = {}
|
402
|
+
tgn_data[:coords] = coords
|
403
|
+
tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil
|
404
|
+
tgn_data[:non_hier_geo] = non_hier_geo ? non_hier_geo : nil
|
405
|
+
|
406
|
+
else
|
407
|
+
|
408
|
+
tgn_data = nil
|
409
|
+
|
410
|
+
end
|
411
|
+
|
412
|
+
return tgn_data
|
413
|
+
|
414
|
+
end
|
415
|
+
|
416
|
+
#Note: Limited to only looking at United States places...
|
417
|
+
def self.parse_bing_api(term)
|
418
|
+
return_hash = {}
|
419
|
+
|
420
|
+
#Bing API does badly with paranthesis...
|
421
|
+
if term.match(/[\(\)]+/)
|
422
|
+
return return_hash
|
423
|
+
end
|
424
|
+
|
425
|
+
#Sometimes with building, city, state, bing is dumb and will only return state. Example: Boston Harbor, Boston, Mass.
|
426
|
+
#So if not a street address, pass to have google handle it for better results...
|
427
|
+
#Example of another bad record: South Street bridge, West Bridgewater, Mass. would give a place in Holyoke
|
428
|
+
if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
|
429
|
+
return return_hash
|
430
|
+
end
|
431
|
+
|
432
|
+
Geocoder.configure(:lookup => :bing,:api_key => 'Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn',:timeout => 7)
|
433
|
+
bing_api_result = Geocoder.search(term)
|
434
|
+
|
435
|
+
|
436
|
+
|
437
|
+
#Use bing first and only for United States results...
|
438
|
+
if bing_api_result.present? && bing_api_result.first.data["address"]["countryRegion"] == 'United States'
|
439
|
+
if bing_api_result.first.data["address"]["addressLine"].present?
|
440
|
+
return_hash[:keep_original_string] = true
|
441
|
+
return_hash[:coordinates] = bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s + ',' + bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s
|
442
|
+
end
|
443
|
+
|
444
|
+
return_hash[:country_part] = bing_api_result.first.data["address"]["countryRegion"]
|
445
|
+
|
446
|
+
if return_hash[:country_part] == 'United States'
|
447
|
+
return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[bing_api_result.first.data["address"]["adminDistrict"]]
|
448
|
+
else
|
449
|
+
return_hash[:state_part] = bing_api_result.first.data["address"]["adminDistrict"]
|
450
|
+
end
|
451
|
+
|
452
|
+
return_hash[:city_part] = bing_api_result.first.data["address"]["locality"]
|
453
|
+
end
|
454
|
+
|
455
|
+
return return_hash
|
456
|
+
end
|
457
|
+
|
458
|
+
#Mapquest allows unlimited requests - start here?
|
459
|
+
def self.parse_mapquest_api(term)
|
460
|
+
return_hash = {}
|
461
|
+
|
462
|
+
#Mapquest returns bad data for: Manchester, Mass.
|
463
|
+
if term.include?('Manchester')
|
464
|
+
return return_hash
|
465
|
+
end
|
466
|
+
|
467
|
+
#Messed up with just neighborhoods. Example: Hyde Park (Boston, Mass.) or Hyde Park (Boston, Mass.)
|
468
|
+
#So if not a street address, pass to have google handle it for better results...
|
469
|
+
if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
|
470
|
+
return return_hash
|
471
|
+
end
|
472
|
+
|
473
|
+
Geocoder.configure(:lookup => :mapquest,:api_key => 'Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a',:timeout => 7)
|
474
|
+
|
475
|
+
mapquest_api_result = Geocoder.search(term)
|
476
|
+
|
477
|
+
|
478
|
+
#If this call returned a result...
|
479
|
+
if mapquest_api_result.present?
|
480
|
+
|
481
|
+
if mapquest_api_result.first.data["street"].present?
|
482
|
+
return_hash[:keep_original_string] = true
|
483
|
+
return_hash[:coordinates] = mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s
|
484
|
+
end
|
485
|
+
|
486
|
+
return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
|
487
|
+
|
488
|
+
if return_hash[:country_part] == 'United States'
|
489
|
+
return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]]
|
490
|
+
if mapquest_api_result.first.data["adminArea4"] == 'District of Columbia'
|
491
|
+
return_hash[:state_part] = mapquest_api_result.first.data["adminArea4"]
|
492
|
+
end
|
493
|
+
else
|
494
|
+
return_hash[:state_part] = mapquest_api_result.first.data["adminArea3"].gsub(' province', '')
|
495
|
+
end
|
496
|
+
|
497
|
+
return_hash[:city_part] = mapquest_api_result.first.data["adminArea5"]
|
498
|
+
end
|
499
|
+
|
500
|
+
return return_hash
|
501
|
+
end
|
502
|
+
|
503
|
+
#Final fallback is google API. The best but we are limited to 2500 requests per day unless we pay the $10k a year premium account...
|
504
|
+
#Note: If google cannot find street, it will return just city/state, like for "Salem Street and Paradise Road, Swampscott, MA, 01907"
|
505
|
+
#Seems like it sets a partial_match=>true in the data section...
|
506
|
+
def self.parse_google_api(term)
|
507
|
+
return_hash = {}
|
508
|
+
|
509
|
+
Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => 7)
|
510
|
+
google_api_result = Geocoder.search(term)
|
511
|
+
|
512
|
+
#Check if only a partial match. To avoid errors, strip out the first part and try again...
|
513
|
+
#Need better way to check for street endings. See: http://pe.usps.gov/text/pub28/28apc_002.htm
|
514
|
+
if google_api_result.present?
|
515
|
+
if google_api_result.first.data['partial_match'] && term.split(',').length > 1 && !term.downcase.include?('street') && !term.downcase.include?('st.') && !term.downcase.include?('avenue') && !term.downcase.include?('ave.') && !term.downcase.include?('court')
|
516
|
+
term = term.split(',')[1..term.split(',').length-1].join(',').strip
|
517
|
+
google_api_result = Geocoder.search(term)
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
if google_api_result.present?
|
522
|
+
#Types: street number, route, neighborhood, establishment, transit_station, bus_station
|
523
|
+
google_api_result.first.data["address_components"].each do |result|
|
524
|
+
if (result['types'] & ['street number', 'route', 'neighborhood', 'establishment', 'transit_station', 'bus_station']).present?
|
525
|
+
return_hash[:keep_original_string] = true
|
526
|
+
return_hash[:coordinates] = google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s
|
527
|
+
elsif (result['types'] & ['country']).present?
|
528
|
+
return_hash[:country_part] = result['long_name']
|
529
|
+
elsif (result['types'] & ['administrative_area_level_1']).present?
|
530
|
+
return_hash[:state_part] = result['long_name'].to_ascii
|
531
|
+
elsif (result['types'] & ['locality']).present?
|
532
|
+
return_hash[:city_part] = result['long_name']
|
533
|
+
elsif (result['types'] & ['sublocality', 'political']).present?
|
534
|
+
return_hash[:neighborhood_part] = result['long_name']
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
return_hash[:keep_original_string] ||= google_api_result.first.data['partial_match'] unless google_api_result.first.data['partial_match'].blank?
|
539
|
+
end
|
540
|
+
|
541
|
+
|
542
|
+
return return_hash
|
543
|
+
end
|
544
|
+
|
545
|
+
def self.parse_geographic_term(term)
|
546
|
+
geo_term = nil
|
547
|
+
|
548
|
+
#Weird incorrect dash seperator
|
549
|
+
term = term.gsub('–', '--')
|
550
|
+
|
551
|
+
#Likely too long to be an address... some fields have junk with an address string...
|
552
|
+
if term.length > 125
|
553
|
+
return nil
|
554
|
+
end
|
555
|
+
|
556
|
+
#TODO: Use Countries gem of https://github.com/hexorx/countries
|
557
|
+
#test = Country.new('US')
|
558
|
+
#test.states
|
559
|
+
|
560
|
+
#Parsing a subject geographic term.
|
561
|
+
if term.include?('--')
|
562
|
+
term.split('--').each_with_index do |split_term, index|
|
563
|
+
if split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
|
564
|
+
geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
|
565
|
+
elsif split_term.include?('Mass') || split_term.include?(' MA')
|
566
|
+
geo_term = split_term
|
567
|
+
end
|
568
|
+
end
|
569
|
+
#Other than a '--' field
|
570
|
+
#Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
|
571
|
+
elsif term.include?(' - ')
|
572
|
+
term.split(' - ').each do |split_term|
|
573
|
+
if split_term.include?('Mass') || split_term.include?(' MA') || split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
|
574
|
+
geo_term = split_term
|
575
|
+
end
|
576
|
+
|
577
|
+
end
|
578
|
+
else
|
579
|
+
if term.include?('Mass') || term.include?(' MA') || term.include?('Massachusetts') || term.include?('New Jersey') || term.include?('Wisconsin') || term.include?('New Hampshire') || term.include?('New York') || term.include?('Maine')
|
580
|
+
geo_term = term
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
#if geo_term.blank?
|
585
|
+
#return nil
|
586
|
+
#end
|
587
|
+
|
588
|
+
return geo_term
|
589
|
+
end
|
590
|
+
|
591
|
+
def self.standardize_geographic_term(geo_term)
|
592
|
+
#Remove common junk terms
|
593
|
+
geo_term = geo_term.gsub('Cranberries', '').gsub('History', '').gsub('Maps', '').gsub('State Police', '').gsub('Pictorial works.', '').gsub(/[nN]ation/, '').gsub('Asia', '').gsub('(Republic)', '').strip
|
594
|
+
|
595
|
+
#Strip any leading periods or commas from junk terms
|
596
|
+
geo_term = geo_term.gsub(/^[\.,]+/, '').strip
|
597
|
+
|
598
|
+
#Replace any semicolons with commas... possible strip them?
|
599
|
+
geo_term = geo_term.gsub(';', ',')
|
600
|
+
|
601
|
+
#Note: the following returns junk from Bing as if these are in WI, California, Etc:
|
602
|
+
#East Monponsett Lake (Halifax, Mass.)
|
603
|
+
#Silver Lake (Halifax, Mass.)
|
604
|
+
#Scarier note: Washington Park (Reading, Mass.) will always return Boston, MA in google
|
605
|
+
if geo_term.match(/[\(\)]+/)
|
606
|
+
#Attempt to fix address if something like (word)
|
607
|
+
if geo_term.match(/ \(+.*\)+/)
|
608
|
+
#Make this replacement better?
|
609
|
+
geo_term = geo_term.gsub(' (', ', ').gsub(')', '')
|
610
|
+
#Else skip this as data returned likely will be unreliable for now... FIXME when use case occurs.
|
611
|
+
else
|
612
|
+
return nil
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
return geo_term
|
617
|
+
end
|
618
|
+
|
619
|
+
|
620
|
+
|
621
|
+
def self.tgn_id_from_term(term,parse_term=false)
|
622
|
+
return_hash = {}
|
623
|
+
max_retry = 3
|
624
|
+
sleep_time = 60 # In seconds
|
625
|
+
retry_count = 0
|
626
|
+
|
627
|
+
#If not a good address source, parsing is done here...
|
628
|
+
term = parse_geographic_term(term) unless !parse_term
|
629
|
+
|
630
|
+
term = standardize_geographic_term(term) unless term.blank?
|
631
|
+
|
632
|
+
if term.blank?
|
633
|
+
return return_hash
|
634
|
+
end
|
635
|
+
|
636
|
+
return_hash = parse_mapquest_api(term)
|
637
|
+
|
638
|
+
if return_hash.blank?
|
639
|
+
return_hash = parse_bing_api(term)
|
640
|
+
end
|
641
|
+
|
642
|
+
if return_hash.blank?
|
643
|
+
return_hash = parse_google_api(term)
|
644
|
+
end
|
645
|
+
|
646
|
+
if return_hash.blank?
|
647
|
+
return nil
|
648
|
+
end
|
649
|
+
|
650
|
+
state_part = return_hash[:state_part]
|
651
|
+
|
652
|
+
|
653
|
+
country_code = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_id] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
|
654
|
+
country_code ||= ''
|
655
|
+
|
656
|
+
|
657
|
+
country_part = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_country_name] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
|
658
|
+
country_part ||= return_hash[:country_part]
|
659
|
+
country_part ||= ''
|
660
|
+
|
661
|
+
city_part = return_hash[:city_part]
|
662
|
+
|
663
|
+
#Keep original string if three parts at least or if there is a number in the term.
|
664
|
+
if term.split(',').length >= 3 || term.match(/\d/).present?
|
665
|
+
return_hash[:keep_original_string] = true
|
666
|
+
end
|
667
|
+
|
668
|
+
top_match_term = ''
|
669
|
+
match_term = nil
|
670
|
+
|
671
|
+
if city_part.blank? && state_part.blank?
|
672
|
+
# Limit to nations
|
673
|
+
place_type = 81010
|
674
|
+
top_match_term = ''
|
675
|
+
match_term = country_part.to_ascii.downcase || term.to_ascii.downcase
|
676
|
+
elsif state_part.present? && city_part.blank? && country_code == 7012149
|
677
|
+
#Limit to states
|
678
|
+
place_type = 81175
|
679
|
+
top_match_term = country_part.to_ascii.downcase
|
680
|
+
match_term = state_part.to_ascii.downcase
|
681
|
+
elsif state_part.present? && city_part.blank?
|
682
|
+
#Limit to regions
|
683
|
+
place_type = 81165
|
684
|
+
top_match_term = country_part.to_ascii.downcase
|
685
|
+
match_term = state_part.to_ascii.downcase
|
686
|
+
elsif state_part.present? && city_part.present?
|
687
|
+
#Limited to only inhabited places at the moment...
|
688
|
+
place_type = 83002
|
689
|
+
top_match_term = state_part.to_ascii.downcase
|
690
|
+
match_term = city_part.to_ascii.downcase
|
691
|
+
else
|
692
|
+
return nil
|
693
|
+
end
|
694
|
+
|
695
|
+
begin
|
696
|
+
if retry_count > 0
|
697
|
+
sleep(sleep_time)
|
698
|
+
end
|
699
|
+
retry_count = retry_count + 1
|
700
|
+
|
701
|
+
tgn_response = Typhoeus::Request.get("http://vocabsservices.getty.edu/TGNService.asmx/TGNGetTermMatch?placetypeid=#{place_type}&nationid=#{country_code}&name=" + CGI.escape(match_term), userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
|
702
|
+
|
703
|
+
|
704
|
+
end until (tgn_response.code != 500 || retry_count == max_retry)
|
705
|
+
|
706
|
+
unless tgn_response.code == 500
|
707
|
+
puts 'match found!'
|
708
|
+
parsed_xml = Nokogiri::Slop(tgn_response.body)
|
709
|
+
|
710
|
+
if parsed_xml.Vocabulary.Count.text == '0'
|
711
|
+
return nil
|
712
|
+
end
|
713
|
+
|
714
|
+
#If only one result, then not array. Otherwise array....
|
715
|
+
if parsed_xml.Vocabulary.Subject.first.blank?
|
716
|
+
subject = parsed_xml.Vocabulary.Subject
|
717
|
+
|
718
|
+
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
719
|
+
|
720
|
+
if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
|
721
|
+
return_hash[:tgn_id] = subject.Subject_ID.text
|
722
|
+
end
|
723
|
+
else
|
724
|
+
parsed_xml.Vocabulary.Subject.each do |subject|
|
725
|
+
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
726
|
+
|
727
|
+
if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
|
728
|
+
return_hash[:tgn_id] = subject.Subject_ID.text
|
729
|
+
end
|
730
|
+
end
|
731
|
+
end
|
732
|
+
|
733
|
+
end
|
734
|
+
|
735
|
+
if tgn_response.code == 500
|
736
|
+
raise 'TGN Server appears to not be responding for Geographic query: ' + term
|
737
|
+
end
|
738
|
+
|
739
|
+
|
740
|
+
return return_hash
|
741
|
+
end
|
742
|
+
|
743
|
+
|
744
|
+
def self.LCSHize(value)
|
745
|
+
|
746
|
+
if value.blank?
|
747
|
+
return ''
|
748
|
+
end
|
749
|
+
|
750
|
+
#Remove stuff that is quoted (quotation for first and last words)..
|
751
|
+
value = value.gsub(/^['"]/, '').gsub(/['"]$/, '').strip
|
752
|
+
|
753
|
+
#Remove ending periods ... except when an initial or etc.
|
754
|
+
if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
|
755
|
+
value = value.slice(0..-2)
|
756
|
+
end
|
757
|
+
|
758
|
+
#Fix when '- -' occurs
|
759
|
+
value = value.gsub(/-\s-/,'--')
|
760
|
+
|
761
|
+
#Fix for "em" dashes - two types?
|
762
|
+
value = value.gsub('—','--')
|
763
|
+
|
764
|
+
#Fix for "em" dashes - two types?
|
765
|
+
value = value.gsub('–','--')
|
766
|
+
|
767
|
+
#Fix for ' - ' combinations
|
768
|
+
value = value.gsub(' - ','--')
|
769
|
+
|
770
|
+
#Remove white space after and before '--'
|
771
|
+
value = value.gsub(/\s+--/,'--')
|
772
|
+
value = value.gsub(/--\s+/,'--')
|
773
|
+
|
774
|
+
#Ensure first work is capitalized
|
775
|
+
value[0] = value.first.capitalize[0]
|
776
|
+
|
777
|
+
#Strip an white space
|
778
|
+
value = Bplmodels::DatastreamInputFuncs.strip_value(value)
|
779
|
+
|
780
|
+
return value
|
781
|
+
end
|
782
|
+
|
783
|
+
|
784
|
+
def self.strip_value(value)
|
785
|
+
if(value.blank?)
|
786
|
+
return nil
|
787
|
+
else
|
788
|
+
if value.class == Float || value.class == Fixnum
|
789
|
+
value = value.to_i.to_s
|
790
|
+
end
|
791
|
+
|
792
|
+
# Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
|
793
|
+
return utf8Encode(value)
|
794
|
+
end
|
795
|
+
end
|
796
|
+
|
797
|
+
def self.utf8Encode(value)
|
798
|
+
return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
|
799
|
+
end
|
800
|
+
|
801
|
+
def self.split_with_nils(value)
|
802
|
+
if(value == nil)
|
803
|
+
return ""
|
804
|
+
else
|
805
|
+
split_value = value.split("||")
|
806
|
+
0.upto split_value.length-1 do |pos|
|
807
|
+
split_value[pos] = strip_value(split_value[pos])
|
808
|
+
end
|
809
|
+
|
810
|
+
return split_value
|
811
|
+
end
|
812
|
+
end
|
813
|
+
|
814
|
+
|
815
|
+
#Problems: A . Some Name and A & R
|
816
|
+
def self.getProperTitle(title)
|
817
|
+
nonSort = nil
|
818
|
+
title = title
|
819
|
+
|
820
|
+
if title[0..1].downcase == "a " && (title[0..2].downcase != "a ." && title[0..2].downcase != "a &")
|
821
|
+
nonSort = title[0..1]
|
822
|
+
title = title[2..title.length]
|
823
|
+
elsif title[0..3].downcase == "the "
|
824
|
+
nonSort = title[0..3]
|
825
|
+
title = title[4..title.length]
|
826
|
+
elsif title[0..2].downcase == "an "
|
827
|
+
nonSort = title[0..2]
|
828
|
+
title = title[3..title.length]
|
829
|
+
#elsif title[0..6].downcase == "in the "
|
830
|
+
#return [title[0..5], title[7..title.length]]
|
831
|
+
end
|
832
|
+
|
833
|
+
return [nonSort, title]
|
834
|
+
end
|
835
|
+
|
836
|
+
def self.parse_language(language_value)
|
837
|
+
return_hash = {}
|
838
|
+
authority_check = Qa::Authorities::Loc.new
|
839
|
+
authority_result = authority_check.search(URI.escape(language_value), 'iso639-2')
|
840
|
+
|
841
|
+
if authority_result.present?
|
842
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == language_value.downcase || hash['id'].split('/').last.downcase == language_value.downcase }
|
843
|
+
if authority_result.present?
|
844
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
845
|
+
return_hash[:label] = authority_result.first["label"]
|
846
|
+
end
|
847
|
+
end
|
848
|
+
|
849
|
+
return return_hash
|
850
|
+
end
|
851
|
+
|
852
|
+
def self.parse_role(role_value)
|
853
|
+
return_hash = {}
|
854
|
+
authority_check = Qa::Authorities::Loc.new
|
855
|
+
authority_result = authority_check.search(URI.escape(role_value), 'relators')
|
856
|
+
if authority_result.present?
|
857
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
|
858
|
+
if authority_result.present?
|
859
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
860
|
+
return_hash[:label] = authority_result.first["label"]
|
861
|
+
end
|
862
|
+
end
|
863
|
+
|
864
|
+
return return_hash
|
865
|
+
end
|
866
|
+
|
867
|
+
def self.parse_name_roles(name)
|
868
|
+
return_hash = {}
|
869
|
+
|
870
|
+
#Make sure we have at least three distinct parts of 2-letter+ words. Avoid something like: Steven C. Painter or Painter, Steven C.
|
871
|
+
#Possible Issue: Full name of Steven Carlos Painter ?
|
872
|
+
potential_role_check = name.match(/[\(\"\',]*\w\w+[\),\"\']* [\w\.,\d\-\"]*[\w\d][\w\d][\w\.,\d\-\"]* [\(\"\',]*\w\w+[\),\"\']*$/) || name.split(/[ ]+/).length >= 4
|
873
|
+
|
874
|
+
if potential_role_check.present?
|
875
|
+
authority_check = Qa::Authorities::Loc.new
|
876
|
+
|
877
|
+
#Check the last value of the name string...
|
878
|
+
role_value = name.match(/(?<=[\(\"\', ])\w+(?=[\),\"\']*$)/).to_s
|
879
|
+
authority_result = authority_check.search(URI.escape(role_value), 'relators')
|
880
|
+
if authority_result.present?
|
881
|
+
|
882
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
|
883
|
+
if authority_result.present?
|
884
|
+
#Remove the word and any other characters around it. $ means the end of the line.
|
885
|
+
#
|
886
|
+
return_hash[:name] = name.sub(/[\(\"\', ]*\w+[\),\"\']*$/, '').gsub(/^[ ]*:/, '').strip
|
887
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
888
|
+
return_hash[:label] = authority_result.first["label"]
|
889
|
+
end
|
890
|
+
end
|
891
|
+
|
892
|
+
#Check the last value of the name string...
|
893
|
+
role_value = name.match(/\w+(?=[\),\"\']*)/).to_s
|
894
|
+
authority_result = authority_check.search(URI.escape(role_value), 'relators')
|
895
|
+
if authority_result.present? && return_hash.blank?
|
896
|
+
|
897
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
|
898
|
+
if authority_result.present?
|
899
|
+
#Remove the word and any other characters around it. $ means the end of the line.
|
900
|
+
return_hash[:name] = name.sub(/[\(\"\', ]*\w+[ \),\"\']*/, '').gsub(/^[ ]*:/, '').strip
|
901
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
902
|
+
return_hash[:label] = authority_result.first["label"]
|
903
|
+
end
|
904
|
+
end
|
905
|
+
end
|
906
|
+
|
907
|
+
return return_hash
|
908
|
+
end
|
909
|
+
|
910
|
+
def self.is_numeric? (string)
|
911
|
+
true if Float(string) rescue false
|
912
|
+
end
|
913
|
+
|
914
|
+
# returns a well-formatted placename for display on a map
|
915
|
+
# hiergeo_hash = hash of <mods:hierarchicahlGeographic> elements
|
916
|
+
def self.render_display_placename(hiergeo_hash)
|
917
|
+
placename = []
|
918
|
+
case hiergeo_hash[:country]
|
919
|
+
when 'United States','Canada'
|
920
|
+
if hiergeo_hash[:state] || hiergeo_hash[:province]
|
921
|
+
placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence
|
922
|
+
if placename[0].nil? && hiergeo_hash[:county]
|
923
|
+
placename[0] = hiergeo_hash[:county] + ' (county)'
|
924
|
+
end
|
925
|
+
if placename[0]
|
926
|
+
placename[1] = Constants::STATE_ABBR.key(hiergeo_hash[:state]) || hiergeo_hash[:province].presence
|
927
|
+
else
|
928
|
+
placename[1] = hiergeo_hash[:state].presence || hiergeo_hash[:province].presence
|
929
|
+
end
|
930
|
+
else
|
931
|
+
placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence || hiergeo_hash[:country].presence
|
932
|
+
end
|
933
|
+
else
|
934
|
+
placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:state].presence || hiergeo_hash[:province].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence
|
935
|
+
if placename[0].nil? && hiergeo_hash[:county]
|
936
|
+
placename[0] = hiergeo_hash[:county] + ' (county)'
|
937
|
+
end
|
938
|
+
placename[1] = hiergeo_hash[:country]
|
939
|
+
end
|
940
|
+
|
941
|
+
if !placename.blank?
|
942
|
+
placename.join(', ').gsub(/(\A,\s)|(,\s\z)/,'')
|
943
|
+
else
|
944
|
+
nil
|
945
|
+
end
|
946
|
+
end
|
947
|
+
|
948
|
+
end
|
949
|
+
end
|