bplmodels 0.0.91
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +0 -0
- data/Rakefile +40 -0
- data/app/assets/javascripts/bplmodels/application.js +15 -0
- data/app/assets/stylesheets/bplmodels/application.css +13 -0
- data/app/controllers/bplmodels/application_controller.rb +4 -0
- data/app/helpers/bplmodels/application_helper.rb +4 -0
- data/app/models/bplmodels/audio_file.rb +14 -0
- data/app/models/bplmodels/book.rb +35 -0
- data/app/models/bplmodels/card.rb +35 -0
- data/app/models/bplmodels/characterization.rb +92 -0
- data/app/models/bplmodels/collection.rb +118 -0
- data/app/models/bplmodels/complex_object_base.rb +24 -0
- data/app/models/bplmodels/correspondence.rb +35 -0
- data/app/models/bplmodels/document.rb +35 -0
- data/app/models/bplmodels/document_file.rb +8 -0
- data/app/models/bplmodels/ephemera.rb +35 -0
- data/app/models/bplmodels/file.rb +151 -0
- data/app/models/bplmodels/file_content_datastream.rb +10 -0
- data/app/models/bplmodels/fits_datastream.rb +190 -0
- data/app/models/bplmodels/image.rb +14 -0
- data/app/models/bplmodels/image_file.rb +18 -0
- data/app/models/bplmodels/institution.rb +159 -0
- data/app/models/bplmodels/manuscript.rb +34 -0
- data/app/models/bplmodels/map.rb +34 -0
- data/app/models/bplmodels/mods_desc_metadata.rb +1826 -0
- data/app/models/bplmodels/musical_notation.rb +34 -0
- data/app/models/bplmodels/newspaper.rb +15 -0
- data/app/models/bplmodels/nom_terminology.rb +1242 -0
- data/app/models/bplmodels/non_photographic_print.rb +34 -0
- data/app/models/bplmodels/oai_collection.rb +19 -0
- data/app/models/bplmodels/oai_metadata.rb +75 -0
- data/app/models/bplmodels/oai_object.rb +45 -0
- data/app/models/bplmodels/object.rb +36 -0
- data/app/models/bplmodels/object_base.rb +1241 -0
- data/app/models/bplmodels/objects/collection.rb~ +28 -0
- data/app/models/bplmodels/objects/image.rb~ +59 -0
- data/app/models/bplmodels/objects/postcard.rb~ +56 -0
- data/app/models/bplmodels/organizational_set.rb +25 -0
- data/app/models/bplmodels/periodical.rb +37 -0
- data/app/models/bplmodels/photographic_print.rb +34 -0
- data/app/models/bplmodels/relation_base.rb +99 -0
- data/app/models/bplmodels/scrapbook.rb +35 -0
- data/app/models/bplmodels/simple_object_base.rb +27 -0
- data/app/models/bplmodels/sound_recording.rb +15 -0
- data/app/models/bplmodels/system_collection.rb +8 -0
- data/app/models/bplmodels/uploads_set.rb +3 -0
- data/app/models/bplmodels/workflow_metadata.rb +99 -0
- data/app/views/layouts/bplmodels/application.html.erb +14 -0
- data/config/application.rb +6 -0
- data/config/predicate_mappings.yml +61 -0
- data/config/routes.rb +2 -0
- data/lib/bplmodels.rb +21 -0
- data/lib/bplmodels/constants.rb +119 -0
- data/lib/bplmodels/datastream_input_funcs.rb +949 -0
- data/lib/bplmodels/engine.rb +5 -0
- data/lib/bplmodels/engine.rb~ +5 -0
- data/lib/bplmodels/finder.rb +192 -0
- data/lib/bplmodels/object_funcs.rb +10 -0
- data/lib/bplmodels/version.rb +3 -0
- data/lib/tasks/bplmodels_tasks.rake +4 -0
- data/test/bplmodels_test.rb +7 -0
- data/test/dummy/README.rdoc +261 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/assets/javascripts/application.js +15 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +59 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +37 -0
- data/test/dummy/config/environments/production.rb +67 -0
- data/test/dummy/config/environments/test.rb +37 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +15 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +4 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +25 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/integration/navigation_test.rb +10 -0
- data/test/test_helper.rb +15 -0
- metadata +234 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
module Bplmodels
|
2
|
+
class WorkflowMetadata < ActiveFedora::OmDatastream
|
3
|
+
include OM::XML::Document
|
4
|
+
|
5
|
+
WORKFLOW_NS = 'http://www.bpl.org/repository/xml/ns/workflow'
|
6
|
+
WORKFLOW_SCHEMA = 'http://www.bpl.org/repository/xml/xsd/workflow.xsd'
|
7
|
+
WORKFLOW_PARAMS = {
|
8
|
+
"version" => "0.0.1",
|
9
|
+
"xmlns:xlink" => "http://www.w3.org/1999/xlink",
|
10
|
+
"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
|
11
|
+
"xmlns" => WORKFLOW_NS,
|
12
|
+
"xsi:schemaLocation" => "#{WORKFLOW_NS} #{WORKFLOW_SCHEMA}",
|
13
|
+
}
|
14
|
+
|
15
|
+
set_terminology do |t|
|
16
|
+
t.root :path => 'workflowMetadata', :xmlns => WORKFLOW_NS
|
17
|
+
|
18
|
+
t.item_status(:path=>"itemStatus") {
|
19
|
+
t.state(:path=>"state")
|
20
|
+
t.state_comment(:path=>"stateComment")
|
21
|
+
t.processing(:path=>"processing")
|
22
|
+
t.processing_comment(:path=>"processingComment")
|
23
|
+
}
|
24
|
+
|
25
|
+
t.item_source(:path=>"itemSource") {
|
26
|
+
t.ingest_origin(:path=>"ingestOrigin")
|
27
|
+
t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
|
28
|
+
t.ingest_filename(:path=>"ingestFilename") #Only recently added
|
29
|
+
}
|
30
|
+
|
31
|
+
t.item_ark_info(:path=>"arkInformation") {
|
32
|
+
t.ark_id(:path=>"arkID")
|
33
|
+
t.ark_type(:path=>"arkType")
|
34
|
+
t.ark_parent_pid(:path=>"arkParentPID")
|
35
|
+
}
|
36
|
+
|
37
|
+
t.source(:path=>"source") {
|
38
|
+
t.ingest_origin(:path=>"ingestOrigin")
|
39
|
+
t.ingest_filepath(:path=>"ingestFilepath") #Only supported later for file objects.
|
40
|
+
t.ingest_filename(:path=>"ingestFilename") #Only recently added
|
41
|
+
t.ingest_datastream(:path=>"ingestDatastream")
|
42
|
+
}
|
43
|
+
|
44
|
+
t.item_designations(:path=>'itemDesignations') {
|
45
|
+
t.flagged_for_content(:path=>"flaggedForContent")
|
46
|
+
}
|
47
|
+
|
48
|
+
t.marked_for_deletion(:path=>'markedForDelation') {
|
49
|
+
t.reason(:path=>'reason')
|
50
|
+
}
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.xml_template
|
55
|
+
Nokogiri::XML::Builder.new do |xml|
|
56
|
+
xml.workflowMetadata(WORKFLOW_PARAMS) {
|
57
|
+
|
58
|
+
}
|
59
|
+
end.doc
|
60
|
+
end
|
61
|
+
|
62
|
+
#Required for Active Fedora 9
|
63
|
+
def prefix(path=nil)
|
64
|
+
return ''
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def insert_file_path(value=nil)
|
69
|
+
ingest_filepath_index = self.item_source.ingest_filepath.count
|
70
|
+
|
71
|
+
self.item_source.ingest_filepath(ingest_filepath_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
|
72
|
+
end
|
73
|
+
|
74
|
+
def insert_file_name(value=nil)
|
75
|
+
ingest_filename_index = self.item_source.ingest_filepath.count
|
76
|
+
|
77
|
+
self.item_source.ingest_filename(ingest_filename_index, value) unless value.blank? || self.item_source.ingest_filepath.include?(value)
|
78
|
+
end
|
79
|
+
|
80
|
+
def insert_file_source(filepath, filename, datastream)
|
81
|
+
source_count = self.source.count
|
82
|
+
|
83
|
+
self.source(source_count).ingest_filepath(0, filepath) unless filepath.blank?
|
84
|
+
self.source(source_count).ingest_filename(0, filename) unless filename.blank?
|
85
|
+
self.source(source_count).ingest_datastream(0, datastream) unless datastream.blank?
|
86
|
+
end
|
87
|
+
|
88
|
+
def insert_flagged(value=nil)
|
89
|
+
self.item_designations(0).flagged_for_content(0, value) unless value.blank?
|
90
|
+
end
|
91
|
+
|
92
|
+
def insert_oai_defaults
|
93
|
+
self.item_status(0).state = "published"
|
94
|
+
self.item_status(0).state_comment = "OAI Harvested Record"
|
95
|
+
self.item_status(0).processing = "complete"
|
96
|
+
self.item_status(0).processing_comment = "Object Processing Complete"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>Bplmodels</title>
|
5
|
+
<%= stylesheet_link_tag "bplmodels/application", :media => "all" %>
|
6
|
+
<%= javascript_include_tag "bplmodels/application" %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<%= yield %>
|
12
|
+
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# The default namespace maps to the default namespace for generating rels_ext from solr
|
2
|
+
:default_namespace: info:fedora/fedora-system:def/relations-external#
|
3
|
+
|
4
|
+
:predicate_namespaces:
|
5
|
+
fedora-model: info:fedora/fedora-system:def/model#
|
6
|
+
fedora-relations-model: info:fedora/fedora-system:def/relations-external#
|
7
|
+
bpllib-rel: http://projecthydra.org/ns/relations#
|
8
|
+
|
9
|
+
# namespace mappings---
|
10
|
+
# you can add specific mappings for your institution by providing the following:
|
11
|
+
# namespace_uri:
|
12
|
+
# :relationship_symbol: relationship_identifier
|
13
|
+
#
|
14
|
+
# For example, if you have the following element in your rels_ext:
|
15
|
+
#
|
16
|
+
# <oai:itemID>oai:example.edu:changeme:500</oai:itemID>
|
17
|
+
#
|
18
|
+
# With the last two lines of this file uncommented, the relationships hash of your object will include:
|
19
|
+
# :oai_item_id => ["info:fedora/oai:example.edu:changeme:500"]
|
20
|
+
#
|
21
|
+
:predicate_mapping:
|
22
|
+
info:fedora/fedora-system:def/relations-external#:
|
23
|
+
:conforms_to: conformsTo
|
24
|
+
:has_annotation: hasAnnotation
|
25
|
+
:has_collection_member: hasCollectionMember
|
26
|
+
:has_constituent: hasConstituent
|
27
|
+
:has_dependent: hasDependent
|
28
|
+
:has_derivation: hasDerivation
|
29
|
+
:has_description: hasDescription
|
30
|
+
:has_equivalent: hasEquivalent
|
31
|
+
:has_metadata: hasMetadata
|
32
|
+
:has_member: hasMember
|
33
|
+
:has_model: hasModel
|
34
|
+
:has_part: hasPart
|
35
|
+
:has_subset: hasSubset
|
36
|
+
:is_annotation_of: isAnnotationOf
|
37
|
+
:is_constituent_of: isConstituentOf
|
38
|
+
:is_dependent_of: isDependentOf
|
39
|
+
:is_derivation_of: isDerivationOf
|
40
|
+
:is_description_of: isDescriptionOf
|
41
|
+
:is_member_of: isMemberOf
|
42
|
+
:is_member_of_collection: isMemberOfCollection
|
43
|
+
:is_metadata_for: isMetadataFor
|
44
|
+
:is_part_of: isPartOf
|
45
|
+
:is_subset_of: isSubsetOf
|
46
|
+
:is_topic_of: isTopicOf
|
47
|
+
info:fedora/fedora-system:def/model#:
|
48
|
+
:is_contractor_of: isContractorOf
|
49
|
+
:is_deployment_of: isDeploymentOf
|
50
|
+
:has_service: hasService
|
51
|
+
:has_model: hasModel
|
52
|
+
http://www.openarchives.org/OAI/2.0/:
|
53
|
+
:oai_item_id: itemID
|
54
|
+
http://projecthydra.org/ns/relations#:
|
55
|
+
:is_governed_by: isGovernedBy
|
56
|
+
:is_image_of: isImageOf
|
57
|
+
:has_image: hasImage
|
58
|
+
:has_subcollection: hasSubcollection
|
59
|
+
:has_crop: hasCrop
|
60
|
+
:is_crop_of: isCropOf
|
61
|
+
:is_exemplary_image_of: isExemplaryImageOf
|
data/config/routes.rb
ADDED
data/lib/bplmodels.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "bplmodels/engine"
|
2
|
+
require "bplmodels/datastream_input_funcs"
|
3
|
+
require "bplmodels/finder"
|
4
|
+
require "bplmodels/constants"
|
5
|
+
require "timeliness"
|
6
|
+
|
7
|
+
module Bplmodels
|
8
|
+
def self.environment
|
9
|
+
if defined?(DERIVATIVE_CONFIG_GLOBAL) && DERIVATIVE_CONFIG_GLOBAL.present? && DERIVATIVE_CONFIG_GLOBAL['environment'].present?
|
10
|
+
return DERIVATIVE_CONFIG_GLOBAL['environment']
|
11
|
+
elsif defined?(Rails.env) and !Rails.env.nil?
|
12
|
+
return Rails.env.to_s
|
13
|
+
elsif defined?(ENV['environment']) and !(ENV['environment'].nil?)
|
14
|
+
return ENV['environment']
|
15
|
+
elsif defined?(ENV['RAILS_ENV']) and !(ENV['RAILS_ENV'].nil?)
|
16
|
+
raise RuntimeError, "You're depending on RAILS_ENV for setting your environment. Please use ENV['environment'] for non-rails environment setting: 'rake foo:bar environment=test'"
|
17
|
+
else
|
18
|
+
ENV['environment'] = 'development'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Bplmodels
|
2
|
+
class Constants
|
3
|
+
GENRE_LOOKUP = {}
|
4
|
+
GENRE_LOOKUP['Cards'] = {:id=>'tgm001686', :authority=>'gmgpc'}
|
5
|
+
GENRE_LOOKUP['Correspondence'] = {:id=>'tgm002590', :authority=>'lctgm'}
|
6
|
+
GENRE_LOOKUP['Documents'] = {:id=>'tgm003185', :authority=>'gmgpc'}
|
7
|
+
GENRE_LOOKUP['Drawings'] = {:id=>'tgm003279', :authority=>'gmgpc'}
|
8
|
+
GENRE_LOOKUP['Ephemera'] = {:id=>'tgm003634', :authority=>'gmgpc'}
|
9
|
+
GENRE_LOOKUP['Manuscripts'] = {:id=>'tgm012286', :authority=>'gmgpc'}
|
10
|
+
GENRE_LOOKUP['Maps'] = {:id=>'tgm006261', :authority=>'gmgpc'}
|
11
|
+
GENRE_LOOKUP['Objects'] = {:id=>'tgm007159', :authority=>'lctgm'}
|
12
|
+
GENRE_LOOKUP['Paintings'] = {:id=>'tgm007393', :authority=>'gmgpc'}
|
13
|
+
GENRE_LOOKUP['Photographs'] = {:id=>'tgm007721', :authority=>'gmgpc'}
|
14
|
+
GENRE_LOOKUP['Posters'] = {:id=>'tgm008104', :authority=>'gmgpc'}
|
15
|
+
GENRE_LOOKUP['Prints'] = {:id=>'tgm008237', :authority=>'gmgpc'}
|
16
|
+
GENRE_LOOKUP['Newspapers'] = {:id=>'tgm007068', :authority=>'lctgm'}
|
17
|
+
GENRE_LOOKUP['Sound recordings'] = {:id=>'tgm009874', :authority=>'lctgm'}
|
18
|
+
GENRE_LOOKUP['Motion pictures'] = {:id=>'tgm006804', :authority=>'lctgm'}
|
19
|
+
GENRE_LOOKUP['Periodicals'] = {:id=>'tgm007641', :authority=>'gmgpc'}
|
20
|
+
GENRE_LOOKUP['Books'] = {:id=>'tgm001221', :authority=>'gmgpc'}
|
21
|
+
GENRE_LOOKUP['Albums'] = {:id=>'tgm000229', :authority=>'gmgpc'}
|
22
|
+
GENRE_LOOKUP['Musical notation'] = {:id=>'tgm006926', :authority=>'lctgm'}
|
23
|
+
|
24
|
+
COUNTRY_TGN_LOOKUP = {}
|
25
|
+
COUNTRY_TGN_LOOKUP['United States'] = {:tgn_id=>7012149, :tgn_country_name=>'United States'}
|
26
|
+
COUNTRY_TGN_LOOKUP['Canada'] = {:tgn_id=>7005685, :tgn_country_name=>'Canada'}
|
27
|
+
COUNTRY_TGN_LOOKUP['France'] = {:tgn_id=>1000070, :tgn_country_name=>'France'}
|
28
|
+
COUNTRY_TGN_LOOKUP['Vietnam'] = {:tgn_id=>1000145, :tgn_country_name=>'Viet Nam'}
|
29
|
+
COUNTRY_TGN_LOOKUP['South Africa'] = {:tgn_id=>1000193, :tgn_country_name=>'South Africa'}
|
30
|
+
COUNTRY_TGN_LOOKUP['Philippines'] = {:tgn_id=>1000135, :tgn_country_name=>'Pilipinas'}
|
31
|
+
COUNTRY_TGN_LOOKUP['China'] = {:tgn_id=>1000111, :tgn_country_name=>'Zhongguo'}
|
32
|
+
COUNTRY_TGN_LOOKUP['Japan'] = {:tgn_id=>1000120, :tgn_country_name=>'Nihon'}
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
=begin
|
39
|
+
COUNTRY_TGN_LOOKUP = {
|
40
|
+
'US' => 7012149,
|
41
|
+
'CA' => 7005685,
|
42
|
+
'FR' => 1000070,
|
43
|
+
'VN' => 1000145,
|
44
|
+
'ZA' => 1000193,
|
45
|
+
'PH' => 1000135,
|
46
|
+
'United States' => 7012149,
|
47
|
+
'Canada' => 7005685,
|
48
|
+
'France' => 1000070,
|
49
|
+
'Vietnam' => 1000145,
|
50
|
+
'Viet Nam' => 1000145,
|
51
|
+
'South Africa' => 1000193,
|
52
|
+
'Philippines' => 1000135
|
53
|
+
}
|
54
|
+
=end
|
55
|
+
|
56
|
+
STATE_ABBR = {
|
57
|
+
'AL' => 'Alabama',
|
58
|
+
'AK' => 'Alaska',
|
59
|
+
'AS' => 'America Samoa',
|
60
|
+
'AZ' => 'Arizona',
|
61
|
+
'AR' => 'Arkansas',
|
62
|
+
'CA' => 'California',
|
63
|
+
'CO' => 'Colorado',
|
64
|
+
'CT' => 'Connecticut',
|
65
|
+
'DE' => 'Delaware',
|
66
|
+
'DC' => 'District of Columbia',
|
67
|
+
'FM' => 'Micronesia1',
|
68
|
+
'FL' => 'Florida',
|
69
|
+
'GA' => 'Georgia',
|
70
|
+
'GU' => 'Guam',
|
71
|
+
'HI' => 'Hawaii',
|
72
|
+
'ID' => 'Idaho',
|
73
|
+
'IL' => 'Illinois',
|
74
|
+
'IN' => 'Indiana',
|
75
|
+
'IA' => 'Iowa',
|
76
|
+
'KS' => 'Kansas',
|
77
|
+
'KY' => 'Kentucky',
|
78
|
+
'LA' => 'Louisiana',
|
79
|
+
'ME' => 'Maine',
|
80
|
+
'MH' => 'Islands1',
|
81
|
+
'MD' => 'Maryland',
|
82
|
+
'MA' => 'Massachusetts',
|
83
|
+
'MI' => 'Michigan',
|
84
|
+
'MN' => 'Minnesota',
|
85
|
+
'MS' => 'Mississippi',
|
86
|
+
'MO' => 'Missouri',
|
87
|
+
'MT' => 'Montana',
|
88
|
+
'NE' => 'Nebraska',
|
89
|
+
'NV' => 'Nevada',
|
90
|
+
'NH' => 'New Hampshire',
|
91
|
+
'NJ' => 'New Jersey',
|
92
|
+
'NM' => 'New Mexico',
|
93
|
+
'NY' => 'New York',
|
94
|
+
'NC' => 'North Carolina',
|
95
|
+
'ND' => 'North Dakota',
|
96
|
+
'OH' => 'Ohio',
|
97
|
+
'OK' => 'Oklahoma',
|
98
|
+
'OR' => 'Oregon',
|
99
|
+
'PW' => 'Palau',
|
100
|
+
'PA' => 'Pennsylvania',
|
101
|
+
'PR' => 'Puerto Rico',
|
102
|
+
'RI' => 'Rhode Island',
|
103
|
+
'SC' => 'South Carolina',
|
104
|
+
'SD' => 'South Dakota',
|
105
|
+
'TN' => 'Tennessee',
|
106
|
+
'TX' => 'Texas',
|
107
|
+
'UT' => 'Utah',
|
108
|
+
'VT' => 'Vermont',
|
109
|
+
'VI' => 'Virgin Island',
|
110
|
+
'VA' => 'Virginia',
|
111
|
+
'WA' => 'Washington',
|
112
|
+
'WV' => 'West Virginia',
|
113
|
+
'WI' => 'Wisconsin',
|
114
|
+
'WY' => 'Wyoming'
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,949 @@
|
|
1
|
+
#!/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
module Bplmodels
|
5
|
+
class DatastreamInputFuncs
|
6
|
+
|
7
|
+
# these functions can be used to split names into subparts for <mods:name> and <mods:subject><mods:name>
|
8
|
+
|
9
|
+
# use for personal name headings e.g., <mods:name type="personal">
|
10
|
+
# returns personal name data as a hash which can be used to populate <mods:namePart> and <mads:namePart type="date">
|
11
|
+
|
12
|
+
def self.persNamePartSplitter(inputstring)
|
13
|
+
splitNamePartsHash = Hash.new
|
14
|
+
unless inputstring =~ /\d{4}/
|
15
|
+
splitNamePartsHash[:namePart] = inputstring
|
16
|
+
else
|
17
|
+
if inputstring =~ /\(.*\d{4}.*\)/
|
18
|
+
splitNamePartsHash[:namePart] = inputstring
|
19
|
+
else
|
20
|
+
splitNamePartsHash[:namePart] = inputstring.gsub(/,[\d\- \.\w?]*$/,"")
|
21
|
+
splitArray = inputstring.split(/.*,/)
|
22
|
+
splitNamePartsHash[:datePart] = splitArray[1].strip
|
23
|
+
end
|
24
|
+
end
|
25
|
+
return splitNamePartsHash
|
26
|
+
end
|
27
|
+
|
28
|
+
# use for corporate name headings e.g., <mods:name type="corporate">
|
29
|
+
# returns corporate name data as an array which can be used to populate <mods:namePart> subparts
|
30
|
+
# (corporate name subparts are not differentiated by any attributes in the xml)
|
31
|
+
# (see http://id.loc.gov/authorities/names/n82139319.madsxml.xml for example)
|
32
|
+
# Note: (?!\)) part is to check for examples like: 'Boston (Mass.) Police Dept.'
|
33
|
+
|
34
|
+
def self.corpNamePartSplitter(inputstring)
|
35
|
+
splitNamePartsArray = Array.new
|
36
|
+
unless inputstring =~ /[\S]{5}\.(?!\))/
|
37
|
+
splitNamePartsArray << inputstring
|
38
|
+
else
|
39
|
+
while inputstring =~ /[\S]{5}\.(?!\))/
|
40
|
+
snip = /[\S]{5}\.(?!\))/.match(inputstring).post_match
|
41
|
+
subpart = inputstring.gsub(snip,"")
|
42
|
+
splitNamePartsArray << subpart.gsub(/\.\z/,"").strip
|
43
|
+
inputstring = snip
|
44
|
+
end
|
45
|
+
splitNamePartsArray << inputstring.gsub(/\.\z/,"").strip
|
46
|
+
end
|
47
|
+
return splitNamePartsArray
|
48
|
+
end
|
49
|
+
|
50
|
+
# a function to convert date data from OAI feeds into MODS-usable date data
|
51
|
+
# assumes date values containing ";" have already been split
|
52
|
+
# returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
|
53
|
+
def self.convert_to_mods_date(value)
|
54
|
+
|
55
|
+
date_data = {} # create the hash to hold all the data
|
56
|
+
source_date_string = value.strip # variable to hold original value
|
57
|
+
|
58
|
+
# weed out obvious bad dates before processing
|
59
|
+
if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
|
60
|
+
(value.match(/\d\d\d\d-\z/)) || # 1975-
|
61
|
+
(value.match(/\d\d-\d\d\/\d\d/)) || # 1975-09-09/10
|
62
|
+
(value.match(/\d*\(\d*\)/)) || # 1975(1976)
|
63
|
+
(value.scan(/\d\d\d\d/).length > 2) || # 1861/1869/1915
|
64
|
+
(value.scan(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).length > 1) ||
|
65
|
+
# or if data does not match any of these
|
66
|
+
(!value.match(/(\d\dth [Cc]entury|\d\d\d-\?*|\d\d\d\?|\d\d\?\?|\d\d\d\d)/))
|
67
|
+
date_data[:date_note] = source_date_string
|
68
|
+
else
|
69
|
+
# find date qualifier
|
70
|
+
if value.include? '?'
|
71
|
+
date_data[:date_qualifier] = 'questionable'
|
72
|
+
elsif value.match(/\A[Cc]/)
|
73
|
+
date_data[:date_qualifier] = 'approximate'
|
74
|
+
elsif (value.match(/[\[\]]+/)) || (value.match(/[(][A-Za-z, \d]*[\d]+[A-Za-z, \d]*[)]+/)) # if [] or ()
|
75
|
+
date_data[:date_qualifier] = 'inferred'
|
76
|
+
end
|
77
|
+
|
78
|
+
# remove unnecessary chars and words
|
79
|
+
value = value.gsub(/[\[\]\(\)\.,']/,'')
|
80
|
+
value = value.gsub(/(\b[Bb]etween\b|\bcirca\b|\bca\b|\Aca|\Ac)/,'').strip
|
81
|
+
|
82
|
+
# differentiate between ranges and single dates
|
83
|
+
if (value.scan(/\d\d\d\d/).length == 2) ||
|
84
|
+
(value.include? '0s') || # 1970s
|
85
|
+
(value.include? 'entury') || # 20th century
|
86
|
+
(value.match(/(\A\d\d\d\?|\A\d\d\?\?|\A\d\d\d-\?*|\d\d\d\d-\d\z|\d\d\d\d\/[\d]{1,2}\z)/)) ||
|
87
|
+
(value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/)) ||
|
88
|
+
((value.match(/\d\d\d\d-\d\d\z/)) && (value[-2..-1].to_i > 12)) # 1975-76 but NOT 1910-11
|
89
|
+
|
90
|
+
# RANGES
|
91
|
+
date_data[:date_range] = {}
|
92
|
+
|
93
|
+
# deal with date strings with 2 4-digit year values separately
|
94
|
+
if value.scan(/\d\d\d\d/).length == 2
|
95
|
+
|
96
|
+
# convert weird span indicators ('or','and','||'), remove extraneous text
|
97
|
+
value = value.gsub(/(or|and|\|\|)/,'-').gsub(/[A-Za-z\?\s]/,'')
|
98
|
+
|
99
|
+
if value.match(/\A[12][\d]{3}-[01][\d]-[12][\d]{3}-[01][\d]\z/) # 1895-05-1898-01
|
100
|
+
date_data_range_start = value[0..6]
|
101
|
+
date_data_range_end = value[-7..-1]
|
102
|
+
elsif value.match(/\A[12][\d]{3}\/[12][\d]{3}\z/) # 1987/1988
|
103
|
+
date_data_range_start = value[0..3]
|
104
|
+
date_data_range_end = value[-4..-1]
|
105
|
+
else
|
106
|
+
range_dates = value.split('-') # split the dates into an array
|
107
|
+
range_dates.each_with_index do |range_date,index|
|
108
|
+
# format the data properly
|
109
|
+
if range_date.include? '/' # 11/05/1965
|
110
|
+
range_date_pieces = range_date.split('/')
|
111
|
+
range_date_piece_year = range_date_pieces.last
|
112
|
+
range_date_piece_month = range_date_pieces.first.length == 2 ? range_date_pieces.first : '0' + range_date_pieces.first
|
113
|
+
if range_date_pieces.length == 3
|
114
|
+
range_date_piece_day = range_date_pieces[1].length == 2 ? range_date_pieces[1] : '0' + range_date_pieces[1]
|
115
|
+
end
|
116
|
+
value_to_insert = range_date_piece_year + '-' + range_date_piece_month
|
117
|
+
value_to_insert << '-' + range_date_piece_day if range_date_piece_day
|
118
|
+
elsif range_date.match(/\A[12][\d]{3}\z/)
|
119
|
+
value_to_insert = range_date
|
120
|
+
end
|
121
|
+
# add the data to the proper variable
|
122
|
+
if value_to_insert
|
123
|
+
if index == 0
|
124
|
+
date_data_range_start = value_to_insert
|
125
|
+
else
|
126
|
+
date_data_range_end = value_to_insert
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
else
|
132
|
+
# if there are 'natural language' range values, find, assign to var, then remove
|
133
|
+
text_range = value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).to_s
|
134
|
+
if text_range.length > 0
|
135
|
+
date_data[:date_qualifier] ||= 'approximate' # TODO - remove this??
|
136
|
+
value = value.gsub(/#{text_range}/,'').strip
|
137
|
+
end
|
138
|
+
|
139
|
+
# deal with ranges for which 'natural language' range values are ignored
|
140
|
+
if value.match(/\A1\d\?\?\z/) # 19??
|
141
|
+
date_data_range_start = value[0..1] + '00'
|
142
|
+
date_data_range_end = value[0..1] + '99'
|
143
|
+
elsif value.match(/\A[12]\d\d-*\?*\z/) # 195? || 195-? || 195-
|
144
|
+
date_data_range_start = value[0..2] + '0'
|
145
|
+
date_data_range_end = value[0..2] + '9'
|
146
|
+
elsif value.match(/\A[12]\d\d\d[-\/][\d]{1,2}\z/) # 1956-57 || 1956/57 || 1956-7
|
147
|
+
if value.length == 7 && (value[5..6].to_i > value[2..3].to_i)
|
148
|
+
date_data_range_start = value[0..3]
|
149
|
+
date_data_range_end = value[0..1] + value[5..6]
|
150
|
+
elsif value.length == 6 && (value[5].to_i > value[3].to_i)
|
151
|
+
date_data_range_start = value[0..3]
|
152
|
+
date_data_range_end = value[0..2] + value[5]
|
153
|
+
end
|
154
|
+
date_data[:date_note] = source_date_string if text_range.length > 0
|
155
|
+
end
|
156
|
+
# deal with ranges where text range values are evaluated
|
157
|
+
value = value.gsub(/\?/,'').strip # remove question marks
|
158
|
+
|
159
|
+
# centuries
|
160
|
+
if value.match(/([12][\d]{1}th [Cc]entury|[12][\d]{1}00s)/) # 19th century || 1800s
|
161
|
+
if value.match(/[12][\d]{1}00s/)
|
162
|
+
century_prefix_date = value.match(/[12][\d]{1}/).to_s
|
163
|
+
else
|
164
|
+
century_prefix_date = (value.match(/[12][\d]{1}/).to_s.to_i-1).to_s
|
165
|
+
end
|
166
|
+
if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
|
167
|
+
if text_range.match(/[Ee]arly/)
|
168
|
+
century_suffix_dates = %w[00 39]
|
169
|
+
elsif text_range.match(/[Mm]id/)
|
170
|
+
century_suffix_dates = %w[30 69]
|
171
|
+
else
|
172
|
+
century_suffix_dates = %w[60 99]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
date_data_range_start = century_suffix_dates ? century_prefix_date + century_suffix_dates[0] : century_prefix_date + '00'
|
176
|
+
date_data_range_end = century_suffix_dates ? century_prefix_date + century_suffix_dates[1] : century_prefix_date + '99'
|
177
|
+
else
|
178
|
+
# remove any remaining non-date text
|
179
|
+
value.match(/[12][1-9][1-9]0s/) ? is_decade = true : is_decade = false # but preserve decade-ness
|
180
|
+
remaining_text = value.match(/\D+/).to_s
|
181
|
+
value = value.gsub(/#{remaining_text}/,'').strip if remaining_text.length > 0
|
182
|
+
|
183
|
+
# decades
|
184
|
+
if is_decade
|
185
|
+
decade_prefix_date = value.match(/\A[12][1-9][1-9]/).to_s
|
186
|
+
if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
|
187
|
+
if text_range.match(/[Ee]arly/)
|
188
|
+
decade_suffix_dates = %w[0 3]
|
189
|
+
elsif text_range.match(/[Mm]id/)
|
190
|
+
decade_suffix_dates = %w[4 6]
|
191
|
+
else
|
192
|
+
decade_suffix_dates = %w[7 9]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
date_data_range_start = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[0] : decade_prefix_date + '0'
|
196
|
+
date_data_range_end = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[1] : decade_prefix_date + '9'
|
197
|
+
else
|
198
|
+
# single year ranges
|
199
|
+
single_year_prefix = value.match(/[12][0-9]{3}/).to_s
|
200
|
+
if text_range.length > 0
|
201
|
+
if text_range.match(/[Ee]arly/)
|
202
|
+
single_year_suffixes = %w[01 04]
|
203
|
+
elsif text_range.match(/[Mm]id/)
|
204
|
+
single_year_suffixes = %w[05 08]
|
205
|
+
elsif text_range.match(/[Ll]ate/)
|
206
|
+
single_year_suffixes = %w[09 12]
|
207
|
+
elsif text_range.match(/[Ww]inter/)
|
208
|
+
single_year_suffixes = %w[01 03]
|
209
|
+
elsif text_range.match(/[Ss]pring/)
|
210
|
+
single_year_suffixes = %w[03 05]
|
211
|
+
elsif text_range.match(/[Ss]ummer/)
|
212
|
+
single_year_suffixes = %w[06 08]
|
213
|
+
else text_range.match(/[F]all/)
|
214
|
+
single_year_suffixes = %w[09 11]
|
215
|
+
end
|
216
|
+
date_data_range_start = single_year_prefix + '-' + single_year_suffixes[0]
|
217
|
+
date_data_range_end = single_year_prefix + '-' + single_year_suffixes[1]
|
218
|
+
end
|
219
|
+
end
|
220
|
+
# if possibly significant info removed, include as note
|
221
|
+
date_data[:date_note] = source_date_string if remaining_text.length > 1
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# insert the values into the date_data hash
|
226
|
+
if date_data_range_start && date_data_range_end
|
227
|
+
date_data[:date_range][:start] = date_data_range_start
|
228
|
+
date_data[:date_range][:end] = date_data_range_end
|
229
|
+
else
|
230
|
+
date_data[:date_note] ||= source_date_string
|
231
|
+
date_data.delete :date_range
|
232
|
+
end
|
233
|
+
|
234
|
+
else
|
235
|
+
# SINGLE DATES
|
236
|
+
value = value.gsub(/\?/,'') # remove question marks
|
237
|
+
# fix bad spacing (e.g. December 13,1985 || December 3,1985)
|
238
|
+
value = value.insert(-5, ' ') if value.match(/[A-Za-z]* \d{6}/) || value.match(/[A-Za-z]* \d{5}/)
|
239
|
+
|
240
|
+
# try to automatically parse single dates with YYYY && MM && DD values
|
241
|
+
if Timeliness.parse(value).nil?
|
242
|
+
# start further processing
|
243
|
+
if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
|
244
|
+
date_data[:single_date] = value
|
245
|
+
elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
|
246
|
+
value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
|
247
|
+
date_data[:single_date] = value[3..6] + '-' + value[0..1]
|
248
|
+
elsif value.match(/\A[A-Za-z]{3,9}[\.]? [12]\d\d\d\z/) # April 1987 || Apr. 1987
|
249
|
+
value = value.split(' ')
|
250
|
+
if value[0].match(/\A[A-Za-z]{3}[\.]?\z/)
|
251
|
+
value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0]) if Date::ABBR_MONTHNAMES.index(value[0])
|
252
|
+
else
|
253
|
+
value_month = '%02d' % Date::MONTHNAMES.index(value[0]) if Date::MONTHNAMES.index(value[0])
|
254
|
+
end
|
255
|
+
date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
|
256
|
+
elsif value.match(/\A[12]\d\d\d\z/) # 1999
|
257
|
+
date_data[:single_date] = value
|
258
|
+
else
|
259
|
+
date_data[:date_note] = source_date_string
|
260
|
+
end
|
261
|
+
else
|
262
|
+
date_data[:single_date] = Timeliness.parse(value).strftime("%Y-%m-%d")
|
263
|
+
end
|
264
|
+
|
265
|
+
end
|
266
|
+
|
267
|
+
end
|
268
|
+
|
269
|
+
# some final validation, just in case
|
270
|
+
date_validation_array = []
|
271
|
+
date_validation_array << date_data[:single_date] if date_data[:single_date]
|
272
|
+
date_validation_array << date_data[:date_range][:start] if date_data[:date_range]
|
273
|
+
date_validation_array << date_data[:date_range][:end] if date_data[:date_range]
|
274
|
+
date_validation_array.each do |date_to_val|
|
275
|
+
if date_to_val.length == '7'
|
276
|
+
bad_date = true unless date_to_val[-2..-1].to_i.between?(1,12) && !date_to_val.nil?
|
277
|
+
elsif
|
278
|
+
date_to_val.length == '10'
|
279
|
+
bad_date = true unless Timeliness.parse(value) && !date_to_val.nil?
|
280
|
+
end
|
281
|
+
if bad_date
|
282
|
+
date_data[:date_note] ||= source_date_string
|
283
|
+
date_data.delete :single_date if date_data[:single_date]
|
284
|
+
date_data.delete :date_range if date_data[:date_range]
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
# if the date slipped by all the processing somehow!
|
289
|
+
if date_data[:single_date].nil? && date_data[:date_range].nil? && date_data[:date_note].nil?
|
290
|
+
date_data[:date_note] = source_date_string
|
291
|
+
end
|
292
|
+
|
293
|
+
date_data
|
294
|
+
|
295
|
+
end
|
296
|
+
|
297
|
+
# retrieve data from Getty TGN to populate <mods:subject auth="tgn">
|
298
|
+
def self.get_tgn_data(tgn_id)
|
299
|
+
tgn_response = Typhoeus::Request.get('http://vocabsservices.getty.edu/TGNService.asmx/TGNGetSubject?subjectID=' + tgn_id, userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
|
300
|
+
unless tgn_response.code == 500
|
301
|
+
tgnrec = Nokogiri::XML(tgn_response.body)
|
302
|
+
#puts tgnrec.to_s
|
303
|
+
|
304
|
+
# coordinates
|
305
|
+
if tgnrec.at_xpath("//Coordinates")
|
306
|
+
coords = {}
|
307
|
+
coords[:latitude] = tgnrec.at_xpath("//Latitude/Decimal").children.to_s
|
308
|
+
coords[:longitude] = tgnrec.at_xpath("//Longitude/Decimal").children.to_s
|
309
|
+
else
|
310
|
+
coords = nil
|
311
|
+
end
|
312
|
+
|
313
|
+
hier_geo = {}
|
314
|
+
|
315
|
+
#main term
|
316
|
+
if tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text")
|
317
|
+
tgn_term_type = tgnrec.at_xpath("//Preferred_Place_Type/Place_Type_ID").children.to_s
|
318
|
+
pref_term_langs = tgnrec.xpath("//Terms/Preferred_Term/Term_Languages/Term_Language/Language")
|
319
|
+
# if the preferred term is the preferred English form, use that
|
320
|
+
if pref_term_langs.children.to_s.include? "English"
|
321
|
+
tgn_term = tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
|
322
|
+
else # use the non-preferred term which is the preferred English form
|
323
|
+
if tgnrec.xpath("//Terms/Non-Preferred_Term")
|
324
|
+
non_pref_terms = tgnrec.xpath("//Terms/Non-Preferred_Term")
|
325
|
+
non_pref_terms.each do |non_pref_term|
|
326
|
+
non_pref_term_langs = non_pref_term.children.css("Term_Language")
|
327
|
+
# have to loop through these, as sometimes languages share form
|
328
|
+
non_pref_term_langs.each do |non_pref_term_lang|
|
329
|
+
if non_pref_term_lang.children.css("Preferred").children.to_s == "Preferred" && non_pref_term_lang.children.css("Language").children.to_s == "English"
|
330
|
+
tgn_term = non_pref_term.children.css("Term_Text").children.to_s
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
# if no term is the preferred English form, just use the preferred term
|
337
|
+
tgn_term ||= tgnrec.at_xpath("//Terms/Preferred_Term/Term_Text").children.to_s
|
338
|
+
end
|
339
|
+
if tgn_term && tgn_term_type
|
340
|
+
case tgn_term_type
|
341
|
+
when '29000/continent'
|
342
|
+
hier_geo[:continent] = tgn_term
|
343
|
+
when '81010/nation'
|
344
|
+
hier_geo[:country] = tgn_term
|
345
|
+
when '81161/province'
|
346
|
+
hier_geo[:province] = tgn_term
|
347
|
+
when '81165/region', '82193/union', '80005/semi-independent political entity'
|
348
|
+
hier_geo[:region] = tgn_term
|
349
|
+
when '81175/state', '81117/department', '82133/governorate'
|
350
|
+
hier_geo[:state] = tgn_term
|
351
|
+
when '81181/territory', '81021/dependent state', '81186/union territory', '81125/national district'
|
352
|
+
hier_geo[:territory] = tgn_term
|
353
|
+
when '81115/county'
|
354
|
+
hier_geo[:county] = tgn_term
|
355
|
+
when '83002/inhabited place'
|
356
|
+
hier_geo[:city] = tgn_term
|
357
|
+
when '84251/neighborhood'
|
358
|
+
hier_geo[:city_section] = tgn_term
|
359
|
+
when '21471/island'
|
360
|
+
hier_geo[:island] = tgn_term
|
361
|
+
when '81101/area', '22101/general region', '83210/deserted settlement', '81501/historical region', '81126/national division'
|
362
|
+
hier_geo[:area] = tgn_term
|
363
|
+
else
|
364
|
+
non_hier_geo = tgn_term
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
# parent data for <mods:hierarchicalGeographic>
|
369
|
+
if tgnrec.at_xpath("//Parent_String")
|
370
|
+
parents = tgnrec.at_xpath("//Parent_String").children.to_s.split('], ')
|
371
|
+
parents.each do |parent|
|
372
|
+
if parent.include? '(continent)'
|
373
|
+
hier_geo[:continent] = parent
|
374
|
+
elsif parent.include? '(nation)'
|
375
|
+
hier_geo[:country] = parent
|
376
|
+
elsif parent.include? '(province)'
|
377
|
+
hier_geo[:province] = parent
|
378
|
+
elsif (parent.include? '(region)') || (parent.include? '(union)') || (parent.include? '(semi-independent political entity)')
|
379
|
+
hier_geo[:region] = parent
|
380
|
+
elsif (parent.include? '(state)') || (parent.include? '(department)') || (parent.include? '(governorate)')
|
381
|
+
hier_geo[:state] = parent
|
382
|
+
elsif (parent.include? '(territory)') || (parent.include? '(dependent state)') || (parent.include? '(union territory)') || (parent.include? '(national district)')
|
383
|
+
hier_geo[:territory] = parent
|
384
|
+
elsif parent.include? '(county)'
|
385
|
+
hier_geo[:county] = parent
|
386
|
+
elsif parent.include? '(inhabited place)'
|
387
|
+
hier_geo[:city] = parent
|
388
|
+
elsif parent.include? '(neighborhood)'
|
389
|
+
hier_geo[:city_section] = parent
|
390
|
+
elsif parent.include? '(island)'
|
391
|
+
hier_geo[:island] = parent
|
392
|
+
elsif (parent.include? '(area)') || (parent.include? '(general region)') || (parent.include? '(deserted settlement)') || (parent.include? '(historical region)') || (parent.include? '(national division)')
|
393
|
+
hier_geo[:area] = parent
|
394
|
+
end
|
395
|
+
end
|
396
|
+
hier_geo.each do |k,v|
|
397
|
+
hier_geo[k] = v.gsub(/ \(.*/,'')
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
tgn_data = {}
|
402
|
+
tgn_data[:coords] = coords
|
403
|
+
tgn_data[:hier_geo] = hier_geo.length > 0 ? hier_geo : nil
|
404
|
+
tgn_data[:non_hier_geo] = non_hier_geo ? non_hier_geo : nil
|
405
|
+
|
406
|
+
else
|
407
|
+
|
408
|
+
tgn_data = nil
|
409
|
+
|
410
|
+
end
|
411
|
+
|
412
|
+
return tgn_data
|
413
|
+
|
414
|
+
end
|
415
|
+
|
416
|
+
#Note: Limited to only looking at United States places...
|
417
|
+
def self.parse_bing_api(term)
|
418
|
+
return_hash = {}
|
419
|
+
|
420
|
+
#Bing API does badly with paranthesis...
|
421
|
+
if term.match(/[\(\)]+/)
|
422
|
+
return return_hash
|
423
|
+
end
|
424
|
+
|
425
|
+
#Sometimes with building, city, state, bing is dumb and will only return state. Example: Boston Harbor, Boston, Mass.
|
426
|
+
#So if not a street address, pass to have google handle it for better results...
|
427
|
+
#Example of another bad record: South Street bridge, West Bridgewater, Mass. would give a place in Holyoke
|
428
|
+
if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
|
429
|
+
return return_hash
|
430
|
+
end
|
431
|
+
|
432
|
+
Geocoder.configure(:lookup => :bing,:api_key => 'Avmp8UMpfYiAJOYa2D-6_cykJoprZsvvN5YLv6SDalvN-BZnW9KMlCzjIV7Zrtmn',:timeout => 7)
|
433
|
+
bing_api_result = Geocoder.search(term)
|
434
|
+
|
435
|
+
|
436
|
+
|
437
|
+
#Use bing first and only for United States results...
|
438
|
+
if bing_api_result.present? && bing_api_result.first.data["address"]["countryRegion"] == 'United States'
|
439
|
+
if bing_api_result.first.data["address"]["addressLine"].present?
|
440
|
+
return_hash[:keep_original_string] = true
|
441
|
+
return_hash[:coordinates] = bing_api_result.first.data["geocodePoints"].first["coordinates"].first.to_s + ',' + bing_api_result.first.data["geocodePoints"].first["coordinates"].last.to_s
|
442
|
+
end
|
443
|
+
|
444
|
+
return_hash[:country_part] = bing_api_result.first.data["address"]["countryRegion"]
|
445
|
+
|
446
|
+
if return_hash[:country_part] == 'United States'
|
447
|
+
return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[bing_api_result.first.data["address"]["adminDistrict"]]
|
448
|
+
else
|
449
|
+
return_hash[:state_part] = bing_api_result.first.data["address"]["adminDistrict"]
|
450
|
+
end
|
451
|
+
|
452
|
+
return_hash[:city_part] = bing_api_result.first.data["address"]["locality"]
|
453
|
+
end
|
454
|
+
|
455
|
+
return return_hash
|
456
|
+
end
|
457
|
+
|
458
|
+
#Mapquest allows unlimited requests - start here?
|
459
|
+
def self.parse_mapquest_api(term)
|
460
|
+
return_hash = {}
|
461
|
+
|
462
|
+
#Mapquest returns bad data for: Manchester, Mass.
|
463
|
+
if term.include?('Manchester')
|
464
|
+
return return_hash
|
465
|
+
end
|
466
|
+
|
467
|
+
#Messed up with just neighborhoods. Example: Hyde Park (Boston, Mass.) or Hyde Park (Boston, Mass.)
|
468
|
+
#So if not a street address, pass to have google handle it for better results...
|
469
|
+
if term.split(' ').length >= 3 && term.match(/\d/).blank? && term.downcase.match(/ave\.*,/).blank? && term.downcase.match(/avenue\.*,/).blank? && term.downcase.match(/street\.*,/).blank? && term.downcase.match(/st\.*,/).blank? && term.downcase.match(/road\.*,/).blank? && term.downcase.match(/rd\.*,/).blank?
|
470
|
+
return return_hash
|
471
|
+
end
|
472
|
+
|
473
|
+
Geocoder.configure(:lookup => :mapquest,:api_key => 'Fmjtd%7Cluubn1utn0%2Ca2%3Do5-90b00a',:timeout => 7)
|
474
|
+
|
475
|
+
mapquest_api_result = Geocoder.search(term)
|
476
|
+
|
477
|
+
|
478
|
+
#If this call returned a result...
|
479
|
+
if mapquest_api_result.present?
|
480
|
+
|
481
|
+
if mapquest_api_result.first.data["street"].present?
|
482
|
+
return_hash[:keep_original_string] = true
|
483
|
+
return_hash[:coordinates] = mapquest_api_result.first.data['latLng']['lat'].to_s + ',' + mapquest_api_result.first.data['latLng']['lng'].to_s
|
484
|
+
end
|
485
|
+
|
486
|
+
return_hash[:country_part] = Country.new(mapquest_api_result.first.data["adminArea1"]).name
|
487
|
+
|
488
|
+
if return_hash[:country_part] == 'United States'
|
489
|
+
return_hash[:state_part] = Bplmodels::Constants::STATE_ABBR[mapquest_api_result.first.data["adminArea3"]]
|
490
|
+
if mapquest_api_result.first.data["adminArea4"] == 'District of Columbia'
|
491
|
+
return_hash[:state_part] = mapquest_api_result.first.data["adminArea4"]
|
492
|
+
end
|
493
|
+
else
|
494
|
+
return_hash[:state_part] = mapquest_api_result.first.data["adminArea3"].gsub(' province', '')
|
495
|
+
end
|
496
|
+
|
497
|
+
return_hash[:city_part] = mapquest_api_result.first.data["adminArea5"]
|
498
|
+
end
|
499
|
+
|
500
|
+
return return_hash
|
501
|
+
end
|
502
|
+
|
503
|
+
#Final fallback is google API. The best but we are limited to 2500 requests per day unless we pay the $10k a year premium account...
|
504
|
+
#Note: If google cannot find street, it will return just city/state, like for "Salem Street and Paradise Road, Swampscott, MA, 01907"
|
505
|
+
#Seems like it sets a partial_match=>true in the data section...
|
506
|
+
def self.parse_google_api(term)
|
507
|
+
return_hash = {}
|
508
|
+
|
509
|
+
Geocoder.configure(:lookup => :google,:api_key => nil,:timeout => 7)
|
510
|
+
google_api_result = Geocoder.search(term)
|
511
|
+
|
512
|
+
#Check if only a partial match. To avoid errors, strip out the first part and try again...
|
513
|
+
#Need better way to check for street endings. See: http://pe.usps.gov/text/pub28/28apc_002.htm
|
514
|
+
if google_api_result.present?
|
515
|
+
if google_api_result.first.data['partial_match'] && term.split(',').length > 1 && !term.downcase.include?('street') && !term.downcase.include?('st.') && !term.downcase.include?('avenue') && !term.downcase.include?('ave.') && !term.downcase.include?('court')
|
516
|
+
term = term.split(',')[1..term.split(',').length-1].join(',').strip
|
517
|
+
google_api_result = Geocoder.search(term)
|
518
|
+
end
|
519
|
+
end
|
520
|
+
|
521
|
+
if google_api_result.present?
|
522
|
+
#Types: street number, route, neighborhood, establishment, transit_station, bus_station
|
523
|
+
google_api_result.first.data["address_components"].each do |result|
|
524
|
+
if (result['types'] & ['street number', 'route', 'neighborhood', 'establishment', 'transit_station', 'bus_station']).present?
|
525
|
+
return_hash[:keep_original_string] = true
|
526
|
+
return_hash[:coordinates] = google_api_result.first.data['geometry']['location']['lat'].to_s + ',' + google_api_result.first.data['geometry']['location']['lng'].to_s
|
527
|
+
elsif (result['types'] & ['country']).present?
|
528
|
+
return_hash[:country_part] = result['long_name']
|
529
|
+
elsif (result['types'] & ['administrative_area_level_1']).present?
|
530
|
+
return_hash[:state_part] = result['long_name'].to_ascii
|
531
|
+
elsif (result['types'] & ['locality']).present?
|
532
|
+
return_hash[:city_part] = result['long_name']
|
533
|
+
elsif (result['types'] & ['sublocality', 'political']).present?
|
534
|
+
return_hash[:neighborhood_part] = result['long_name']
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
return_hash[:keep_original_string] ||= google_api_result.first.data['partial_match'] unless google_api_result.first.data['partial_match'].blank?
|
539
|
+
end
|
540
|
+
|
541
|
+
|
542
|
+
return return_hash
|
543
|
+
end
|
544
|
+
|
545
|
+
def self.parse_geographic_term(term)
|
546
|
+
geo_term = nil
|
547
|
+
|
548
|
+
#Weird incorrect dash seperator
|
549
|
+
term = term.gsub('–', '--')
|
550
|
+
|
551
|
+
#Likely too long to be an address... some fields have junk with an address string...
|
552
|
+
if term.length > 125
|
553
|
+
return nil
|
554
|
+
end
|
555
|
+
|
556
|
+
#TODO: Use Countries gem of https://github.com/hexorx/countries
|
557
|
+
#test = Country.new('US')
|
558
|
+
#test.states
|
559
|
+
|
560
|
+
#Parsing a subject geographic term.
|
561
|
+
if term.include?('--')
|
562
|
+
term.split('--').each_with_index do |split_term, index|
|
563
|
+
if split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
|
564
|
+
geo_term = term.split('--')[index..term.split('--').length-1].reverse!.join(',')
|
565
|
+
elsif split_term.include?('Mass') || split_term.include?(' MA')
|
566
|
+
geo_term = split_term
|
567
|
+
end
|
568
|
+
end
|
569
|
+
#Other than a '--' field
|
570
|
+
#Experimental... example: Palmer (Mass) - history or Stores (retail trade) - Palmer, Mass
|
571
|
+
elsif term.include?(' - ')
|
572
|
+
term.split(' - ').each do |split_term|
|
573
|
+
if split_term.include?('Mass') || split_term.include?(' MA') || split_term.include?('Massachusetts') || split_term.include?('New Jersey') || split_term.include?('Wisconsin') || split_term.include?('New Hampshire') || split_term.include?('New York') || split_term.include?('Maine')
|
574
|
+
geo_term = split_term
|
575
|
+
end
|
576
|
+
|
577
|
+
end
|
578
|
+
else
|
579
|
+
if term.include?('Mass') || term.include?(' MA') || term.include?('Massachusetts') || term.include?('New Jersey') || term.include?('Wisconsin') || term.include?('New Hampshire') || term.include?('New York') || term.include?('Maine')
|
580
|
+
geo_term = term
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
#if geo_term.blank?
|
585
|
+
#return nil
|
586
|
+
#end
|
587
|
+
|
588
|
+
return geo_term
|
589
|
+
end
|
590
|
+
|
591
|
+
def self.standardize_geographic_term(geo_term)
|
592
|
+
#Remove common junk terms
|
593
|
+
geo_term = geo_term.gsub('Cranberries', '').gsub('History', '').gsub('Maps', '').gsub('State Police', '').gsub('Pictorial works.', '').gsub(/[nN]ation/, '').gsub('Asia', '').gsub('(Republic)', '').strip
|
594
|
+
|
595
|
+
#Strip any leading periods or commas from junk terms
|
596
|
+
geo_term = geo_term.gsub(/^[\.,]+/, '').strip
|
597
|
+
|
598
|
+
#Replace any semicolons with commas... possible strip them?
|
599
|
+
geo_term = geo_term.gsub(';', ',')
|
600
|
+
|
601
|
+
#Note: the following returns junk from Bing as if these are in WI, California, Etc:
|
602
|
+
#East Monponsett Lake (Halifax, Mass.)
|
603
|
+
#Silver Lake (Halifax, Mass.)
|
604
|
+
#Scarier note: Washington Park (Reading, Mass.) will always return Boston, MA in google
|
605
|
+
if geo_term.match(/[\(\)]+/)
|
606
|
+
#Attempt to fix address if something like (word)
|
607
|
+
if geo_term.match(/ \(+.*\)+/)
|
608
|
+
#Make this replacement better?
|
609
|
+
geo_term = geo_term.gsub(' (', ', ').gsub(')', '')
|
610
|
+
#Else skip this as data returned likely will be unreliable for now... FIXME when use case occurs.
|
611
|
+
else
|
612
|
+
return nil
|
613
|
+
end
|
614
|
+
end
|
615
|
+
|
616
|
+
return geo_term
|
617
|
+
end
|
618
|
+
|
619
|
+
|
620
|
+
|
621
|
+
def self.tgn_id_from_term(term,parse_term=false)
|
622
|
+
return_hash = {}
|
623
|
+
max_retry = 3
|
624
|
+
sleep_time = 60 # In seconds
|
625
|
+
retry_count = 0
|
626
|
+
|
627
|
+
#If not a good address source, parsing is done here...
|
628
|
+
term = parse_geographic_term(term) unless !parse_term
|
629
|
+
|
630
|
+
term = standardize_geographic_term(term) unless term.blank?
|
631
|
+
|
632
|
+
if term.blank?
|
633
|
+
return return_hash
|
634
|
+
end
|
635
|
+
|
636
|
+
return_hash = parse_mapquest_api(term)
|
637
|
+
|
638
|
+
if return_hash.blank?
|
639
|
+
return_hash = parse_bing_api(term)
|
640
|
+
end
|
641
|
+
|
642
|
+
if return_hash.blank?
|
643
|
+
return_hash = parse_google_api(term)
|
644
|
+
end
|
645
|
+
|
646
|
+
if return_hash.blank?
|
647
|
+
return nil
|
648
|
+
end
|
649
|
+
|
650
|
+
state_part = return_hash[:state_part]
|
651
|
+
|
652
|
+
|
653
|
+
country_code = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_id] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
|
654
|
+
country_code ||= ''
|
655
|
+
|
656
|
+
|
657
|
+
country_part = Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]][:tgn_country_name] unless Bplmodels::Constants::COUNTRY_TGN_LOOKUP[return_hash[:country_part]].blank?
|
658
|
+
country_part ||= return_hash[:country_part]
|
659
|
+
country_part ||= ''
|
660
|
+
|
661
|
+
city_part = return_hash[:city_part]
|
662
|
+
|
663
|
+
#Keep original string if three parts at least or if there is a number in the term.
|
664
|
+
if term.split(',').length >= 3 || term.match(/\d/).present?
|
665
|
+
return_hash[:keep_original_string] = true
|
666
|
+
end
|
667
|
+
|
668
|
+
top_match_term = ''
|
669
|
+
match_term = nil
|
670
|
+
|
671
|
+
if city_part.blank? && state_part.blank?
|
672
|
+
# Limit to nations
|
673
|
+
place_type = 81010
|
674
|
+
top_match_term = ''
|
675
|
+
match_term = country_part.to_ascii.downcase || term.to_ascii.downcase
|
676
|
+
elsif state_part.present? && city_part.blank? && country_code == 7012149
|
677
|
+
#Limit to states
|
678
|
+
place_type = 81175
|
679
|
+
top_match_term = country_part.to_ascii.downcase
|
680
|
+
match_term = state_part.to_ascii.downcase
|
681
|
+
elsif state_part.present? && city_part.blank?
|
682
|
+
#Limit to regions
|
683
|
+
place_type = 81165
|
684
|
+
top_match_term = country_part.to_ascii.downcase
|
685
|
+
match_term = state_part.to_ascii.downcase
|
686
|
+
elsif state_part.present? && city_part.present?
|
687
|
+
#Limited to only inhabited places at the moment...
|
688
|
+
place_type = 83002
|
689
|
+
top_match_term = state_part.to_ascii.downcase
|
690
|
+
match_term = city_part.to_ascii.downcase
|
691
|
+
else
|
692
|
+
return nil
|
693
|
+
end
|
694
|
+
|
695
|
+
begin
|
696
|
+
if retry_count > 0
|
697
|
+
sleep(sleep_time)
|
698
|
+
end
|
699
|
+
retry_count = retry_count + 1
|
700
|
+
|
701
|
+
tgn_response = Typhoeus::Request.get("http://vocabsservices.getty.edu/TGNService.asmx/TGNGetTermMatch?placetypeid=#{place_type}&nationid=#{country_code}&name=" + CGI.escape(match_term), userpwd: BPL_CONFIG_GLOBAL['getty_un'] + ':' + BPL_CONFIG_GLOBAL['getty_pw'])
|
702
|
+
|
703
|
+
|
704
|
+
end until (tgn_response.code != 500 || retry_count == max_retry)
|
705
|
+
|
706
|
+
unless tgn_response.code == 500
|
707
|
+
puts 'match found!'
|
708
|
+
parsed_xml = Nokogiri::Slop(tgn_response.body)
|
709
|
+
|
710
|
+
if parsed_xml.Vocabulary.Count.text == '0'
|
711
|
+
return nil
|
712
|
+
end
|
713
|
+
|
714
|
+
#If only one result, then not array. Otherwise array....
|
715
|
+
if parsed_xml.Vocabulary.Subject.first.blank?
|
716
|
+
subject = parsed_xml.Vocabulary.Subject
|
717
|
+
|
718
|
+
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
719
|
+
|
720
|
+
if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
|
721
|
+
return_hash[:tgn_id] = subject.Subject_ID.text
|
722
|
+
end
|
723
|
+
else
|
724
|
+
parsed_xml.Vocabulary.Subject.each do |subject|
|
725
|
+
current_term = subject.Preferred_Term.text.gsub(/\(.*\)/, '').to_ascii.downcase.strip
|
726
|
+
|
727
|
+
if current_term == match_term && subject.Preferred_Parent.text.to_ascii.downcase.include?("#{top_match_term}")
|
728
|
+
return_hash[:tgn_id] = subject.Subject_ID.text
|
729
|
+
end
|
730
|
+
end
|
731
|
+
end
|
732
|
+
|
733
|
+
end
|
734
|
+
|
735
|
+
if tgn_response.code == 500
|
736
|
+
raise 'TGN Server appears to not be responding for Geographic query: ' + term
|
737
|
+
end
|
738
|
+
|
739
|
+
|
740
|
+
return return_hash
|
741
|
+
end
|
742
|
+
|
743
|
+
|
744
|
+
def self.LCSHize(value)
|
745
|
+
|
746
|
+
if value.blank?
|
747
|
+
return ''
|
748
|
+
end
|
749
|
+
|
750
|
+
#Remove stuff that is quoted (quotation for first and last words)..
|
751
|
+
value = value.gsub(/^['"]/, '').gsub(/['"]$/, '').strip
|
752
|
+
|
753
|
+
#Remove ending periods ... except when an initial or etc.
|
754
|
+
if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
|
755
|
+
value = value.slice(0..-2)
|
756
|
+
end
|
757
|
+
|
758
|
+
#Fix when '- -' occurs
|
759
|
+
value = value.gsub(/-\s-/,'--')
|
760
|
+
|
761
|
+
#Fix for "em" dashes - two types?
|
762
|
+
value = value.gsub('—','--')
|
763
|
+
|
764
|
+
#Fix for "em" dashes - two types?
|
765
|
+
value = value.gsub('–','--')
|
766
|
+
|
767
|
+
#Fix for ' - ' combinations
|
768
|
+
value = value.gsub(' - ','--')
|
769
|
+
|
770
|
+
#Remove white space after and before '--'
|
771
|
+
value = value.gsub(/\s+--/,'--')
|
772
|
+
value = value.gsub(/--\s+/,'--')
|
773
|
+
|
774
|
+
#Ensure first work is capitalized
|
775
|
+
value[0] = value.first.capitalize[0]
|
776
|
+
|
777
|
+
#Strip an white space
|
778
|
+
value = Bplmodels::DatastreamInputFuncs.strip_value(value)
|
779
|
+
|
780
|
+
return value
|
781
|
+
end
|
782
|
+
|
783
|
+
|
784
|
+
def self.strip_value(value)
|
785
|
+
if(value.blank?)
|
786
|
+
return nil
|
787
|
+
else
|
788
|
+
if value.class == Float || value.class == Fixnum
|
789
|
+
value = value.to_i.to_s
|
790
|
+
end
|
791
|
+
|
792
|
+
# Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
|
793
|
+
return utf8Encode(value)
|
794
|
+
end
|
795
|
+
end
|
796
|
+
|
797
|
+
def self.utf8Encode(value)
|
798
|
+
return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
|
799
|
+
end
|
800
|
+
|
801
|
+
def self.split_with_nils(value)
|
802
|
+
if(value == nil)
|
803
|
+
return ""
|
804
|
+
else
|
805
|
+
split_value = value.split("||")
|
806
|
+
0.upto split_value.length-1 do |pos|
|
807
|
+
split_value[pos] = strip_value(split_value[pos])
|
808
|
+
end
|
809
|
+
|
810
|
+
return split_value
|
811
|
+
end
|
812
|
+
end
|
813
|
+
|
814
|
+
|
815
|
+
#Problems: A . Some Name and A & R
|
816
|
+
def self.getProperTitle(title)
|
817
|
+
nonSort = nil
|
818
|
+
title = title
|
819
|
+
|
820
|
+
if title[0..1].downcase == "a " && (title[0..2].downcase != "a ." && title[0..2].downcase != "a &")
|
821
|
+
nonSort = title[0..1]
|
822
|
+
title = title[2..title.length]
|
823
|
+
elsif title[0..3].downcase == "the "
|
824
|
+
nonSort = title[0..3]
|
825
|
+
title = title[4..title.length]
|
826
|
+
elsif title[0..2].downcase == "an "
|
827
|
+
nonSort = title[0..2]
|
828
|
+
title = title[3..title.length]
|
829
|
+
#elsif title[0..6].downcase == "in the "
|
830
|
+
#return [title[0..5], title[7..title.length]]
|
831
|
+
end
|
832
|
+
|
833
|
+
return [nonSort, title]
|
834
|
+
end
|
835
|
+
|
836
|
+
def self.parse_language(language_value)
|
837
|
+
return_hash = {}
|
838
|
+
authority_check = Qa::Authorities::Loc.new
|
839
|
+
authority_result = authority_check.search(URI.escape(language_value), 'iso639-2')
|
840
|
+
|
841
|
+
if authority_result.present?
|
842
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == language_value.downcase || hash['id'].split('/').last.downcase == language_value.downcase }
|
843
|
+
if authority_result.present?
|
844
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
845
|
+
return_hash[:label] = authority_result.first["label"]
|
846
|
+
end
|
847
|
+
end
|
848
|
+
|
849
|
+
return return_hash
|
850
|
+
end
|
851
|
+
|
852
|
+
def self.parse_role(role_value)
|
853
|
+
return_hash = {}
|
854
|
+
authority_check = Qa::Authorities::Loc.new
|
855
|
+
authority_result = authority_check.search(URI.escape(role_value), 'relators')
|
856
|
+
if authority_result.present?
|
857
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
|
858
|
+
if authority_result.present?
|
859
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
860
|
+
return_hash[:label] = authority_result.first["label"]
|
861
|
+
end
|
862
|
+
end
|
863
|
+
|
864
|
+
return return_hash
|
865
|
+
end
|
866
|
+
|
867
|
+
def self.parse_name_roles(name)
|
868
|
+
return_hash = {}
|
869
|
+
|
870
|
+
#Make sure we have at least three distinct parts of 2-letter+ words. Avoid something like: Steven C. Painter or Painter, Steven C.
|
871
|
+
#Possible Issue: Full name of Steven Carlos Painter ?
|
872
|
+
potential_role_check = name.match(/[\(\"\',]*\w\w+[\),\"\']* [\w\.,\d\-\"]*[\w\d][\w\d][\w\.,\d\-\"]* [\(\"\',]*\w\w+[\),\"\']*$/) || name.split(/[ ]+/).length >= 4
|
873
|
+
|
874
|
+
if potential_role_check.present?
|
875
|
+
authority_check = Qa::Authorities::Loc.new
|
876
|
+
|
877
|
+
#Check the last value of the name string...
|
878
|
+
role_value = name.match(/(?<=[\(\"\', ])\w+(?=[\),\"\']*$)/).to_s
|
879
|
+
authority_result = authority_check.search(URI.escape(role_value), 'relators')
|
880
|
+
if authority_result.present?
|
881
|
+
|
882
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
|
883
|
+
if authority_result.present?
|
884
|
+
#Remove the word and any other characters around it. $ means the end of the line.
|
885
|
+
#
|
886
|
+
return_hash[:name] = name.sub(/[\(\"\', ]*\w+[\),\"\']*$/, '').gsub(/^[ ]*:/, '').strip
|
887
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
888
|
+
return_hash[:label] = authority_result.first["label"]
|
889
|
+
end
|
890
|
+
end
|
891
|
+
|
892
|
+
#Check the last value of the name string...
|
893
|
+
role_value = name.match(/\w+(?=[\),\"\']*)/).to_s
|
894
|
+
authority_result = authority_check.search(URI.escape(role_value), 'relators')
|
895
|
+
if authority_result.present? && return_hash.blank?
|
896
|
+
|
897
|
+
authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
|
898
|
+
if authority_result.present?
|
899
|
+
#Remove the word and any other characters around it. $ means the end of the line.
|
900
|
+
return_hash[:name] = name.sub(/[\(\"\', ]*\w+[ \),\"\']*/, '').gsub(/^[ ]*:/, '').strip
|
901
|
+
return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
|
902
|
+
return_hash[:label] = authority_result.first["label"]
|
903
|
+
end
|
904
|
+
end
|
905
|
+
end
|
906
|
+
|
907
|
+
return return_hash
|
908
|
+
end
|
909
|
+
|
910
|
+
def self.is_numeric? (string)
|
911
|
+
true if Float(string) rescue false
|
912
|
+
end
|
913
|
+
|
914
|
+
# returns a well-formatted placename for display on a map
|
915
|
+
# hiergeo_hash = hash of <mods:hierarchicahlGeographic> elements
|
916
|
+
def self.render_display_placename(hiergeo_hash)
|
917
|
+
placename = []
|
918
|
+
case hiergeo_hash[:country]
|
919
|
+
when 'United States','Canada'
|
920
|
+
if hiergeo_hash[:state] || hiergeo_hash[:province]
|
921
|
+
placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence
|
922
|
+
if placename[0].nil? && hiergeo_hash[:county]
|
923
|
+
placename[0] = hiergeo_hash[:county] + ' (county)'
|
924
|
+
end
|
925
|
+
if placename[0]
|
926
|
+
placename[1] = Constants::STATE_ABBR.key(hiergeo_hash[:state]) || hiergeo_hash[:province].presence
|
927
|
+
else
|
928
|
+
placename[1] = hiergeo_hash[:state].presence || hiergeo_hash[:province].presence
|
929
|
+
end
|
930
|
+
else
|
931
|
+
placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence || hiergeo_hash[:country].presence
|
932
|
+
end
|
933
|
+
else
|
934
|
+
placename[0] = hiergeo_hash[:other].presence || hiergeo_hash[:city_section].presence || hiergeo_hash[:city].presence || hiergeo_hash[:island].presence || hiergeo_hash[:area].presence || hiergeo_hash[:state].presence || hiergeo_hash[:province].presence || hiergeo_hash[:region].presence || hiergeo_hash[:territory].presence
|
935
|
+
if placename[0].nil? && hiergeo_hash[:county]
|
936
|
+
placename[0] = hiergeo_hash[:county] + ' (county)'
|
937
|
+
end
|
938
|
+
placename[1] = hiergeo_hash[:country]
|
939
|
+
end
|
940
|
+
|
941
|
+
if !placename.blank?
|
942
|
+
placename.join(', ').gsub(/(\A,\s)|(,\s\z)/,'')
|
943
|
+
else
|
944
|
+
nil
|
945
|
+
end
|
946
|
+
end
|
947
|
+
|
948
|
+
end
|
949
|
+
end
|