cul_scv_hydra 0.15.1 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/app/models/cul/scv/hydra/datastreams/mods_document.rb +27 -16
- data/config/locales/ldpd_hydra.en.yml +37 -0
- data/lib/cul_scv_hydra/indexer.rb +26 -11
- data/lib/cul_scv_hydra/om/standard_mods.rb +2 -2
- data/lib/cul_scv_hydra/solrizer/field_mapper.rb +36 -18
- data/lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb +58 -3
- data/lib/cul_scv_hydra/version.rb +1 -1
- data/lib/tasks/index.rake +8 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OWFhODEzOTJhOTliYjEzMTRhZWY1MGFkMzJjZWViMWQ2ODc5YjRiMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MzRhNGVlNjkxNjE4YjRhNzM5OTg0YTk0ZmYyOTczN2YwYWI5ZTIwMg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NjI4ODgyYmE4YmQzOGVhMmZmNjBmYjg2ZTVjNzE0NmQwMDRmNjJkZWM5NjRk
|
10
|
+
OTU0YzIwNDBmOTdiOGM4OTNhOTI0NDY2M2U2YTY3ZjEzZTZhNmI2OWQ0MWMz
|
11
|
+
ZGIzZTc0ZjFmMTc0ZmZmNTNlMmRmZmIzZWQ1Mjk3MGFmNjdmMmQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZGRiMjc0NTU2ZjI5MDIwMjJjODc5ZmE2OTU2NjIyOGFlMGVhMzA3NDcwNmI4
|
14
|
+
MTdlYjdhNzA3OWNhN2E5OTE1ZDAxZGNhNjA2YTJjZWY5M2FlZjJkYjA4NjYw
|
15
|
+
MTA2MmExZmEyMDAxOWUwY2M1MGRmYzYxNjQ0OGNlMDc3OWVmYjg=
|
@@ -24,14 +24,14 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
24
24
|
t.non_sort(:path=>"nonSort", :index_as=>[])
|
25
25
|
t.main_title(:path=>"title", :index_as=>[])
|
26
26
|
}
|
27
|
-
|
27
|
+
|
28
28
|
t.title(:proxy=>[:mods, :main_title_info, :main_title], :type=>:string,
|
29
|
-
:index_as=>[:searchable, :sortable
|
29
|
+
:index_as=>[:searchable, :sortable])
|
30
30
|
t.title_display(:proxy=>[:mods, :main_title_info], :type=>:string,
|
31
31
|
:index_as=>[:displayable])
|
32
32
|
|
33
33
|
t.search_title_info(:path=>'titleInfo', :index_as=>[]){
|
34
|
-
t.search_title(:path=>'title', :index_as=>[:
|
34
|
+
t.search_title(:path=>'title', :index_as=>[:textable])
|
35
35
|
}
|
36
36
|
t.project(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Project"}, :index_as=>[]){
|
37
37
|
t.project_title_info(:path=>'titleInfo', :index_as=>[]){
|
@@ -44,14 +44,15 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
44
44
|
}
|
45
45
|
}
|
46
46
|
t.lib_project(:proxy=>[:project,:project_title_info],
|
47
|
-
:index_as=>[:displayable, :searchable, :project_facetable, :
|
47
|
+
:index_as=>[:displayable, :searchable, :project_facetable, :project_textable])
|
48
48
|
t.lib_collection(:proxy=>[:collection,:collection_title_info])
|
49
49
|
# pattern matches
|
50
50
|
t.identifier(:path=>"identifier", :attributes=>{:type=>"local"}, :type=>:string, :index_as=>[:symbol, :textable])
|
51
51
|
t.clio(:path=>"identifier", :attributes=>{:type=>"CLIO"}, :data_type=>:symbol, :index_as=>[:symbol, :textable])
|
52
52
|
t.abstract
|
53
|
-
t.subject
|
54
|
-
t.topic
|
53
|
+
t.subject(:index_as=>[:textable]){
|
54
|
+
t.topic(:index_as=>[:facetable])
|
55
|
+
t.geographic(:index_as=>[:facetable])
|
55
56
|
}
|
56
57
|
t.type_of_resource(:path=>"typeOfResource", :index_as=>[:displayable])
|
57
58
|
t.physical_description(:path=>"physicalDescription", :index_as=>[]){
|
@@ -64,13 +65,14 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
64
65
|
t.internet_media_type(:path=>"internetMediaType", :index_as=>[:displayable])
|
65
66
|
t.digital_origin(:path=>"digitalOrigin", :index_as=>[:displayable])
|
66
67
|
}
|
67
|
-
t.lib_format(:proxy=>[:physical_description, :form_nomarc], :index_as=>[:displayable, :facetable])
|
68
|
+
t.lib_format(:proxy=>[:physical_description, :form_nomarc], :index_as=>[:displayable, :facetable, :textable])
|
68
69
|
t.location(:path=>"location", :index_as=>[]){
|
69
70
|
t.repo_text(:path=>"physicalLocation",:attributes=>{:authority=>:none}, :index_as=>[])
|
70
71
|
t.lib_repo(:path=>"physicalLocation",
|
71
72
|
:attributes=>{:authority=>"marcorg"},
|
72
|
-
:index_as=>[])
|
73
|
+
:index_as=>[:textable])
|
73
74
|
t.shelf_locator(:path=>"shelfLocator", :index_as=>[:textable])
|
75
|
+
t.sublocation(:path=>"sublocation", :index_as=>[:textable])
|
74
76
|
}
|
75
77
|
t.lib_repo(:proxy=>[:location, :lib_repo], :type=>:text,
|
76
78
|
:index_as=>[:marc_code_facetable, :marc_code_displayable, :marc_code_textable])
|
@@ -80,7 +82,7 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
80
82
|
t.name_part(:path=>'namePart', :index_as=>[])
|
81
83
|
}
|
82
84
|
t.name_corporate(
|
83
|
-
:path=>'name',:attributes=>{:type=>'corporate'},
|
85
|
+
:path=>'name',:attributes=>{:type=>'corporate'},
|
84
86
|
:index_as=>[:facetable, :displayable, :searchable],
|
85
87
|
:variant_of=>{:field_base=>:lib_name}){
|
86
88
|
t.name_part(
|
@@ -103,18 +105,27 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
103
105
|
t.language_term(:proxy=>[:record_info, :language_of_cataloging, :language_term])
|
104
106
|
t.language_code(:proxy=>[:record_info, :language_of_cataloging, :language_code])
|
105
107
|
|
108
|
+
t.language(:index_as=>[]){
|
109
|
+
t.language_term_text(:path=>"languageTerm", :attributes=>{:authority=>'iso639-2b',:type=>'text'}, :index_as=>[:facetable, :textable])
|
110
|
+
t.language_term_code(:path=>"languageTerm", :attributes=>{:authority=>'iso639-2b',:type=>'code'}, :index_as=>[:facetable, :textable])
|
111
|
+
}
|
112
|
+
|
106
113
|
t.origin_info(:path=>"originInfo", :index_as=>[]){
|
107
|
-
t.
|
108
|
-
t.
|
109
|
-
|
110
|
-
t.
|
111
|
-
t.
|
114
|
+
t.date_issued(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:displayable, :textable])
|
115
|
+
t.date_issued_start(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:displayable, :textable])
|
116
|
+
t.date_issued_end(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:displayable, :textable])
|
117
|
+
t.date_created(:path=>"dateCreated", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:displayable, :textable])
|
118
|
+
t.date_created_start(:path=>"dateCreated", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:displayable, :textable])
|
119
|
+
t.date_created_end(:path=>"dateCreated", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:displayable, :textable])
|
120
|
+
t.date_other(:path=>"dateOther", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:displayable, :textable])
|
121
|
+
t.date_other_start(:path=>"dateOther", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:displayable, :textable])
|
122
|
+
t.date_other_end(:path=>"dateOther", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:displayable, :textable])
|
112
123
|
}
|
113
124
|
end
|
114
125
|
|
115
126
|
def self.xml_template
|
116
127
|
builder = Nokogiri::XML::Builder.new do |xml|
|
117
|
-
xml.mods(:version=>"3.4",
|
128
|
+
xml.mods(:version=>"3.4",
|
118
129
|
"xmlns"=>"http://www.loc.gov/mods/v3",
|
119
130
|
"xmlns:xlink"=>"http://www.w3.org/1999/xlink",
|
120
131
|
"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance"){
|
@@ -154,4 +165,4 @@ end
|
|
154
165
|
end
|
155
166
|
end
|
156
167
|
end
|
157
|
-
end
|
168
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
en:
|
2
|
+
ldpd:
|
3
|
+
short:
|
4
|
+
repo:
|
5
|
+
'NNC' : 'General Collections'
|
6
|
+
'NNC-A' : 'Avery'
|
7
|
+
'NNC-ART' : 'Office of Art Properties'
|
8
|
+
'NNBa' : 'Barnard College Library'
|
9
|
+
'NNC-EA' : 'East Asian'
|
10
|
+
'NNC-L' : 'Law Library'
|
11
|
+
'NNC-M' : 'Health Sciences Library'
|
12
|
+
'NNC-MUS' : 'Music Library'
|
13
|
+
'NNC-RB' : 'RBML'
|
14
|
+
'NyNyCBL' : 'Burke Library'
|
15
|
+
'NyNyCOH' : 'CCOH'
|
16
|
+
project:
|
17
|
+
'Customer Order Collection' : 'Pres Orders'
|
18
|
+
"Children's Drawings of the Spanish Civil War (online exhibition)" : 'Spanish Civil War'
|
19
|
+
"Jewels in her crown: treasures of Columbia University Libraries special collections" : 'Jewels in her Crown'
|
20
|
+
"Russian Imperial Corps of Pages" : 'Russian Corps of Pages'
|
21
|
+
"Preserving Historic Audio Content" : 'Audio Preservation'
|
22
|
+
'Papers of John Jay' : 'John Jay Papers'
|
23
|
+
'Project Facet Mapping Test' : 'Successful Project Mapping'
|
24
|
+
'G.E.E. Lindquist Native American Photographs' : 'Lindquist Photographs'
|
25
|
+
long:
|
26
|
+
repo:
|
27
|
+
'NNC' : 'General Collections'
|
28
|
+
'NNC-A' : 'Avery Architectural & Fine Arts Library'
|
29
|
+
'NNC-ART' : 'Office of Art Properties'
|
30
|
+
'NNBa' : 'Barnard College Library'
|
31
|
+
'NNC-EA' : 'Starr East Asian'
|
32
|
+
'NNC-L' : 'Law Library'
|
33
|
+
'NNC-M' : 'Health Sciences Library'
|
34
|
+
'NNC-MUS' : 'Music Library'
|
35
|
+
'NNC-RB' : 'Rare Book and Manuscript Library'
|
36
|
+
'NyNyCBL' : 'Burke Library at Union Theological Seminary'
|
37
|
+
'NyNyCOH' : 'Columbia Center for Oral History Collections'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Cul::Scv::Hydra::Indexer
|
2
2
|
|
3
|
-
def self.recursively_index_fedora_objects(pid,
|
3
|
+
def self.recursively_index_fedora_objects(pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false)
|
4
4
|
|
5
5
|
if pid.blank?
|
6
6
|
raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)'
|
@@ -10,26 +10,37 @@ module Cul::Scv::Hydra::Indexer
|
|
10
10
|
raise 'Could not find Fedora object with pid: ' + pid
|
11
11
|
end
|
12
12
|
|
13
|
-
if
|
14
|
-
puts 'Skipping
|
13
|
+
if pids_to_omit.present? && pids_to_omit.include?(pid)
|
14
|
+
puts 'Skipping indexing of topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output
|
15
15
|
else
|
16
16
|
puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output
|
17
|
-
puts 'If this is a BagAggregator with a lot of members, this
|
17
|
+
puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output
|
18
18
|
|
19
19
|
# We found an object with the desired PID. Let's reindex it
|
20
20
|
active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
|
21
|
-
active_fedora_object.update_index
|
22
21
|
|
23
|
-
|
22
|
+
if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
|
23
|
+
puts 'Top level object was skipped because GenericResources are being skipped and it is a GenericResource.'
|
24
|
+
else
|
25
|
+
active_fedora_object.update_index
|
26
|
+
puts 'Done indexing topmost object (' + pid + '). Took ' + (Time.now - START_TIME).to_s + ' seconds' if verbose_output
|
27
|
+
end
|
28
|
+
|
24
29
|
end
|
25
30
|
|
26
|
-
puts 'Recursively retreieving and indexing all members...'
|
31
|
+
puts 'Recursively retreieving and indexing all members of ' + pid + '...'
|
27
32
|
|
28
33
|
unique_pids = Cul::Scv::Hydra::RisearchMembers.get_recursive_member_pids(pid, true)
|
29
34
|
|
30
35
|
total_number_of_members = unique_pids.length
|
31
36
|
puts 'Recursive search found ' + total_number_of_members.to_s + ' members.' if verbose_output
|
32
37
|
|
38
|
+
if pids_to_omit.present?
|
39
|
+
unique_pids = unique_pids - pids_to_omit
|
40
|
+
total_number_of_members = unique_pids.length
|
41
|
+
puts 'After checking against the list of omitted pids, the total number of objects to index will be: ' + total_number_of_members.to_s if verbose_output
|
42
|
+
end
|
43
|
+
|
33
44
|
i = 1
|
34
45
|
if total_number_of_members > 0
|
35
46
|
unique_pids.each {|pid|
|
@@ -37,16 +48,20 @@ module Cul::Scv::Hydra::Indexer
|
|
37
48
|
print 'Indexing ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output
|
38
49
|
|
39
50
|
active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
|
40
|
-
active_fedora_object.update_index
|
41
51
|
|
42
|
-
|
43
|
-
|
52
|
+
if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
|
53
|
+
puts "skipped (because we're skipping GenericResources." if verbose_output
|
54
|
+
else
|
55
|
+
active_fedora_object.update_index
|
56
|
+
# Display progress
|
57
|
+
puts 'done.' if verbose_output
|
58
|
+
end
|
44
59
|
|
45
60
|
i += 1
|
46
61
|
}
|
47
62
|
end
|
48
63
|
|
49
|
-
puts 'Indexing complete! Took ' + (Time.now - START_TIME).to_s + ' seconds'
|
64
|
+
puts 'Indexing complete! Took ' + (Time.now - START_TIME).to_s + ' seconds'
|
50
65
|
|
51
66
|
end
|
52
67
|
|
@@ -24,13 +24,13 @@ module Om
|
|
24
24
|
t.type_of_resource(:path=>"typeOfResource", :index_as=>[:not_searchable])
|
25
25
|
t.physical_description(:path=>"physicalDescription", :index_as=>[:not_searchable]){
|
26
26
|
t.form_marc(:path=>"form", :attributes=>{:authority=>"marcform"}, :index_as=>[:not_searchable])
|
27
|
-
t.form_nomarc(:path=>"form[@authority !='marcform']", :index_as=>[:not_searchable, :displayable, :facetable])
|
27
|
+
t.form_nomarc(:path=>"form[@authority !='marcform']", :index_as=>[:not_searchable, :displayable, :facetable, :textable])
|
28
28
|
t.extent(:path=>"extent", :index_as=>[:not_searchable])
|
29
29
|
t.reformatting_quality(:path=>"reformattingQuality", :index_as=>[:not_searchable])
|
30
30
|
t.internet_media_type(:path=>"internetMediaType", :index_as=>[:not_searchable])
|
31
31
|
t.digital_origin(:path=>"digitalOrigin", :index_as=>[:not_searchable])
|
32
32
|
}
|
33
|
-
t.lib_format(:
|
33
|
+
t.lib_format(proxy: [:physical_description, :form_nomarc] )
|
34
34
|
t.location(:path=>"location", :index_as=>[:not_searchable]){
|
35
35
|
t.repo_text(:path=>"physicalLocation",:attributes=>{:authority=>:none}, :index_as=>[:not_searchable])
|
36
36
|
t.repo_code(:path=>"physicalLocation",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
|
@@ -14,6 +14,10 @@ module Solrizer::DefaultDescriptors
|
|
14
14
|
@project_facet_type ||= ProjectFacetDescriptor.new(:string, :indexed, :multivalued)
|
15
15
|
end
|
16
16
|
|
17
|
+
def self.project_textable
|
18
|
+
@project_textable_type ||= ProjectTextableDescriptor.new(:text_en, :indexed, :multivalued)
|
19
|
+
end
|
20
|
+
|
17
21
|
# Produces _sim suffix and a value-mapping converter
|
18
22
|
def self.marc_code_facetable
|
19
23
|
@marc_code_facet_type ||= MarcCodeFacetDescriptor.new(:string, :indexed, :multivalued)
|
@@ -66,40 +70,61 @@ module Solrizer::DefaultDescriptors
|
|
66
70
|
end
|
67
71
|
|
68
72
|
module Normal
|
73
|
+
def normal(value)
|
74
|
+
normal!(value.clone)
|
75
|
+
end
|
69
76
|
def normal!(value)
|
70
77
|
value.gsub!(/\s+/,' ')
|
71
78
|
value.strip!
|
72
|
-
|
79
|
+
value
|
73
80
|
end
|
74
81
|
end
|
75
82
|
|
76
83
|
class TextableDescriptor < Solrizer::Descriptor
|
84
|
+
include Normal
|
77
85
|
def name_and_converter(field_name, args=nil)
|
78
86
|
super('all_text', args)
|
79
87
|
end
|
88
|
+
def converter(field_type)
|
89
|
+
lambda {|value| normal(value)}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class ProjectTextableDescriptor < Solrizer::Descriptor
|
94
|
+
include Normal
|
95
|
+
def name_and_converter(field_name, args=nil)
|
96
|
+
super('all_text', args)
|
97
|
+
end
|
98
|
+
def converter(field_type)
|
99
|
+
lambda do |value|
|
100
|
+
if value.is_a? String
|
101
|
+
I18n.t("ldpd.short.project.#{normal!(value)}")
|
102
|
+
else
|
103
|
+
raise "unexpected project_textable #{value.inspect}"
|
104
|
+
value
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
80
108
|
end
|
81
109
|
|
82
110
|
class ProjectFacetDescriptor < Solrizer::Descriptor
|
83
111
|
include Normal
|
84
112
|
def converter(field_type)
|
85
|
-
|
86
|
-
|
87
|
-
end
|
113
|
+
lambda {|value| I18n.t("ldpd.short.project.#{normal!(value)}")}
|
114
|
+
end
|
88
115
|
end
|
89
116
|
|
90
117
|
class MarcCodeFacetDescriptor < Solrizer::Descriptor
|
91
118
|
include Normal
|
92
119
|
def converter(field_type)
|
93
|
-
|
94
|
-
|
95
|
-
end
|
120
|
+
lambda {|value| I18n.t("ldpd.short.repo.#{normal!(value)}")}
|
121
|
+
end
|
96
122
|
end
|
97
123
|
|
98
124
|
class MarcCodeDisplayDescriptor < Solrizer::Descriptor
|
99
125
|
include Normal
|
100
126
|
def converter(field_type)
|
101
|
-
|
102
|
-
lambda {|value| (normal!(value) and map.has_key? value) ? map[value] : value}
|
127
|
+
lambda {|value| I18n.t("ldpd.long.repo.#{normal!(value)}")}
|
103
128
|
end
|
104
129
|
end
|
105
130
|
|
@@ -109,13 +134,11 @@ module Solrizer::DefaultDescriptors
|
|
109
134
|
super('all_text', args)
|
110
135
|
end
|
111
136
|
def converter(field_type)
|
112
|
-
fmap = Solrizer::DefaultDescriptors.value_maps[:marc_to_facet] || {}
|
113
|
-
dmap = Solrizer::DefaultDescriptors.value_maps[:marc_to_display] || {}
|
114
137
|
lambda do |value|
|
115
138
|
if value.is_a? String
|
116
139
|
normal!(value)
|
117
|
-
r = (
|
118
|
-
r <<
|
140
|
+
r = [I18n.t("ldpd.short.repo.#{normal!(value)}")]
|
141
|
+
r << I18n.t("ldpd.long.repo.#{normal!(value)}")
|
119
142
|
r.uniq!
|
120
143
|
r.join(' ')
|
121
144
|
else
|
@@ -124,9 +147,4 @@ module Solrizer::DefaultDescriptors
|
|
124
147
|
end
|
125
148
|
end
|
126
149
|
end
|
127
|
-
class MarcCodeDisplayTextableDescriptor < MarcCodeDisplayDescriptor
|
128
|
-
def name_and_converter(field_name, args=nil)
|
129
|
-
super('all_text', args)
|
130
|
-
end
|
131
|
-
end
|
132
150
|
end
|
@@ -71,9 +71,8 @@ module Cul::Scv::Hydra::Solrizer
|
|
71
71
|
|
72
72
|
def names(role_authority=nil, role=nil)
|
73
73
|
# get all the name nodes
|
74
|
-
# reject the ones that aren't type 'personal' or 'corporate'
|
75
74
|
# keep all child text except the role terms
|
76
|
-
xpath = "./mods:name
|
75
|
+
xpath = "./mods:name"
|
77
76
|
unless role_authority.nil?
|
78
77
|
xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
|
79
78
|
unless role.nil?
|
@@ -130,6 +129,51 @@ module Cul::Scv::Hydra::Solrizer
|
|
130
129
|
solr_doc["lib_format_sim"] = formats
|
131
130
|
solr_doc["lib_repo_sim"] = repositories
|
132
131
|
solr_doc["lib_shelf_sim"] = shelf_locators
|
132
|
+
|
133
|
+
# Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
|
134
|
+
possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
|
135
|
+
possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
|
136
|
+
start_date = nil
|
137
|
+
end_date = nil
|
138
|
+
start_year = nil
|
139
|
+
end_year = nil
|
140
|
+
possible_start_date_fields.each{|key|
|
141
|
+
if solr_doc.has_key?(key)
|
142
|
+
start_date = solr_doc[key][0]
|
143
|
+
break
|
144
|
+
end
|
145
|
+
}
|
146
|
+
possible_end_date_fields.each{|key|
|
147
|
+
if solr_doc.has_key?(key)
|
148
|
+
end_date = solr_doc[key][0]
|
149
|
+
break
|
150
|
+
end
|
151
|
+
}
|
152
|
+
|
153
|
+
if start_date.present?
|
154
|
+
|
155
|
+
end_date = start_date if end_date.blank?
|
156
|
+
|
157
|
+
#solr_doc["lib_start_date_ss"] = start_date
|
158
|
+
#solr_doc["lib_end_date_ss"] = end_date
|
159
|
+
|
160
|
+
year_regex = /^(-?\d{1,4}).*/
|
161
|
+
|
162
|
+
start_year_match = start_date.match(year_regex)
|
163
|
+
start_year = start_year_match.captures[0] if start_year_match
|
164
|
+
start_year = zero_pad_year(start_year)
|
165
|
+
#solr_doc["lib_start_date_year_ssi"] = start_year if start_year
|
166
|
+
solr_doc["lib_start_date_year_itsi"] = start_year.to_i if start_year # TrieInt version for searches
|
167
|
+
|
168
|
+
end_year_match = end_date.match(year_regex)
|
169
|
+
end_year = end_year_match.captures[0] if end_year_match
|
170
|
+
end_year = zero_pad_year(end_year)
|
171
|
+
#solr_doc["lib_end_date_year_ssi"] = end_year if end_year
|
172
|
+
solr_doc["lib_end_date_year_itsi"] = end_year.to_i if end_year # TrieInt version for searches
|
173
|
+
|
174
|
+
solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year
|
175
|
+
end
|
176
|
+
|
133
177
|
solr_doc.each do |k, v|
|
134
178
|
if self.class.maps_field? k
|
135
179
|
solr_doc[k] = self.class.map_value(k, v)
|
@@ -138,6 +182,17 @@ module Cul::Scv::Hydra::Solrizer
|
|
138
182
|
solr_doc
|
139
183
|
end
|
140
184
|
|
185
|
+
def zero_pad_year(year)
|
186
|
+
year = year.to_s
|
187
|
+
is_negative = year.start_with?('-')
|
188
|
+
year_without_sign = (is_negative ? year[1, year.length]: year)
|
189
|
+
if year_without_sign.length < 4
|
190
|
+
year_without_sign = year_without_sign.rjust(4, '0')
|
191
|
+
end
|
192
|
+
|
193
|
+
return (is_negative ? '-' : '') + year_without_sign
|
194
|
+
end
|
195
|
+
|
141
196
|
def self.normalize(t, strip_punctuation=false)
|
142
197
|
# strip whitespace
|
143
198
|
n_t = t.dup.strip
|
@@ -159,4 +214,4 @@ module Cul::Scv::Hydra::Solrizer
|
|
159
214
|
n_t
|
160
215
|
end
|
161
216
|
end
|
162
|
-
end
|
217
|
+
end
|
data/lib/tasks/index.rake
CHANGED
@@ -11,14 +11,18 @@ namespace :cul_scv_hydra do
|
|
11
11
|
|
12
12
|
START_TIME = Time.now
|
13
13
|
|
14
|
-
#lindquist == burke_lindq == ldpd:130509
|
15
|
-
|
16
14
|
ENV["RAILS_ENV"] ||= Rails.env
|
17
15
|
pid = ENV['pid']
|
18
|
-
|
16
|
+
if ENV['omit']
|
17
|
+
pids_to_omit = ENV['omit'].split(',').map{|pid|pid.strip}
|
18
|
+
else
|
19
|
+
pids_to_omit = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
skip_generic_resources = true if ENV['skip_generic_resources'] == 'true'
|
19
23
|
|
20
24
|
begin
|
21
|
-
Cul::Scv::Hydra::Indexer.recursively_index_fedora_objects(pid,
|
25
|
+
Cul::Scv::Hydra::Indexer.recursively_index_fedora_objects(pid, pids_to_omit, skip_generic_resources, true)
|
22
26
|
rescue => e
|
23
27
|
puts 'Error: ' + e.message
|
24
28
|
puts e.backtrace
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cul_scv_hydra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Armintor
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: blacklight
|
@@ -253,6 +253,7 @@ files:
|
|
253
253
|
- bin/rails
|
254
254
|
- config/fedora.yml
|
255
255
|
- config/jetty.yml
|
256
|
+
- config/locales/ldpd_hydra.en.yml
|
256
257
|
- config/predicate_mappings.yml
|
257
258
|
- config/solr_mappings.yml
|
258
259
|
- config/solr_value_maps.yml
|