cul_scv_hydra 0.15.1 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/app/models/cul/scv/hydra/datastreams/mods_document.rb +27 -16
- data/config/locales/ldpd_hydra.en.yml +37 -0
- data/lib/cul_scv_hydra/indexer.rb +26 -11
- data/lib/cul_scv_hydra/om/standard_mods.rb +2 -2
- data/lib/cul_scv_hydra/solrizer/field_mapper.rb +36 -18
- data/lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb +58 -3
- data/lib/cul_scv_hydra/version.rb +1 -1
- data/lib/tasks/index.rake +8 -4
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OWFhODEzOTJhOTliYjEzMTRhZWY1MGFkMzJjZWViMWQ2ODc5YjRiMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MzRhNGVlNjkxNjE4YjRhNzM5OTg0YTk0ZmYyOTczN2YwYWI5ZTIwMg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NjI4ODgyYmE4YmQzOGVhMmZmNjBmYjg2ZTVjNzE0NmQwMDRmNjJkZWM5NjRk
|
10
|
+
OTU0YzIwNDBmOTdiOGM4OTNhOTI0NDY2M2U2YTY3ZjEzZTZhNmI2OWQ0MWMz
|
11
|
+
ZGIzZTc0ZjFmMTc0ZmZmNTNlMmRmZmIzZWQ1Mjk3MGFmNjdmMmQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZGRiMjc0NTU2ZjI5MDIwMjJjODc5ZmE2OTU2NjIyOGFlMGVhMzA3NDcwNmI4
|
14
|
+
MTdlYjdhNzA3OWNhN2E5OTE1ZDAxZGNhNjA2YTJjZWY5M2FlZjJkYjA4NjYw
|
15
|
+
MTA2MmExZmEyMDAxOWUwY2M1MGRmYzYxNjQ0OGNlMDc3OWVmYjg=
|
@@ -24,14 +24,14 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
24
24
|
t.non_sort(:path=>"nonSort", :index_as=>[])
|
25
25
|
t.main_title(:path=>"title", :index_as=>[])
|
26
26
|
}
|
27
|
-
|
27
|
+
|
28
28
|
t.title(:proxy=>[:mods, :main_title_info, :main_title], :type=>:string,
|
29
|
-
:index_as=>[:searchable, :sortable
|
29
|
+
:index_as=>[:searchable, :sortable])
|
30
30
|
t.title_display(:proxy=>[:mods, :main_title_info], :type=>:string,
|
31
31
|
:index_as=>[:displayable])
|
32
32
|
|
33
33
|
t.search_title_info(:path=>'titleInfo', :index_as=>[]){
|
34
|
-
t.search_title(:path=>'title', :index_as=>[:
|
34
|
+
t.search_title(:path=>'title', :index_as=>[:textable])
|
35
35
|
}
|
36
36
|
t.project(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Project"}, :index_as=>[]){
|
37
37
|
t.project_title_info(:path=>'titleInfo', :index_as=>[]){
|
@@ -44,14 +44,15 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
44
44
|
}
|
45
45
|
}
|
46
46
|
t.lib_project(:proxy=>[:project,:project_title_info],
|
47
|
-
:index_as=>[:displayable, :searchable, :project_facetable, :
|
47
|
+
:index_as=>[:displayable, :searchable, :project_facetable, :project_textable])
|
48
48
|
t.lib_collection(:proxy=>[:collection,:collection_title_info])
|
49
49
|
# pattern matches
|
50
50
|
t.identifier(:path=>"identifier", :attributes=>{:type=>"local"}, :type=>:string, :index_as=>[:symbol, :textable])
|
51
51
|
t.clio(:path=>"identifier", :attributes=>{:type=>"CLIO"}, :data_type=>:symbol, :index_as=>[:symbol, :textable])
|
52
52
|
t.abstract
|
53
|
-
t.subject
|
54
|
-
t.topic
|
53
|
+
t.subject(:index_as=>[:textable]){
|
54
|
+
t.topic(:index_as=>[:facetable])
|
55
|
+
t.geographic(:index_as=>[:facetable])
|
55
56
|
}
|
56
57
|
t.type_of_resource(:path=>"typeOfResource", :index_as=>[:displayable])
|
57
58
|
t.physical_description(:path=>"physicalDescription", :index_as=>[]){
|
@@ -64,13 +65,14 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
64
65
|
t.internet_media_type(:path=>"internetMediaType", :index_as=>[:displayable])
|
65
66
|
t.digital_origin(:path=>"digitalOrigin", :index_as=>[:displayable])
|
66
67
|
}
|
67
|
-
t.lib_format(:proxy=>[:physical_description, :form_nomarc], :index_as=>[:displayable, :facetable])
|
68
|
+
t.lib_format(:proxy=>[:physical_description, :form_nomarc], :index_as=>[:displayable, :facetable, :textable])
|
68
69
|
t.location(:path=>"location", :index_as=>[]){
|
69
70
|
t.repo_text(:path=>"physicalLocation",:attributes=>{:authority=>:none}, :index_as=>[])
|
70
71
|
t.lib_repo(:path=>"physicalLocation",
|
71
72
|
:attributes=>{:authority=>"marcorg"},
|
72
|
-
:index_as=>[])
|
73
|
+
:index_as=>[:textable])
|
73
74
|
t.shelf_locator(:path=>"shelfLocator", :index_as=>[:textable])
|
75
|
+
t.sublocation(:path=>"sublocation", :index_as=>[:textable])
|
74
76
|
}
|
75
77
|
t.lib_repo(:proxy=>[:location, :lib_repo], :type=>:text,
|
76
78
|
:index_as=>[:marc_code_facetable, :marc_code_displayable, :marc_code_textable])
|
@@ -80,7 +82,7 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
80
82
|
t.name_part(:path=>'namePart', :index_as=>[])
|
81
83
|
}
|
82
84
|
t.name_corporate(
|
83
|
-
:path=>'name',:attributes=>{:type=>'corporate'},
|
85
|
+
:path=>'name',:attributes=>{:type=>'corporate'},
|
84
86
|
:index_as=>[:facetable, :displayable, :searchable],
|
85
87
|
:variant_of=>{:field_base=>:lib_name}){
|
86
88
|
t.name_part(
|
@@ -103,18 +105,27 @@ class ModsDocument < ::ActiveFedora::OmDatastream
|
|
103
105
|
t.language_term(:proxy=>[:record_info, :language_of_cataloging, :language_term])
|
104
106
|
t.language_code(:proxy=>[:record_info, :language_of_cataloging, :language_code])
|
105
107
|
|
108
|
+
t.language(:index_as=>[]){
|
109
|
+
t.language_term_text(:path=>"languageTerm", :attributes=>{:authority=>'iso639-2b',:type=>'text'}, :index_as=>[:facetable, :textable])
|
110
|
+
t.language_term_code(:path=>"languageTerm", :attributes=>{:authority=>'iso639-2b',:type=>'code'}, :index_as=>[:facetable, :textable])
|
111
|
+
}
|
112
|
+
|
106
113
|
t.origin_info(:path=>"originInfo", :index_as=>[]){
|
107
|
-
t.
|
108
|
-
t.
|
109
|
-
|
110
|
-
t.
|
111
|
-
t.
|
114
|
+
t.date_issued(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:displayable, :textable])
|
115
|
+
t.date_issued_start(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:displayable, :textable])
|
116
|
+
t.date_issued_end(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:displayable, :textable])
|
117
|
+
t.date_created(:path=>"dateCreated", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:displayable, :textable])
|
118
|
+
t.date_created_start(:path=>"dateCreated", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:displayable, :textable])
|
119
|
+
t.date_created_end(:path=>"dateCreated", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:displayable, :textable])
|
120
|
+
t.date_other(:path=>"dateOther", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:displayable, :textable])
|
121
|
+
t.date_other_start(:path=>"dateOther", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:displayable, :textable])
|
122
|
+
t.date_other_end(:path=>"dateOther", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:displayable, :textable])
|
112
123
|
}
|
113
124
|
end
|
114
125
|
|
115
126
|
def self.xml_template
|
116
127
|
builder = Nokogiri::XML::Builder.new do |xml|
|
117
|
-
xml.mods(:version=>"3.4",
|
128
|
+
xml.mods(:version=>"3.4",
|
118
129
|
"xmlns"=>"http://www.loc.gov/mods/v3",
|
119
130
|
"xmlns:xlink"=>"http://www.w3.org/1999/xlink",
|
120
131
|
"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance"){
|
@@ -154,4 +165,4 @@ end
|
|
154
165
|
end
|
155
166
|
end
|
156
167
|
end
|
157
|
-
end
|
168
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
en:
|
2
|
+
ldpd:
|
3
|
+
short:
|
4
|
+
repo:
|
5
|
+
'NNC' : 'General Collections'
|
6
|
+
'NNC-A' : 'Avery'
|
7
|
+
'NNC-ART' : 'Office of Art Properties'
|
8
|
+
'NNBa' : 'Barnard College Library'
|
9
|
+
'NNC-EA' : 'East Asian'
|
10
|
+
'NNC-L' : 'Law Library'
|
11
|
+
'NNC-M' : 'Health Sciences Library'
|
12
|
+
'NNC-MUS' : 'Music Library'
|
13
|
+
'NNC-RB' : 'RBML'
|
14
|
+
'NyNyCBL' : 'Burke Library'
|
15
|
+
'NyNyCOH' : 'CCOH'
|
16
|
+
project:
|
17
|
+
'Customer Order Collection' : 'Pres Orders'
|
18
|
+
"Children's Drawings of the Spanish Civil War (online exhibition)" : 'Spanish Civil War'
|
19
|
+
"Jewels in her crown: treasures of Columbia University Libraries special collections" : 'Jewels in her Crown'
|
20
|
+
"Russian Imperial Corps of Pages" : 'Russian Corps of Pages'
|
21
|
+
"Preserving Historic Audio Content" : 'Audio Preservation'
|
22
|
+
'Papers of John Jay' : 'John Jay Papers'
|
23
|
+
'Project Facet Mapping Test' : 'Successful Project Mapping'
|
24
|
+
'G.E.E. Lindquist Native American Photographs' : 'Lindquist Photographs'
|
25
|
+
long:
|
26
|
+
repo:
|
27
|
+
'NNC' : 'General Collections'
|
28
|
+
'NNC-A' : 'Avery Architectural & Fine Arts Library'
|
29
|
+
'NNC-ART' : 'Office of Art Properties'
|
30
|
+
'NNBa' : 'Barnard College Library'
|
31
|
+
'NNC-EA' : 'Starr East Asian'
|
32
|
+
'NNC-L' : 'Law Library'
|
33
|
+
'NNC-M' : 'Health Sciences Library'
|
34
|
+
'NNC-MUS' : 'Music Library'
|
35
|
+
'NNC-RB' : 'Rare Book and Manuscript Library'
|
36
|
+
'NyNyCBL' : 'Burke Library at Union Theological Seminary'
|
37
|
+
'NyNyCOH' : 'Columbia Center for Oral History Collections'
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Cul::Scv::Hydra::Indexer
|
2
2
|
|
3
|
-
def self.recursively_index_fedora_objects(pid,
|
3
|
+
def self.recursively_index_fedora_objects(pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false)
|
4
4
|
|
5
5
|
if pid.blank?
|
6
6
|
raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)'
|
@@ -10,26 +10,37 @@ module Cul::Scv::Hydra::Indexer
|
|
10
10
|
raise 'Could not find Fedora object with pid: ' + pid
|
11
11
|
end
|
12
12
|
|
13
|
-
if
|
14
|
-
puts 'Skipping
|
13
|
+
if pids_to_omit.present? && pids_to_omit.include?(pid)
|
14
|
+
puts 'Skipping indexing of topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output
|
15
15
|
else
|
16
16
|
puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output
|
17
|
-
puts 'If this is a BagAggregator with a lot of members, this
|
17
|
+
puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output
|
18
18
|
|
19
19
|
# We found an object with the desired PID. Let's reindex it
|
20
20
|
active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
|
21
|
-
active_fedora_object.update_index
|
22
21
|
|
23
|
-
|
22
|
+
if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
|
23
|
+
puts 'Top level object was skipped because GenericResources are being skipped and it is a GenericResource.'
|
24
|
+
else
|
25
|
+
active_fedora_object.update_index
|
26
|
+
puts 'Done indexing topmost object (' + pid + '). Took ' + (Time.now - START_TIME).to_s + ' seconds' if verbose_output
|
27
|
+
end
|
28
|
+
|
24
29
|
end
|
25
30
|
|
26
|
-
puts 'Recursively retreieving and indexing all members...'
|
31
|
+
puts 'Recursively retreieving and indexing all members of ' + pid + '...'
|
27
32
|
|
28
33
|
unique_pids = Cul::Scv::Hydra::RisearchMembers.get_recursive_member_pids(pid, true)
|
29
34
|
|
30
35
|
total_number_of_members = unique_pids.length
|
31
36
|
puts 'Recursive search found ' + total_number_of_members.to_s + ' members.' if verbose_output
|
32
37
|
|
38
|
+
if pids_to_omit.present?
|
39
|
+
unique_pids = unique_pids - pids_to_omit
|
40
|
+
total_number_of_members = unique_pids.length
|
41
|
+
puts 'After checking against the list of omitted pids, the total number of objects to index will be: ' + total_number_of_members.to_s if verbose_output
|
42
|
+
end
|
43
|
+
|
33
44
|
i = 1
|
34
45
|
if total_number_of_members > 0
|
35
46
|
unique_pids.each {|pid|
|
@@ -37,16 +48,20 @@ module Cul::Scv::Hydra::Indexer
|
|
37
48
|
print 'Indexing ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output
|
38
49
|
|
39
50
|
active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
|
40
|
-
active_fedora_object.update_index
|
41
51
|
|
42
|
-
|
43
|
-
|
52
|
+
if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
|
53
|
+
puts "skipped (because we're skipping GenericResources." if verbose_output
|
54
|
+
else
|
55
|
+
active_fedora_object.update_index
|
56
|
+
# Display progress
|
57
|
+
puts 'done.' if verbose_output
|
58
|
+
end
|
44
59
|
|
45
60
|
i += 1
|
46
61
|
}
|
47
62
|
end
|
48
63
|
|
49
|
-
puts 'Indexing complete! Took ' + (Time.now - START_TIME).to_s + ' seconds'
|
64
|
+
puts 'Indexing complete! Took ' + (Time.now - START_TIME).to_s + ' seconds'
|
50
65
|
|
51
66
|
end
|
52
67
|
|
@@ -24,13 +24,13 @@ module Om
|
|
24
24
|
t.type_of_resource(:path=>"typeOfResource", :index_as=>[:not_searchable])
|
25
25
|
t.physical_description(:path=>"physicalDescription", :index_as=>[:not_searchable]){
|
26
26
|
t.form_marc(:path=>"form", :attributes=>{:authority=>"marcform"}, :index_as=>[:not_searchable])
|
27
|
-
t.form_nomarc(:path=>"form[@authority !='marcform']", :index_as=>[:not_searchable, :displayable, :facetable])
|
27
|
+
t.form_nomarc(:path=>"form[@authority !='marcform']", :index_as=>[:not_searchable, :displayable, :facetable, :textable])
|
28
28
|
t.extent(:path=>"extent", :index_as=>[:not_searchable])
|
29
29
|
t.reformatting_quality(:path=>"reformattingQuality", :index_as=>[:not_searchable])
|
30
30
|
t.internet_media_type(:path=>"internetMediaType", :index_as=>[:not_searchable])
|
31
31
|
t.digital_origin(:path=>"digitalOrigin", :index_as=>[:not_searchable])
|
32
32
|
}
|
33
|
-
t.lib_format(:
|
33
|
+
t.lib_format(proxy: [:physical_description, :form_nomarc] )
|
34
34
|
t.location(:path=>"location", :index_as=>[:not_searchable]){
|
35
35
|
t.repo_text(:path=>"physicalLocation",:attributes=>{:authority=>:none}, :index_as=>[:not_searchable])
|
36
36
|
t.repo_code(:path=>"physicalLocation",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
|
@@ -14,6 +14,10 @@ module Solrizer::DefaultDescriptors
|
|
14
14
|
@project_facet_type ||= ProjectFacetDescriptor.new(:string, :indexed, :multivalued)
|
15
15
|
end
|
16
16
|
|
17
|
+
def self.project_textable
|
18
|
+
@project_textable_type ||= ProjectTextableDescriptor.new(:text_en, :indexed, :multivalued)
|
19
|
+
end
|
20
|
+
|
17
21
|
# Produces _sim suffix and a value-mapping converter
|
18
22
|
def self.marc_code_facetable
|
19
23
|
@marc_code_facet_type ||= MarcCodeFacetDescriptor.new(:string, :indexed, :multivalued)
|
@@ -66,40 +70,61 @@ module Solrizer::DefaultDescriptors
|
|
66
70
|
end
|
67
71
|
|
68
72
|
module Normal
|
73
|
+
def normal(value)
|
74
|
+
normal!(value.clone)
|
75
|
+
end
|
69
76
|
def normal!(value)
|
70
77
|
value.gsub!(/\s+/,' ')
|
71
78
|
value.strip!
|
72
|
-
|
79
|
+
value
|
73
80
|
end
|
74
81
|
end
|
75
82
|
|
76
83
|
class TextableDescriptor < Solrizer::Descriptor
|
84
|
+
include Normal
|
77
85
|
def name_and_converter(field_name, args=nil)
|
78
86
|
super('all_text', args)
|
79
87
|
end
|
88
|
+
def converter(field_type)
|
89
|
+
lambda {|value| normal(value)}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
class ProjectTextableDescriptor < Solrizer::Descriptor
|
94
|
+
include Normal
|
95
|
+
def name_and_converter(field_name, args=nil)
|
96
|
+
super('all_text', args)
|
97
|
+
end
|
98
|
+
def converter(field_type)
|
99
|
+
lambda do |value|
|
100
|
+
if value.is_a? String
|
101
|
+
I18n.t("ldpd.short.project.#{normal!(value)}")
|
102
|
+
else
|
103
|
+
raise "unexpected project_textable #{value.inspect}"
|
104
|
+
value
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
80
108
|
end
|
81
109
|
|
82
110
|
class ProjectFacetDescriptor < Solrizer::Descriptor
|
83
111
|
include Normal
|
84
112
|
def converter(field_type)
|
85
|
-
|
86
|
-
|
87
|
-
end
|
113
|
+
lambda {|value| I18n.t("ldpd.short.project.#{normal!(value)}")}
|
114
|
+
end
|
88
115
|
end
|
89
116
|
|
90
117
|
class MarcCodeFacetDescriptor < Solrizer::Descriptor
|
91
118
|
include Normal
|
92
119
|
def converter(field_type)
|
93
|
-
|
94
|
-
|
95
|
-
end
|
120
|
+
lambda {|value| I18n.t("ldpd.short.repo.#{normal!(value)}")}
|
121
|
+
end
|
96
122
|
end
|
97
123
|
|
98
124
|
class MarcCodeDisplayDescriptor < Solrizer::Descriptor
|
99
125
|
include Normal
|
100
126
|
def converter(field_type)
|
101
|
-
|
102
|
-
lambda {|value| (normal!(value) and map.has_key? value) ? map[value] : value}
|
127
|
+
lambda {|value| I18n.t("ldpd.long.repo.#{normal!(value)}")}
|
103
128
|
end
|
104
129
|
end
|
105
130
|
|
@@ -109,13 +134,11 @@ module Solrizer::DefaultDescriptors
|
|
109
134
|
super('all_text', args)
|
110
135
|
end
|
111
136
|
def converter(field_type)
|
112
|
-
fmap = Solrizer::DefaultDescriptors.value_maps[:marc_to_facet] || {}
|
113
|
-
dmap = Solrizer::DefaultDescriptors.value_maps[:marc_to_display] || {}
|
114
137
|
lambda do |value|
|
115
138
|
if value.is_a? String
|
116
139
|
normal!(value)
|
117
|
-
r = (
|
118
|
-
r <<
|
140
|
+
r = [I18n.t("ldpd.short.repo.#{normal!(value)}")]
|
141
|
+
r << I18n.t("ldpd.long.repo.#{normal!(value)}")
|
119
142
|
r.uniq!
|
120
143
|
r.join(' ')
|
121
144
|
else
|
@@ -124,9 +147,4 @@ module Solrizer::DefaultDescriptors
|
|
124
147
|
end
|
125
148
|
end
|
126
149
|
end
|
127
|
-
class MarcCodeDisplayTextableDescriptor < MarcCodeDisplayDescriptor
|
128
|
-
def name_and_converter(field_name, args=nil)
|
129
|
-
super('all_text', args)
|
130
|
-
end
|
131
|
-
end
|
132
150
|
end
|
@@ -71,9 +71,8 @@ module Cul::Scv::Hydra::Solrizer
|
|
71
71
|
|
72
72
|
def names(role_authority=nil, role=nil)
|
73
73
|
# get all the name nodes
|
74
|
-
# reject the ones that aren't type 'personal' or 'corporate'
|
75
74
|
# keep all child text except the role terms
|
76
|
-
xpath = "./mods:name
|
75
|
+
xpath = "./mods:name"
|
77
76
|
unless role_authority.nil?
|
78
77
|
xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
|
79
78
|
unless role.nil?
|
@@ -130,6 +129,51 @@ module Cul::Scv::Hydra::Solrizer
|
|
130
129
|
solr_doc["lib_format_sim"] = formats
|
131
130
|
solr_doc["lib_repo_sim"] = repositories
|
132
131
|
solr_doc["lib_shelf_sim"] = shelf_locators
|
132
|
+
|
133
|
+
# Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
|
134
|
+
possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
|
135
|
+
possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
|
136
|
+
start_date = nil
|
137
|
+
end_date = nil
|
138
|
+
start_year = nil
|
139
|
+
end_year = nil
|
140
|
+
possible_start_date_fields.each{|key|
|
141
|
+
if solr_doc.has_key?(key)
|
142
|
+
start_date = solr_doc[key][0]
|
143
|
+
break
|
144
|
+
end
|
145
|
+
}
|
146
|
+
possible_end_date_fields.each{|key|
|
147
|
+
if solr_doc.has_key?(key)
|
148
|
+
end_date = solr_doc[key][0]
|
149
|
+
break
|
150
|
+
end
|
151
|
+
}
|
152
|
+
|
153
|
+
if start_date.present?
|
154
|
+
|
155
|
+
end_date = start_date if end_date.blank?
|
156
|
+
|
157
|
+
#solr_doc["lib_start_date_ss"] = start_date
|
158
|
+
#solr_doc["lib_end_date_ss"] = end_date
|
159
|
+
|
160
|
+
year_regex = /^(-?\d{1,4}).*/
|
161
|
+
|
162
|
+
start_year_match = start_date.match(year_regex)
|
163
|
+
start_year = start_year_match.captures[0] if start_year_match
|
164
|
+
start_year = zero_pad_year(start_year)
|
165
|
+
#solr_doc["lib_start_date_year_ssi"] = start_year if start_year
|
166
|
+
solr_doc["lib_start_date_year_itsi"] = start_year.to_i if start_year # TrieInt version for searches
|
167
|
+
|
168
|
+
end_year_match = end_date.match(year_regex)
|
169
|
+
end_year = end_year_match.captures[0] if end_year_match
|
170
|
+
end_year = zero_pad_year(end_year)
|
171
|
+
#solr_doc["lib_end_date_year_ssi"] = end_year if end_year
|
172
|
+
solr_doc["lib_end_date_year_itsi"] = end_year.to_i if end_year # TrieInt version for searches
|
173
|
+
|
174
|
+
solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year
|
175
|
+
end
|
176
|
+
|
133
177
|
solr_doc.each do |k, v|
|
134
178
|
if self.class.maps_field? k
|
135
179
|
solr_doc[k] = self.class.map_value(k, v)
|
@@ -138,6 +182,17 @@ module Cul::Scv::Hydra::Solrizer
|
|
138
182
|
solr_doc
|
139
183
|
end
|
140
184
|
|
185
|
+
def zero_pad_year(year)
|
186
|
+
year = year.to_s
|
187
|
+
is_negative = year.start_with?('-')
|
188
|
+
year_without_sign = (is_negative ? year[1, year.length]: year)
|
189
|
+
if year_without_sign.length < 4
|
190
|
+
year_without_sign = year_without_sign.rjust(4, '0')
|
191
|
+
end
|
192
|
+
|
193
|
+
return (is_negative ? '-' : '') + year_without_sign
|
194
|
+
end
|
195
|
+
|
141
196
|
def self.normalize(t, strip_punctuation=false)
|
142
197
|
# strip whitespace
|
143
198
|
n_t = t.dup.strip
|
@@ -159,4 +214,4 @@ module Cul::Scv::Hydra::Solrizer
|
|
159
214
|
n_t
|
160
215
|
end
|
161
216
|
end
|
162
|
-
end
|
217
|
+
end
|
data/lib/tasks/index.rake
CHANGED
@@ -11,14 +11,18 @@ namespace :cul_scv_hydra do
|
|
11
11
|
|
12
12
|
START_TIME = Time.now
|
13
13
|
|
14
|
-
#lindquist == burke_lindq == ldpd:130509
|
15
|
-
|
16
14
|
ENV["RAILS_ENV"] ||= Rails.env
|
17
15
|
pid = ENV['pid']
|
18
|
-
|
16
|
+
if ENV['omit']
|
17
|
+
pids_to_omit = ENV['omit'].split(',').map{|pid|pid.strip}
|
18
|
+
else
|
19
|
+
pids_to_omit = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
skip_generic_resources = true if ENV['skip_generic_resources'] == 'true'
|
19
23
|
|
20
24
|
begin
|
21
|
-
Cul::Scv::Hydra::Indexer.recursively_index_fedora_objects(pid,
|
25
|
+
Cul::Scv::Hydra::Indexer.recursively_index_fedora_objects(pid, pids_to_omit, skip_generic_resources, true)
|
22
26
|
rescue => e
|
23
27
|
puts 'Error: ' + e.message
|
24
28
|
puts e.backtrace
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cul_scv_hydra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Armintor
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: blacklight
|
@@ -253,6 +253,7 @@ files:
|
|
253
253
|
- bin/rails
|
254
254
|
- config/fedora.yml
|
255
255
|
- config/jetty.yml
|
256
|
+
- config/locales/ldpd_hydra.en.yml
|
256
257
|
- config/predicate_mappings.yml
|
257
258
|
- config/solr_mappings.yml
|
258
259
|
- config/solr_value_maps.yml
|