cul_scv_hydra 0.22.6 → 0.22.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/concerns/cul/hydra/controller.rb +22 -0
- data/app/controllers/concerns/cul/hydra/resolver.rb +69 -0
- data/app/controllers/concerns/cul/hydra/thumbnails.rb +62 -0
- data/app/controllers/concerns/cul/scv/hydra/controller.rb +3 -19
- data/app/controllers/concerns/cul/scv/hydra/resolver.rb +2 -65
- data/app/controllers/concerns/cul/scv/hydra/thumbnails.rb +3 -59
- data/app/models/concept.rb +1 -1
- data/app/models/concerns/cul/hydra/models.rb +24 -0
- data/app/models/concerns/cul/hydra/models/aggregator.rb +121 -0
- data/app/models/concerns/cul/hydra/models/common.rb +220 -0
- data/app/models/concerns/cul/hydra/models/image_resource.rb +78 -0
- data/app/models/concerns/cul/hydra/models/linkable_resources.rb +108 -0
- data/app/models/concerns/cul/hydra/models/resource.rb +87 -0
- data/app/models/concerns/cul/scv/hydra/models.rb +1 -13
- data/app/models/concerns/cul/scv/hydra/models/aggregator.rb +1 -116
- data/app/models/concerns/cul/scv/hydra/models/common.rb +1 -213
- data/app/models/concerns/cul/scv/hydra/models/image_resource.rb +3 -75
- data/app/models/concerns/cul/scv/hydra/models/linkable_resources.rb +3 -105
- data/app/models/concerns/cul/scv/hydra/models/resource.rb +2 -83
- data/app/models/cul/hydra/datastreams/dc_metadata.rb +107 -0
- data/app/models/cul/hydra/datastreams/mods_document.rb +195 -0
- data/app/models/cul/hydra/datastreams/struct_metadata.rb +176 -0
- data/app/models/cul/scv/hydra/datastreams/dc_metadata.rb +5 -104
- data/app/models/cul/scv/hydra/datastreams/mods_document.rb +5 -178
- data/app/models/cul/scv/hydra/datastreams/struct_metadata.rb +5 -174
- data/app/models/dc_document.rb +1 -1
- data/app/models/generic_aggregator.rb +5 -5
- data/app/models/generic_object.rb +2 -2
- data/app/models/generic_resource.rb +4 -4
- data/app/models/mets_structured_aggregator.rb +2 -2
- data/app/models/resource.rb +3 -3
- data/app/models/resource_aggregator.rb +3 -3
- data/fixtures/spec/CUL_MODS/mods-subjects.xml +24 -0
- data/lib/cul_hydra.rb +18 -0
- data/lib/cul_hydra/access_controls_enforcement.rb +53 -0
- data/lib/cul_hydra/controllers.rb +13 -0
- data/lib/cul_hydra/controllers/aggregates.rb +93 -0
- data/lib/cul_hydra/controllers/aggregator_controller_helper.rb +27 -0
- data/lib/cul_hydra/controllers/catalog.rb +12 -0
- data/lib/cul_hydra/controllers/content_aggregators.rb +81 -0
- data/lib/cul_hydra/controllers/datastreams.rb +145 -0
- data/lib/cul_hydra/controllers/helpers.rb +10 -0
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/active_fedora_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/application_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/dc_metadata_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_assets_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_autocomplete_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_uploader_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/resources_helper_behavior.rb +1 -1
- data/lib/cul_hydra/controllers/resources.rb +161 -0
- data/lib/cul_hydra/controllers/static_image_aggregators.rb +105 -0
- data/lib/cul_hydra/controllers/suggestions.rb +126 -0
- data/lib/cul_hydra/controllers/terms.rb +205 -0
- data/lib/cul_hydra/engine.rb +31 -0
- data/lib/cul_hydra/fedora.rb +41 -0
- data/lib/cul_hydra/fedora/dummy_object.rb +37 -0
- data/lib/cul_hydra/fedora/rubydora_patch.rb +16 -0
- data/lib/cul_hydra/fedora/url_helper_behavior.rb +32 -0
- data/lib/cul_hydra/indexer.rb +84 -0
- data/lib/cul_hydra/om.rb +7 -0
- data/lib/cul_hydra/om/standard_mods.rb +115 -0
- data/lib/cul_hydra/risearch_members.rb +92 -0
- data/lib/cul_hydra/solrizer.rb +10 -0
- data/lib/cul_hydra/solrizer/extractor.rb +27 -0
- data/lib/cul_hydra/solrizer/mods_fieldable.rb +435 -0
- data/lib/cul_hydra/solrizer/terminology_based_solrizer.rb +35 -0
- data/lib/cul_hydra/solrizer/value_mapper.rb +46 -0
- data/lib/{cul_scv_hydra/solrizer/field_mapper.rb → cul_hydra/solrizer_patch.rb} +0 -0
- data/lib/cul_hydra/version.rb +8 -0
- data/lib/cul_hydra/version.rb~ +8 -0
- data/lib/cul_scv_fedora/dummy_object.rb +1 -30
- data/lib/cul_scv_fedora/rubydora_patch.rb +3 -7
- data/lib/cul_scv_fedora/url_helper_behavior.rb +3 -23
- data/lib/cul_scv_hydra.rb +5 -32
- data/lib/cul_scv_hydra/access_controls_enforcement.rb +3 -50
- data/lib/cul_scv_hydra/controllers.rb +10 -10
- data/lib/cul_scv_hydra/controllers/aggregates.rb +1 -86
- data/lib/cul_scv_hydra/controllers/aggregator_controller_helper.rb +4 -23
- data/lib/cul_scv_hydra/controllers/catalog.rb +5 -9
- data/lib/cul_scv_hydra/controllers/content_aggregators.rb +4 -77
- data/lib/cul_scv_hydra/controllers/datastreams.rb +3 -140
- data/lib/cul_scv_hydra/controllers/helpers.rb +44 -8
- data/lib/cul_scv_hydra/controllers/resources.rb +4 -157
- data/lib/cul_scv_hydra/controllers/static_image_aggregators.rb +4 -100
- data/lib/cul_scv_hydra/controllers/suggestions.rb +4 -122
- data/lib/cul_scv_hydra/controllers/terms.rb +4 -201
- data/lib/cul_scv_hydra/engine.rb +1 -1
- data/lib/cul_scv_hydra/indexer.rb +3 -82
- data/lib/cul_scv_hydra/om.rb +2 -2
- data/lib/cul_scv_hydra/om/standard_mods.rb +1 -108
- data/lib/cul_scv_hydra/risearch_members.rb +4 -89
- data/lib/cul_scv_hydra/solrizer.rb +5 -6
- data/lib/cul_scv_hydra/solrizer/extractor.rb +1 -25
- data/lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb +4 -429
- data/lib/cul_scv_hydra/solrizer/terminology_based_solrizer.rb +4 -32
- data/lib/cul_scv_hydra/solrizer/value_mapper.rb +1 -44
- data/lib/cul_scv_hydra/version.rb +5 -5
- data/lib/tasks/index.rake +2 -2
- data/lib/tasks/transform.rake +23 -0
- metadata +55 -12
@@ -0,0 +1,32 @@
|
|
1
|
+
module Cul
|
2
|
+
module Hydra
|
3
|
+
module Fedora
|
4
|
+
module UrlHelperBehavior
|
5
|
+
|
6
|
+
def fedora_url
|
7
|
+
@fedora_url ||= ActiveFedora.config.credentials[:url]
|
8
|
+
end
|
9
|
+
|
10
|
+
def pid_for_url(pid)
|
11
|
+
pid.gsub(/^\//,'').gsub(/info:fedora\//,'')
|
12
|
+
end
|
13
|
+
|
14
|
+
def fedora_object_url(pid)
|
15
|
+
fedora_url + '/objects/' + pid_for_url(pid)
|
16
|
+
end
|
17
|
+
|
18
|
+
def fedora_ds_url(pid, dsid)
|
19
|
+
fedora_object_url(pid) + '/datastreams/' + dsid
|
20
|
+
end
|
21
|
+
|
22
|
+
def fedora_method_url(pid, method)
|
23
|
+
fedora_object_url(pid) + '/methods/' + method
|
24
|
+
end
|
25
|
+
|
26
|
+
def fedora_risearch_url
|
27
|
+
fedora_url + '/risearch'
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Cul::Hydra::Indexer
|
2
|
+
|
3
|
+
def self.descend_from(pid, pids_to_omit=nil, verbose_output=false)
|
4
|
+
if pid.blank?
|
5
|
+
raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)'
|
6
|
+
end
|
7
|
+
|
8
|
+
begin
|
9
|
+
|
10
|
+
unless ActiveFedora::Base.exists?(pid)
|
11
|
+
raise 'Could not find Fedora object with pid: ' + pid
|
12
|
+
end
|
13
|
+
|
14
|
+
if pids_to_omit.present? && pids_to_omit.include?(pid)
|
15
|
+
puts 'Skipping topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output
|
16
|
+
else
|
17
|
+
puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output
|
18
|
+
puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output
|
19
|
+
|
20
|
+
yield pid
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
puts 'Recursively retreieving and indexing all members of ' + pid + '...'
|
25
|
+
|
26
|
+
unique_pids = Cul::Hydra::RisearchMembers.get_recursive_member_pids(pid, true)
|
27
|
+
|
28
|
+
total_number_of_members = unique_pids.length
|
29
|
+
puts 'Recursive search found ' + total_number_of_members.to_s + ' members.' if verbose_output
|
30
|
+
|
31
|
+
if pids_to_omit.present?
|
32
|
+
unique_pids = unique_pids - pids_to_omit
|
33
|
+
total_number_of_members = unique_pids.length
|
34
|
+
puts 'After checking against the list of omitted pids, the total number of objects to index will be: ' + total_number_of_members.to_s if verbose_output
|
35
|
+
end
|
36
|
+
|
37
|
+
i = 1
|
38
|
+
if total_number_of_members > 0
|
39
|
+
unique_pids.each {|pid|
|
40
|
+
|
41
|
+
puts 'Recursing on ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output
|
42
|
+
|
43
|
+
yield pid
|
44
|
+
|
45
|
+
i += 1
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
rescue RestClient::Unauthorized => e
|
50
|
+
error_message = "Skipping #{pid} due to error: " + e.message + '. Problem with Fedora object?'
|
51
|
+
puts error_message
|
52
|
+
logger.error error_message if defined?(logger)
|
53
|
+
end
|
54
|
+
|
55
|
+
puts 'Recursion complete!'
|
56
|
+
|
57
|
+
end
|
58
|
+
def self.recursively_index_fedora_objects(top_pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false)
|
59
|
+
|
60
|
+
descend_from(top_pid, pids_to_omit, verbose_output) do |pid|
|
61
|
+
self.index_pid(pid, skip_generic_resources, verbose_output)
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.index_pid(pid, skip_generic_resources=false, verbose_output=false)
|
67
|
+
# We found an object with the desired PID. Let's reindex it
|
68
|
+
begin
|
69
|
+
active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
|
70
|
+
|
71
|
+
if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
|
72
|
+
puts 'Object was skipped because GenericResources are being skipped and it is a GenericResource.'
|
73
|
+
else
|
74
|
+
active_fedora_object.update_index
|
75
|
+
puts 'done.' if verbose_output
|
76
|
+
end
|
77
|
+
rescue SystemExit, Interrupt => e
|
78
|
+
# Allow system interrupt (ctrl+c)
|
79
|
+
raise e
|
80
|
+
rescue Exception => e
|
81
|
+
puts "Encountered problem with #{pid}. Skipping record. Exception: #{e.message}"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
data/lib/cul_hydra/om.rb
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'active-fedora'
|
2
|
+
require 'solrizer'
|
3
|
+
require 'cul_hydra/solrizer_patch'
|
4
|
+
module Cul
|
5
|
+
module Hydra
|
6
|
+
module Om
|
7
|
+
class StandardMods < ::ActiveFedora::OmDatastream
|
8
|
+
|
9
|
+
set_terminology do |t|
|
10
|
+
t.root(:path=>"mods",
|
11
|
+
:xmlns=>"http://www.loc.gov/mods/v3",
|
12
|
+
:schema=>"http://www.loc.gov/standards/mods/v3/mods-3-4.xsd")
|
13
|
+
t.identifier(:path=>"identifier", :attributes=>{:type=>"local"}, :data_type=>:symbol)
|
14
|
+
t.clio(:path=>"identifier", :attributes=>{:type=>"CLIO"}, :data_type=>:symbol)
|
15
|
+
t.title_info(:path=>"titleInfo", :index_as=>[:not_searchable]) {
|
16
|
+
t.main_title(:path=>"title", :index_as=>[:not_searchable])
|
17
|
+
}
|
18
|
+
t.title(:path=>'mods/oxns:titleInfo/oxns:title', :index_as=>[:searchable,:displayable, :sortable])
|
19
|
+
t.abstract
|
20
|
+
t.subject {
|
21
|
+
t.topic
|
22
|
+
}
|
23
|
+
t.type_of_resource(:path=>"typeOfResource", :index_as=>[:not_searchable])
|
24
|
+
t.physical_description(:path=>"physicalDescription", :index_as=>[:not_searchable]){
|
25
|
+
t.form_marc(:path=>"form", :attributes=>{:authority=>"marcform"}, :index_as=>[:not_searchable])
|
26
|
+
t.form_nomarc(:path=>"form[@authority !='marcform']", :index_as=>[:not_searchable, :displayable, :facetable, :textable])
|
27
|
+
t.extent(:path=>"extent", :index_as=>[:not_searchable])
|
28
|
+
t.reformatting_quality(:path=>"reformattingQuality", :index_as=>[:not_searchable])
|
29
|
+
t.internet_media_type(:path=>"internetMediaType", :index_as=>[:not_searchable])
|
30
|
+
t.digital_origin(:path=>"digitalOrigin", :index_as=>[:not_searchable])
|
31
|
+
}
|
32
|
+
t.lib_format(proxy: [:physical_description, :form_nomarc] )
|
33
|
+
t.location(:path=>"location", :index_as=>[:not_searchable]){
|
34
|
+
t.repo_text(:path=>"physicalLocation",:attributes=>{:authority=>:none}, :index_as=>[:not_searchable])
|
35
|
+
t.repo_code(:path=>"physicalLocation",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
|
36
|
+
}
|
37
|
+
t.lib_repo_text(:ref=>[:location, :repo_text], :label=>"lib_repo", :index_as=>[:searchable])
|
38
|
+
t.lib_repo(:ref=>[:location, :repo_code], :index_as=>[:not_searchable,:facetable, :displayable])
|
39
|
+
t.project_host(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Project"}, :index_as=>[:not_searchable]){
|
40
|
+
t.p_title(:path=>'titleInfo',:index_as=>[:not_searchable])
|
41
|
+
}
|
42
|
+
t.lib_project(:proxy=>[:project_host, :p_title],:index_as=>[:facetable,:displayable, :not_searchable])
|
43
|
+
t.collection_host(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Collection"}, :index_as=>[:not_searchable]){
|
44
|
+
t.c_title(:path=>'titleInfo',:index_as=>[:not_searchable])
|
45
|
+
}
|
46
|
+
t.lib_project(:path=>"mods/oxns:relatedItem[@type='host'][@displayLabel='Project']/oxns:titleInfo/oxns:title",:index_as=>[:facetable,:displayable, :not_searchable])
|
47
|
+
t.lib_collection(:path=>"mods/oxns:relatedItem[@type='host'][@displayLabel='Collection']/oxns:titleInfo/oxns:title",:index_as=>[:facetable,:displayable, :not_searchable])
|
48
|
+
t.note(:path=>"note")
|
49
|
+
t.access_condition(:path=>"accessCondition", :attributes=>{:type=>"useAndReproduction"}, :index_as => [:searchable], :data_type => :symbol)
|
50
|
+
t.record_info(:path=>"recordInfo", :index_as=>[:not_searchable]) {
|
51
|
+
t.record_creation_date(:path=>"recordCreationDate",:attributes=>{:encoding=>"w3cdtf"}, :index_as=>[:not_searchable])
|
52
|
+
t.record_content_source(:path=>"recordContentSource",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
|
53
|
+
t.language_of_cataloging(:path=>"languageOfCataloging", :index_as=>[:not_searchable]){
|
54
|
+
t.language_term(:path=>"languageTerm", :index_as=>[:not_searchable], :attributes=>{:type=>:none})
|
55
|
+
t.language_code(:path=>"languageTerm",:attributes=>{:type=>'code',:authority=>"iso639-2b"}, :index_as=>[:not_searchable])
|
56
|
+
}
|
57
|
+
t.record_origin(:path=>"recordOrigin", :index_as=>[:not_searchable])
|
58
|
+
}
|
59
|
+
|
60
|
+
t.origin_info(:path=>"originInfo", :index_as=>[:not_searchable]){
|
61
|
+
t.date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf'}, :index_as=>[:not_searchable])
|
62
|
+
t.key_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:not_searchable])
|
63
|
+
t.start_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:not_searchable])
|
64
|
+
t.end_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:not_searchable])
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.xml_template
|
69
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
70
|
+
xml.mods(:version=>"3.4",
|
71
|
+
"xmlns"=>"http://www.loc.gov/mods/v3",
|
72
|
+
"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance"){
|
73
|
+
}
|
74
|
+
end
|
75
|
+
builder.doc.encoding = 'UTF-8'
|
76
|
+
builder.doc.root["xsi:schemaLocation"] = 'http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd'
|
77
|
+
return builder.doc
|
78
|
+
end
|
79
|
+
|
80
|
+
def prefix
|
81
|
+
#if ::ActiveFedora::VERSION >= '8'
|
82
|
+
# Rails.logger.warn("the prefix method of #{self.class.name} was overriden to maintain backwards compatibility")
|
83
|
+
#end
|
84
|
+
''
|
85
|
+
end
|
86
|
+
|
87
|
+
def method_missing method, *args
|
88
|
+
query = false
|
89
|
+
_mname = method.id2name
|
90
|
+
if _mname[-1,1] == '?'
|
91
|
+
query = true
|
92
|
+
_mname = _mname[0,_mname.length-1]
|
93
|
+
end
|
94
|
+
_msym = _mname.to_sym
|
95
|
+
begin
|
96
|
+
has_term = self.class.terminology.has_term?(_msym)
|
97
|
+
|
98
|
+
_r = (has_term)? find_by_terms(_msym, *args) : nil
|
99
|
+
if query
|
100
|
+
return !( _r.nil? || _r.size()==0)
|
101
|
+
else
|
102
|
+
return _r
|
103
|
+
end
|
104
|
+
rescue
|
105
|
+
super
|
106
|
+
end
|
107
|
+
end
|
108
|
+
def update_values(params)
|
109
|
+
super
|
110
|
+
self.dirty = true
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module Cul::Hydra::RisearchMembers
|
2
|
+
module ClassMethods
|
3
|
+
def get_recursive_member_pids(pid, verbose_output=false, cmodel_type='all')
|
4
|
+
|
5
|
+
recursive_member_query =
|
6
|
+
'select $child $parent from <#ri>
|
7
|
+
where
|
8
|
+
walk($child <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '> and $child <http://purl.oclc.org/NET/CUL/memberOf> $parent)'
|
9
|
+
|
10
|
+
unless cmodel_type == 'all'
|
11
|
+
recursive_member_query += ' and $child <fedora-model:hasModel> $cmodel'
|
12
|
+
recursive_member_query += ' and $cmodel <mulgara:is> <info:fedora/ldpd:' + cmodel_type + '>'
|
13
|
+
end
|
14
|
+
|
15
|
+
puts 'Performing query:' if verbose_output
|
16
|
+
puts recursive_member_query if verbose_output
|
17
|
+
|
18
|
+
search_response = JSON(Cul::Hydra::Fedora.repository.find_by_itql(recursive_member_query, {
|
19
|
+
:type => 'tuples',
|
20
|
+
:format => 'json',
|
21
|
+
:limit => '',
|
22
|
+
:stream => 'on'
|
23
|
+
}))
|
24
|
+
|
25
|
+
unique_pids = search_response['results'].map{|result| result['child'].gsub('info:fedora/', '') }.uniq
|
26
|
+
|
27
|
+
return unique_pids
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_direct_member_results(pid, verbose_output=false, format='json')
|
32
|
+
|
33
|
+
direct_member_query =
|
34
|
+
'select $pid from <#ri>
|
35
|
+
where $pid <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '>'
|
36
|
+
|
37
|
+
puts 'Performing query:' if verbose_output
|
38
|
+
puts direct_member_query if verbose_output
|
39
|
+
|
40
|
+
search_response = JSON(Cul::Hydra::Fedora.repository.find_by_itql(direct_member_query, {
|
41
|
+
:type => 'tuples',
|
42
|
+
:format => format,
|
43
|
+
:limit => '',
|
44
|
+
:stream => 'on'
|
45
|
+
}))
|
46
|
+
|
47
|
+
return search_response['results']
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_direct_member_pids(pid, verbose_output=false)
|
51
|
+
unique_pids = get_direct_member_results(pid,verbose_output,'json')
|
52
|
+
unique_pids.map{|result| result['pid'].gsub('info:fedora/', '') }.uniq
|
53
|
+
end
|
54
|
+
|
55
|
+
def get_direct_member_count(pid, verbose_output=false)
|
56
|
+
count = get_direct_member_results(pid,verbose_output,'count/json')
|
57
|
+
return count.blank? ? 0 : count[0]['count'].to_i
|
58
|
+
end
|
59
|
+
|
60
|
+
#Project constituents
|
61
|
+
|
62
|
+
def get_project_constituent_results(pid, verbose_output=false, format='json')
|
63
|
+
|
64
|
+
project_constituent_query =
|
65
|
+
'select $pid from <#ri>
|
66
|
+
where $pid <info:fedora/fedora-system:def/relations-external#isConstituentOf> <fedora:' + pid + '>'
|
67
|
+
|
68
|
+
puts 'Performing query:' if verbose_output
|
69
|
+
puts project_constituent_query if verbose_output
|
70
|
+
|
71
|
+
search_response = JSON(Cul::Hydra::Fedora.repository.find_by_itql(project_constituent_query, {
|
72
|
+
:type => 'tuples',
|
73
|
+
:format => format,
|
74
|
+
:limit => '',
|
75
|
+
:stream => 'on'
|
76
|
+
}))
|
77
|
+
|
78
|
+
return search_response['results']
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_project_constituent_pids(pid, verbose_output=false)
|
82
|
+
unique_pids = get_project_constituent_results(pid,verbose_output,'json')
|
83
|
+
unique_pids.map{|result| result['pid'].gsub('info:fedora/', '') }.uniq
|
84
|
+
end
|
85
|
+
|
86
|
+
def get_project_constituent_count(pid, verbose_output=false)
|
87
|
+
count = get_project_constituent_results(pid,verbose_output,'count/json')
|
88
|
+
return count.blank? ? 0 : count[0]['count'].to_i
|
89
|
+
end
|
90
|
+
end
|
91
|
+
extend ClassMethods
|
92
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module Cul
|
2
|
+
module Hydra
|
3
|
+
module Solrizer
|
4
|
+
autoload :Extractor, "cul_hydra/solrizer/extractor"
|
5
|
+
autoload :TerminologyBasedSolrizer, "cul_hydra/solrizer/terminology_based_solrizer"
|
6
|
+
autoload :ValueMapper, "cul_hydra/solrizer/value_mapper"
|
7
|
+
autoload :ModsFieldable, "cul_hydra/solrizer/mods_fieldable"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Cul::Hydra::Solrizer
|
2
|
+
class Extractor < ::Solrizer::Extractor
|
3
|
+
# Insert +field_value+ for +field_name+ into +solr_doc+
|
4
|
+
# Handles inserting new values into a Hash while ensuring that you don't destroy or overwrite any existing values in the hash.
|
5
|
+
# Ensures that field values are always appended to arrays within the values hash.
|
6
|
+
# Ensures that values are run through format_node_value
|
7
|
+
# Also ensures that values are unique if specified
|
8
|
+
# @param [Hash] solr_doc
|
9
|
+
# @param [String] field_name
|
10
|
+
# @param [String] field_value
|
11
|
+
# @param [boolean] unique
|
12
|
+
def self.insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
13
|
+
formatted_value = self.format_node_value(field_value)
|
14
|
+
if solr_doc.has_key?(field_name)
|
15
|
+
solr_doc[field_name] << formatted_value unless (unique and solr_doc[field_name].include? formatted_value)
|
16
|
+
else
|
17
|
+
solr_doc.merge!( {field_name => [formatted_value]} )
|
18
|
+
end
|
19
|
+
return solr_doc
|
20
|
+
end
|
21
|
+
|
22
|
+
# Instance Methods
|
23
|
+
def insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
24
|
+
Cul::Hydra::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value, unique)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,435 @@
|
|
1
|
+
module Cul::Hydra::Solrizer
|
2
|
+
module ModsFieldable
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
include Solrizer::DefaultDescriptors::Normal
|
5
|
+
|
6
|
+
MODS_NS = {'mods'=>'http://www.loc.gov/mods/v3'}
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def value_mapper(maps=nil)
|
10
|
+
@value_mapper ||= ValueMapper.new(maps)
|
11
|
+
end
|
12
|
+
|
13
|
+
def map_field(field_key, map_key)
|
14
|
+
value_mapper.map_field(field_key, map_key)
|
15
|
+
end
|
16
|
+
|
17
|
+
def map_value(field_key, value_key)
|
18
|
+
value_mapper.map_value(field_key, value_key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def maps_field?(field_key)
|
22
|
+
value_mapper.maps_field? field_key
|
23
|
+
end
|
24
|
+
def normalize(t, strip_punctuation=false)
|
25
|
+
# strip whitespace
|
26
|
+
n_t = t.dup.strip
|
27
|
+
# collapse intermediate whitespace
|
28
|
+
n_t.gsub!(/\s+/, ' ')
|
29
|
+
# pull off paired punctuation, and any leading punctuation
|
30
|
+
if strip_punctuation
|
31
|
+
n_t = n_t.sub(/^\((.*)\)$/, "\\1")
|
32
|
+
n_t = n_t.sub(/^\{(.*)\}$/, "\\1")
|
33
|
+
n_t = n_t.sub(/^\[(.*)\]$/, "\\1")
|
34
|
+
n_t = n_t.sub(/^"(.*)"$/, "\\1")
|
35
|
+
n_t = n_t.sub(/^'(.*)'$/, "\\1")
|
36
|
+
n_t = n_t.sub(/^<(.*)>$/, "\\1")
|
37
|
+
#n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1")
|
38
|
+
n_t = n_t.sub(/^[[:punct:]]+/, '')
|
39
|
+
# this may have 'created' leading/trailing space, so strip
|
40
|
+
n_t.strip!
|
41
|
+
end
|
42
|
+
n_t
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
extend ClassMethods
|
47
|
+
|
48
|
+
def mods
|
49
|
+
ng_xml.xpath('/mods:mods', MODS_NS).first
|
50
|
+
end
|
51
|
+
|
52
|
+
def projects
|
53
|
+
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node|
|
54
|
+
ModsFieldable.normalize(main_title(p_node), true)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def collections
|
59
|
+
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node|
|
60
|
+
ModsFieldable.normalize(main_title(p_node), true)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def sort_title(node=mods)
|
65
|
+
# include only the untyped [!@type] titleInfo, exclude noSort
|
66
|
+
base_text = ''
|
67
|
+
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
68
|
+
if t
|
69
|
+
t.children.each do |child|
|
70
|
+
base_text << child.text unless child.name == 'nonSort'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
base_text = ModsFieldable.normalize(base_text, true)
|
74
|
+
base_text = nil if base_text.empty?
|
75
|
+
base_text
|
76
|
+
end
|
77
|
+
|
78
|
+
def main_title(node=mods)
|
79
|
+
# include only the untyped [!@type] titleInfo
|
80
|
+
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
81
|
+
if t
|
82
|
+
ModsFieldable.normalize(t.text)
|
83
|
+
else
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def titles(node=mods)
|
89
|
+
# all titles without descending into relatedItems
|
90
|
+
# For now, this only includes the main title and selected alternate_titles
|
91
|
+
all_titles = []
|
92
|
+
all_titles << main_title unless main_title.nil?
|
93
|
+
all_titles += alternative_titles unless alternative_titles.nil?
|
94
|
+
end
|
95
|
+
|
96
|
+
def alternative_titles(node=mods)
|
97
|
+
node.xpath('./mods:titleInfo[@type and (@type="alternative" or @type="abbreviated" or @type="translated" or @type="uniform")]', MODS_NS).collect do |t|
|
98
|
+
ModsFieldable.normalize(t.text)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def names(role_authority=nil, role=nil)
|
103
|
+
# get all the name nodes
|
104
|
+
# keep all child text except the role terms
|
105
|
+
xpath = "./mods:name"
|
106
|
+
unless role_authority.nil?
|
107
|
+
xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
|
108
|
+
unless role.nil?
|
109
|
+
xpath << " and normalize-space(text()) = '#{role.to_s.strip}'"
|
110
|
+
end
|
111
|
+
xpath << "]/ancestor::mods:name"
|
112
|
+
end
|
113
|
+
names = mods.xpath(xpath, MODS_NS).collect do |node|
|
114
|
+
base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
115
|
+
ModsFieldable.normalize(base_text, true)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Note: Removing subject names from name field extraction.
|
119
|
+
# See: https://issues.cul.columbia.edu/browse/DCV-231 and https://issues.cul.columbia.edu/browse/SCV-102
|
120
|
+
#xpath = "./mods:subject" + xpath[1,xpath.length]
|
121
|
+
#mods.xpath(xpath, MODS_NS).each do |node|
|
122
|
+
# base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
123
|
+
# names << ModsFieldable.normalize(base_text, true)
|
124
|
+
#end
|
125
|
+
|
126
|
+
names
|
127
|
+
end
|
128
|
+
|
129
|
+
def dates(node=mods)
|
130
|
+
# get all the dateIssued with keyDate = 'yes', but not point = 'end'
|
131
|
+
end
|
132
|
+
|
133
|
+
def formats(node=mods)
|
134
|
+
# get all the form values with authority != 'marcform'
|
135
|
+
node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n|
|
136
|
+
ModsFieldable.normalize(n.text)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def repository_code(node=mods)
|
141
|
+
# get the location/physicalLocation[@authority = 'marcorg']
|
142
|
+
repo_code_node = node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).first
|
143
|
+
|
144
|
+
if repo_code_node
|
145
|
+
ModsFieldable.normalize(repo_code_node.text)
|
146
|
+
else
|
147
|
+
return nil
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def repository_text(node=mods)
|
152
|
+
# get the location/physicalLocation[not(@authority)]
|
153
|
+
repo_text_node = node.xpath("./mods:location/mods:physicalLocation[not(@authority)]", MODS_NS).first
|
154
|
+
|
155
|
+
if repo_text_node
|
156
|
+
ModsFieldable.normalize(repo_text_node.text)
|
157
|
+
else
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def translate_repo_marc_code(code, type)
|
163
|
+
#code = ModsFieldable.normalize(code)
|
164
|
+
|
165
|
+
if type == 'short'
|
166
|
+
return translate_with_default(SHORT_REPO, code, 'Non-Columbia Location')
|
167
|
+
elsif type == 'long'
|
168
|
+
return translate_with_default(LONG_REPO, code, 'Non-Columbia Location')
|
169
|
+
elsif type == 'full'
|
170
|
+
return translate_with_default(FULL_REPO, code, 'Non-Columbia Location')
|
171
|
+
end
|
172
|
+
|
173
|
+
return nil
|
174
|
+
end
|
175
|
+
|
176
|
+
def translate_project_title(project_title, type)
|
177
|
+
normalized_project_title = ModsFieldable.normalize(project_title)
|
178
|
+
|
179
|
+
if type == 'short'
|
180
|
+
return translate_with_default(SHORT_PROJ, normalized_project_title, normalized_project_title)
|
181
|
+
elsif type == 'full'
|
182
|
+
return translate_with_default(FULL_PROJ, normalized_project_title, normalized_project_title)
|
183
|
+
end
|
184
|
+
|
185
|
+
return nil
|
186
|
+
end
|
187
|
+
|
188
|
+
def shelf_locators(node=mods)
|
189
|
+
node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n|
|
190
|
+
ModsFieldable.normalize(n.text, true)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def textual_dates(node=mods)
|
195
|
+
dates = []
|
196
|
+
node.xpath("./mods:originInfo/mods:dateCreated[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
197
|
+
dates << ModsFieldable.normalize(n.text, true)
|
198
|
+
end
|
199
|
+
node.xpath("./mods:originInfo/mods:dateIssued[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
200
|
+
dates << ModsFieldable.normalize(n.text, true)
|
201
|
+
end
|
202
|
+
node.xpath("./mods:originInfo/mods:dateOther[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
203
|
+
dates << ModsFieldable.normalize(n.text, true)
|
204
|
+
end
|
205
|
+
return dates
|
206
|
+
end
|
207
|
+
|
208
|
+
def date_range_to_textual_date(start_year, end_year)
|
209
|
+
start_year = start_year.to_i.to_s # Remove zero-padding if present
|
210
|
+
end_year = end_year.to_i.to_s # Remove zero-padding if present
|
211
|
+
|
212
|
+
if start_year == end_year
|
213
|
+
return [start_year]
|
214
|
+
else
|
215
|
+
return [('Between ' +
|
216
|
+
(start_year.to_i > 0 ? start_year : start_year[1,start_year.length] + ' BCE') +
|
217
|
+
' and ' +
|
218
|
+
(end_year.to_i > 0 ? (start_year.to_i > 0 ? end_year : end_year + ' CE') : end_year[1,end_year.length] + ' BCE')
|
219
|
+
)]
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def date_notes(node=mods)
|
224
|
+
date_notes = []
|
225
|
+
node.xpath("./mods:note[@type = 'date' or @type = 'date source']", MODS_NS).collect do |n|
|
226
|
+
date_notes << ModsFieldable.normalize(n.text, true)
|
227
|
+
end
|
228
|
+
return date_notes
|
229
|
+
end
|
230
|
+
|
231
|
+
def non_date_notes(node=mods)
|
232
|
+
non_date_notes = []
|
233
|
+
node.xpath("./mods:note[not(@type) or (@type != 'date' and @type != 'date source')]", MODS_NS).collect do |n|
|
234
|
+
non_date_notes << ModsFieldable.normalize(n.text, true)
|
235
|
+
end
|
236
|
+
return non_date_notes
|
237
|
+
end
|
238
|
+
|
239
|
+
def item_in_context_url(node=mods)
|
240
|
+
item_in_context_url_val = []
|
241
|
+
node.xpath("./mods:location/mods:url[@access='object in context' and @usage='primary display']", MODS_NS).collect do |n|
|
242
|
+
item_in_context_url_val << ModsFieldable.normalize(n.text, true)
|
243
|
+
end
|
244
|
+
item_in_context_url_val
|
245
|
+
end
|
246
|
+
|
247
|
+
def project_url(node=mods)
|
248
|
+
project_url_val = []
|
249
|
+
node.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']/mods:location/mods:url", MODS_NS).collect do |n|
|
250
|
+
project_url_val << ModsFieldable.normalize(n.text, true)
|
251
|
+
end
|
252
|
+
project_url_val
|
253
|
+
end
|
254
|
+
|
255
|
+
def all_subjects(node=mods)
|
256
|
+
list_of_subjects = []
|
257
|
+
|
258
|
+
node.xpath("./mods:subject/mods:topic", MODS_NS).collect do |n|
|
259
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
260
|
+
end
|
261
|
+
node.xpath("./mods:subject/mods:geographic", MODS_NS).collect do |n|
|
262
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
263
|
+
end
|
264
|
+
node.xpath("./mods:subject/mods:name", MODS_NS).collect do |n|
|
265
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
266
|
+
end
|
267
|
+
node.xpath("./mods:subject/mods:temporal", MODS_NS).collect do |n|
|
268
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
269
|
+
end
|
270
|
+
node.xpath("./mods:subject/mods:titleInfo", MODS_NS).collect do |n|
|
271
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
272
|
+
end
|
273
|
+
node.xpath("./mods:subject/mods:genre", MODS_NS).collect do |n|
|
274
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
275
|
+
end
|
276
|
+
|
277
|
+
return list_of_subjects
|
278
|
+
end
|
279
|
+
|
280
|
+
def origin_info_place(node=mods)
|
281
|
+
places = []
|
282
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm", MODS_NS).collect do |n|
|
283
|
+
places << ModsFieldable.normalize(n.text, true)
|
284
|
+
end
|
285
|
+
return places
|
286
|
+
end
|
287
|
+
|
288
|
+
def origin_info_place_for_display(node=mods)
|
289
|
+
# If there are multiple origin_info place elements, choose only the ones without valueURI attributes. Otherwise show the others.
|
290
|
+
places_with_uri = []
|
291
|
+
places_without_uri = []
|
292
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[@valueURI]", MODS_NS).collect do |n|
|
293
|
+
places_with_uri << ModsFieldable.normalize(n.text, true)
|
294
|
+
end
|
295
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[not(@valueURI)]", MODS_NS).collect do |n|
|
296
|
+
places_without_uri << ModsFieldable.normalize(n.text, true)
|
297
|
+
end
|
298
|
+
|
299
|
+
return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
|
300
|
+
end
|
301
|
+
|
302
|
+
def coordinates(node=mods)
|
303
|
+
coordinate_values = []
|
304
|
+
node.xpath("./mods:subject/mods:cartographics/mods:coordinates", MODS_NS).collect do |n|
|
305
|
+
n = ModsFieldable.normalize(n.text, true)
|
306
|
+
if n.match(/-*\d+\.\d+\s*,\s*-*\d+\.\d+\s*/) # Expected coordinate format: 40.123456,-73.5678
|
307
|
+
coordinate_values << n
|
308
|
+
end
|
309
|
+
end
|
310
|
+
coordinate_values
|
311
|
+
end
|
312
|
+
|
313
|
+
def to_solr(solr_doc={})
|
314
|
+
solr_doc = (defined? super) ? super : solr_doc
|
315
|
+
|
316
|
+
return solr_doc if mods.nil? # There is no mods. Return because there is nothing to process, otherwise NoMethodError will be raised by subsequent lines.
|
317
|
+
|
318
|
+
solr_doc["all_text_teim"] ||= []
|
319
|
+
|
320
|
+
solr_doc["title_si"] = sort_title
|
321
|
+
solr_doc["title_ssm"] = titles
|
322
|
+
solr_doc["alternative_title_ssm"] = alternative_titles
|
323
|
+
solr_doc["all_text_teim"] += solr_doc["alternative_title_ssm"]
|
324
|
+
solr_doc["lib_collection_sim"] = collections
|
325
|
+
solr_doc["lib_name_sim"] = names
|
326
|
+
solr_doc["lib_name_teim"] = solr_doc["lib_name_sim"]
|
327
|
+
solr_doc["all_text_teim"] += solr_doc["lib_name_teim"]
|
328
|
+
solr_doc["lib_all_subjects_ssm"] = all_subjects
|
329
|
+
solr_doc["lib_all_subjects_teim"] = solr_doc["lib_all_subjects_ssm"]
|
330
|
+
solr_doc["all_text_teim"] += solr_doc["lib_all_subjects_teim"]
|
331
|
+
solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"]
|
332
|
+
solr_doc["lib_author_sim"] = names(:marcrelator, 'aut')
|
333
|
+
solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp')
|
334
|
+
solr_doc["lib_format_sim"] = formats
|
335
|
+
solr_doc["lib_shelf_sim"] = shelf_locators
|
336
|
+
solr_doc["lib_date_textual_ssm"] = textual_dates
|
337
|
+
solr_doc["lib_date_notes_ssm"] = date_notes
|
338
|
+
solr_doc["lib_non_date_notes_ssm"] = non_date_notes
|
339
|
+
solr_doc["lib_item_in_context_url_ssm"] = item_in_context_url
|
340
|
+
solr_doc["lib_project_url_ssm"] = project_url
|
341
|
+
solr_doc["origin_info_place_ssm"] = origin_info_place
|
342
|
+
solr_doc["origin_info_place_for_display_ssm"] = origin_info_place_for_display
|
343
|
+
|
344
|
+
repo_marc_code = repository_code
|
345
|
+
unless repo_marc_code.nil?
|
346
|
+
solr_doc["lib_repo_short_ssim"] = [translate_repo_marc_code(repo_marc_code, 'short')]
|
347
|
+
solr_doc["lib_repo_long_sim"] = [translate_repo_marc_code(repo_marc_code, 'long')]
|
348
|
+
solr_doc["lib_repo_full_ssim"] = [translate_repo_marc_code(repo_marc_code, 'full')]
|
349
|
+
end
|
350
|
+
solr_doc["lib_repo_text_ssm"] = repository_text
|
351
|
+
|
352
|
+
project_titles = projects
|
353
|
+
unless project_titles.nil?
|
354
|
+
solr_doc["lib_project_short_ssim"] = []
|
355
|
+
solr_doc["lib_project_full_ssim"] = []
|
356
|
+
project_titles.each {|project_title|
|
357
|
+
solr_doc["lib_project_short_ssim"] << translate_project_title(project_title, 'short')
|
358
|
+
solr_doc["lib_project_full_ssim"] << translate_project_title(project_title, 'full')
|
359
|
+
}
|
360
|
+
solr_doc["lib_project_short_ssim"].uniq!
|
361
|
+
solr_doc["lib_project_full_ssim"].uniq!
|
362
|
+
end
|
363
|
+
|
364
|
+
# Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
|
365
|
+
possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
|
366
|
+
possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
|
367
|
+
start_date = nil
|
368
|
+
end_date = nil
|
369
|
+
start_year = nil
|
370
|
+
end_year = nil
|
371
|
+
possible_start_date_fields.each{|key|
|
372
|
+
if solr_doc.has_key?(key)
|
373
|
+
start_date = solr_doc[key][0]
|
374
|
+
break
|
375
|
+
end
|
376
|
+
}
|
377
|
+
possible_end_date_fields.each{|key|
|
378
|
+
if solr_doc.has_key?(key)
|
379
|
+
end_date = solr_doc[key][0]
|
380
|
+
break
|
381
|
+
end
|
382
|
+
}
|
383
|
+
|
384
|
+
if start_date.present?
|
385
|
+
|
386
|
+
end_date = start_date if end_date.blank?
|
387
|
+
|
388
|
+
year_regex = /^(-?\d{1,4}).*/
|
389
|
+
|
390
|
+
start_year_match = start_date.match(year_regex)
|
391
|
+
if start_year_match && start_year_match.captures.length > 0
|
392
|
+
start_year = start_year_match.captures[0]
|
393
|
+
start_year = zero_pad_year(start_year)
|
394
|
+
solr_doc["lib_start_date_year_itsi"] = start_year.to_i # TrieInt version for searches
|
395
|
+
end
|
396
|
+
|
397
|
+
end_year_match = end_date.match(year_regex)
|
398
|
+
if end_year_match && end_year_match.captures.length > 0
|
399
|
+
end_year = end_year_match.captures[0]
|
400
|
+
end_year = zero_pad_year(end_year)
|
401
|
+
solr_doc["lib_end_date_year_itsi"] = end_year.to_i # TrieInt version for searches
|
402
|
+
end
|
403
|
+
|
404
|
+
solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year
|
405
|
+
|
406
|
+
# When no textual date is available, fall back to other date data (if available)
|
407
|
+
if solr_doc["lib_date_textual_ssm"].blank?
|
408
|
+
solr_doc["lib_date_textual_ssm"] = date_range_to_textual_date(start_year.to_i, end_year.to_i)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
# Geo data
|
413
|
+
solr_doc["geo"] = coordinates
|
414
|
+
|
415
|
+
solr_doc.each do |k, v|
|
416
|
+
if self.class.maps_field? k
|
417
|
+
solr_doc[k] = self.class.map_value(k, v)
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
421
|
+
solr_doc
|
422
|
+
end
|
423
|
+
|
424
|
+
def zero_pad_year(year)
|
425
|
+
year = year.to_s
|
426
|
+
is_negative = year.start_with?('-')
|
427
|
+
year_without_sign = (is_negative ? year[1, year.length]: year)
|
428
|
+
if year_without_sign.length < 4
|
429
|
+
year_without_sign = year_without_sign.rjust(4, '0')
|
430
|
+
end
|
431
|
+
|
432
|
+
return (is_negative ? '-' : '') + year_without_sign
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|