cul_scv_hydra 0.22.6 → 0.22.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/controllers/concerns/cul/hydra/controller.rb +22 -0
- data/app/controllers/concerns/cul/hydra/resolver.rb +69 -0
- data/app/controllers/concerns/cul/hydra/thumbnails.rb +62 -0
- data/app/controllers/concerns/cul/scv/hydra/controller.rb +3 -19
- data/app/controllers/concerns/cul/scv/hydra/resolver.rb +2 -65
- data/app/controllers/concerns/cul/scv/hydra/thumbnails.rb +3 -59
- data/app/models/concept.rb +1 -1
- data/app/models/concerns/cul/hydra/models.rb +24 -0
- data/app/models/concerns/cul/hydra/models/aggregator.rb +121 -0
- data/app/models/concerns/cul/hydra/models/common.rb +220 -0
- data/app/models/concerns/cul/hydra/models/image_resource.rb +78 -0
- data/app/models/concerns/cul/hydra/models/linkable_resources.rb +108 -0
- data/app/models/concerns/cul/hydra/models/resource.rb +87 -0
- data/app/models/concerns/cul/scv/hydra/models.rb +1 -13
- data/app/models/concerns/cul/scv/hydra/models/aggregator.rb +1 -116
- data/app/models/concerns/cul/scv/hydra/models/common.rb +1 -213
- data/app/models/concerns/cul/scv/hydra/models/image_resource.rb +3 -75
- data/app/models/concerns/cul/scv/hydra/models/linkable_resources.rb +3 -105
- data/app/models/concerns/cul/scv/hydra/models/resource.rb +2 -83
- data/app/models/cul/hydra/datastreams/dc_metadata.rb +107 -0
- data/app/models/cul/hydra/datastreams/mods_document.rb +195 -0
- data/app/models/cul/hydra/datastreams/struct_metadata.rb +176 -0
- data/app/models/cul/scv/hydra/datastreams/dc_metadata.rb +5 -104
- data/app/models/cul/scv/hydra/datastreams/mods_document.rb +5 -178
- data/app/models/cul/scv/hydra/datastreams/struct_metadata.rb +5 -174
- data/app/models/dc_document.rb +1 -1
- data/app/models/generic_aggregator.rb +5 -5
- data/app/models/generic_object.rb +2 -2
- data/app/models/generic_resource.rb +4 -4
- data/app/models/mets_structured_aggregator.rb +2 -2
- data/app/models/resource.rb +3 -3
- data/app/models/resource_aggregator.rb +3 -3
- data/fixtures/spec/CUL_MODS/mods-subjects.xml +24 -0
- data/lib/cul_hydra.rb +18 -0
- data/lib/cul_hydra/access_controls_enforcement.rb +53 -0
- data/lib/cul_hydra/controllers.rb +13 -0
- data/lib/cul_hydra/controllers/aggregates.rb +93 -0
- data/lib/cul_hydra/controllers/aggregator_controller_helper.rb +27 -0
- data/lib/cul_hydra/controllers/catalog.rb +12 -0
- data/lib/cul_hydra/controllers/content_aggregators.rb +81 -0
- data/lib/cul_hydra/controllers/datastreams.rb +145 -0
- data/lib/cul_hydra/controllers/helpers.rb +10 -0
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/active_fedora_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/application_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/dc_metadata_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_assets_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_autocomplete_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_uploader_helper_behavior.rb +1 -1
- data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/resources_helper_behavior.rb +1 -1
- data/lib/cul_hydra/controllers/resources.rb +161 -0
- data/lib/cul_hydra/controllers/static_image_aggregators.rb +105 -0
- data/lib/cul_hydra/controllers/suggestions.rb +126 -0
- data/lib/cul_hydra/controllers/terms.rb +205 -0
- data/lib/cul_hydra/engine.rb +31 -0
- data/lib/cul_hydra/fedora.rb +41 -0
- data/lib/cul_hydra/fedora/dummy_object.rb +37 -0
- data/lib/cul_hydra/fedora/rubydora_patch.rb +16 -0
- data/lib/cul_hydra/fedora/url_helper_behavior.rb +32 -0
- data/lib/cul_hydra/indexer.rb +84 -0
- data/lib/cul_hydra/om.rb +7 -0
- data/lib/cul_hydra/om/standard_mods.rb +115 -0
- data/lib/cul_hydra/risearch_members.rb +92 -0
- data/lib/cul_hydra/solrizer.rb +10 -0
- data/lib/cul_hydra/solrizer/extractor.rb +27 -0
- data/lib/cul_hydra/solrizer/mods_fieldable.rb +435 -0
- data/lib/cul_hydra/solrizer/terminology_based_solrizer.rb +35 -0
- data/lib/cul_hydra/solrizer/value_mapper.rb +46 -0
- data/lib/{cul_scv_hydra/solrizer/field_mapper.rb → cul_hydra/solrizer_patch.rb} +0 -0
- data/lib/cul_hydra/version.rb +8 -0
- data/lib/cul_hydra/version.rb~ +8 -0
- data/lib/cul_scv_fedora/dummy_object.rb +1 -30
- data/lib/cul_scv_fedora/rubydora_patch.rb +3 -7
- data/lib/cul_scv_fedora/url_helper_behavior.rb +3 -23
- data/lib/cul_scv_hydra.rb +5 -32
- data/lib/cul_scv_hydra/access_controls_enforcement.rb +3 -50
- data/lib/cul_scv_hydra/controllers.rb +10 -10
- data/lib/cul_scv_hydra/controllers/aggregates.rb +1 -86
- data/lib/cul_scv_hydra/controllers/aggregator_controller_helper.rb +4 -23
- data/lib/cul_scv_hydra/controllers/catalog.rb +5 -9
- data/lib/cul_scv_hydra/controllers/content_aggregators.rb +4 -77
- data/lib/cul_scv_hydra/controllers/datastreams.rb +3 -140
- data/lib/cul_scv_hydra/controllers/helpers.rb +44 -8
- data/lib/cul_scv_hydra/controllers/resources.rb +4 -157
- data/lib/cul_scv_hydra/controllers/static_image_aggregators.rb +4 -100
- data/lib/cul_scv_hydra/controllers/suggestions.rb +4 -122
- data/lib/cul_scv_hydra/controllers/terms.rb +4 -201
- data/lib/cul_scv_hydra/engine.rb +1 -1
- data/lib/cul_scv_hydra/indexer.rb +3 -82
- data/lib/cul_scv_hydra/om.rb +2 -2
- data/lib/cul_scv_hydra/om/standard_mods.rb +1 -108
- data/lib/cul_scv_hydra/risearch_members.rb +4 -89
- data/lib/cul_scv_hydra/solrizer.rb +5 -6
- data/lib/cul_scv_hydra/solrizer/extractor.rb +1 -25
- data/lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb +4 -429
- data/lib/cul_scv_hydra/solrizer/terminology_based_solrizer.rb +4 -32
- data/lib/cul_scv_hydra/solrizer/value_mapper.rb +1 -44
- data/lib/cul_scv_hydra/version.rb +5 -5
- data/lib/tasks/index.rake +2 -2
- data/lib/tasks/transform.rake +23 -0
- metadata +55 -12
data/lib/cul_scv_hydra/engine.rb
CHANGED
@@ -1,85 +1,6 @@
|
|
1
1
|
module Cul::Scv::Hydra::Indexer
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)'
|
6
|
-
end
|
7
|
-
|
8
|
-
begin
|
9
|
-
|
10
|
-
unless ActiveFedora::Base.exists?(pid)
|
11
|
-
raise 'Could not find Fedora object with pid: ' + pid
|
12
|
-
end
|
13
|
-
|
14
|
-
if pids_to_omit.present? && pids_to_omit.include?(pid)
|
15
|
-
puts 'Skipping topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output
|
16
|
-
else
|
17
|
-
puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output
|
18
|
-
puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output
|
19
|
-
|
20
|
-
yield pid
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
|
-
puts 'Recursively retreieving and indexing all members of ' + pid + '...'
|
25
|
-
|
26
|
-
unique_pids = Cul::Scv::Hydra::RisearchMembers.get_recursive_member_pids(pid, true)
|
27
|
-
|
28
|
-
total_number_of_members = unique_pids.length
|
29
|
-
puts 'Recursive search found ' + total_number_of_members.to_s + ' members.' if verbose_output
|
30
|
-
|
31
|
-
if pids_to_omit.present?
|
32
|
-
unique_pids = unique_pids - pids_to_omit
|
33
|
-
total_number_of_members = unique_pids.length
|
34
|
-
puts 'After checking against the list of omitted pids, the total number of objects to index will be: ' + total_number_of_members.to_s if verbose_output
|
35
|
-
end
|
36
|
-
|
37
|
-
i = 1
|
38
|
-
if total_number_of_members > 0
|
39
|
-
unique_pids.each {|pid|
|
40
|
-
|
41
|
-
puts 'Recursing on ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output
|
42
|
-
|
43
|
-
yield pid
|
44
|
-
|
45
|
-
i += 1
|
46
|
-
}
|
47
|
-
end
|
48
|
-
|
49
|
-
rescue RestClient::Unauthorized => e
|
50
|
-
error_message = "Skipping #{pid} due to error: " + e.message + '. Problem with Fedora object?'
|
51
|
-
puts error_message
|
52
|
-
logger.error error_message if defined?(logger)
|
53
|
-
end
|
54
|
-
|
55
|
-
puts 'Recursion complete!'
|
56
|
-
|
2
|
+
extend ActiveSupport::Concern
|
3
|
+
included do
|
4
|
+
include Cul::Hydra::Indexer
|
57
5
|
end
|
58
|
-
def self.recursively_index_fedora_objects(top_pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false)
|
59
|
-
|
60
|
-
descend_from(top_pid, pids_to_omit, verbose_output) do |pid|
|
61
|
-
self.index_pid(pid, skip_generic_resources, verbose_output)
|
62
|
-
end
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
def self.index_pid(pid, skip_generic_resources=false, verbose_output=false)
|
67
|
-
# We found an object with the desired PID. Let's reindex it
|
68
|
-
begin
|
69
|
-
active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
|
70
|
-
|
71
|
-
if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
|
72
|
-
puts 'Object was skipped because GenericResources are being skipped and it is a GenericResource.'
|
73
|
-
else
|
74
|
-
active_fedora_object.update_index
|
75
|
-
puts 'done.' if verbose_output
|
76
|
-
end
|
77
|
-
rescue SystemExit, Interrupt => e
|
78
|
-
# Allow system interrupt (ctrl+c)
|
79
|
-
raise e
|
80
|
-
rescue Exception => e
|
81
|
-
puts "Encountered problem with #{pid}. Skipping record. Exception: #{e.message}"
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
6
|
end
|
data/lib/cul_scv_hydra/om.rb
CHANGED
@@ -1,115 +1,8 @@
|
|
1
|
-
require 'active-fedora'
|
2
|
-
require 'solrizer'
|
3
|
-
require 'cul_scv_hydra/solrizer'
|
4
1
|
module Cul
|
5
2
|
module Scv
|
6
3
|
module Hydra
|
7
4
|
module Om
|
8
|
-
class StandardMods < ::
|
9
|
-
|
10
|
-
set_terminology do |t|
|
11
|
-
t.root(:path=>"mods",
|
12
|
-
:xmlns=>"http://www.loc.gov/mods/v3",
|
13
|
-
:schema=>"http://www.loc.gov/standards/mods/v3/mods-3-4.xsd")
|
14
|
-
t.identifier(:path=>"identifier", :attributes=>{:type=>"local"}, :data_type=>:symbol)
|
15
|
-
t.clio(:path=>"identifier", :attributes=>{:type=>"CLIO"}, :data_type=>:symbol)
|
16
|
-
t.title_info(:path=>"titleInfo", :index_as=>[:not_searchable]) {
|
17
|
-
t.main_title(:path=>"title", :index_as=>[:not_searchable])
|
18
|
-
}
|
19
|
-
t.title(:path=>'mods/oxns:titleInfo/oxns:title', :index_as=>[:searchable,:displayable, :sortable])
|
20
|
-
t.abstract
|
21
|
-
t.subject {
|
22
|
-
t.topic
|
23
|
-
}
|
24
|
-
t.type_of_resource(:path=>"typeOfResource", :index_as=>[:not_searchable])
|
25
|
-
t.physical_description(:path=>"physicalDescription", :index_as=>[:not_searchable]){
|
26
|
-
t.form_marc(:path=>"form", :attributes=>{:authority=>"marcform"}, :index_as=>[:not_searchable])
|
27
|
-
t.form_nomarc(:path=>"form[@authority !='marcform']", :index_as=>[:not_searchable, :displayable, :facetable, :textable])
|
28
|
-
t.extent(:path=>"extent", :index_as=>[:not_searchable])
|
29
|
-
t.reformatting_quality(:path=>"reformattingQuality", :index_as=>[:not_searchable])
|
30
|
-
t.internet_media_type(:path=>"internetMediaType", :index_as=>[:not_searchable])
|
31
|
-
t.digital_origin(:path=>"digitalOrigin", :index_as=>[:not_searchable])
|
32
|
-
}
|
33
|
-
t.lib_format(proxy: [:physical_description, :form_nomarc] )
|
34
|
-
t.location(:path=>"location", :index_as=>[:not_searchable]){
|
35
|
-
t.repo_text(:path=>"physicalLocation",:attributes=>{:authority=>:none}, :index_as=>[:not_searchable])
|
36
|
-
t.repo_code(:path=>"physicalLocation",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
|
37
|
-
}
|
38
|
-
t.lib_repo_text(:ref=>[:location, :repo_text], :label=>"lib_repo", :index_as=>[:searchable])
|
39
|
-
t.lib_repo(:ref=>[:location, :repo_code], :index_as=>[:not_searchable,:facetable, :displayable])
|
40
|
-
t.project_host(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Project"}, :index_as=>[:not_searchable]){
|
41
|
-
t.p_title(:path=>'titleInfo',:index_as=>[:not_searchable])
|
42
|
-
}
|
43
|
-
t.lib_project(:proxy=>[:project_host, :p_title],:index_as=>[:facetable,:displayable, :not_searchable])
|
44
|
-
t.collection_host(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Collection"}, :index_as=>[:not_searchable]){
|
45
|
-
t.c_title(:path=>'titleInfo',:index_as=>[:not_searchable])
|
46
|
-
}
|
47
|
-
t.lib_project(:path=>"mods/oxns:relatedItem[@type='host'][@displayLabel='Project']/oxns:titleInfo/oxns:title",:index_as=>[:facetable,:displayable, :not_searchable])
|
48
|
-
t.lib_collection(:path=>"mods/oxns:relatedItem[@type='host'][@displayLabel='Collection']/oxns:titleInfo/oxns:title",:index_as=>[:facetable,:displayable, :not_searchable])
|
49
|
-
t.note(:path=>"note")
|
50
|
-
t.access_condition(:path=>"accessCondition", :attributes=>{:type=>"useAndReproduction"}, :index_as => [:searchable], :data_type => :symbol)
|
51
|
-
t.record_info(:path=>"recordInfo", :index_as=>[:not_searchable]) {
|
52
|
-
t.record_creation_date(:path=>"recordCreationDate",:attributes=>{:encoding=>"w3cdtf"}, :index_as=>[:not_searchable])
|
53
|
-
t.record_content_source(:path=>"recordContentSource",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
|
54
|
-
t.language_of_cataloging(:path=>"languageOfCataloging", :index_as=>[:not_searchable]){
|
55
|
-
t.language_term(:path=>"languageTerm", :index_as=>[:not_searchable], :attributes=>{:type=>:none})
|
56
|
-
t.language_code(:path=>"languageTerm",:attributes=>{:type=>'code',:authority=>"iso639-2b"}, :index_as=>[:not_searchable])
|
57
|
-
}
|
58
|
-
t.record_origin(:path=>"recordOrigin", :index_as=>[:not_searchable])
|
59
|
-
}
|
60
|
-
|
61
|
-
t.origin_info(:path=>"originInfo", :index_as=>[:not_searchable]){
|
62
|
-
t.date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf'}, :index_as=>[:not_searchable])
|
63
|
-
t.key_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:not_searchable])
|
64
|
-
t.start_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:not_searchable])
|
65
|
-
t.end_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:not_searchable])
|
66
|
-
}
|
67
|
-
end
|
68
|
-
|
69
|
-
def self.xml_template
|
70
|
-
builder = Nokogiri::XML::Builder.new do |xml|
|
71
|
-
xml.mods(:version=>"3.4",
|
72
|
-
"xmlns"=>"http://www.loc.gov/mods/v3",
|
73
|
-
"xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance"){
|
74
|
-
}
|
75
|
-
end
|
76
|
-
builder.doc.encoding = 'UTF-8'
|
77
|
-
builder.doc.root["xsi:schemaLocation"] = 'http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd'
|
78
|
-
return builder.doc
|
79
|
-
end
|
80
|
-
|
81
|
-
def prefix
|
82
|
-
#if ::ActiveFedora::VERSION >= '8'
|
83
|
-
# Rails.logger.warn("the prefix method of #{self.class.name} was overriden to maintain backwards compatibility")
|
84
|
-
#end
|
85
|
-
''
|
86
|
-
end
|
87
|
-
|
88
|
-
def method_missing method, *args
|
89
|
-
query = false
|
90
|
-
_mname = method.id2name
|
91
|
-
if _mname[-1,1] == '?'
|
92
|
-
query = true
|
93
|
-
_mname = _mname[0,_mname.length-1]
|
94
|
-
end
|
95
|
-
_msym = _mname.to_sym
|
96
|
-
begin
|
97
|
-
has_term = self.class.terminology.has_term?(_msym)
|
98
|
-
|
99
|
-
_r = (has_term)? find_by_terms(_msym, *args) : nil
|
100
|
-
if query
|
101
|
-
return !( _r.nil? || _r.size()==0)
|
102
|
-
else
|
103
|
-
return _r
|
104
|
-
end
|
105
|
-
rescue
|
106
|
-
super
|
107
|
-
end
|
108
|
-
end
|
109
|
-
def update_values(params)
|
110
|
-
super
|
111
|
-
self.dirty = true
|
112
|
-
end
|
5
|
+
class StandardMods < Cul::Hydra::Om::StandardMods
|
113
6
|
end
|
114
7
|
end
|
115
8
|
end
|
@@ -1,92 +1,7 @@
|
|
1
1
|
module Cul::Scv::Hydra::RisearchMembers
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
'select $child $parent from <#ri>
|
7
|
-
where
|
8
|
-
walk($child <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '> and $child <http://purl.oclc.org/NET/CUL/memberOf> $parent)'
|
9
|
-
|
10
|
-
unless cmodel_type == 'all'
|
11
|
-
recursive_member_query += ' and $child <fedora-model:hasModel> $cmodel'
|
12
|
-
recursive_member_query += ' and $cmodel <mulgara:is> <info:fedora/ldpd:' + cmodel_type + '>'
|
13
|
-
end
|
14
|
-
|
15
|
-
puts 'Performing query:' if verbose_output
|
16
|
-
puts recursive_member_query if verbose_output
|
17
|
-
|
18
|
-
search_response = JSON(Cul::Scv::Fedora.repository.find_by_itql(recursive_member_query, {
|
19
|
-
:type => 'tuples',
|
20
|
-
:format => 'json',
|
21
|
-
:limit => '',
|
22
|
-
:stream => 'on'
|
23
|
-
}))
|
24
|
-
|
25
|
-
unique_pids = search_response['results'].map{|result| result['child'].gsub('info:fedora/', '') }.uniq
|
26
|
-
|
27
|
-
return unique_pids
|
28
|
-
|
2
|
+
extend ActiveSupport::Concern
|
3
|
+
extend Cul::Hydra::RisearchMembers::ClassMethods
|
4
|
+
included do
|
5
|
+
include Cul::Hydra::RisearchMembers
|
29
6
|
end
|
30
|
-
|
31
|
-
def self.get_direct_member_results(pid, verbose_output=false, format='json')
|
32
|
-
|
33
|
-
direct_member_query =
|
34
|
-
'select $pid from <#ri>
|
35
|
-
where $pid <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '>'
|
36
|
-
|
37
|
-
puts 'Performing query:' if verbose_output
|
38
|
-
puts direct_member_query if verbose_output
|
39
|
-
|
40
|
-
search_response = JSON(Cul::Scv::Fedora.repository.find_by_itql(direct_member_query, {
|
41
|
-
:type => 'tuples',
|
42
|
-
:format => format,
|
43
|
-
:limit => '',
|
44
|
-
:stream => 'on'
|
45
|
-
}))
|
46
|
-
|
47
|
-
return search_response['results']
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.get_direct_member_pids(pid, verbose_output=false)
|
51
|
-
unique_pids = get_direct_member_results(pid,verbose_output,'json')
|
52
|
-
unique_pids.map{|result| result['pid'].gsub('info:fedora/', '') }.uniq
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.get_direct_member_count(pid, verbose_output=false)
|
56
|
-
count = get_direct_member_results(pid,verbose_output,'count/json')
|
57
|
-
return count.blank? ? 0 : count[0]['count'].to_i
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
#Project constituents
|
62
|
-
|
63
|
-
def self.get_project_constituent_results(pid, verbose_output=false, format='json')
|
64
|
-
|
65
|
-
project_constituent_query =
|
66
|
-
'select $pid from <#ri>
|
67
|
-
where $pid <info:fedora/fedora-system:def/relations-external#isConstituentOf> <fedora:' + pid + '>'
|
68
|
-
|
69
|
-
puts 'Performing query:' if verbose_output
|
70
|
-
puts project_constituent_query if verbose_output
|
71
|
-
|
72
|
-
search_response = JSON(Cul::Scv::Fedora.repository.find_by_itql(project_constituent_query, {
|
73
|
-
:type => 'tuples',
|
74
|
-
:format => format,
|
75
|
-
:limit => '',
|
76
|
-
:stream => 'on'
|
77
|
-
}))
|
78
|
-
|
79
|
-
return search_response['results']
|
80
|
-
end
|
81
|
-
|
82
|
-
def self.get_project_constituent_pids(pid, verbose_output=false)
|
83
|
-
unique_pids = get_project_constituent_results(pid,verbose_output,'json')
|
84
|
-
unique_pids.map{|result| result['pid'].gsub('info:fedora/', '') }.uniq
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.get_project_constituent_count(pid, verbose_output=false)
|
88
|
-
count = get_project_constituent_results(pid,verbose_output,'count/json')
|
89
|
-
return count.blank? ? 0 : count[0]['count'].to_i
|
90
|
-
end
|
91
|
-
|
92
7
|
end
|
@@ -2,12 +2,11 @@ module Cul
|
|
2
2
|
module Scv
|
3
3
|
module Hydra
|
4
4
|
module Solrizer
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
autoload :Extractor, "cul_scv_hydra/solrizer/extractor"
|
6
|
+
autoload :TerminologyBasedSolrizer, "cul_scv_hydra/solrizer/terminology_based_solrizer"
|
7
|
+
autoload :ValueMapper, "cul_scv_hydra/solrizer/value_mapper"
|
8
|
+
autoload :ScvModsFieldable, "cul_scv_hydra/solrizer/scv_mods_fieldable"
|
9
9
|
end
|
10
10
|
end
|
11
11
|
end
|
12
|
-
end
|
13
|
-
require "cul_scv_hydra/solrizer/field_mapper"
|
12
|
+
end
|
@@ -1,27 +1,3 @@
|
|
1
1
|
module Cul::Scv::Hydra::Solrizer
|
2
|
-
class Extractor < ::Solrizer
|
3
|
-
# Insert +field_value+ for +field_name+ into +solr_doc+
|
4
|
-
# Handles inserting new values into a Hash while ensuring that you don't destroy or overwrite any existing values in the hash.
|
5
|
-
# Ensures that field values are always appended to arrays within the values hash.
|
6
|
-
# Ensures that values are run through format_node_value
|
7
|
-
# Also ensures that values are unique if specified
|
8
|
-
# @param [Hash] solr_doc
|
9
|
-
# @param [String] field_name
|
10
|
-
# @param [String] field_value
|
11
|
-
# @param [boolean] unique
|
12
|
-
def self.insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
13
|
-
formatted_value = self.format_node_value(field_value)
|
14
|
-
if solr_doc.has_key?(field_name)
|
15
|
-
solr_doc[field_name] << formatted_value unless (unique and solr_doc[field_name].include? formatted_value)
|
16
|
-
else
|
17
|
-
solr_doc.merge!( {field_name => [formatted_value]} )
|
18
|
-
end
|
19
|
-
return solr_doc
|
20
|
-
end
|
21
|
-
|
22
|
-
# Instance Methods
|
23
|
-
def insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
24
|
-
Cul::Scv::Hydra::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value, unique)
|
25
|
-
end
|
26
|
-
end
|
2
|
+
class Extractor < Cul::Hydra::Solrizer:Extractor;end
|
27
3
|
end
|
@@ -1,434 +1,9 @@
|
|
1
1
|
module Cul::Scv::Hydra::Solrizer
|
2
|
-
|
2
|
+
module ScvModsFieldable
|
3
3
|
extend ActiveSupport::Concern
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
module ClassMethods
|
9
|
-
def value_mapper(maps=nil)
|
10
|
-
@value_mapper ||= ValueMapper.new(maps)
|
11
|
-
end
|
12
|
-
|
13
|
-
def map_field(field_key, map_key)
|
14
|
-
value_mapper.map_field(field_key, map_key)
|
15
|
-
end
|
16
|
-
|
17
|
-
def map_value(field_key, value_key)
|
18
|
-
value_mapper.map_value(field_key, value_key)
|
19
|
-
end
|
20
|
-
|
21
|
-
def maps_field?(field_key)
|
22
|
-
value_mapper.maps_field? field_key
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def mods
|
27
|
-
ng_xml.xpath('/mods:mods', MODS_NS).first
|
28
|
-
end
|
29
|
-
|
30
|
-
def projects
|
31
|
-
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node|
|
32
|
-
ScvModsFieldable.normalize(main_title(p_node), true)
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def collections
|
37
|
-
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node|
|
38
|
-
ScvModsFieldable.normalize(main_title(p_node), true)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def sort_title(node=mods)
|
43
|
-
# include only the untyped [!@type] titleInfo, exclude noSort
|
44
|
-
base_text = ''
|
45
|
-
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
46
|
-
if t
|
47
|
-
t.children.each do |child|
|
48
|
-
base_text << child.text unless child.name == 'nonSort'
|
49
|
-
end
|
50
|
-
end
|
51
|
-
base_text = ScvModsFieldable.normalize(base_text, true)
|
52
|
-
base_text = nil if base_text.empty?
|
53
|
-
base_text
|
54
|
-
end
|
55
|
-
|
56
|
-
def main_title(node=mods)
|
57
|
-
# include only the untyped [!@type] titleInfo
|
58
|
-
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
59
|
-
if t
|
60
|
-
ScvModsFieldable.normalize(t.text)
|
61
|
-
else
|
62
|
-
nil
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def titles(node=mods)
|
67
|
-
# all titles without descending into relatedItems
|
68
|
-
# For now, this only includes the main title and selected alternate_titles
|
69
|
-
all_titles = []
|
70
|
-
all_titles << main_title unless main_title.nil?
|
71
|
-
all_titles += alternative_titles unless alternative_titles.nil?
|
72
|
-
end
|
73
|
-
|
74
|
-
def alternative_titles(node=mods)
|
75
|
-
node.xpath('./mods:titleInfo[@type and (@type="alternative" or @type="abbreviated" or @type="translated" or @type="uniform")]', MODS_NS).collect do |t|
|
76
|
-
ScvModsFieldable.normalize(t.text)
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
def names(role_authority=nil, role=nil)
|
81
|
-
# get all the name nodes
|
82
|
-
# keep all child text except the role terms
|
83
|
-
xpath = "./mods:name"
|
84
|
-
unless role_authority.nil?
|
85
|
-
xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
|
86
|
-
unless role.nil?
|
87
|
-
xpath << " and normalize-space(text()) = '#{role.to_s.strip}'"
|
88
|
-
end
|
89
|
-
xpath << "]/ancestor::mods:name"
|
90
|
-
end
|
91
|
-
names = mods.xpath(xpath, MODS_NS).collect do |node|
|
92
|
-
base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
93
|
-
ScvModsFieldable.normalize(base_text, true)
|
94
|
-
end
|
95
|
-
|
96
|
-
# Note: Removing subject names from name field extraction.
|
97
|
-
# See: https://issues.cul.columbia.edu/browse/DCV-231 and https://issues.cul.columbia.edu/browse/SCV-102
|
98
|
-
#xpath = "./mods:subject" + xpath[1,xpath.length]
|
99
|
-
#mods.xpath(xpath, MODS_NS).each do |node|
|
100
|
-
# base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
101
|
-
# names << ScvModsFieldable.normalize(base_text, true)
|
102
|
-
#end
|
103
|
-
|
104
|
-
names
|
105
|
-
end
|
106
|
-
|
107
|
-
def dates(node=mods)
|
108
|
-
# get all the dateIssued with keyDate = 'yes', but not point = 'end'
|
109
|
-
end
|
110
|
-
|
111
|
-
def formats(node=mods)
|
112
|
-
# get all the form values with authority != 'marcform'
|
113
|
-
node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n|
|
114
|
-
ScvModsFieldable.normalize(n.text)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def repository_code(node=mods)
|
119
|
-
# get the location/physicalLocation[@authority = 'marcorg']
|
120
|
-
repo_code_node = node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).first
|
121
|
-
|
122
|
-
if repo_code_node
|
123
|
-
ScvModsFieldable.normalize(repo_code_node.text)
|
124
|
-
else
|
125
|
-
return nil
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def repository_text(node=mods)
|
130
|
-
# get the location/physicalLocation[not(@authority)]
|
131
|
-
repo_text_node = node.xpath("./mods:location/mods:physicalLocation[not(@authority)]", MODS_NS).first
|
132
|
-
|
133
|
-
if repo_text_node
|
134
|
-
ScvModsFieldable.normalize(repo_text_node.text)
|
135
|
-
else
|
136
|
-
return nil
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
def translate_repo_marc_code(code, type)
|
141
|
-
#code = ScvModsFieldable.normalize(code)
|
142
|
-
|
143
|
-
if type == 'short'
|
144
|
-
return translate_with_default(SHORT_REPO, code, 'Non-Columbia Location')
|
145
|
-
elsif type == 'long'
|
146
|
-
return translate_with_default(LONG_REPO, code, 'Non-Columbia Location')
|
147
|
-
elsif type == 'full'
|
148
|
-
return translate_with_default(FULL_REPO, code, 'Non-Columbia Location')
|
149
|
-
end
|
150
|
-
|
151
|
-
return nil
|
152
|
-
end
|
153
|
-
|
154
|
-
def translate_project_title(project_title, type)
|
155
|
-
normalized_project_title = ScvModsFieldable.normalize(project_title)
|
156
|
-
|
157
|
-
if type == 'short'
|
158
|
-
return translate_with_default(SHORT_PROJ, normalized_project_title, normalized_project_title)
|
159
|
-
elsif type == 'full'
|
160
|
-
return translate_with_default(FULL_PROJ, normalized_project_title, normalized_project_title)
|
161
|
-
end
|
162
|
-
|
163
|
-
return nil
|
164
|
-
end
|
165
|
-
|
166
|
-
def shelf_locators(node=mods)
|
167
|
-
node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n|
|
168
|
-
ScvModsFieldable.normalize(n.text, true)
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
def textual_dates(node=mods)
|
173
|
-
dates = []
|
174
|
-
node.xpath("./mods:originInfo/mods:dateCreated[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
175
|
-
dates << ScvModsFieldable.normalize(n.text, true)
|
176
|
-
end
|
177
|
-
node.xpath("./mods:originInfo/mods:dateIssued[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
178
|
-
dates << ScvModsFieldable.normalize(n.text, true)
|
179
|
-
end
|
180
|
-
node.xpath("./mods:originInfo/mods:dateOther[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
181
|
-
dates << ScvModsFieldable.normalize(n.text, true)
|
182
|
-
end
|
183
|
-
return dates
|
184
|
-
end
|
185
|
-
|
186
|
-
def date_range_to_textual_date(start_year, end_year)
|
187
|
-
start_year = start_year.to_i.to_s # Remove zero-padding if present
|
188
|
-
end_year = end_year.to_i.to_s # Remove zero-padding if present
|
189
|
-
|
190
|
-
if start_year == end_year
|
191
|
-
return [start_year]
|
192
|
-
else
|
193
|
-
return [('Between ' +
|
194
|
-
(start_year.to_i > 0 ? start_year : start_year[1,start_year.length] + ' BCE') +
|
195
|
-
' and ' +
|
196
|
-
(end_year.to_i > 0 ? (start_year.to_i > 0 ? end_year : end_year + ' CE') : end_year[1,end_year.length] + ' BCE')
|
197
|
-
)]
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
def date_notes(node=mods)
|
202
|
-
date_notes = []
|
203
|
-
node.xpath("./mods:note[@type = 'date' or @type = 'date source']", MODS_NS).collect do |n|
|
204
|
-
date_notes << ScvModsFieldable.normalize(n.text, true)
|
205
|
-
end
|
206
|
-
return date_notes
|
207
|
-
end
|
208
|
-
|
209
|
-
def non_date_notes(node=mods)
|
210
|
-
non_date_notes = []
|
211
|
-
node.xpath("./mods:note[not(@type) or (@type != 'date' and @type != 'date source')]", MODS_NS).collect do |n|
|
212
|
-
non_date_notes << ScvModsFieldable.normalize(n.text, true)
|
213
|
-
end
|
214
|
-
return non_date_notes
|
215
|
-
end
|
216
|
-
|
217
|
-
def item_in_context_url(node=mods)
|
218
|
-
item_in_context_url_val = []
|
219
|
-
node.xpath("./mods:location/mods:url[@access='object in context' and @usage='primary display']", MODS_NS).collect do |n|
|
220
|
-
item_in_context_url_val << ScvModsFieldable.normalize(n.text, true)
|
221
|
-
end
|
222
|
-
item_in_context_url_val
|
223
|
-
end
|
224
|
-
|
225
|
-
def project_url(node=mods)
|
226
|
-
project_url_val = []
|
227
|
-
node.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']/mods:location/mods:url", MODS_NS).collect do |n|
|
228
|
-
project_url_val << ScvModsFieldable.normalize(n.text, true)
|
229
|
-
end
|
230
|
-
project_url_val
|
231
|
-
end
|
232
|
-
|
233
|
-
def all_subjects(node=mods)
|
234
|
-
list_of_subjects = []
|
235
|
-
|
236
|
-
node.xpath("./mods:subject/mods:topic", MODS_NS).collect do |n|
|
237
|
-
list_of_subjects << ScvModsFieldable.normalize(n.text, true)
|
238
|
-
end
|
239
|
-
node.xpath("./mods:subject/mods:geographic", MODS_NS).collect do |n|
|
240
|
-
list_of_subjects << ScvModsFieldable.normalize(n.text, true)
|
241
|
-
end
|
242
|
-
node.xpath("./mods:subject/mods:name", MODS_NS).collect do |n|
|
243
|
-
list_of_subjects << ScvModsFieldable.normalize(n.text, true)
|
244
|
-
end
|
245
|
-
node.xpath("./mods:subject/mods:temporal", MODS_NS).collect do |n|
|
246
|
-
list_of_subjects << ScvModsFieldable.normalize(n.text, true)
|
247
|
-
end
|
248
|
-
node.xpath("./mods:subject/mods:titleInfo", MODS_NS).collect do |n|
|
249
|
-
list_of_subjects << ScvModsFieldable.normalize(n.text, true)
|
250
|
-
end
|
251
|
-
node.xpath("./mods:subject/mods:genre", MODS_NS).collect do |n|
|
252
|
-
list_of_subjects << ScvModsFieldable.normalize(n.text, true)
|
253
|
-
end
|
254
|
-
|
255
|
-
return list_of_subjects
|
256
|
-
end
|
257
|
-
|
258
|
-
def origin_info_place(node=mods)
|
259
|
-
places = []
|
260
|
-
node.xpath("./mods:originInfo/mods:place/mods:placeTerm", MODS_NS).collect do |n|
|
261
|
-
places << ScvModsFieldable.normalize(n.text, true)
|
262
|
-
end
|
263
|
-
return places
|
264
|
-
end
|
265
|
-
|
266
|
-
def origin_info_place_for_display(node=mods)
|
267
|
-
# If there are multiple origin_info place elements, choose only the ones without valueURI attributes. Otherwise show the others.
|
268
|
-
places_with_uri = []
|
269
|
-
places_without_uri = []
|
270
|
-
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[@valueURI]", MODS_NS).collect do |n|
|
271
|
-
places_with_uri << ScvModsFieldable.normalize(n.text, true)
|
272
|
-
end
|
273
|
-
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[not(@valueURI)]", MODS_NS).collect do |n|
|
274
|
-
places_without_uri << ScvModsFieldable.normalize(n.text, true)
|
275
|
-
end
|
276
|
-
|
277
|
-
return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
|
278
|
-
end
|
279
|
-
|
280
|
-
def coordinates(node=mods)
|
281
|
-
coordinate_values = []
|
282
|
-
node.xpath("./mods:subject/mods:cartographics/mods:coordinates", MODS_NS).collect do |n|
|
283
|
-
n = ScvModsFieldable.normalize(n.text, true)
|
284
|
-
if n.match(/-*\d+\.\d+\s*,\s*-*\d+\.\d+\s*/) # Expected coordinate format: 40.123456,-73.5678
|
285
|
-
coordinate_values << n
|
286
|
-
end
|
287
|
-
end
|
288
|
-
coordinate_values
|
289
|
-
end
|
290
|
-
|
291
|
-
def to_solr(solr_doc={})
|
292
|
-
solr_doc = (defined? super) ? super : solr_doc
|
293
|
-
|
294
|
-
return solr_doc if mods.nil? # There is no mods. Return because there is nothing to process.
|
295
|
-
|
296
|
-
solr_doc["all_text_teim"] ||= []
|
297
|
-
|
298
|
-
solr_doc["title_si"] = sort_title
|
299
|
-
solr_doc["title_ssm"] = titles
|
300
|
-
solr_doc["alternative_title_ssm"] = alternative_titles
|
301
|
-
solr_doc["all_text_teim"] += solr_doc["alternative_title_ssm"]
|
302
|
-
solr_doc["lib_collection_sim"] = collections
|
303
|
-
solr_doc["lib_name_sim"] = names
|
304
|
-
solr_doc["lib_name_teim"] = solr_doc["lib_name_sim"]
|
305
|
-
solr_doc["all_text_teim"] += solr_doc["lib_name_teim"]
|
306
|
-
solr_doc["lib_all_subjects_ssm"] = all_subjects
|
307
|
-
solr_doc["lib_all_subjects_teim"] = solr_doc["lib_all_subjects_ssm"]
|
308
|
-
solr_doc["all_text_teim"] += solr_doc["lib_all_subjects_teim"]
|
309
|
-
solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"]
|
310
|
-
solr_doc["lib_author_sim"] = names(:marcrelator, 'aut')
|
311
|
-
solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp')
|
312
|
-
solr_doc["lib_format_sim"] = formats
|
313
|
-
solr_doc["lib_shelf_sim"] = shelf_locators
|
314
|
-
solr_doc["lib_date_textual_ssm"] = textual_dates
|
315
|
-
solr_doc["lib_date_notes_ssm"] = date_notes
|
316
|
-
solr_doc["lib_non_date_notes_ssm"] = non_date_notes
|
317
|
-
solr_doc["lib_item_in_context_url_ssm"] = item_in_context_url
|
318
|
-
solr_doc["lib_project_url_ssm"] = project_url
|
319
|
-
solr_doc["origin_info_place_ssm"] = origin_info_place
|
320
|
-
solr_doc["origin_info_place_for_display_ssm"] = origin_info_place_for_display
|
321
|
-
|
322
|
-
repo_marc_code = repository_code
|
323
|
-
unless repo_marc_code.nil?
|
324
|
-
solr_doc["lib_repo_short_ssim"] = [translate_repo_marc_code(repo_marc_code, 'short')]
|
325
|
-
solr_doc["lib_repo_long_sim"] = [translate_repo_marc_code(repo_marc_code, 'long')]
|
326
|
-
solr_doc["lib_repo_full_ssim"] = [translate_repo_marc_code(repo_marc_code, 'full')]
|
327
|
-
end
|
328
|
-
solr_doc["lib_repo_text_ssm"] = repository_text
|
329
|
-
|
330
|
-
project_titles = projects
|
331
|
-
unless project_titles.nil?
|
332
|
-
solr_doc["lib_project_short_ssim"] = []
|
333
|
-
solr_doc["lib_project_full_ssim"] = []
|
334
|
-
project_titles.each {|project_title|
|
335
|
-
solr_doc["lib_project_short_ssim"] << translate_project_title(project_title, 'short')
|
336
|
-
solr_doc["lib_project_full_ssim"] << translate_project_title(project_title, 'full')
|
337
|
-
}
|
338
|
-
solr_doc["lib_project_short_ssim"].uniq!
|
339
|
-
solr_doc["lib_project_full_ssim"].uniq!
|
340
|
-
end
|
341
|
-
|
342
|
-
# Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
|
343
|
-
possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
|
344
|
-
possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
|
345
|
-
start_date = nil
|
346
|
-
end_date = nil
|
347
|
-
start_year = nil
|
348
|
-
end_year = nil
|
349
|
-
possible_start_date_fields.each{|key|
|
350
|
-
if solr_doc.has_key?(key)
|
351
|
-
start_date = solr_doc[key][0]
|
352
|
-
break
|
353
|
-
end
|
354
|
-
}
|
355
|
-
possible_end_date_fields.each{|key|
|
356
|
-
if solr_doc.has_key?(key)
|
357
|
-
end_date = solr_doc[key][0]
|
358
|
-
break
|
359
|
-
end
|
360
|
-
}
|
361
|
-
|
362
|
-
if start_date.present?
|
363
|
-
|
364
|
-
end_date = start_date if end_date.blank?
|
365
|
-
|
366
|
-
year_regex = /^(-?\d{1,4}).*/
|
367
|
-
|
368
|
-
start_year_match = start_date.match(year_regex)
|
369
|
-
if start_year_match && start_year_match.captures.length > 0
|
370
|
-
start_year = start_year_match.captures[0]
|
371
|
-
start_year = zero_pad_year(start_year)
|
372
|
-
solr_doc["lib_start_date_year_itsi"] = start_year.to_i # TrieInt version for searches
|
373
|
-
end
|
374
|
-
|
375
|
-
end_year_match = end_date.match(year_regex)
|
376
|
-
if end_year_match && end_year_match.captures.length > 0
|
377
|
-
end_year = end_year_match.captures[0]
|
378
|
-
end_year = zero_pad_year(end_year)
|
379
|
-
solr_doc["lib_end_date_year_itsi"] = end_year.to_i # TrieInt version for searches
|
380
|
-
end
|
381
|
-
|
382
|
-
solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year
|
383
|
-
|
384
|
-
# When no textual date is available, fall back to other date data (if available)
|
385
|
-
if solr_doc["lib_date_textual_ssm"].blank?
|
386
|
-
|
387
|
-
solr_doc["lib_date_textual_ssm"] = date_range_to_textual_date(start_year.to_i, end_year.to_i)
|
388
|
-
end
|
389
|
-
end
|
390
|
-
|
391
|
-
# Geo data
|
392
|
-
solr_doc["geo"] = coordinates
|
393
|
-
|
394
|
-
solr_doc.each do |k, v|
|
395
|
-
if self.class.maps_field? k
|
396
|
-
solr_doc[k] = self.class.map_value(k, v)
|
397
|
-
end
|
398
|
-
end
|
399
|
-
solr_doc
|
400
|
-
end
|
401
|
-
|
402
|
-
def zero_pad_year(year)
|
403
|
-
year = year.to_s
|
404
|
-
is_negative = year.start_with?('-')
|
405
|
-
year_without_sign = (is_negative ? year[1, year.length]: year)
|
406
|
-
if year_without_sign.length < 4
|
407
|
-
year_without_sign = year_without_sign.rjust(4, '0')
|
408
|
-
end
|
409
|
-
|
410
|
-
return (is_negative ? '-' : '') + year_without_sign
|
411
|
-
end
|
412
|
-
|
413
|
-
def self.normalize(t, strip_punctuation=false)
|
414
|
-
# strip whitespace
|
415
|
-
n_t = t.dup.strip
|
416
|
-
# collapse intermediate whitespace
|
417
|
-
n_t.gsub!(/\s+/, ' ')
|
418
|
-
# pull off paired punctuation, and any leading punctuation
|
419
|
-
if strip_punctuation
|
420
|
-
n_t = n_t.sub(/^\((.*)\)$/, "\\1")
|
421
|
-
n_t = n_t.sub(/^\{(.*)\}$/, "\\1")
|
422
|
-
n_t = n_t.sub(/^\[(.*)\]$/, "\\1")
|
423
|
-
n_t = n_t.sub(/^"(.*)"$/, "\\1")
|
424
|
-
n_t = n_t.sub(/^'(.*)'$/, "\\1")
|
425
|
-
n_t = n_t.sub(/^<(.*)>$/, "\\1")
|
426
|
-
#n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1")
|
427
|
-
n_t = n_t.sub(/^[[:punct:]]+/, '')
|
428
|
-
# this may have 'created' leading/trailing space, so strip
|
429
|
-
n_t.strip!
|
430
|
-
end
|
431
|
-
n_t
|
4
|
+
extend Cul::Hydra::Solrizer::ModsFieldable::ClassMethods
|
5
|
+
included do
|
6
|
+
include Cul::Hydra::Solrizer::ModsFieldable
|
432
7
|
end
|
433
8
|
end
|
434
9
|
end
|