spotlight-oaipmh-resources 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +56 -0
  3. data/app/controllers/spotlight/resources/harvester_controller.rb +58 -0
  4. data/app/jobs/spotlight/resources/perform_harvests_job.rb +44 -0
  5. data/app/mailer/spotlight/harvesting_complete_mailer.rb +20 -0
  6. data/app/models/spotlight/resources/exceptions.rb +17 -0
  7. data/app/models/spotlight/resources/harvest_type.rb +7 -0
  8. data/app/models/spotlight/resources/harvester.rb +46 -0
  9. data/app/models/spotlight/resources/oaipmh_harvester.rb +41 -0
  10. data/app/models/spotlight/resources/oaipmh_mods_converter.rb +468 -0
  11. data/app/models/spotlight/resources/oaipmh_mods_item.rb +61 -0
  12. data/app/models/spotlight/resources/solr_converter.rb +180 -0
  13. data/app/models/spotlight/resources/solr_harvester.rb +42 -0
  14. data/app/models/spotlight/resources/solr_harvesting_item.rb +50 -0
  15. data/app/services/spotlight/resources/oaipmh_builder.rb +166 -0
  16. data/app/services/spotlight/resources/solr_harvesting_builder.rb +115 -0
  17. data/app/views/catalog/_show.html.erb +10 -0
  18. data/app/views/spotlight/harvesting_complete_mailer/harvest_failed.html.erb +6 -0
  19. data/app/views/spotlight/harvesting_complete_mailer/harvest_indexed.html.erb +13 -0
  20. data/app/views/spotlight/resources/harvester/_form.html.erb +36 -0
  21. data/config/default_solr_mapping.yml +20 -0
  22. data/config/locales/en.yml +32 -0
  23. data/config/mapping.yml +172 -0
  24. data/config/marc_mapping.yml +190 -0
  25. data/config/routes.rb +5 -0
  26. data/lib/generators/spotlight/oaipmh/resources/install_generator.rb +16 -0
  27. data/lib/spotlight/oaipmh/resources.rb +11 -0
  28. data/lib/spotlight/oaipmh/resources/engine.rb +23 -0
  29. data/lib/spotlight/oaipmh/resources/version.rb +8 -0
  30. metadata +253 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: '08cdf7cea5b6d5df0ff7731657f4b831b111f2e2cad7805ae50818c8985701c6'
4
+ data.tar.gz: 33420d3322230a3ef58a8505b4b863c626d78a50a12efa706446ae0f00bf70ef
5
+ SHA512:
6
+ metadata.gz: cf80c0ef442f69a2639b25bfa9cc5d8fc8664ebdf6fa919546ab336d1125bd87e604da159b259f51bfe20e93cab62c39f16bdb8bc290a9be709f94815524884d
7
+ data.tar.gz: a8a0ca1bc7420d0bef6ceb3db60a5201becee501c3310ecfb1a6da3adc2c258bc4265353b4f1f3f4b224b064f6c6cb3c1b018f0720f62b91e5fdf61a76648a9a
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+
8
+ require 'engine_cart/rake_task'
9
+ desc 'Run tests in generated test Rails app with generated Solr instance running'
10
+ task ci: ['engine_cart:generate'] do
11
+ require 'solr_wrapper'
12
+ require 'exhibits_solr_conf'
13
+ ENV['environment'] = 'test'
14
+ SolrWrapper.wrap(port: '8983') do |solr|
15
+ solr.with_collection(name: 'blacklight-core', dir: ExhibitsSolrConf.path) do
16
+ # run the tests
17
+ Rake::Task['spec'].invoke
18
+ end
19
+ end
20
+ end
21
+ #ZIP_URL = "https://github.com/projectblacklight/blacklight-jetty/archive/v4.10.4.zip"
22
+ #require 'jettywrapper'
23
+ #
24
+ #require 'engine_cart/rake_task'
25
+ #EngineCart.fingerprint_proc = EngineCart.rails_fingerprint_proc
26
+ #
27
+ #require 'exhibits_solr_conf'
28
+ #
29
+ #desc 'Run tests in generated test Rails app with generated Solr instance running'
30
+ #task ci: ['engine_cart:generate', 'jetty:clean', 'exhibits:configure_solr'] do
31
+ # ENV['environment'] = 'test'
32
+ # jetty_params = Jettywrapper.load_config
33
+ # jetty_params[:startup_wait] = 60
34
+ #
35
+ # Jettywrapper.wrap(jetty_params) do
36
+ # # run the tests
37
+ # Rake::Task['spec'].invoke
38
+ # end
39
+ #end
40
+
41
+ #RDoc::Task.new(:rdoc) do |rdoc|
42
+ # rdoc.rdoc_dir = 'rdoc'
43
+ # rdoc.title = 'SpotlightOaipmh'
44
+ # rdoc.options << '--line-numbers'
45
+ # rdoc.rdoc_files.include('README.rdoc')
46
+ # rdoc.rdoc_files.include('lib/**/*.rb')
47
+ #end
48
+
49
+
50
+
51
+ load 'rails/tasks/statistics.rake'
52
+
53
+
54
+
55
+ Bundler::GemHelper.install_tasks
56
+
@@ -0,0 +1,58 @@
1
+
2
+ module Spotlight::Resources
3
+ class HarvesterController < Spotlight::ApplicationController
4
+
5
+ load_and_authorize_resource :exhibit, class: Spotlight::Exhibit
6
+
7
+ # POST /oaipmh_harvester
8
+ def create
9
+
10
+ my_params = resource_params
11
+
12
+ #upload the mapping file if it exists
13
+ if (my_params.has_key?(:custom_mapping))
14
+ upload
15
+ my_params.delete(:custom_mapping)
16
+ end
17
+ mapping_file = resource_params[:mapping_file]
18
+ if (resource_params[:type] == Spotlight::Resources::HarvestType::SOLR)
19
+ mapping_file = resource_params[:solr_mapping_file]
20
+ end
21
+ if (resource_params.has_key?(:custom_mapping))
22
+ mapping_file = resource_params[:custom_mapping].original_filename
23
+ end
24
+
25
+ Spotlight::Resources::PerformHarvestsJob.perform_later(resource_params[:type], resource_params[:url], resource_params[:set], mapping_file, current_exhibit, current_user, new_job_log_entry)
26
+ flash[:notice] = t('spotlight.resources.harvester.performharvest.success', set: resource_params[:set])
27
+ redirect_to spotlight.admin_exhibit_catalog_path(current_exhibit, sort: :timestamp)
28
+ end
29
+
30
+ private
31
+
32
+ def upload
33
+ name = resource_params[:custom_mapping].original_filename
34
+ Dir.mkdir("public/uploads") unless Dir.exist?("public/uploads")
35
+ dir = "public/uploads/modsmapping"
36
+ if (resource_params[:type] == Spotlight::Resources::HarvestType::SOLR)
37
+ dir = "public/uploads/solrmapping"
38
+ end
39
+ Dir.mkdir(dir) unless Dir.exist?(dir)
40
+
41
+ path = File.join(dir, name)
42
+ File.open(path, "w") { |f| f.write(resource_params[:custom_mapping].read) }
43
+ end
44
+
45
+
46
+ def resource_params
47
+ params.require(:resources_harvester).permit(:type, :url, :set, :mapping_file, :solr_mapping_file, :custom_mapping)
48
+ end
49
+
50
+ #Set the job status so users can view
51
+ def new_job_log_entry
52
+ Spotlight::JobLogEntry.create(exhibit: current_exhibit, user: current_user, job_item_count: 0, job_status: 'unstarted', job_type: 'Harvesting')
53
+ end
54
+
55
+
56
+ end
57
+
58
+ end
@@ -0,0 +1,44 @@
1
+ require 'oai'
2
+ require 'net/http'
3
+ require 'uri'
4
+ require_relative '../../../mailer/spotlight/harvesting_complete_mailer'
5
+ include Spotlight::Resources::Exceptions
6
+ # encoding: utf-8
7
+ module Spotlight::Resources
8
+ ##
9
+ # Process a CSV upload into new Spotlight::Resource::Upload objects
10
+ class PerformHarvestsJob < ActiveJob::Base
11
+ queue_as :default
12
+
13
+ before_perform do |job|
14
+ job_log_entry = log_entry(job)
15
+ job_log_entry.in_progress! if job_log_entry
16
+ end
17
+
18
+ def perform(harvest_type, url, set, mapping_file, exhibit, _user, job_entry, cursor = nil, count = 0, failed_items = nil)
19
+ harvester = Spotlight::Resources::Harvester.create(
20
+ url: url,
21
+ data: {base_url: url,
22
+ set: set,
23
+ mapping_file: mapping_file,
24
+ job_entry: job_entry,
25
+ type: harvest_type,
26
+ user: _user,
27
+ cursor: cursor,
28
+ count: count,
29
+ failed_items: failed_items},
30
+ exhibit: exhibit)
31
+ if !harvester.save_and_index
32
+ raise HarvestingFailedException
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ def log_entry(job)
39
+ job.arguments[6] if job.arguments[6].is_a?(Spotlight::JobLogEntry)
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,20 @@
1
+ module Spotlight
2
+ ##
3
+ # Notify the curator that we're finished processing a
4
+ # batch upload
5
+ class HarvestingCompleteMailer < ActionMailer::Base
6
+ def harvest_indexed(set, exhibit, user, failed_items)
7
+ @set = set
8
+ @exhibit = exhibit
9
+ @failed_items = failed_items
10
+ mail(to: user.email, from: 'oaiharvester@noreply.com', subject: 'Harvest indexing complete for '+ set)
11
+ end
12
+
13
+ def harvest_failed(set, exhibit, user, message)
14
+ @set = set
15
+ @exhibit = exhibit
16
+ @message = message
17
+ mail(to: user.email, from: 'oaiharvester@noreply.com', subject: 'The harvest failed for '+ set)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ module Spotlight
2
+ module Resources
3
+ module Exceptions
4
+ class InvalidModsRecord < StandardError
5
+ end
6
+
7
+ class InvalidMappingFile < StandardError
8
+ end
9
+
10
+ class ModsPathDoesNotExist < StandardError
11
+ end
12
+
13
+ class HarvestingFailedException < StandardError
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,7 @@
1
+ module Spotlight::Resources
2
+ class HarvestType
3
+ MODS = "MODS"
4
+ SOLR = "Solr"
5
+ HARVEST_TYPES = [MODS, SOLR]
6
+ end
7
+ end
@@ -0,0 +1,46 @@
1
+ module Spotlight::Resources
2
+ class Harvester < Spotlight::Resource
3
+ attr_accessor :set, :base_url, :mapping_file, :solr_mapping_file, :user
4
+
5
+ def harvests
6
+ harvester = get_harvester
7
+ harvester.get_harvests
8
+
9
+ end
10
+
11
+ #Override the document builder since the builder has to be determined after insantiation
12
+ def document_builder
13
+ if (self.data[:type] == Spotlight::Resources::HarvestType::SOLR)
14
+ @document_builder = Spotlight::Resources::SolrHarvestingBuilder.new(self)
15
+ else
16
+ @document_builder = Spotlight::Resources::OaipmhBuilder.new(self)
17
+ end
18
+ end
19
+
20
+ #The harvester will know what type of token to expect
21
+ def paginate (token)
22
+ harvester = get_harvester
23
+ harvester.paginate(token)
24
+ end
25
+
26
+ def get_job_entry
27
+ self.data[:job_entry]
28
+ end
29
+
30
+ private
31
+
32
+ def get_harvester
33
+ if @harvester.nil?
34
+ if (self.data[:type] == Spotlight::Resources::HarvestType::SOLR)
35
+ self.document_builder_class = Spotlight::Resources::SolrHarvestingBuilder
36
+ @harvester = SolrHarvester.new(self.data[:base_url], self.data[:set])
37
+ else
38
+ self.document_builder_class = Spotlight::Resources::OaipmhBuilder
39
+ @harvester = OaipmhHarvester.new(self.data[:base_url], self.data[:set])
40
+ end
41
+ end
42
+ @harvester
43
+ end
44
+ end
45
+
46
+ end
@@ -0,0 +1,41 @@
1
+ require 'oai'
2
+ require 'net/http'
3
+ require 'uri'
4
+
5
+ module Spotlight::Resources
6
+ class OaipmhHarvester
7
+
8
+ def initialize(base_url, set)
9
+ @url = base_url + '?verb=ListRecords&metadataPrefix=mods&set=' + set
10
+ @base_url = base_url
11
+ @set = set
12
+ end
13
+
14
+ def get_harvests
15
+ @client = OAI::Client.new @base_url
16
+ @oaipmh_harvests = @client.list_records :set => @set, :metadata_prefix => 'mods'
17
+ end
18
+
19
+ def paginate (token)
20
+ if @client.nil?
21
+ @client = OAI::Client.new @base_url
22
+ end
23
+ @oaipmh_harvests = @client.list_records :resumption_token => token
24
+ end
25
+
26
+ def self.mapping_files
27
+ if (Dir.exist?('public/uploads/modsmapping'))
28
+ files = Dir.entries('public/uploads/modsmapping')
29
+ files.delete(".")
30
+ files.delete("..")
31
+ else
32
+ files = Array.new
33
+ end
34
+
35
+ files.insert(0, "New Mapping File")
36
+ files.insert(0, "Default Mapping File")
37
+ files
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,468 @@
1
+ include Spotlight::Resources::Exceptions
2
+ module Spotlight::Resources
3
+
4
+ class XPathEntry
5
+ attr_accessor :xpath_string, :xpath_ns_prefix, :xpath_ns_def
6
+ end
7
+ class ModsPath
8
+ attr_accessor :path, :subpaths, :delimiter
9
+ end
10
+ class ModsItem
11
+ attr_accessor :mods_path, :mods_attribute, :mods_attribute_value, :conditional_mods_value, :conditional_mods_path
12
+ end
13
+ class ConverterItem
14
+ attr_accessor :spotlight_field, :mods_items, :default_value, :delimiter, :xpath_items, :multivalue_facets
15
+
16
+ RESERVED_WORDS = {'name'=> "name_el", 'description' => 'description_el', 'type' => 'type_at'}
17
+ TOP_LEVEL_ELEMENTS_SIMPLE = [
18
+ 'abstract',
19
+ 'accessCondition',
20
+ 'classification',
21
+ 'extension',
22
+ 'genre',
23
+ 'identifier',
24
+ 'note',
25
+ 'tableOfContents',
26
+ 'targetAudience',
27
+ 'typeOfResource',
28
+ ]
29
+
30
+ def initialize()
31
+ delimiter = ", "
32
+ end
33
+
34
+ def extract_all_values(modsrecord)
35
+
36
+ xpath_values = extract_xpath_values(modsrecord)
37
+ mods_values = extract_mods_values(modsrecord)
38
+
39
+ values = xpath_values.concat(mods_values)
40
+
41
+ #Remove duplicates
42
+ values = values.uniq
43
+
44
+ finalvalue = nil
45
+ if (!values.empty?)
46
+ #if multiple values, allow for faceting on each item by keeping it as an array
47
+ if (!multivalue_facets.nil? && (multivalue_facets.eql?("yes") || multivalue_facets))
48
+
49
+ finalvalue = values;
50
+ else
51
+ finalvalue = values.join(delimiter)
52
+ end
53
+ end
54
+ finalvalue
55
+ end
56
+
57
+ private
58
+
59
+ def extract_xpath_values(modsrecord)
60
+ values = Array.new
61
+ if (!xpath_items.nil?)
62
+ xpath_items.each do |item|
63
+ node = modsrecord.mods_ng_xml
64
+ if (!item.xpath_ns_def.nil?)
65
+ retnodes = node.xpath(item.xpath_string, {item.xpath_ns_prefix => item.xpath_ns_def})
66
+ else
67
+ retnodes = node.xpath(item.xpath_string)
68
+ end
69
+
70
+ if (retnodes.empty? && !default_value.blank?)
71
+ value = default_value
72
+ values << value
73
+ elsif (!retnodes.empty?)
74
+ retnodes.each do |retnode|
75
+ values << retnode.text
76
+ end
77
+ end
78
+ end
79
+ end
80
+ values
81
+ end
82
+
83
+ def extract_mods_values(modsrecord)
84
+ values = Array.new
85
+ if (!mods_items.nil?)
86
+ mods_items.each do |item|
87
+ #Throw error if path value fails
88
+ begin
89
+ node = modsrecord.mods_ng_xml
90
+
91
+ retvalues = parse_paths(item, node)
92
+ if (retvalues.empty? && !default_value.blank?)
93
+ value = default_value
94
+ values << value
95
+ elsif (!retvalues.empty?)
96
+ retvalues.each do |retnode|
97
+ values << retnode
98
+ end
99
+ #values << retvalues
100
+ end
101
+
102
+ rescue NoMethodError => e
103
+ puts e.message
104
+ puts e.backtrace
105
+ puts "The path " + item.mods_path.path + " does not exist\n"
106
+ end
107
+
108
+ end
109
+ end
110
+ values
111
+ end
112
+
113
+
114
+ #Creates the proper path and subpath names to use since some words may be reserved.
115
+ #It then uses these paths to search for the value in the Mods::Record
116
+ def parse_paths(item, parentnode)
117
+ path_array = item.mods_path.path.split("/")
118
+ if (!TOP_LEVEL_ELEMENTS_SIMPLE.include?(item.mods_path.path))
119
+ path_array[0] = path_array[0].split(/(?<!^)(?=[A-Z])/)
120
+ path_array[0] = path_array[0].join("_").downcase
121
+ end
122
+ path_array.each_with_index do |value, key|
123
+ #The mods gem has special names for certain reserved words/paths
124
+ if (RESERVED_WORDS.key?(value))
125
+ path_array[key] = RESERVED_WORDS[value]
126
+ end
127
+ end
128
+
129
+
130
+ subpaths = Array.new
131
+ if (!item.mods_path.subpaths.blank?)
132
+ if (!item.mods_path.delimiter.nil?)
133
+ sub_delimiter = item.mods_path.delimiter
134
+ end
135
+
136
+ item.mods_path.subpaths.each do |subpath|
137
+ subpath_array = subpath.split("/")
138
+ subpath_array.each_with_index do |value, key|
139
+ #The mods gem has special names for certain reserved words/paths
140
+ if (RESERVED_WORDS.key?(value))
141
+ subpath_array[key] = RESERVED_WORDS[value]
142
+ end
143
+ end
144
+
145
+ subpaths << subpath_array
146
+ end
147
+ end
148
+
149
+ values = Array.new
150
+
151
+ node = parentnode
152
+
153
+ #eg: subject
154
+ path_array.each do |path|
155
+ node = node.send(path)
156
+ end
157
+
158
+ if (!subpaths.empty?)
159
+
160
+ #subnodes when paths are stored in subpaths in the mapping file
161
+ node.each do |subnode|
162
+ if (check_attributes(subnode, item))
163
+ subpathvalues = Array.new
164
+
165
+ value = find_node_value(subnode, subpaths, [], 0)
166
+ if (!value.empty?)
167
+ subpathvalues << value
168
+ end
169
+ if (!subpathvalues.empty? && check_conditional_subpath(subnode, item, parentnode))
170
+ values << subpathvalues.join(sub_delimiter)
171
+ end
172
+ end
173
+ end
174
+ else
175
+
176
+ node.each do |subnode|
177
+ if (!subnode.text.blank? && check_attributes(subnode, item) && check_conditional_path(subnode, item, parentnode))
178
+ values << subnode.text
179
+ end
180
+ end
181
+ end
182
+ values
183
+ end
184
+
185
+ #Loops through the nodes to find the supplied subpaths. It is done this way to preserve the mods order of the subpath values
186
+ def find_node_value(nodeset, subpaths, parentpathname, popcount)
187
+ values = []
188
+ pathname = parentpathname
189
+
190
+ nodeset.children.each do |node|
191
+
192
+ nodename = node.name
193
+
194
+ if (RESERVED_WORDS.key?(nodename))
195
+ nodename = RESERVED_WORDS[nodename]
196
+ end
197
+ if (!nodename.eql?('text'))
198
+ pathname << nodename
199
+ popcount = popcount + 1
200
+ if (subpaths.include?(pathname))
201
+ if (!node.text.blank?)
202
+ values << node.text
203
+ end
204
+ #If the paths have multiple levels, then we have to back out to the original nodepath.
205
+ until (popcount == 0) do
206
+ pathname.pop
207
+ popcount = popcount - 1;
208
+ end
209
+ elsif (node.children.count > 1 || (node.children.first == 1 && !node.children.first.name.eql?('text')))
210
+ values += find_node_value(node, subpaths, pathname, popcount+1)
211
+ until (popcount == 0) do
212
+ pathname.pop
213
+ popcount = popcount - 1;
214
+ end
215
+ end
216
+ end
217
+ end
218
+ values
219
+ end
220
+
221
+ #Make sure that the attribute value matches (if supplied)
222
+ def check_attributes(node, item)
223
+ value_accepted = false
224
+ if (!item.mods_attribute.blank?)
225
+ if (item.mods_attribute[0].eql?("!") && node[item.mods_attribute.delete("!")].blank?)
226
+ value_accepted = true
227
+ elsif (!item.mods_attribute[0].eql?("!"))
228
+ if (!item.mods_attribute_value.blank? && item.mods_attribute_value[0].eql?("!") && !node[item.mods_attribute].eql?(item.mods_attribute_value.delete("!")))
229
+ value_accepted = true
230
+ elsif (!node[item.mods_attribute].nil? && node[item.mods_attribute].eql?(item.mods_attribute_value))
231
+ value_accepted = true
232
+ end
233
+ end
234
+ else
235
+ value_accepted = true
236
+ end
237
+ value_accepted
238
+ end
239
+
240
+ #Make sure the conditional path value matches (if supplied)
241
+ def check_conditional_path(node, item, parentnode)
242
+ value_accepted = false
243
+ if (!item.conditional_mods_value.blank?)
244
+ path_array = item.conditional_mods_path.split("/")
245
+ path_array[0] = path_array[0].split(/(?<!^)(?=[A-Z])/)
246
+ path_array[0] = path_array[0].join("_").downcase
247
+ path_array.each_with_index do |value, key|
248
+ #The mods gem has special names for certain reserved words/paths
249
+ if (RESERVED_WORDS.key?(value))
250
+ path_array[key] = RESERVED_WORDS[value]
251
+ end
252
+ end
253
+ conditionalnode = parentnode
254
+ path_array.each do |path|
255
+ conditionalnode = conditionalnode.send(path)
256
+ end
257
+ if (item.conditional_mods_value[0].eql?("!") && !conditionalnode.text.eql?(item.conditional_mods_value.delete("!")))
258
+ value_accepted = true
259
+ elsif (conditionalnode.text.eql?(item.conditional_mods_value))
260
+ value_accepted = true
261
+ end
262
+ else
263
+ value_accepted = true
264
+ end
265
+ value_accepted
266
+ end
267
+
268
+ #Make sure the conditional path value matches (if supplied)
269
+ def check_conditional_subpath(node, item, parentnode)
270
+ value_accepted = false
271
+ if (!item.conditional_mods_value.blank?)
272
+ path_array = item.conditional_mods_path.split("/")
273
+ path_array[0] = path_array[0].split(/(?<!^)(?=[A-Z])/)
274
+ path_array[0] = path_array[0].join("_").downcase
275
+ path_array.each_with_index do |value, key|
276
+ #The mods gem has special names for certain reserved words/paths
277
+ if (RESERVED_WORDS.key?(value))
278
+ path_array[key] = RESERVED_WORDS[value]
279
+ end
280
+ end
281
+ conditionalnode = node
282
+ path_array.each do |path|
283
+ conditionalnode = conditionalnode.send(path)
284
+ end
285
+
286
+ if (item.conditional_mods_value[0].eql?("!") && !conditionalnode.text.eql?(item.conditional_mods_value.delete("!")))
287
+ value_accepted = true
288
+ elsif (conditionalnode.text.eql?(item.conditional_mods_value))
289
+ value_accepted = true
290
+ end
291
+ else
292
+ value_accepted = true
293
+ end
294
+ value_accepted
295
+ end
296
+ end
297
+
298
+ class OaipmhModsConverter
299
+ RESERVED_PATHS = {'name/namePart'=> "plain_name/namePart", "name/role/roleTerm" => "plain_name/role/roleTerm"}
300
+ STANDARD_SPOTLIGHT_FIELDS = ['unique-id_tesim', 'full_title_tesim', 'spotlight_upload_description_tesim', 'thumbnail_url_ssm', 'full_image_url_ssm', 'spotlight_upload_date_tesim"', 'spotlight_upload_attribution_tesim']
301
+
302
+ attr_accessor :sidecar_hash
303
+
304
+ #Initialize with the name of the set being converted
305
+ def initialize(set, exhibitslug, mapping_file)
306
+ @set = set
307
+ @exhibitslug = exhibitslug
308
+ @mapping_file = mapping_file
309
+ @converter_items = Array.new
310
+ @sidecar_hash = {}
311
+ end
312
+
313
+ #Expects a Mods::Record parameter value
314
+ def convert(modsrecord)
315
+ if (@converter_items.empty?)
316
+ parse_mapping_file(mapping_file)
317
+ end
318
+
319
+ solr_hash = {}
320
+
321
+ @converter_items.each do |item|
322
+ value = item.extract_all_values(modsrecord)
323
+
324
+ #Not sure why but if a value isn't assigned, the last existing value for the field gets
325
+ #placed in all non-existing values
326
+ solr_hash[get_spotlight_field_name(item.spotlight_field)] = value
327
+ @sidecar_hash[item.spotlight_field] = value
328
+
329
+ end
330
+ solr_hash
331
+ end
332
+
333
+ #Some spotlight fields use the exhibit slug, others do not
334
+ def get_spotlight_field_name(spotlight_field)
335
+ if (!STANDARD_SPOTLIGHT_FIELDS.include?(spotlight_field))
336
+ spotlight_field = 'exhibit_' + @exhibitslug + '_' + spotlight_field
337
+ end
338
+ spotlight_field
339
+ end
340
+
341
+
342
+ #Retrieves the mapping file for the set, if one exists, otherwise uses the generic mapping file
343
+ def mapping_file
344
+ if (@mapping_file == nil)
345
+ engine_root = Spotlight::Oaipmh::Resources::Engine.root
346
+ @mapping_file = File.join(engine_root, 'config', 'mapping.yml')
347
+ else
348
+ @mapping_file = Rails.root.join("public/uploads/modsmapping", @mapping_file)
349
+ end
350
+ @mapping_file
351
+ end
352
+
353
+
354
+ #private
355
+
356
+ #parses the mapping file into a model
357
+ def parse_mapping_file(file)
358
+
359
+ mapping_config = YAML.load_file(file)
360
+ mapping_config.each do |field|
361
+
362
+ item = ConverterItem.new
363
+ #validate the spotlight-field is not null
364
+ if (!field.key?("spotlight-field") || field['spotlight-field'].blank?)
365
+ raise InvalidMappingFile, "spotlight-field is required for each entry"
366
+ end
367
+ item.spotlight_field = field['spotlight-field']
368
+
369
+ if (field.key?("delimiter"))
370
+ item.delimiter = field["delimiter"]
371
+ end
372
+ if (field.key?("default-value"))
373
+ item.default_value = field["default-value"]
374
+ end
375
+
376
+ if (field.key?("multivalue-breaks"))
377
+ item.multivalue_facets = field["multivalue-breaks"]
378
+ end
379
+
380
+ #must have a mods or xpath
381
+ if (!field.key?("mods") && (!field.key?('xpath') || field['xpath'].blank?))
382
+ raise InvalidMappingFile, "mods or xpath is required for each entry"
383
+ end
384
+
385
+ #Can only have mods OR xpath
386
+ if (field.key?('mods') && field.key('xpath'))
387
+ raise InvalidMappingFile, "Use either mods OR xpath for each entry but not both"
388
+ end
389
+
390
+ #if using xpath, then add the values from xpath
391
+ if (field.key?('xpath'))
392
+ item.xpath_items = Array.new
393
+ field['xpath'].each do |xpath_field|
394
+ if (!xpath_field.key?("xpath-value") || xpath_field['xpath-value'].blank?)
395
+ raise InvalidMappingFile, "xpath_value is required for each xpath entry"
396
+ end
397
+ xpathitem = XPathEntry.new
398
+ xpathitem.xpath_string = xpath_field['xpath-value']
399
+ if (xpath_field.key?('xpath-namespace-prefix') && xpath_field.key?('xpath-namespace-def'))
400
+ xpathitem.xpath_ns_def = xpath_field['xpath-namespace-def']
401
+ xpathitem.xpath_ns_prefix = xpath_field['xpath-namespace-prefix']
402
+ end
403
+ item.xpath_items << xpathitem
404
+ end
405
+ end
406
+ #otherwise use mods
407
+ if (field.key?('mods'))
408
+ item.mods_items = Array.new
409
+ field['mods'].each do |mods_field|
410
+ modsitem = ModsItem.new
411
+ #validate the path is not null
412
+ if (!mods_field.key?("path") || mods_field['path'].blank?)
413
+ raise InvalidMappingFile, "path is required for each mods entry"
414
+ end
415
+
416
+ modsitem.mods_path = ModsPath.new
417
+ #The mods gem has special names for certain reserved words/paths
418
+ if (RESERVED_PATHS.key?(mods_field['path']))
419
+ modsitem.mods_path.path = RESERVED_PATHS[mods_field['path']]
420
+ else
421
+ modsitem.mods_path.path = mods_field['path']
422
+ end
423
+
424
+
425
+ if (mods_field.key?('subpaths'))
426
+ subpaths = Array.new
427
+ mods_field['subpaths'].each do |subpath|
428
+ subpaths << subpath['subpath']
429
+ end
430
+ modsitem.mods_path.subpaths = subpaths
431
+ end
432
+
433
+ if (mods_field.key?('delimiter'))
434
+ modsitem.mods_path.delimiter = mods_field['delimiter']
435
+ end
436
+ modsitem.conditional_mods_value = mods_field['mods-value']
437
+
438
+ if (mods_field.key?('attribute'))
439
+ if (!mods_field.key?('attribute-value'))
440
+ raise InvalidMappingFile, field['spotlight-field'] + " - " + mods_field['path'] + ": attribute-value is required if attribute is present"
441
+ end
442
+ modsitem.mods_attribute = mods_field['attribute']
443
+ modsitem.mods_attribute_value = mods_field['attribute-value']
444
+ end
445
+
446
+ if (mods_field.key?('mods-path'))
447
+ if (!mods_field.key?('mods-value'))
448
+ raise InvalidMappingFile, field['spotlight-field'] + " - " + mods_field['path'] + ": mods-value is required if mods-path is present"
449
+ end
450
+ if (RESERVED_PATHS.key?(mods_field['mods-path']))
451
+ modsitem.conditional_mods_path = RESERVED_PATHS[mods_field['mods-path']]
452
+ else
453
+ modsitem.conditional_mods_path = mods_field['mods-path']
454
+ end
455
+ modsitem.conditional_mods_value = mods_field['mods-value']
456
+ end
457
+
458
+ item.mods_items << modsitem
459
+ end #mods
460
+ end
461
+ @converter_items << item
462
+ end
463
+ @converter_items
464
+ end
465
+
466
+
467
+ end
468
+ end