spotlight-oaipmh-resources 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Rakefile +56 -0
- data/app/controllers/spotlight/resources/harvester_controller.rb +58 -0
- data/app/jobs/spotlight/resources/perform_harvests_job.rb +44 -0
- data/app/mailer/spotlight/harvesting_complete_mailer.rb +20 -0
- data/app/models/spotlight/resources/exceptions.rb +17 -0
- data/app/models/spotlight/resources/harvest_type.rb +7 -0
- data/app/models/spotlight/resources/harvester.rb +46 -0
- data/app/models/spotlight/resources/oaipmh_harvester.rb +41 -0
- data/app/models/spotlight/resources/oaipmh_mods_converter.rb +468 -0
- data/app/models/spotlight/resources/oaipmh_mods_item.rb +61 -0
- data/app/models/spotlight/resources/solr_converter.rb +180 -0
- data/app/models/spotlight/resources/solr_harvester.rb +42 -0
- data/app/models/spotlight/resources/solr_harvesting_item.rb +50 -0
- data/app/services/spotlight/resources/oaipmh_builder.rb +166 -0
- data/app/services/spotlight/resources/solr_harvesting_builder.rb +115 -0
- data/app/views/catalog/_show.html.erb +10 -0
- data/app/views/spotlight/harvesting_complete_mailer/harvest_failed.html.erb +6 -0
- data/app/views/spotlight/harvesting_complete_mailer/harvest_indexed.html.erb +13 -0
- data/app/views/spotlight/resources/harvester/_form.html.erb +36 -0
- data/config/default_solr_mapping.yml +20 -0
- data/config/locales/en.yml +32 -0
- data/config/mapping.yml +172 -0
- data/config/marc_mapping.yml +190 -0
- data/config/routes.rb +5 -0
- data/lib/generators/spotlight/oaipmh/resources/install_generator.rb +16 -0
- data/lib/spotlight/oaipmh/resources.rb +11 -0
- data/lib/spotlight/oaipmh/resources/engine.rb +23 -0
- data/lib/spotlight/oaipmh/resources/version.rb +8 -0
- metadata +253 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: '08cdf7cea5b6d5df0ff7731657f4b831b111f2e2cad7805ae50818c8985701c6'
|
4
|
+
data.tar.gz: 33420d3322230a3ef58a8505b4b863c626d78a50a12efa706446ae0f00bf70ef
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cf80c0ef442f69a2639b25bfa9cc5d8fc8664ebdf6fa919546ab336d1125bd87e604da159b259f51bfe20e93cab62c39f16bdb8bc290a9be709f94815524884d
|
7
|
+
data.tar.gz: a8a0ca1bc7420d0bef6ceb3db60a5201becee501c3310ecfb1a6da3adc2c258bc4265353b4f1f3f4b224b064f6c6cb3c1b018f0720f62b91e5fdf61a76648a9a
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
|
8
|
+
require 'engine_cart/rake_task'
|
9
|
+
desc 'Run tests in generated test Rails app with generated Solr instance running'
|
10
|
+
task ci: ['engine_cart:generate'] do
|
11
|
+
require 'solr_wrapper'
|
12
|
+
require 'exhibits_solr_conf'
|
13
|
+
ENV['environment'] = 'test'
|
14
|
+
SolrWrapper.wrap(port: '8983') do |solr|
|
15
|
+
solr.with_collection(name: 'blacklight-core', dir: ExhibitsSolrConf.path) do
|
16
|
+
# run the tests
|
17
|
+
Rake::Task['spec'].invoke
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
#ZIP_URL = "https://github.com/projectblacklight/blacklight-jetty/archive/v4.10.4.zip"
|
22
|
+
#require 'jettywrapper'
|
23
|
+
#
|
24
|
+
#require 'engine_cart/rake_task'
|
25
|
+
#EngineCart.fingerprint_proc = EngineCart.rails_fingerprint_proc
|
26
|
+
#
|
27
|
+
#require 'exhibits_solr_conf'
|
28
|
+
#
|
29
|
+
#desc 'Run tests in generated test Rails app with generated Solr instance running'
|
30
|
+
#task ci: ['engine_cart:generate', 'jetty:clean', 'exhibits:configure_solr'] do
|
31
|
+
# ENV['environment'] = 'test'
|
32
|
+
# jetty_params = Jettywrapper.load_config
|
33
|
+
# jetty_params[:startup_wait] = 60
|
34
|
+
#
|
35
|
+
# Jettywrapper.wrap(jetty_params) do
|
36
|
+
# # run the tests
|
37
|
+
# Rake::Task['spec'].invoke
|
38
|
+
# end
|
39
|
+
#end
|
40
|
+
|
41
|
+
#RDoc::Task.new(:rdoc) do |rdoc|
|
42
|
+
# rdoc.rdoc_dir = 'rdoc'
|
43
|
+
# rdoc.title = 'SpotlightOaipmh'
|
44
|
+
# rdoc.options << '--line-numbers'
|
45
|
+
# rdoc.rdoc_files.include('README.rdoc')
|
46
|
+
# rdoc.rdoc_files.include('lib/**/*.rb')
|
47
|
+
#end
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
load 'rails/tasks/statistics.rake'
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
Bundler::GemHelper.install_tasks
|
56
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
|
2
|
+
module Spotlight::Resources
|
3
|
+
class HarvesterController < Spotlight::ApplicationController
|
4
|
+
|
5
|
+
load_and_authorize_resource :exhibit, class: Spotlight::Exhibit
|
6
|
+
|
7
|
+
# POST /oaipmh_harvester
|
8
|
+
def create
|
9
|
+
|
10
|
+
my_params = resource_params
|
11
|
+
|
12
|
+
#upload the mapping file if it exists
|
13
|
+
if (my_params.has_key?(:custom_mapping))
|
14
|
+
upload
|
15
|
+
my_params.delete(:custom_mapping)
|
16
|
+
end
|
17
|
+
mapping_file = resource_params[:mapping_file]
|
18
|
+
if (resource_params[:type] == Spotlight::Resources::HarvestType::SOLR)
|
19
|
+
mapping_file = resource_params[:solr_mapping_file]
|
20
|
+
end
|
21
|
+
if (resource_params.has_key?(:custom_mapping))
|
22
|
+
mapping_file = resource_params[:custom_mapping].original_filename
|
23
|
+
end
|
24
|
+
|
25
|
+
Spotlight::Resources::PerformHarvestsJob.perform_later(resource_params[:type], resource_params[:url], resource_params[:set], mapping_file, current_exhibit, current_user, new_job_log_entry)
|
26
|
+
flash[:notice] = t('spotlight.resources.harvester.performharvest.success', set: resource_params[:set])
|
27
|
+
redirect_to spotlight.admin_exhibit_catalog_path(current_exhibit, sort: :timestamp)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def upload
|
33
|
+
name = resource_params[:custom_mapping].original_filename
|
34
|
+
Dir.mkdir("public/uploads") unless Dir.exist?("public/uploads")
|
35
|
+
dir = "public/uploads/modsmapping"
|
36
|
+
if (resource_params[:type] == Spotlight::Resources::HarvestType::SOLR)
|
37
|
+
dir = "public/uploads/solrmapping"
|
38
|
+
end
|
39
|
+
Dir.mkdir(dir) unless Dir.exist?(dir)
|
40
|
+
|
41
|
+
path = File.join(dir, name)
|
42
|
+
File.open(path, "w") { |f| f.write(resource_params[:custom_mapping].read) }
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def resource_params
|
47
|
+
params.require(:resources_harvester).permit(:type, :url, :set, :mapping_file, :solr_mapping_file, :custom_mapping)
|
48
|
+
end
|
49
|
+
|
50
|
+
#Set the job status so users can view
|
51
|
+
def new_job_log_entry
|
52
|
+
Spotlight::JobLogEntry.create(exhibit: current_exhibit, user: current_user, job_item_count: 0, job_status: 'unstarted', job_type: 'Harvesting')
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'oai'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
require_relative '../../../mailer/spotlight/harvesting_complete_mailer'
|
5
|
+
include Spotlight::Resources::Exceptions
|
6
|
+
# encoding: utf-8
|
7
|
+
module Spotlight::Resources
|
8
|
+
##
|
9
|
+
# Process a CSV upload into new Spotlight::Resource::Upload objects
|
10
|
+
class PerformHarvestsJob < ActiveJob::Base
|
11
|
+
queue_as :default
|
12
|
+
|
13
|
+
before_perform do |job|
|
14
|
+
job_log_entry = log_entry(job)
|
15
|
+
job_log_entry.in_progress! if job_log_entry
|
16
|
+
end
|
17
|
+
|
18
|
+
def perform(harvest_type, url, set, mapping_file, exhibit, _user, job_entry, cursor = nil, count = 0, failed_items = nil)
|
19
|
+
harvester = Spotlight::Resources::Harvester.create(
|
20
|
+
url: url,
|
21
|
+
data: {base_url: url,
|
22
|
+
set: set,
|
23
|
+
mapping_file: mapping_file,
|
24
|
+
job_entry: job_entry,
|
25
|
+
type: harvest_type,
|
26
|
+
user: _user,
|
27
|
+
cursor: cursor,
|
28
|
+
count: count,
|
29
|
+
failed_items: failed_items},
|
30
|
+
exhibit: exhibit)
|
31
|
+
if !harvester.save_and_index
|
32
|
+
raise HarvestingFailedException
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def log_entry(job)
|
39
|
+
job.arguments[6] if job.arguments[6].is_a?(Spotlight::JobLogEntry)
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Spotlight
|
2
|
+
##
|
3
|
+
# Notify the curator that we're finished processing a
|
4
|
+
# batch upload
|
5
|
+
class HarvestingCompleteMailer < ActionMailer::Base
|
6
|
+
def harvest_indexed(set, exhibit, user, failed_items)
|
7
|
+
@set = set
|
8
|
+
@exhibit = exhibit
|
9
|
+
@failed_items = failed_items
|
10
|
+
mail(to: user.email, from: 'oaiharvester@noreply.com', subject: 'Harvest indexing complete for '+ set)
|
11
|
+
end
|
12
|
+
|
13
|
+
def harvest_failed(set, exhibit, user, message)
|
14
|
+
@set = set
|
15
|
+
@exhibit = exhibit
|
16
|
+
@message = message
|
17
|
+
mail(to: user.email, from: 'oaiharvester@noreply.com', subject: 'The harvest failed for '+ set)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Spotlight
|
2
|
+
module Resources
|
3
|
+
module Exceptions
|
4
|
+
class InvalidModsRecord < StandardError
|
5
|
+
end
|
6
|
+
|
7
|
+
class InvalidMappingFile < StandardError
|
8
|
+
end
|
9
|
+
|
10
|
+
class ModsPathDoesNotExist < StandardError
|
11
|
+
end
|
12
|
+
|
13
|
+
class HarvestingFailedException < StandardError
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Spotlight::Resources
|
2
|
+
class Harvester < Spotlight::Resource
|
3
|
+
attr_accessor :set, :base_url, :mapping_file, :solr_mapping_file, :user
|
4
|
+
|
5
|
+
def harvests
|
6
|
+
harvester = get_harvester
|
7
|
+
harvester.get_harvests
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
#Override the document builder since the builder has to be determined after insantiation
|
12
|
+
def document_builder
|
13
|
+
if (self.data[:type] == Spotlight::Resources::HarvestType::SOLR)
|
14
|
+
@document_builder = Spotlight::Resources::SolrHarvestingBuilder.new(self)
|
15
|
+
else
|
16
|
+
@document_builder = Spotlight::Resources::OaipmhBuilder.new(self)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
#The harvester will know what type of token to expect
|
21
|
+
def paginate (token)
|
22
|
+
harvester = get_harvester
|
23
|
+
harvester.paginate(token)
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_job_entry
|
27
|
+
self.data[:job_entry]
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def get_harvester
|
33
|
+
if @harvester.nil?
|
34
|
+
if (self.data[:type] == Spotlight::Resources::HarvestType::SOLR)
|
35
|
+
self.document_builder_class = Spotlight::Resources::SolrHarvestingBuilder
|
36
|
+
@harvester = SolrHarvester.new(self.data[:base_url], self.data[:set])
|
37
|
+
else
|
38
|
+
self.document_builder_class = Spotlight::Resources::OaipmhBuilder
|
39
|
+
@harvester = OaipmhHarvester.new(self.data[:base_url], self.data[:set])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
@harvester
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'oai'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module Spotlight::Resources
|
6
|
+
class OaipmhHarvester
|
7
|
+
|
8
|
+
def initialize(base_url, set)
|
9
|
+
@url = base_url + '?verb=ListRecords&metadataPrefix=mods&set=' + set
|
10
|
+
@base_url = base_url
|
11
|
+
@set = set
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_harvests
|
15
|
+
@client = OAI::Client.new @base_url
|
16
|
+
@oaipmh_harvests = @client.list_records :set => @set, :metadata_prefix => 'mods'
|
17
|
+
end
|
18
|
+
|
19
|
+
def paginate (token)
|
20
|
+
if @client.nil?
|
21
|
+
@client = OAI::Client.new @base_url
|
22
|
+
end
|
23
|
+
@oaipmh_harvests = @client.list_records :resumption_token => token
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.mapping_files
|
27
|
+
if (Dir.exist?('public/uploads/modsmapping'))
|
28
|
+
files = Dir.entries('public/uploads/modsmapping')
|
29
|
+
files.delete(".")
|
30
|
+
files.delete("..")
|
31
|
+
else
|
32
|
+
files = Array.new
|
33
|
+
end
|
34
|
+
|
35
|
+
files.insert(0, "New Mapping File")
|
36
|
+
files.insert(0, "Default Mapping File")
|
37
|
+
files
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,468 @@
|
|
1
|
+
include Spotlight::Resources::Exceptions
|
2
|
+
module Spotlight::Resources
|
3
|
+
|
4
|
+
class XPathEntry
|
5
|
+
attr_accessor :xpath_string, :xpath_ns_prefix, :xpath_ns_def
|
6
|
+
end
|
7
|
+
class ModsPath
|
8
|
+
attr_accessor :path, :subpaths, :delimiter
|
9
|
+
end
|
10
|
+
class ModsItem
|
11
|
+
attr_accessor :mods_path, :mods_attribute, :mods_attribute_value, :conditional_mods_value, :conditional_mods_path
|
12
|
+
end
|
13
|
+
class ConverterItem
|
14
|
+
attr_accessor :spotlight_field, :mods_items, :default_value, :delimiter, :xpath_items, :multivalue_facets
|
15
|
+
|
16
|
+
RESERVED_WORDS = {'name'=> "name_el", 'description' => 'description_el', 'type' => 'type_at'}
|
17
|
+
TOP_LEVEL_ELEMENTS_SIMPLE = [
|
18
|
+
'abstract',
|
19
|
+
'accessCondition',
|
20
|
+
'classification',
|
21
|
+
'extension',
|
22
|
+
'genre',
|
23
|
+
'identifier',
|
24
|
+
'note',
|
25
|
+
'tableOfContents',
|
26
|
+
'targetAudience',
|
27
|
+
'typeOfResource',
|
28
|
+
]
|
29
|
+
|
30
|
+
def initialize()
|
31
|
+
delimiter = ", "
|
32
|
+
end
|
33
|
+
|
34
|
+
def extract_all_values(modsrecord)
|
35
|
+
|
36
|
+
xpath_values = extract_xpath_values(modsrecord)
|
37
|
+
mods_values = extract_mods_values(modsrecord)
|
38
|
+
|
39
|
+
values = xpath_values.concat(mods_values)
|
40
|
+
|
41
|
+
#Remove duplicates
|
42
|
+
values = values.uniq
|
43
|
+
|
44
|
+
finalvalue = nil
|
45
|
+
if (!values.empty?)
|
46
|
+
#if multiple values, allow for faceting on each item by keeping it as an array
|
47
|
+
if (!multivalue_facets.nil? && (multivalue_facets.eql?("yes") || multivalue_facets))
|
48
|
+
|
49
|
+
finalvalue = values;
|
50
|
+
else
|
51
|
+
finalvalue = values.join(delimiter)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
finalvalue
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def extract_xpath_values(modsrecord)
|
60
|
+
values = Array.new
|
61
|
+
if (!xpath_items.nil?)
|
62
|
+
xpath_items.each do |item|
|
63
|
+
node = modsrecord.mods_ng_xml
|
64
|
+
if (!item.xpath_ns_def.nil?)
|
65
|
+
retnodes = node.xpath(item.xpath_string, {item.xpath_ns_prefix => item.xpath_ns_def})
|
66
|
+
else
|
67
|
+
retnodes = node.xpath(item.xpath_string)
|
68
|
+
end
|
69
|
+
|
70
|
+
if (retnodes.empty? && !default_value.blank?)
|
71
|
+
value = default_value
|
72
|
+
values << value
|
73
|
+
elsif (!retnodes.empty?)
|
74
|
+
retnodes.each do |retnode|
|
75
|
+
values << retnode.text
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
values
|
81
|
+
end
|
82
|
+
|
83
|
+
def extract_mods_values(modsrecord)
|
84
|
+
values = Array.new
|
85
|
+
if (!mods_items.nil?)
|
86
|
+
mods_items.each do |item|
|
87
|
+
#Throw error if path value fails
|
88
|
+
begin
|
89
|
+
node = modsrecord.mods_ng_xml
|
90
|
+
|
91
|
+
retvalues = parse_paths(item, node)
|
92
|
+
if (retvalues.empty? && !default_value.blank?)
|
93
|
+
value = default_value
|
94
|
+
values << value
|
95
|
+
elsif (!retvalues.empty?)
|
96
|
+
retvalues.each do |retnode|
|
97
|
+
values << retnode
|
98
|
+
end
|
99
|
+
#values << retvalues
|
100
|
+
end
|
101
|
+
|
102
|
+
rescue NoMethodError => e
|
103
|
+
puts e.message
|
104
|
+
puts e.backtrace
|
105
|
+
puts "The path " + item.mods_path.path + " does not exist\n"
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
110
|
+
values
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
#Creates the proper path and subpath names to use since some words may be reserved.
|
115
|
+
#It then uses these paths to search for the value in the Mods::Record
|
116
|
+
def parse_paths(item, parentnode)
|
117
|
+
path_array = item.mods_path.path.split("/")
|
118
|
+
if (!TOP_LEVEL_ELEMENTS_SIMPLE.include?(item.mods_path.path))
|
119
|
+
path_array[0] = path_array[0].split(/(?<!^)(?=[A-Z])/)
|
120
|
+
path_array[0] = path_array[0].join("_").downcase
|
121
|
+
end
|
122
|
+
path_array.each_with_index do |value, key|
|
123
|
+
#The mods gem has special names for certain reserved words/paths
|
124
|
+
if (RESERVED_WORDS.key?(value))
|
125
|
+
path_array[key] = RESERVED_WORDS[value]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
subpaths = Array.new
|
131
|
+
if (!item.mods_path.subpaths.blank?)
|
132
|
+
if (!item.mods_path.delimiter.nil?)
|
133
|
+
sub_delimiter = item.mods_path.delimiter
|
134
|
+
end
|
135
|
+
|
136
|
+
item.mods_path.subpaths.each do |subpath|
|
137
|
+
subpath_array = subpath.split("/")
|
138
|
+
subpath_array.each_with_index do |value, key|
|
139
|
+
#The mods gem has special names for certain reserved words/paths
|
140
|
+
if (RESERVED_WORDS.key?(value))
|
141
|
+
subpath_array[key] = RESERVED_WORDS[value]
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
subpaths << subpath_array
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
values = Array.new
|
150
|
+
|
151
|
+
node = parentnode
|
152
|
+
|
153
|
+
#eg: subject
|
154
|
+
path_array.each do |path|
|
155
|
+
node = node.send(path)
|
156
|
+
end
|
157
|
+
|
158
|
+
if (!subpaths.empty?)
|
159
|
+
|
160
|
+
#subnodes when paths are stored in subpaths in the mapping file
|
161
|
+
node.each do |subnode|
|
162
|
+
if (check_attributes(subnode, item))
|
163
|
+
subpathvalues = Array.new
|
164
|
+
|
165
|
+
value = find_node_value(subnode, subpaths, [], 0)
|
166
|
+
if (!value.empty?)
|
167
|
+
subpathvalues << value
|
168
|
+
end
|
169
|
+
if (!subpathvalues.empty? && check_conditional_subpath(subnode, item, parentnode))
|
170
|
+
values << subpathvalues.join(sub_delimiter)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
else
|
175
|
+
|
176
|
+
node.each do |subnode|
|
177
|
+
if (!subnode.text.blank? && check_attributes(subnode, item) && check_conditional_path(subnode, item, parentnode))
|
178
|
+
values << subnode.text
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
values
|
183
|
+
end
|
184
|
+
|
185
|
+
#Loops through the nodes to find the supplied subpaths. It is done this way to preserve the mods order of the subpath values
|
186
|
+
def find_node_value(nodeset, subpaths, parentpathname, popcount)
|
187
|
+
values = []
|
188
|
+
pathname = parentpathname
|
189
|
+
|
190
|
+
nodeset.children.each do |node|
|
191
|
+
|
192
|
+
nodename = node.name
|
193
|
+
|
194
|
+
if (RESERVED_WORDS.key?(nodename))
|
195
|
+
nodename = RESERVED_WORDS[nodename]
|
196
|
+
end
|
197
|
+
if (!nodename.eql?('text'))
|
198
|
+
pathname << nodename
|
199
|
+
popcount = popcount + 1
|
200
|
+
if (subpaths.include?(pathname))
|
201
|
+
if (!node.text.blank?)
|
202
|
+
values << node.text
|
203
|
+
end
|
204
|
+
#If the paths have multiple levels, then we have to back out to the original nodepath.
|
205
|
+
until (popcount == 0) do
|
206
|
+
pathname.pop
|
207
|
+
popcount = popcount - 1;
|
208
|
+
end
|
209
|
+
elsif (node.children.count > 1 || (node.children.first == 1 && !node.children.first.name.eql?('text')))
|
210
|
+
values += find_node_value(node, subpaths, pathname, popcount+1)
|
211
|
+
until (popcount == 0) do
|
212
|
+
pathname.pop
|
213
|
+
popcount = popcount - 1;
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
values
|
219
|
+
end
|
220
|
+
|
221
|
+
#Make sure that the attribute value matches (if supplied)
|
222
|
+
def check_attributes(node, item)
|
223
|
+
value_accepted = false
|
224
|
+
if (!item.mods_attribute.blank?)
|
225
|
+
if (item.mods_attribute[0].eql?("!") && node[item.mods_attribute.delete("!")].blank?)
|
226
|
+
value_accepted = true
|
227
|
+
elsif (!item.mods_attribute[0].eql?("!"))
|
228
|
+
if (!item.mods_attribute_value.blank? && item.mods_attribute_value[0].eql?("!") && !node[item.mods_attribute].eql?(item.mods_attribute_value.delete("!")))
|
229
|
+
value_accepted = true
|
230
|
+
elsif (!node[item.mods_attribute].nil? && node[item.mods_attribute].eql?(item.mods_attribute_value))
|
231
|
+
value_accepted = true
|
232
|
+
end
|
233
|
+
end
|
234
|
+
else
|
235
|
+
value_accepted = true
|
236
|
+
end
|
237
|
+
value_accepted
|
238
|
+
end
|
239
|
+
|
240
|
+
#Make sure the conditional path value matches (if supplied)
|
241
|
+
def check_conditional_path(node, item, parentnode)
|
242
|
+
value_accepted = false
|
243
|
+
if (!item.conditional_mods_value.blank?)
|
244
|
+
path_array = item.conditional_mods_path.split("/")
|
245
|
+
path_array[0] = path_array[0].split(/(?<!^)(?=[A-Z])/)
|
246
|
+
path_array[0] = path_array[0].join("_").downcase
|
247
|
+
path_array.each_with_index do |value, key|
|
248
|
+
#The mods gem has special names for certain reserved words/paths
|
249
|
+
if (RESERVED_WORDS.key?(value))
|
250
|
+
path_array[key] = RESERVED_WORDS[value]
|
251
|
+
end
|
252
|
+
end
|
253
|
+
conditionalnode = parentnode
|
254
|
+
path_array.each do |path|
|
255
|
+
conditionalnode = conditionalnode.send(path)
|
256
|
+
end
|
257
|
+
if (item.conditional_mods_value[0].eql?("!") && !conditionalnode.text.eql?(item.conditional_mods_value.delete("!")))
|
258
|
+
value_accepted = true
|
259
|
+
elsif (conditionalnode.text.eql?(item.conditional_mods_value))
|
260
|
+
value_accepted = true
|
261
|
+
end
|
262
|
+
else
|
263
|
+
value_accepted = true
|
264
|
+
end
|
265
|
+
value_accepted
|
266
|
+
end
|
267
|
+
|
268
|
+
#Make sure the conditional path value matches (if supplied)
|
269
|
+
def check_conditional_subpath(node, item, parentnode)
|
270
|
+
value_accepted = false
|
271
|
+
if (!item.conditional_mods_value.blank?)
|
272
|
+
path_array = item.conditional_mods_path.split("/")
|
273
|
+
path_array[0] = path_array[0].split(/(?<!^)(?=[A-Z])/)
|
274
|
+
path_array[0] = path_array[0].join("_").downcase
|
275
|
+
path_array.each_with_index do |value, key|
|
276
|
+
#The mods gem has special names for certain reserved words/paths
|
277
|
+
if (RESERVED_WORDS.key?(value))
|
278
|
+
path_array[key] = RESERVED_WORDS[value]
|
279
|
+
end
|
280
|
+
end
|
281
|
+
conditionalnode = node
|
282
|
+
path_array.each do |path|
|
283
|
+
conditionalnode = conditionalnode.send(path)
|
284
|
+
end
|
285
|
+
|
286
|
+
if (item.conditional_mods_value[0].eql?("!") && !conditionalnode.text.eql?(item.conditional_mods_value.delete("!")))
|
287
|
+
value_accepted = true
|
288
|
+
elsif (conditionalnode.text.eql?(item.conditional_mods_value))
|
289
|
+
value_accepted = true
|
290
|
+
end
|
291
|
+
else
|
292
|
+
value_accepted = true
|
293
|
+
end
|
294
|
+
value_accepted
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
class OaipmhModsConverter
|
299
|
+
RESERVED_PATHS = {'name/namePart'=> "plain_name/namePart", "name/role/roleTerm" => "plain_name/role/roleTerm"}
|
300
|
+
STANDARD_SPOTLIGHT_FIELDS = ['unique-id_tesim', 'full_title_tesim', 'spotlight_upload_description_tesim', 'thumbnail_url_ssm', 'full_image_url_ssm', 'spotlight_upload_date_tesim"', 'spotlight_upload_attribution_tesim']
|
301
|
+
|
302
|
+
attr_accessor :sidecar_hash
|
303
|
+
|
304
|
+
#Initialize with the name of the set being converted
|
305
|
+
def initialize(set, exhibitslug, mapping_file)
|
306
|
+
@set = set
|
307
|
+
@exhibitslug = exhibitslug
|
308
|
+
@mapping_file = mapping_file
|
309
|
+
@converter_items = Array.new
|
310
|
+
@sidecar_hash = {}
|
311
|
+
end
|
312
|
+
|
313
|
+
#Expects a Mods::Record parameter value
|
314
|
+
def convert(modsrecord)
|
315
|
+
if (@converter_items.empty?)
|
316
|
+
parse_mapping_file(mapping_file)
|
317
|
+
end
|
318
|
+
|
319
|
+
solr_hash = {}
|
320
|
+
|
321
|
+
@converter_items.each do |item|
|
322
|
+
value = item.extract_all_values(modsrecord)
|
323
|
+
|
324
|
+
#Not sure why but if a value isn't assigned, the last existing value for the field gets
|
325
|
+
#placed in all non-existing values
|
326
|
+
solr_hash[get_spotlight_field_name(item.spotlight_field)] = value
|
327
|
+
@sidecar_hash[item.spotlight_field] = value
|
328
|
+
|
329
|
+
end
|
330
|
+
solr_hash
|
331
|
+
end
|
332
|
+
|
333
|
+
#Some spotlight fields use the exhibit slug, others do not
|
334
|
+
def get_spotlight_field_name(spotlight_field)
|
335
|
+
if (!STANDARD_SPOTLIGHT_FIELDS.include?(spotlight_field))
|
336
|
+
spotlight_field = 'exhibit_' + @exhibitslug + '_' + spotlight_field
|
337
|
+
end
|
338
|
+
spotlight_field
|
339
|
+
end
|
340
|
+
|
341
|
+
|
342
|
+
#Retrieves the mapping file for the set, if one exists, otherwise uses the generic mapping file
|
343
|
+
def mapping_file
|
344
|
+
if (@mapping_file == nil)
|
345
|
+
engine_root = Spotlight::Oaipmh::Resources::Engine.root
|
346
|
+
@mapping_file = File.join(engine_root, 'config', 'mapping.yml')
|
347
|
+
else
|
348
|
+
@mapping_file = Rails.root.join("public/uploads/modsmapping", @mapping_file)
|
349
|
+
end
|
350
|
+
@mapping_file
|
351
|
+
end
|
352
|
+
|
353
|
+
|
354
|
+
#private
|
355
|
+
|
356
|
+
#parses the mapping file into a model
|
357
|
+
def parse_mapping_file(file)
|
358
|
+
|
359
|
+
mapping_config = YAML.load_file(file)
|
360
|
+
mapping_config.each do |field|
|
361
|
+
|
362
|
+
item = ConverterItem.new
|
363
|
+
#validate the spotlight-field is not null
|
364
|
+
if (!field.key?("spotlight-field") || field['spotlight-field'].blank?)
|
365
|
+
raise InvalidMappingFile, "spotlight-field is required for each entry"
|
366
|
+
end
|
367
|
+
item.spotlight_field = field['spotlight-field']
|
368
|
+
|
369
|
+
if (field.key?("delimiter"))
|
370
|
+
item.delimiter = field["delimiter"]
|
371
|
+
end
|
372
|
+
if (field.key?("default-value"))
|
373
|
+
item.default_value = field["default-value"]
|
374
|
+
end
|
375
|
+
|
376
|
+
if (field.key?("multivalue-breaks"))
|
377
|
+
item.multivalue_facets = field["multivalue-breaks"]
|
378
|
+
end
|
379
|
+
|
380
|
+
#must have a mods or xpath
|
381
|
+
if (!field.key?("mods") && (!field.key?('xpath') || field['xpath'].blank?))
|
382
|
+
raise InvalidMappingFile, "mods or xpath is required for each entry"
|
383
|
+
end
|
384
|
+
|
385
|
+
#Can only have mods OR xpath
|
386
|
+
if (field.key?('mods') && field.key('xpath'))
|
387
|
+
raise InvalidMappingFile, "Use either mods OR xpath for each entry but not both"
|
388
|
+
end
|
389
|
+
|
390
|
+
#if using xpath, then add the values from xpath
|
391
|
+
if (field.key?('xpath'))
|
392
|
+
item.xpath_items = Array.new
|
393
|
+
field['xpath'].each do |xpath_field|
|
394
|
+
if (!xpath_field.key?("xpath-value") || xpath_field['xpath-value'].blank?)
|
395
|
+
raise InvalidMappingFile, "xpath_value is required for each xpath entry"
|
396
|
+
end
|
397
|
+
xpathitem = XPathEntry.new
|
398
|
+
xpathitem.xpath_string = xpath_field['xpath-value']
|
399
|
+
if (xpath_field.key?('xpath-namespace-prefix') && xpath_field.key?('xpath-namespace-def'))
|
400
|
+
xpathitem.xpath_ns_def = xpath_field['xpath-namespace-def']
|
401
|
+
xpathitem.xpath_ns_prefix = xpath_field['xpath-namespace-prefix']
|
402
|
+
end
|
403
|
+
item.xpath_items << xpathitem
|
404
|
+
end
|
405
|
+
end
|
406
|
+
#otherwise use mods
|
407
|
+
if (field.key?('mods'))
|
408
|
+
item.mods_items = Array.new
|
409
|
+
field['mods'].each do |mods_field|
|
410
|
+
modsitem = ModsItem.new
|
411
|
+
#validate the path is not null
|
412
|
+
if (!mods_field.key?("path") || mods_field['path'].blank?)
|
413
|
+
raise InvalidMappingFile, "path is required for each mods entry"
|
414
|
+
end
|
415
|
+
|
416
|
+
modsitem.mods_path = ModsPath.new
|
417
|
+
#The mods gem has special names for certain reserved words/paths
|
418
|
+
if (RESERVED_PATHS.key?(mods_field['path']))
|
419
|
+
modsitem.mods_path.path = RESERVED_PATHS[mods_field['path']]
|
420
|
+
else
|
421
|
+
modsitem.mods_path.path = mods_field['path']
|
422
|
+
end
|
423
|
+
|
424
|
+
|
425
|
+
if (mods_field.key?('subpaths'))
|
426
|
+
subpaths = Array.new
|
427
|
+
mods_field['subpaths'].each do |subpath|
|
428
|
+
subpaths << subpath['subpath']
|
429
|
+
end
|
430
|
+
modsitem.mods_path.subpaths = subpaths
|
431
|
+
end
|
432
|
+
|
433
|
+
if (mods_field.key?('delimiter'))
|
434
|
+
modsitem.mods_path.delimiter = mods_field['delimiter']
|
435
|
+
end
|
436
|
+
modsitem.conditional_mods_value = mods_field['mods-value']
|
437
|
+
|
438
|
+
if (mods_field.key?('attribute'))
|
439
|
+
if (!mods_field.key?('attribute-value'))
|
440
|
+
raise InvalidMappingFile, field['spotlight-field'] + " - " + mods_field['path'] + ": attribute-value is required if attribute is present"
|
441
|
+
end
|
442
|
+
modsitem.mods_attribute = mods_field['attribute']
|
443
|
+
modsitem.mods_attribute_value = mods_field['attribute-value']
|
444
|
+
end
|
445
|
+
|
446
|
+
if (mods_field.key?('mods-path'))
|
447
|
+
if (!mods_field.key?('mods-value'))
|
448
|
+
raise InvalidMappingFile, field['spotlight-field'] + " - " + mods_field['path'] + ": mods-value is required if mods-path is present"
|
449
|
+
end
|
450
|
+
if (RESERVED_PATHS.key?(mods_field['mods-path']))
|
451
|
+
modsitem.conditional_mods_path = RESERVED_PATHS[mods_field['mods-path']]
|
452
|
+
else
|
453
|
+
modsitem.conditional_mods_path = mods_field['mods-path']
|
454
|
+
end
|
455
|
+
modsitem.conditional_mods_value = mods_field['mods-value']
|
456
|
+
end
|
457
|
+
|
458
|
+
item.mods_items << modsitem
|
459
|
+
end #mods
|
460
|
+
end
|
461
|
+
@converter_items << item
|
462
|
+
end
|
463
|
+
@converter_items
|
464
|
+
end
|
465
|
+
|
466
|
+
|
467
|
+
end
|
468
|
+
end
|