arclight 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +6 -47
- data/.rubocop_todo.yml +259 -0
- data/.travis.yml +15 -20
- data/README.md +17 -4
- data/app/assets/images/blacklight/compact.svg +15 -15
- data/app/assets/javascripts/arclight/arclight.js +1 -0
- data/app/assets/javascripts/arclight/collection_navigation.js +5 -2
- data/app/assets/javascripts/arclight/oembed_viewer.js +11 -4
- data/app/assets/javascripts/arclight/search_results.js +15 -0
- data/app/assets/stylesheets/arclight/modules/hierarchy_and_online_contents.scss +6 -3
- data/app/assets/stylesheets/arclight/modules/layout.scss +24 -0
- data/app/assets/stylesheets/arclight/modules/mastheads.scss +33 -0
- data/app/helpers/arclight_helper.rb +1 -1
- data/app/models/concerns/arclight/search_behavior.rb +1 -1
- data/app/views/arclight/repositories/_in_person_repository.html.erb +1 -1
- data/app/views/catalog/_component_contents.html.erb +16 -0
- data/app/views/catalog/_component_overview.html.erb +0 -6
- data/app/views/catalog/_context_card.html.erb +1 -1
- data/app/views/catalog/_custom_metadata.html.erb +1 -1
- data/app/views/catalog/_index_default.html.erb +1 -1
- data/app/views/catalog/_index_header.html.erb +2 -2
- data/app/views/catalog/_index_header_hierarchy_default.html.erb +2 -2
- data/app/views/catalog/_index_hierarchy_default.html.erb +1 -1
- data/app/views/catalog/_results_histogram.html.erb +6 -1
- data/app/views/catalog/_show_breadcrumbs_default.html.erb +19 -5
- data/app/views/catalog/_show_default.html.erb +10 -0
- data/app/views/catalog/_show_sidebar.html.erb +0 -8
- data/app/views/catalog/_show_upper_metadata_collection.html.erb +1 -0
- data/app/views/catalog/_show_upper_metadata_default.html.erb +14 -0
- data/app/views/shared/_header_navbar.html.erb +56 -44
- data/app/views/shared/_main_menu_links.html.erb +1 -1
- data/arclight.gemspec +11 -7
- data/config/i18n-tasks.yml +132 -0
- data/config/locales/arclight.en.yml +53 -52
- data/lib/arclight/engine.rb +1 -0
- data/lib/arclight/hash_absolute_xpath.rb +57 -0
- data/lib/arclight/missing_id_strategy.rb +21 -0
- data/lib/arclight/normalized_date.rb +19 -10
- data/lib/arclight/repository.rb +3 -20
- data/lib/arclight/shared_indexing_behavior.rb +1 -1
- data/lib/arclight/solr_ead_indexer_ext.rb +5 -9
- data/lib/arclight/traject/ead2_config.rb +475 -0
- data/lib/arclight/version.rb +1 -1
- data/lib/generators/arclight/install_generator.rb +14 -0
- data/lib/generators/arclight/templates/catalog_controller.rb +43 -40
- data/lib/tasks/index.rake +4 -2
- data/solr/conf/schema.xml +7 -2
- data/tasks/arclight.rake +5 -1
- data/template.rb +1 -1
- metadata +94 -28
- data/app/views/catalog/_arclight_document_show_header.html.erb +0 -15
- data/app/views/catalog/_arclight_document_show_header_collection.html.erb +0 -12
- data/app/views/catalog/_search_within_form.html.erb +0 -16
- data/app/views/catalog/_show_header.html.erb +0 -5
@@ -83,7 +83,7 @@ module Arclight
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def add_normalized_title(solr_doc)
|
86
|
-
dates = Arclight::NormalizedDate.new(unitdate_inclusive
|
86
|
+
dates = Arclight::NormalizedDate.new(unitdate_inclusive, unitdate_bulk, unitdate_other).to_s
|
87
87
|
title = Arclight::NormalizedTitle.new(solr_doc['title_ssm'].try(:first), dates).to_s
|
88
88
|
solr_doc['normalized_title_ssm'] = [title]
|
89
89
|
solr_doc['normalized_date_ssm'] = [dates]
|
@@ -102,16 +102,12 @@ module Arclight
|
|
102
102
|
|
103
103
|
# TODO: these xpaths should be DRY'd up -- they're in both terminologies
|
104
104
|
def extract_title_and_dates(node, prefix = nil)
|
105
|
-
|
106
|
-
title: node.at_xpath("#{prefix}did/unittitle"),
|
107
|
-
unitdate_inclusive: node.
|
108
|
-
unitdate_bulk: node.
|
109
|
-
unitdate_other: node.
|
105
|
+
{
|
106
|
+
title: node.at_xpath("#{prefix}did/unittitle").try(:text),
|
107
|
+
unitdate_inclusive: node.xpath("#{prefix}did/unitdate[@type=\"inclusive\"]").map(&:text),
|
108
|
+
unitdate_bulk: node.xpath("#{prefix}did/unitdate[@type=\"bulk\"]").map(&:text),
|
109
|
+
unitdate_other: node.xpath("#{prefix}did/unitdate[not(@type)]").map(&:text)
|
110
110
|
}
|
111
|
-
data.each do |k, v|
|
112
|
-
data[k] = v.text if v
|
113
|
-
end
|
114
|
-
data
|
115
111
|
end
|
116
112
|
|
117
113
|
def normalized_component_id(node)
|
@@ -0,0 +1,475 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
require 'traject'
|
5
|
+
require 'traject/nokogiri_reader'
|
6
|
+
require 'traject_plus'
|
7
|
+
require 'traject_plus/macros'
|
8
|
+
require 'arclight/normalized_date'
|
9
|
+
require 'arclight/normalized_title'
|
10
|
+
require 'active_model/conversion' ## Needed for Arclight::Repository
|
11
|
+
require 'active_support/core_ext/array/wrap'
|
12
|
+
require 'arclight/digital_object'
|
13
|
+
require 'arclight/year_range'
|
14
|
+
require 'arclight/repository'
|
15
|
+
require 'arclight/missing_id_strategy'
|
16
|
+
|
17
|
+
NAME_ELEMENTS = %w[corpname famname name persname].freeze
|
18
|
+
|
19
|
+
# rubocop:disable Style/MixinUsage
|
20
|
+
extend TrajectPlus::Macros
|
21
|
+
# rubocop:enable Style/MixinUsage
|
22
|
+
|
23
|
+
SEARCHABLE_NOTES_FIELDS = %w[
|
24
|
+
accessrestrict
|
25
|
+
accruals
|
26
|
+
altformavail
|
27
|
+
appraisal
|
28
|
+
arrangement
|
29
|
+
bibliography
|
30
|
+
bioghist
|
31
|
+
custodhist
|
32
|
+
fileplan
|
33
|
+
note
|
34
|
+
odd
|
35
|
+
originalsloc
|
36
|
+
otherfindaid
|
37
|
+
phystech
|
38
|
+
prefercite
|
39
|
+
processinfo
|
40
|
+
relatedmaterial
|
41
|
+
scopecontent
|
42
|
+
separatedmaterial
|
43
|
+
userestrict
|
44
|
+
].freeze
|
45
|
+
|
46
|
+
DID_SEARCHABLE_NOTES_FIELDS = %w[
|
47
|
+
abstract
|
48
|
+
materialspec
|
49
|
+
physloc
|
50
|
+
].freeze
|
51
|
+
|
52
|
+
settings do
|
53
|
+
provide 'nokogiri.namespaces',
|
54
|
+
'xmlns' => 'urn:isbn:1-931666-22-9'
|
55
|
+
provide 'solr_writer.commit_on_close', 'true'
|
56
|
+
provide 'repository', ENV['REPOSITORY_ID']
|
57
|
+
provide 'logger', Logger.new($stderr)
|
58
|
+
end
|
59
|
+
|
60
|
+
each_record do |_record, context|
|
61
|
+
next unless settings['repository']
|
62
|
+
|
63
|
+
context.clipboard[:repository] = Arclight::Repository.find_by(
|
64
|
+
slug: settings['repository']
|
65
|
+
).name
|
66
|
+
end
|
67
|
+
|
68
|
+
# Top level
|
69
|
+
to_field 'id', extract_xpath('//xmlns:eadid'), strip, gsub('.', '-')
|
70
|
+
to_field 'title_filing_si', extract_xpath('//xmlns:titleproper[@type="filing"]')
|
71
|
+
to_field 'title_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unittitle')
|
72
|
+
to_field 'title_teim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unittitle')
|
73
|
+
to_field 'ead_ssi', extract_xpath('//xmlns:eadid')
|
74
|
+
|
75
|
+
to_field 'unitdate_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate')
|
76
|
+
to_field 'unitdate_bulk_ssim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[@type="bulk"]')
|
77
|
+
to_field 'unitdate_inclusive_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[@type="inclusive"]')
|
78
|
+
to_field 'unitdate_other_ssim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[not(@type)]')
|
79
|
+
|
80
|
+
to_field 'level_ssm' do |record, accumulator|
|
81
|
+
accumulator << record.at_xpath('//xmlns:archdesc').attribute('level').value
|
82
|
+
end
|
83
|
+
|
84
|
+
to_field 'level_sim' do |record, accumulator|
|
85
|
+
accumulator << record.at_xpath('//xmlns:archdesc').attribute('level').value&.capitalize
|
86
|
+
end
|
87
|
+
|
88
|
+
to_field 'unitid_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitid')
|
89
|
+
to_field 'unitid_teim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitid')
|
90
|
+
|
91
|
+
to_field 'normalized_title_ssm' do |_record, accumulator, context|
|
92
|
+
dates = Arclight::NormalizedDate.new(
|
93
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
94
|
+
context.output_hash['unitdate_bulk_ssim'],
|
95
|
+
context.output_hash['unitdate_other_ssim']
|
96
|
+
).to_s
|
97
|
+
title = context.output_hash['title_ssm'].first
|
98
|
+
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
|
99
|
+
end
|
100
|
+
|
101
|
+
to_field 'normalized_date_ssm' do |_record, accumulator, context|
|
102
|
+
accumulator << Arclight::NormalizedDate.new(
|
103
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
104
|
+
context.output_hash['unitdate_bulk_ssim'],
|
105
|
+
context.output_hash['unitdate_other_ssim']
|
106
|
+
).to_s
|
107
|
+
end
|
108
|
+
|
109
|
+
to_field 'collection_ssm' do |_record, accumulator, context|
|
110
|
+
accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
|
111
|
+
end
|
112
|
+
to_field 'collection_sim' do |_record, accumulator, context|
|
113
|
+
accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
|
114
|
+
end
|
115
|
+
|
116
|
+
to_field 'repository_ssm' do |_record, accumulator, context|
|
117
|
+
accumulator << context.clipboard[:repository]
|
118
|
+
end
|
119
|
+
|
120
|
+
to_field 'repository_sim' do |_record, accumulator, context|
|
121
|
+
accumulator << context.clipboard[:repository]
|
122
|
+
end
|
123
|
+
|
124
|
+
to_field 'geogname_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
|
125
|
+
|
126
|
+
to_field 'geogname_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
|
127
|
+
|
128
|
+
to_field 'creator_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
|
129
|
+
to_field 'creator_sim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
|
130
|
+
to_field 'creator_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
|
131
|
+
to_field 'creator_sort' do |record, accumulator|
|
132
|
+
accumulator << record.xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']").map { |c| c.text.strip }.join(', ')
|
133
|
+
end
|
134
|
+
|
135
|
+
to_field 'creator_persname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:persname")
|
136
|
+
to_field 'creator_persname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:persname")
|
137
|
+
to_field 'creator_corpname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
|
138
|
+
to_field 'creator_corpname_sim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
|
139
|
+
to_field 'creator_corpname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
|
140
|
+
to_field 'creator_famname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:famname")
|
141
|
+
to_field 'creator_famname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:famname")
|
142
|
+
|
143
|
+
to_field 'persname_sim', extract_xpath('//xmlns:persname')
|
144
|
+
|
145
|
+
to_field 'creators_ssim' do |_record, accumulator, context|
|
146
|
+
accumulator.concat context.output_hash['creator_persname_ssm'] if context.output_hash['creator_persname_ssm']
|
147
|
+
accumulator.concat context.output_hash['creator_corpname_ssm'] if context.output_hash['creator_corpname_ssm']
|
148
|
+
accumulator.concat context.output_hash['creator_famname_ssm'] if context.output_hash['creator_famname_ssm']
|
149
|
+
end
|
150
|
+
|
151
|
+
to_field 'places_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
|
152
|
+
to_field 'places_ssim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
|
153
|
+
to_field 'places_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
|
154
|
+
|
155
|
+
to_field 'access_terms_ssm', extract_xpath('//xmlns:archdesc/xmlns:userestrict/xmlns:p')
|
156
|
+
|
157
|
+
# Indexes the acquisition group information into the notes field
|
158
|
+
# Please see https://www.loc.gov/ead/tglib/elements/acqinfo.html
|
159
|
+
to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:acqinfo/*[local-name()!="head"]')
|
160
|
+
to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
|
161
|
+
to_field 'acqinfo_ssim', extract_xpath('./xmlns:acqinfo/*[local-name()!="head"]')
|
162
|
+
to_field 'acqinfo_ssim', extract_xpath('./xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
|
163
|
+
to_field 'acqinfo_ssm' do |_record, accumulator, context|
|
164
|
+
accumulator.concat(context.output_hash.fetch('acqinfo_ssim', []))
|
165
|
+
end
|
166
|
+
|
167
|
+
# Indexes only specified controlled terms for archival description into the access_subject field
|
168
|
+
to_field 'access_subjects_ssim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess', to_text: false) do |_record, accumulator|
|
169
|
+
accumulator.map! do |element|
|
170
|
+
%w[subject function occupation genreform].map do |selector|
|
171
|
+
element.xpath(".//xmlns:#{selector}").map(&:text)
|
172
|
+
end
|
173
|
+
end.flatten!
|
174
|
+
end
|
175
|
+
|
176
|
+
to_field 'access_subjects_ssm' do |_record, accumulator, context|
|
177
|
+
accumulator.concat Array.wrap(context.output_hash['access_subjects_ssim'])
|
178
|
+
end
|
179
|
+
|
180
|
+
to_field 'has_online_content_ssim', extract_xpath('.//xmlns:dao') do |_record, accumulator|
|
181
|
+
accumulator.replace([accumulator.any?])
|
182
|
+
end
|
183
|
+
|
184
|
+
to_field 'extent_ssm', extract_xpath('//xmlns:did/xmlns:physdesc/xmlns:extent')
|
185
|
+
to_field 'extent_teim', extract_xpath('//xmlns:did/xmlns:physdesc/xmlns:extent')
|
186
|
+
to_field 'genreform_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:genreform')
|
187
|
+
to_field 'genreform_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:genreform')
|
188
|
+
|
189
|
+
to_field 'date_range_sim', extract_xpath('.//xmlns:did/xmlns:unitdate/@normal', to_text: false) do |_record, accumulator|
|
190
|
+
range = Arclight::YearRange.new
|
191
|
+
next range.years if accumulator.blank?
|
192
|
+
|
193
|
+
ranges = accumulator.map(&:to_s)
|
194
|
+
range << range.parse_ranges(ranges)
|
195
|
+
accumulator.replace range.years
|
196
|
+
end
|
197
|
+
|
198
|
+
SEARCHABLE_NOTES_FIELDS.map do |selector|
|
199
|
+
to_field "#{selector}_ssm", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/*[local-name()!='head']")
|
200
|
+
to_field "#{selector}_heading_ssm", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/xmlns:head") unless selector == 'prefercite'
|
201
|
+
to_field "#{selector}_teim", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/*[local-name()!='head']")
|
202
|
+
end
|
203
|
+
|
204
|
+
DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
|
205
|
+
to_field "#{selector}_ssm", extract_xpath("//xmlns:did/xmlns:#{selector}")
|
206
|
+
end
|
207
|
+
NAME_ELEMENTS.map do |selector|
|
208
|
+
to_field 'names_coll_ssim', extract_xpath("/xmlns:ead/xmlns:archdesc/xmlns:controlaccess/xmlns:#{selector}")
|
209
|
+
to_field 'names_ssim', extract_xpath("//xmlns:#{selector}")
|
210
|
+
to_field "#{selector}_ssm", extract_xpath("//xmlns:#{selector}")
|
211
|
+
end
|
212
|
+
to_field 'corpname_sim', extract_xpath('//xmlns:corpname')
|
213
|
+
|
214
|
+
to_field 'language_sim', extract_xpath('//xmlns:did/xmlns:langmaterial')
|
215
|
+
to_field 'language_ssm', extract_xpath('//xmlns:did/xmlns:langmaterial')
|
216
|
+
|
217
|
+
# Each component child document
|
218
|
+
# <c> <c01> <c12>
|
219
|
+
compose 'components', ->(record, accumulator, _context) { accumulator.concat record.xpath('//*[is_component(.)]', NokogiriXpathExtensions.new) } do
|
220
|
+
to_field 'ref_ssi' do |record, accumulator, context|
|
221
|
+
accumulator << if record.attribute('id').blank?
|
222
|
+
strategy = Arclight::MissingIdStrategy.selected
|
223
|
+
hexdigest = strategy.new(record).to_hexdigest
|
224
|
+
parent_id = context.clipboard[:parent].output_hash['id'].first
|
225
|
+
logger.warn('MISSING ID WARNING') do
|
226
|
+
[
|
227
|
+
"A component in #{parent_id} did not have and ID so one was minted using the #{strategy} strategy.",
|
228
|
+
"The ID of this document will be #{parent_id}#{hexdigest}."
|
229
|
+
].join(' ')
|
230
|
+
end
|
231
|
+
else
|
232
|
+
record.attribute('id')&.value&.strip&.gsub('.', '-')
|
233
|
+
end
|
234
|
+
end
|
235
|
+
to_field 'ref_ssm' do |_record, accumulator, context|
|
236
|
+
accumulator.concat context.output_hash['ref_ssi']
|
237
|
+
end
|
238
|
+
|
239
|
+
to_field 'id' do |_record, accumulator, context|
|
240
|
+
accumulator << [
|
241
|
+
context.clipboard[:parent].output_hash['id'],
|
242
|
+
context.output_hash['ref_ssi']
|
243
|
+
].join('')
|
244
|
+
end
|
245
|
+
|
246
|
+
to_field 'ead_ssi' do |_record, accumulator, context|
|
247
|
+
accumulator << context.clipboard[:parent].output_hash['ead_ssi'].first
|
248
|
+
end
|
249
|
+
|
250
|
+
to_field 'title_filing_si', extract_xpath('./xmlns:did/xmlns:unittitle'), first_only
|
251
|
+
to_field 'title_ssm', extract_xpath('./xmlns:did/xmlns:unittitle')
|
252
|
+
to_field 'title_teim', extract_xpath('./xmlns:did/xmlns:unittitle')
|
253
|
+
|
254
|
+
to_field 'unitdate_bulk_ssim', extract_xpath('./xmlns:did/xmlns:unitdate[@type="bulk"]')
|
255
|
+
to_field 'unitdate_inclusive_ssm', extract_xpath('./xmlns:did/xmlns:unitdate[@type="inclusive"]')
|
256
|
+
to_field 'unitdate_other_ssim', extract_xpath('./xmlns:did/xmlns:unitdate[not(@type)]')
|
257
|
+
|
258
|
+
to_field 'normalized_title_ssm' do |_record, accumulator, context|
|
259
|
+
dates = Arclight::NormalizedDate.new(
|
260
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
261
|
+
context.output_hash['unitdate_bulk_ssim'],
|
262
|
+
context.output_hash['unitdate_other_ssim']
|
263
|
+
).to_s
|
264
|
+
title = context.output_hash['title_ssm']&.first
|
265
|
+
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
|
266
|
+
end
|
267
|
+
|
268
|
+
to_field 'normalized_date_ssm' do |_record, accumulator, context|
|
269
|
+
accumulator << Arclight::NormalizedDate.new(
|
270
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
271
|
+
context.output_hash['unitdate_bulk_ssim'],
|
272
|
+
context.output_hash['unitdate_other_ssim']
|
273
|
+
).to_s
|
274
|
+
end
|
275
|
+
|
276
|
+
to_field 'component_level_isim' do |record, accumulator|
|
277
|
+
accumulator << 1 + record.ancestors.count { |node| node.name == 'c' }
|
278
|
+
end
|
279
|
+
|
280
|
+
to_field 'parent_ssm' do |record, accumulator, context|
|
281
|
+
accumulator << context.clipboard[:parent].output_hash['id'].first
|
282
|
+
accumulator.concat NokogiriXpathExtensions.new.is_component(record.ancestors).reverse.map { |n| n.attribute('id').value }
|
283
|
+
end
|
284
|
+
|
285
|
+
to_field 'parent_ssi' do |_record, accumulator, context|
|
286
|
+
accumulator << context.output_hash['parent_ssm'].last
|
287
|
+
end
|
288
|
+
|
289
|
+
to_field 'parent_unittitles_ssm' do |_record, accumulator, context|
|
290
|
+
## Top level document
|
291
|
+
accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
|
292
|
+
## Other components
|
293
|
+
context.output_hash['parent_ssm']&.drop(1)&.each do |id|
|
294
|
+
accumulator.concat Array
|
295
|
+
.wrap(context.clipboard[:parent].output_hash['components'])
|
296
|
+
.find { |c| c['ref_ssi'] == [id] }&.[]('normalized_title_ssm')
|
297
|
+
end
|
298
|
+
end
|
299
|
+
to_field 'parent_unittitles_teim' do |_record, accumulator, context|
|
300
|
+
accumulator.concat context.output_hash['parent_unittitles_ssm']
|
301
|
+
end
|
302
|
+
|
303
|
+
to_field 'unitid_ssm', extract_xpath('./xmlns:did/xmlns:unitid')
|
304
|
+
to_field 'repository_ssm' do |_record, accumulator, context|
|
305
|
+
accumulator << context.clipboard[:parent].clipboard[:repository]
|
306
|
+
end
|
307
|
+
to_field 'repository_sim' do |_record, accumulator, context|
|
308
|
+
accumulator << context.clipboard[:parent].clipboard[:repository]
|
309
|
+
end
|
310
|
+
to_field 'collection_ssm' do |_record, accumulator, context|
|
311
|
+
accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
|
312
|
+
end
|
313
|
+
to_field 'collection_sim' do |_record, accumulator, context|
|
314
|
+
accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
|
315
|
+
end
|
316
|
+
|
317
|
+
to_field 'extent_ssm', extract_xpath('./xmlns:did/xmlns:physdesc/xmlns:extent')
|
318
|
+
to_field 'creator_ssm', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
|
319
|
+
to_field 'creator_ssim', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
|
320
|
+
to_field 'creators_ssim', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
|
321
|
+
to_field 'creator_sort' do |record, accumulator|
|
322
|
+
accumulator << record.xpath("./xmlns:did/xmlns:origination[@label='creator']").map(&:text).join(', ')
|
323
|
+
end
|
324
|
+
to_field 'collection_creator_ssm' do |_record, accumulator, context|
|
325
|
+
accumulator.concat Array.wrap(context.clipboard[:parent].output_hash['creator_ssm'])
|
326
|
+
end
|
327
|
+
to_field 'has_online_content_ssim', extract_xpath('.//xmlns:dao') do |_record, accumulator|
|
328
|
+
accumulator.replace([accumulator.any?])
|
329
|
+
end
|
330
|
+
to_field 'child_component_count_isim', extract_xpath('xmlns:c') do |_record, accumulator|
|
331
|
+
accumulator.replace([accumulator.length])
|
332
|
+
end
|
333
|
+
|
334
|
+
to_field 'ref_ssm' do |record, accumulator|
|
335
|
+
accumulator << record.attribute('id')
|
336
|
+
end
|
337
|
+
|
338
|
+
to_field 'level_ssm' do |record, accumulator|
|
339
|
+
level = record.attribute('level')&.value
|
340
|
+
other_level = record.attribute('otherlevel')&.value
|
341
|
+
|
342
|
+
accumulator << if level == 'otherlevel'
|
343
|
+
alternative_level = other_level if other_level
|
344
|
+
alternative_level.present? ? alternative_level : 'Other'
|
345
|
+
elsif level.present?
|
346
|
+
level&.capitalize
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
to_field 'level_sim' do |_record, accumulator, context|
|
351
|
+
next unless context.output_hash['level_ssm']
|
352
|
+
|
353
|
+
accumulator.concat context.output_hash['level_ssm']&.map(&:capitalize)
|
354
|
+
end
|
355
|
+
|
356
|
+
to_field 'parent_access_restrict_ssm', extract_xpath('./xmlns:accessrestrict/xmlns:p')
|
357
|
+
|
358
|
+
to_field 'parent_access_restrict_ssm' do |_record, accumulator, context|
|
359
|
+
next unless context.output_hash['accessrestrict_ssm'].nil?
|
360
|
+
|
361
|
+
context.output_hash['parent_ssm']&.each do |id|
|
362
|
+
accumulator.concat Array
|
363
|
+
.wrap(context.clipboard[:parent]&.output_hash&.[]('components'))
|
364
|
+
.select { |c| c['ref_ssi'] == [id] }.map { |c| c['accessrestrict_ssm'] }
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
to_field 'parent_access_restrict_ssm' do |_record, accumulator, context|
|
369
|
+
next unless context.output_hash['parent_access_restrict_ssm'].nil?
|
370
|
+
|
371
|
+
accumulator.concat Array.wrap(context.clipboard[:parent]&.output_hash&.[]('accessrestrict_ssm'))
|
372
|
+
end
|
373
|
+
|
374
|
+
to_field 'parent_access_terms_ssm', extract_xpath('xmlns:userestrict/xmlns:p')
|
375
|
+
|
376
|
+
to_field 'parent_access_terms_ssm' do |_record, accumulator, context|
|
377
|
+
next unless context.output_hash['userestrict_ssm'].nil?
|
378
|
+
|
379
|
+
context.output_hash['parent_ssm']&.each do |id|
|
380
|
+
accumulator.concat Array
|
381
|
+
.wrap(context.clipboard[:parent]&.output_hash&.[]('components'))
|
382
|
+
.select { |c| c['ref_ssi'] == [id] }.map { |c| c['userestrict_ssm'] }
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
to_field 'parent_access_terms_ssm' do |_record, accumulator, context|
|
387
|
+
next unless context.output_hash['parent_access_terms_ssm'].nil?
|
388
|
+
|
389
|
+
accumulator << context.clipboard[:parent]&.output_hash&.[]('access_terms_ssm')&.first
|
390
|
+
end
|
391
|
+
|
392
|
+
to_field 'digital_objects_ssm', extract_xpath('./xmlns:dao') do |record, accumulator|
|
393
|
+
accumulator.concat(record.xpath('.//xmlns:dao', xmlns: 'urn:isbn:1-931666-22-9').map do |dao|
|
394
|
+
label = dao.attributes['title']&.value ||
|
395
|
+
dao.xpath('xmlns:daodesc/xmlns:p', xmlns: 'urn:isbn:1-931666-22-9')&.text
|
396
|
+
href = (dao.attributes['href'] || dao.attributes['xlink:href'])&.value
|
397
|
+
Arclight::DigitalObject.new(label: label, href: href).to_json
|
398
|
+
end.to_a)
|
399
|
+
end
|
400
|
+
|
401
|
+
to_field 'date_range_sim', extract_xpath('.//xmlns:did/xmlns:unitdate/@normal', to_text: false) do |_record, accumulator|
|
402
|
+
range = Arclight::YearRange.new
|
403
|
+
next range.years if accumulator.blank?
|
404
|
+
|
405
|
+
ranges = accumulator.map(&:to_s)
|
406
|
+
range << range.parse_ranges(ranges)
|
407
|
+
accumulator.replace range.years
|
408
|
+
end
|
409
|
+
|
410
|
+
NAME_ELEMENTS.map do |selector|
|
411
|
+
to_field 'names_ssim', extract_xpath("./xmlns:controlaccess/xmlns:#{selector}")
|
412
|
+
to_field "#{selector}_ssm", extract_xpath(".//xmlns:#{selector}")
|
413
|
+
end
|
414
|
+
|
415
|
+
to_field 'geogname_sim', extract_xpath('./xmlns:controlaccess/xmlns:geogname')
|
416
|
+
to_field 'geogname_ssm', extract_xpath('./xmlns:controlaccess/xmlns:geogname')
|
417
|
+
to_field 'places_ssim', extract_xpath('xmlns:controlaccess/xmlns:geogname')
|
418
|
+
|
419
|
+
# Indexes only specified controlled terms for archival description into the access_subject field
|
420
|
+
to_field 'access_subjects_ssim', extract_xpath('./xmlns:controlaccess', to_text: false) do |_record, accumulator|
|
421
|
+
accumulator.map! do |element|
|
422
|
+
%w[subject function occupation genreform].map do |selector|
|
423
|
+
element.xpath(".//xmlns:#{selector}").map(&:text)
|
424
|
+
end
|
425
|
+
end.flatten!
|
426
|
+
end
|
427
|
+
|
428
|
+
to_field 'access_subjects_ssm' do |_record, accumulator, context|
|
429
|
+
accumulator.concat(context.output_hash.fetch('access_subjects_ssim', []))
|
430
|
+
end
|
431
|
+
|
432
|
+
# Indexes the acquisition group information into the notes field
|
433
|
+
# Please see https://www.loc.gov/ead/tglib/elements/acqinfo.html
|
434
|
+
to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:acqinfo/*[local-name()!="head"]')
|
435
|
+
to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
|
436
|
+
to_field 'acqinfo_ssim', extract_xpath('./xmlns:acqinfo/*[local-name()!="head"]')
|
437
|
+
to_field 'acqinfo_ssim', extract_xpath('./xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
|
438
|
+
to_field 'acqinfo_ssm' do |_record, accumulator, context|
|
439
|
+
accumulator.concat(context.output_hash.fetch('acqinfo_ssim', []))
|
440
|
+
end
|
441
|
+
|
442
|
+
to_field 'language_ssm', extract_xpath('./xmlns:did/xmlns:langmaterial')
|
443
|
+
to_field 'containers_ssim' do |record, accumulator|
|
444
|
+
record.xpath('./xmlns:did/xmlns:container').each do |node|
|
445
|
+
accumulator << [node.attribute('type'), node.text].join(' ').strip
|
446
|
+
end
|
447
|
+
end
|
448
|
+
SEARCHABLE_NOTES_FIELDS.map do |selector|
|
449
|
+
to_field "#{selector}_ssm", extract_xpath(".//xmlns:#{selector}/*[local-name()!='head']")
|
450
|
+
to_field "#{selector}_heading_ssm", extract_xpath(".//xmlns:archdesc/xmlns:#{selector}/xmlns:head")
|
451
|
+
to_field "#{selector}_teim", extract_xpath(".//xmlns:#{selector}/*[local-name()!='head']")
|
452
|
+
end
|
453
|
+
DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
|
454
|
+
to_field "#{selector}_ssm", extract_xpath(".//xmlns:did/xmlns:#{selector}")
|
455
|
+
end
|
456
|
+
to_field 'did_note_ssm', extract_xpath('.//xmlns:did/xmlns:note')
|
457
|
+
end
|
458
|
+
|
459
|
+
each_record do |_record, context|
|
460
|
+
context.output_hash['components'] &&= context.output_hash['components'].select { |c| c.keys.any? }
|
461
|
+
end
|
462
|
+
|
463
|
+
##
|
464
|
+
# Used for evaluating xpath components to find
|
465
|
+
class NokogiriXpathExtensions
|
466
|
+
# rubocop:disable Naming/PredicateName, Style/FormatString
|
467
|
+
def is_component(node_set)
|
468
|
+
node_set.find_all do |node|
|
469
|
+
component_elements = (1..12).map { |i| "c#{'%02d' % i}" }
|
470
|
+
component_elements.push 'c'
|
471
|
+
component_elements.include? node.name
|
472
|
+
end
|
473
|
+
end
|
474
|
+
# rubocop:enable Naming/PredicateName, Style/FormatString
|
475
|
+
end
|