arclight 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +6 -47
  4. data/.rubocop_todo.yml +259 -0
  5. data/.travis.yml +15 -20
  6. data/README.md +17 -4
  7. data/app/assets/images/blacklight/compact.svg +15 -15
  8. data/app/assets/javascripts/arclight/arclight.js +1 -0
  9. data/app/assets/javascripts/arclight/collection_navigation.js +5 -2
  10. data/app/assets/javascripts/arclight/oembed_viewer.js +11 -4
  11. data/app/assets/javascripts/arclight/search_results.js +15 -0
  12. data/app/assets/stylesheets/arclight/modules/hierarchy_and_online_contents.scss +6 -3
  13. data/app/assets/stylesheets/arclight/modules/layout.scss +24 -0
  14. data/app/assets/stylesheets/arclight/modules/mastheads.scss +33 -0
  15. data/app/helpers/arclight_helper.rb +1 -1
  16. data/app/models/concerns/arclight/search_behavior.rb +1 -1
  17. data/app/views/arclight/repositories/_in_person_repository.html.erb +1 -1
  18. data/app/views/catalog/_component_contents.html.erb +16 -0
  19. data/app/views/catalog/_component_overview.html.erb +0 -6
  20. data/app/views/catalog/_context_card.html.erb +1 -1
  21. data/app/views/catalog/_custom_metadata.html.erb +1 -1
  22. data/app/views/catalog/_index_default.html.erb +1 -1
  23. data/app/views/catalog/_index_header.html.erb +2 -2
  24. data/app/views/catalog/_index_header_hierarchy_default.html.erb +2 -2
  25. data/app/views/catalog/_index_hierarchy_default.html.erb +1 -1
  26. data/app/views/catalog/_results_histogram.html.erb +6 -1
  27. data/app/views/catalog/_show_breadcrumbs_default.html.erb +19 -5
  28. data/app/views/catalog/_show_default.html.erb +10 -0
  29. data/app/views/catalog/_show_sidebar.html.erb +0 -8
  30. data/app/views/catalog/_show_upper_metadata_collection.html.erb +1 -0
  31. data/app/views/catalog/_show_upper_metadata_default.html.erb +14 -0
  32. data/app/views/shared/_header_navbar.html.erb +56 -44
  33. data/app/views/shared/_main_menu_links.html.erb +1 -1
  34. data/arclight.gemspec +11 -7
  35. data/config/i18n-tasks.yml +132 -0
  36. data/config/locales/arclight.en.yml +53 -52
  37. data/lib/arclight/engine.rb +1 -0
  38. data/lib/arclight/hash_absolute_xpath.rb +57 -0
  39. data/lib/arclight/missing_id_strategy.rb +21 -0
  40. data/lib/arclight/normalized_date.rb +19 -10
  41. data/lib/arclight/repository.rb +3 -20
  42. data/lib/arclight/shared_indexing_behavior.rb +1 -1
  43. data/lib/arclight/solr_ead_indexer_ext.rb +5 -9
  44. data/lib/arclight/traject/ead2_config.rb +475 -0
  45. data/lib/arclight/version.rb +1 -1
  46. data/lib/generators/arclight/install_generator.rb +14 -0
  47. data/lib/generators/arclight/templates/catalog_controller.rb +43 -40
  48. data/lib/tasks/index.rake +4 -2
  49. data/solr/conf/schema.xml +7 -2
  50. data/tasks/arclight.rake +5 -1
  51. data/template.rb +1 -1
  52. metadata +94 -28
  53. data/app/views/catalog/_arclight_document_show_header.html.erb +0 -15
  54. data/app/views/catalog/_arclight_document_show_header_collection.html.erb +0 -12
  55. data/app/views/catalog/_search_within_form.html.erb +0 -16
  56. data/app/views/catalog/_show_header.html.erb +0 -5
@@ -83,7 +83,7 @@ module Arclight
83
83
  end
84
84
 
85
85
  def add_normalized_title(solr_doc)
86
- dates = Arclight::NormalizedDate.new(unitdate_inclusive.first, unitdate_bulk.first, unitdate_other.first).to_s
86
+ dates = Arclight::NormalizedDate.new(unitdate_inclusive, unitdate_bulk, unitdate_other).to_s
87
87
  title = Arclight::NormalizedTitle.new(solr_doc['title_ssm'].try(:first), dates).to_s
88
88
  solr_doc['normalized_title_ssm'] = [title]
89
89
  solr_doc['normalized_date_ssm'] = [dates]
@@ -102,16 +102,12 @@ module Arclight
102
102
 
103
103
  # TODO: these xpaths should be DRY'd up -- they're in both terminologies
104
104
  def extract_title_and_dates(node, prefix = nil)
105
- data = {
106
- title: node.at_xpath("#{prefix}did/unittitle"),
107
- unitdate_inclusive: node.at_xpath("#{prefix}did/unitdate[@type=\"inclusive\"]"),
108
- unitdate_bulk: node.at_xpath("#{prefix}did/unitdate[@type=\"bulk\"]"),
109
- unitdate_other: node.at_xpath("#{prefix}did/unitdate[not(@type)]")
105
+ {
106
+ title: node.at_xpath("#{prefix}did/unittitle").try(:text),
107
+ unitdate_inclusive: node.xpath("#{prefix}did/unitdate[@type=\"inclusive\"]").map(&:text),
108
+ unitdate_bulk: node.xpath("#{prefix}did/unitdate[@type=\"bulk\"]").map(&:text),
109
+ unitdate_other: node.xpath("#{prefix}did/unitdate[not(@type)]").map(&:text)
110
110
  }
111
- data.each do |k, v|
112
- data[k] = v.text if v
113
- end
114
- data
115
111
  end
116
112
 
117
113
  def normalized_component_id(node)
@@ -0,0 +1,475 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+ require 'traject'
5
+ require 'traject/nokogiri_reader'
6
+ require 'traject_plus'
7
+ require 'traject_plus/macros'
8
+ require 'arclight/normalized_date'
9
+ require 'arclight/normalized_title'
10
+ require 'active_model/conversion' ## Needed for Arclight::Repository
11
+ require 'active_support/core_ext/array/wrap'
12
+ require 'arclight/digital_object'
13
+ require 'arclight/year_range'
14
+ require 'arclight/repository'
15
+ require 'arclight/missing_id_strategy'
16
+
17
+ NAME_ELEMENTS = %w[corpname famname name persname].freeze
18
+
19
+ # rubocop:disable Style/MixinUsage
20
+ extend TrajectPlus::Macros
21
+ # rubocop:enable Style/MixinUsage
22
+
23
+ SEARCHABLE_NOTES_FIELDS = %w[
24
+ accessrestrict
25
+ accruals
26
+ altformavail
27
+ appraisal
28
+ arrangement
29
+ bibliography
30
+ bioghist
31
+ custodhist
32
+ fileplan
33
+ note
34
+ odd
35
+ originalsloc
36
+ otherfindaid
37
+ phystech
38
+ prefercite
39
+ processinfo
40
+ relatedmaterial
41
+ scopecontent
42
+ separatedmaterial
43
+ userestrict
44
+ ].freeze
45
+
46
+ DID_SEARCHABLE_NOTES_FIELDS = %w[
47
+ abstract
48
+ materialspec
49
+ physloc
50
+ ].freeze
51
+
52
+ settings do
53
+ provide 'nokogiri.namespaces',
54
+ 'xmlns' => 'urn:isbn:1-931666-22-9'
55
+ provide 'solr_writer.commit_on_close', 'true'
56
+ provide 'repository', ENV['REPOSITORY_ID']
57
+ provide 'logger', Logger.new($stderr)
58
+ end
59
+
60
+ each_record do |_record, context|
61
+ next unless settings['repository']
62
+
63
+ context.clipboard[:repository] = Arclight::Repository.find_by(
64
+ slug: settings['repository']
65
+ ).name
66
+ end
67
+
68
+ # Top level
69
+ to_field 'id', extract_xpath('//xmlns:eadid'), strip, gsub('.', '-')
70
+ to_field 'title_filing_si', extract_xpath('//xmlns:titleproper[@type="filing"]')
71
+ to_field 'title_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unittitle')
72
+ to_field 'title_teim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unittitle')
73
+ to_field 'ead_ssi', extract_xpath('//xmlns:eadid')
74
+
75
+ to_field 'unitdate_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate')
76
+ to_field 'unitdate_bulk_ssim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[@type="bulk"]')
77
+ to_field 'unitdate_inclusive_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[@type="inclusive"]')
78
+ to_field 'unitdate_other_ssim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[not(@type)]')
79
+
80
+ to_field 'level_ssm' do |record, accumulator|
81
+ accumulator << record.at_xpath('//xmlns:archdesc').attribute('level').value
82
+ end
83
+
84
+ to_field 'level_sim' do |record, accumulator|
85
+ accumulator << record.at_xpath('//xmlns:archdesc').attribute('level').value&.capitalize
86
+ end
87
+
88
+ to_field 'unitid_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitid')
89
+ to_field 'unitid_teim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitid')
90
+
91
+ to_field 'normalized_title_ssm' do |_record, accumulator, context|
92
+ dates = Arclight::NormalizedDate.new(
93
+ context.output_hash['unitdate_inclusive_ssm'],
94
+ context.output_hash['unitdate_bulk_ssim'],
95
+ context.output_hash['unitdate_other_ssim']
96
+ ).to_s
97
+ title = context.output_hash['title_ssm'].first
98
+ accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
99
+ end
100
+
101
+ to_field 'normalized_date_ssm' do |_record, accumulator, context|
102
+ accumulator << Arclight::NormalizedDate.new(
103
+ context.output_hash['unitdate_inclusive_ssm'],
104
+ context.output_hash['unitdate_bulk_ssim'],
105
+ context.output_hash['unitdate_other_ssim']
106
+ ).to_s
107
+ end
108
+
109
+ to_field 'collection_ssm' do |_record, accumulator, context|
110
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
111
+ end
112
+ to_field 'collection_sim' do |_record, accumulator, context|
113
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
114
+ end
115
+
116
+ to_field 'repository_ssm' do |_record, accumulator, context|
117
+ accumulator << context.clipboard[:repository]
118
+ end
119
+
120
+ to_field 'repository_sim' do |_record, accumulator, context|
121
+ accumulator << context.clipboard[:repository]
122
+ end
123
+
124
+ to_field 'geogname_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
125
+
126
+ to_field 'geogname_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
127
+
128
+ to_field 'creator_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
129
+ to_field 'creator_sim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
130
+ to_field 'creator_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
131
+ to_field 'creator_sort' do |record, accumulator|
132
+ accumulator << record.xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']").map { |c| c.text.strip }.join(', ')
133
+ end
134
+
135
+ to_field 'creator_persname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:persname")
136
+ to_field 'creator_persname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:persname")
137
+ to_field 'creator_corpname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
138
+ to_field 'creator_corpname_sim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
139
+ to_field 'creator_corpname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
140
+ to_field 'creator_famname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:famname")
141
+ to_field 'creator_famname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:famname")
142
+
143
+ to_field 'persname_sim', extract_xpath('//xmlns:persname')
144
+
145
+ to_field 'creators_ssim' do |_record, accumulator, context|
146
+ accumulator.concat context.output_hash['creator_persname_ssm'] if context.output_hash['creator_persname_ssm']
147
+ accumulator.concat context.output_hash['creator_corpname_ssm'] if context.output_hash['creator_corpname_ssm']
148
+ accumulator.concat context.output_hash['creator_famname_ssm'] if context.output_hash['creator_famname_ssm']
149
+ end
150
+
151
+ to_field 'places_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
152
+ to_field 'places_ssim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
153
+ to_field 'places_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
154
+
155
+ to_field 'access_terms_ssm', extract_xpath('//xmlns:archdesc/xmlns:userestrict/xmlns:p')
156
+
157
+ # Indexes the acquisition group information into the notes field
158
+ # Please see https://www.loc.gov/ead/tglib/elements/acqinfo.html
159
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:acqinfo/*[local-name()!="head"]')
160
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
161
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:acqinfo/*[local-name()!="head"]')
162
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
163
+ to_field 'acqinfo_ssm' do |_record, accumulator, context|
164
+ accumulator.concat(context.output_hash.fetch('acqinfo_ssim', []))
165
+ end
166
+
167
+ # Indexes only specified controlled terms for archival description into the access_subject field
168
+ to_field 'access_subjects_ssim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess', to_text: false) do |_record, accumulator|
169
+ accumulator.map! do |element|
170
+ %w[subject function occupation genreform].map do |selector|
171
+ element.xpath(".//xmlns:#{selector}").map(&:text)
172
+ end
173
+ end.flatten!
174
+ end
175
+
176
+ to_field 'access_subjects_ssm' do |_record, accumulator, context|
177
+ accumulator.concat Array.wrap(context.output_hash['access_subjects_ssim'])
178
+ end
179
+
180
+ to_field 'has_online_content_ssim', extract_xpath('.//xmlns:dao') do |_record, accumulator|
181
+ accumulator.replace([accumulator.any?])
182
+ end
183
+
184
+ to_field 'extent_ssm', extract_xpath('//xmlns:did/xmlns:physdesc/xmlns:extent')
185
+ to_field 'extent_teim', extract_xpath('//xmlns:did/xmlns:physdesc/xmlns:extent')
186
+ to_field 'genreform_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:genreform')
187
+ to_field 'genreform_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:genreform')
188
+
189
+ to_field 'date_range_sim', extract_xpath('.//xmlns:did/xmlns:unitdate/@normal', to_text: false) do |_record, accumulator|
190
+ range = Arclight::YearRange.new
191
+ next range.years if accumulator.blank?
192
+
193
+ ranges = accumulator.map(&:to_s)
194
+ range << range.parse_ranges(ranges)
195
+ accumulator.replace range.years
196
+ end
197
+
198
+ SEARCHABLE_NOTES_FIELDS.map do |selector|
199
+ to_field "#{selector}_ssm", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/*[local-name()!='head']")
200
+ to_field "#{selector}_heading_ssm", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/xmlns:head") unless selector == 'prefercite'
201
+ to_field "#{selector}_teim", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/*[local-name()!='head']")
202
+ end
203
+
204
+ DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
205
+ to_field "#{selector}_ssm", extract_xpath("//xmlns:did/xmlns:#{selector}")
206
+ end
207
+ NAME_ELEMENTS.map do |selector|
208
+ to_field 'names_coll_ssim', extract_xpath("/xmlns:ead/xmlns:archdesc/xmlns:controlaccess/xmlns:#{selector}")
209
+ to_field 'names_ssim', extract_xpath("//xmlns:#{selector}")
210
+ to_field "#{selector}_ssm", extract_xpath("//xmlns:#{selector}")
211
+ end
212
+ to_field 'corpname_sim', extract_xpath('//xmlns:corpname')
213
+
214
+ to_field 'language_sim', extract_xpath('//xmlns:did/xmlns:langmaterial')
215
+ to_field 'language_ssm', extract_xpath('//xmlns:did/xmlns:langmaterial')
216
+
217
+ # Each component child document
218
+ # <c> <c01> <c12>
219
+ compose 'components', ->(record, accumulator, _context) { accumulator.concat record.xpath('//*[is_component(.)]', NokogiriXpathExtensions.new) } do
220
+ to_field 'ref_ssi' do |record, accumulator, context|
221
+ accumulator << if record.attribute('id').blank?
222
+ strategy = Arclight::MissingIdStrategy.selected
223
+ hexdigest = strategy.new(record).to_hexdigest
224
+ parent_id = context.clipboard[:parent].output_hash['id'].first
225
+ logger.warn('MISSING ID WARNING') do
226
+ [
227
+ "A component in #{parent_id} did not have and ID so one was minted using the #{strategy} strategy.",
228
+ "The ID of this document will be #{parent_id}#{hexdigest}."
229
+ ].join(' ')
230
+ end
231
+ else
232
+ record.attribute('id')&.value&.strip&.gsub('.', '-')
233
+ end
234
+ end
235
+ to_field 'ref_ssm' do |_record, accumulator, context|
236
+ accumulator.concat context.output_hash['ref_ssi']
237
+ end
238
+
239
+ to_field 'id' do |_record, accumulator, context|
240
+ accumulator << [
241
+ context.clipboard[:parent].output_hash['id'],
242
+ context.output_hash['ref_ssi']
243
+ ].join('')
244
+ end
245
+
246
+ to_field 'ead_ssi' do |_record, accumulator, context|
247
+ accumulator << context.clipboard[:parent].output_hash['ead_ssi'].first
248
+ end
249
+
250
+ to_field 'title_filing_si', extract_xpath('./xmlns:did/xmlns:unittitle'), first_only
251
+ to_field 'title_ssm', extract_xpath('./xmlns:did/xmlns:unittitle')
252
+ to_field 'title_teim', extract_xpath('./xmlns:did/xmlns:unittitle')
253
+
254
+ to_field 'unitdate_bulk_ssim', extract_xpath('./xmlns:did/xmlns:unitdate[@type="bulk"]')
255
+ to_field 'unitdate_inclusive_ssm', extract_xpath('./xmlns:did/xmlns:unitdate[@type="inclusive"]')
256
+ to_field 'unitdate_other_ssim', extract_xpath('./xmlns:did/xmlns:unitdate[not(@type)]')
257
+
258
+ to_field 'normalized_title_ssm' do |_record, accumulator, context|
259
+ dates = Arclight::NormalizedDate.new(
260
+ context.output_hash['unitdate_inclusive_ssm'],
261
+ context.output_hash['unitdate_bulk_ssim'],
262
+ context.output_hash['unitdate_other_ssim']
263
+ ).to_s
264
+ title = context.output_hash['title_ssm']&.first
265
+ accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
266
+ end
267
+
268
+ to_field 'normalized_date_ssm' do |_record, accumulator, context|
269
+ accumulator << Arclight::NormalizedDate.new(
270
+ context.output_hash['unitdate_inclusive_ssm'],
271
+ context.output_hash['unitdate_bulk_ssim'],
272
+ context.output_hash['unitdate_other_ssim']
273
+ ).to_s
274
+ end
275
+
276
+ to_field 'component_level_isim' do |record, accumulator|
277
+ accumulator << 1 + record.ancestors.count { |node| node.name == 'c' }
278
+ end
279
+
280
+ to_field 'parent_ssm' do |record, accumulator, context|
281
+ accumulator << context.clipboard[:parent].output_hash['id'].first
282
+ accumulator.concat NokogiriXpathExtensions.new.is_component(record.ancestors).reverse.map { |n| n.attribute('id').value }
283
+ end
284
+
285
+ to_field 'parent_ssi' do |_record, accumulator, context|
286
+ accumulator << context.output_hash['parent_ssm'].last
287
+ end
288
+
289
+ to_field 'parent_unittitles_ssm' do |_record, accumulator, context|
290
+ ## Top level document
291
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
292
+ ## Other components
293
+ context.output_hash['parent_ssm']&.drop(1)&.each do |id|
294
+ accumulator.concat Array
295
+ .wrap(context.clipboard[:parent].output_hash['components'])
296
+ .find { |c| c['ref_ssi'] == [id] }&.[]('normalized_title_ssm')
297
+ end
298
+ end
299
+ to_field 'parent_unittitles_teim' do |_record, accumulator, context|
300
+ accumulator.concat context.output_hash['parent_unittitles_ssm']
301
+ end
302
+
303
+ to_field 'unitid_ssm', extract_xpath('./xmlns:did/xmlns:unitid')
304
+ to_field 'repository_ssm' do |_record, accumulator, context|
305
+ accumulator << context.clipboard[:parent].clipboard[:repository]
306
+ end
307
+ to_field 'repository_sim' do |_record, accumulator, context|
308
+ accumulator << context.clipboard[:parent].clipboard[:repository]
309
+ end
310
+ to_field 'collection_ssm' do |_record, accumulator, context|
311
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
312
+ end
313
+ to_field 'collection_sim' do |_record, accumulator, context|
314
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
315
+ end
316
+
317
+ to_field 'extent_ssm', extract_xpath('./xmlns:did/xmlns:physdesc/xmlns:extent')
318
+ to_field 'creator_ssm', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
319
+ to_field 'creator_ssim', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
320
+ to_field 'creators_ssim', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
321
+ to_field 'creator_sort' do |record, accumulator|
322
+ accumulator << record.xpath("./xmlns:did/xmlns:origination[@label='creator']").map(&:text).join(', ')
323
+ end
324
+ to_field 'collection_creator_ssm' do |_record, accumulator, context|
325
+ accumulator.concat Array.wrap(context.clipboard[:parent].output_hash['creator_ssm'])
326
+ end
327
+ to_field 'has_online_content_ssim', extract_xpath('.//xmlns:dao') do |_record, accumulator|
328
+ accumulator.replace([accumulator.any?])
329
+ end
330
+ to_field 'child_component_count_isim', extract_xpath('xmlns:c') do |_record, accumulator|
331
+ accumulator.replace([accumulator.length])
332
+ end
333
+
334
+ to_field 'ref_ssm' do |record, accumulator|
335
+ accumulator << record.attribute('id')
336
+ end
337
+
338
+ to_field 'level_ssm' do |record, accumulator|
339
+ level = record.attribute('level')&.value
340
+ other_level = record.attribute('otherlevel')&.value
341
+
342
+ accumulator << if level == 'otherlevel'
343
+ alternative_level = other_level if other_level
344
+ alternative_level.present? ? alternative_level : 'Other'
345
+ elsif level.present?
346
+ level&.capitalize
347
+ end
348
+ end
349
+
350
+ to_field 'level_sim' do |_record, accumulator, context|
351
+ next unless context.output_hash['level_ssm']
352
+
353
+ accumulator.concat context.output_hash['level_ssm']&.map(&:capitalize)
354
+ end
355
+
356
+ to_field 'parent_access_restrict_ssm', extract_xpath('./xmlns:accessrestrict/xmlns:p')
357
+
358
+ to_field 'parent_access_restrict_ssm' do |_record, accumulator, context|
359
+ next unless context.output_hash['accessrestrict_ssm'].nil?
360
+
361
+ context.output_hash['parent_ssm']&.each do |id|
362
+ accumulator.concat Array
363
+ .wrap(context.clipboard[:parent]&.output_hash&.[]('components'))
364
+ .select { |c| c['ref_ssi'] == [id] }.map { |c| c['accessrestrict_ssm'] }
365
+ end
366
+ end
367
+
368
+ to_field 'parent_access_restrict_ssm' do |_record, accumulator, context|
369
+ next unless context.output_hash['parent_access_restrict_ssm'].nil?
370
+
371
+ accumulator.concat Array.wrap(context.clipboard[:parent]&.output_hash&.[]('accessrestrict_ssm'))
372
+ end
373
+
374
+ to_field 'parent_access_terms_ssm', extract_xpath('xmlns:userestrict/xmlns:p')
375
+
376
+ to_field 'parent_access_terms_ssm' do |_record, accumulator, context|
377
+ next unless context.output_hash['userestrict_ssm'].nil?
378
+
379
+ context.output_hash['parent_ssm']&.each do |id|
380
+ accumulator.concat Array
381
+ .wrap(context.clipboard[:parent]&.output_hash&.[]('components'))
382
+ .select { |c| c['ref_ssi'] == [id] }.map { |c| c['userestrict_ssm'] }
383
+ end
384
+ end
385
+
386
+ to_field 'parent_access_terms_ssm' do |_record, accumulator, context|
387
+ next unless context.output_hash['parent_access_terms_ssm'].nil?
388
+
389
+ accumulator << context.clipboard[:parent]&.output_hash&.[]('access_terms_ssm')&.first
390
+ end
391
+
392
+ to_field 'digital_objects_ssm', extract_xpath('./xmlns:dao') do |record, accumulator|
393
+ accumulator.concat(record.xpath('.//xmlns:dao', xmlns: 'urn:isbn:1-931666-22-9').map do |dao|
394
+ label = dao.attributes['title']&.value ||
395
+ dao.xpath('xmlns:daodesc/xmlns:p', xmlns: 'urn:isbn:1-931666-22-9')&.text
396
+ href = (dao.attributes['href'] || dao.attributes['xlink:href'])&.value
397
+ Arclight::DigitalObject.new(label: label, href: href).to_json
398
+ end.to_a)
399
+ end
400
+
401
+ to_field 'date_range_sim', extract_xpath('.//xmlns:did/xmlns:unitdate/@normal', to_text: false) do |_record, accumulator|
402
+ range = Arclight::YearRange.new
403
+ next range.years if accumulator.blank?
404
+
405
+ ranges = accumulator.map(&:to_s)
406
+ range << range.parse_ranges(ranges)
407
+ accumulator.replace range.years
408
+ end
409
+
410
+ NAME_ELEMENTS.map do |selector|
411
+ to_field 'names_ssim', extract_xpath("./xmlns:controlaccess/xmlns:#{selector}")
412
+ to_field "#{selector}_ssm", extract_xpath(".//xmlns:#{selector}")
413
+ end
414
+
415
+ to_field 'geogname_sim', extract_xpath('./xmlns:controlaccess/xmlns:geogname')
416
+ to_field 'geogname_ssm', extract_xpath('./xmlns:controlaccess/xmlns:geogname')
417
+ to_field 'places_ssim', extract_xpath('xmlns:controlaccess/xmlns:geogname')
418
+
419
+ # Indexes only specified controlled terms for archival description into the access_subject field
420
+ to_field 'access_subjects_ssim', extract_xpath('./xmlns:controlaccess', to_text: false) do |_record, accumulator|
421
+ accumulator.map! do |element|
422
+ %w[subject function occupation genreform].map do |selector|
423
+ element.xpath(".//xmlns:#{selector}").map(&:text)
424
+ end
425
+ end.flatten!
426
+ end
427
+
428
+ to_field 'access_subjects_ssm' do |_record, accumulator, context|
429
+ accumulator.concat(context.output_hash.fetch('access_subjects_ssim', []))
430
+ end
431
+
432
+ # Indexes the acquisition group information into the notes field
433
+ # Please see https://www.loc.gov/ead/tglib/elements/acqinfo.html
434
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:acqinfo/*[local-name()!="head"]')
435
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
436
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:acqinfo/*[local-name()!="head"]')
437
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
438
+ to_field 'acqinfo_ssm' do |_record, accumulator, context|
439
+ accumulator.concat(context.output_hash.fetch('acqinfo_ssim', []))
440
+ end
441
+
442
+ to_field 'language_ssm', extract_xpath('./xmlns:did/xmlns:langmaterial')
443
+ to_field 'containers_ssim' do |record, accumulator|
444
+ record.xpath('./xmlns:did/xmlns:container').each do |node|
445
+ accumulator << [node.attribute('type'), node.text].join(' ').strip
446
+ end
447
+ end
448
+ SEARCHABLE_NOTES_FIELDS.map do |selector|
449
+ to_field "#{selector}_ssm", extract_xpath(".//xmlns:#{selector}/*[local-name()!='head']")
450
+ to_field "#{selector}_heading_ssm", extract_xpath(".//xmlns:archdesc/xmlns:#{selector}/xmlns:head")
451
+ to_field "#{selector}_teim", extract_xpath(".//xmlns:#{selector}/*[local-name()!='head']")
452
+ end
453
+ DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
454
+ to_field "#{selector}_ssm", extract_xpath(".//xmlns:did/xmlns:#{selector}")
455
+ end
456
+ to_field 'did_note_ssm', extract_xpath('.//xmlns:did/xmlns:note')
457
+ end
458
+
459
+ each_record do |_record, context|
460
+ context.output_hash['components'] &&= context.output_hash['components'].select { |c| c.keys.any? }
461
+ end
462
+
463
+ ##
464
+ # Used for evaluating xpath components to find
465
+ class NokogiriXpathExtensions
466
+ # rubocop:disable Naming/PredicateName, Style/FormatString
467
+ def is_component(node_set)
468
+ node_set.find_all do |node|
469
+ component_elements = (1..12).map { |i| "c#{'%02d' % i}" }
470
+ component_elements.push 'c'
471
+ component_elements.include? node.name
472
+ end
473
+ end
474
+ # rubocop:enable Naming/PredicateName, Style/FormatString
475
+ end