arclight 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +6 -47
  4. data/.rubocop_todo.yml +259 -0
  5. data/.travis.yml +15 -20
  6. data/README.md +17 -4
  7. data/app/assets/images/blacklight/compact.svg +15 -15
  8. data/app/assets/javascripts/arclight/arclight.js +1 -0
  9. data/app/assets/javascripts/arclight/collection_navigation.js +5 -2
  10. data/app/assets/javascripts/arclight/oembed_viewer.js +11 -4
  11. data/app/assets/javascripts/arclight/search_results.js +15 -0
  12. data/app/assets/stylesheets/arclight/modules/hierarchy_and_online_contents.scss +6 -3
  13. data/app/assets/stylesheets/arclight/modules/layout.scss +24 -0
  14. data/app/assets/stylesheets/arclight/modules/mastheads.scss +33 -0
  15. data/app/helpers/arclight_helper.rb +1 -1
  16. data/app/models/concerns/arclight/search_behavior.rb +1 -1
  17. data/app/views/arclight/repositories/_in_person_repository.html.erb +1 -1
  18. data/app/views/catalog/_component_contents.html.erb +16 -0
  19. data/app/views/catalog/_component_overview.html.erb +0 -6
  20. data/app/views/catalog/_context_card.html.erb +1 -1
  21. data/app/views/catalog/_custom_metadata.html.erb +1 -1
  22. data/app/views/catalog/_index_default.html.erb +1 -1
  23. data/app/views/catalog/_index_header.html.erb +2 -2
  24. data/app/views/catalog/_index_header_hierarchy_default.html.erb +2 -2
  25. data/app/views/catalog/_index_hierarchy_default.html.erb +1 -1
  26. data/app/views/catalog/_results_histogram.html.erb +6 -1
  27. data/app/views/catalog/_show_breadcrumbs_default.html.erb +19 -5
  28. data/app/views/catalog/_show_default.html.erb +10 -0
  29. data/app/views/catalog/_show_sidebar.html.erb +0 -8
  30. data/app/views/catalog/_show_upper_metadata_collection.html.erb +1 -0
  31. data/app/views/catalog/_show_upper_metadata_default.html.erb +14 -0
  32. data/app/views/shared/_header_navbar.html.erb +56 -44
  33. data/app/views/shared/_main_menu_links.html.erb +1 -1
  34. data/arclight.gemspec +11 -7
  35. data/config/i18n-tasks.yml +132 -0
  36. data/config/locales/arclight.en.yml +53 -52
  37. data/lib/arclight/engine.rb +1 -0
  38. data/lib/arclight/hash_absolute_xpath.rb +57 -0
  39. data/lib/arclight/missing_id_strategy.rb +21 -0
  40. data/lib/arclight/normalized_date.rb +19 -10
  41. data/lib/arclight/repository.rb +3 -20
  42. data/lib/arclight/shared_indexing_behavior.rb +1 -1
  43. data/lib/arclight/solr_ead_indexer_ext.rb +5 -9
  44. data/lib/arclight/traject/ead2_config.rb +475 -0
  45. data/lib/arclight/version.rb +1 -1
  46. data/lib/generators/arclight/install_generator.rb +14 -0
  47. data/lib/generators/arclight/templates/catalog_controller.rb +43 -40
  48. data/lib/tasks/index.rake +4 -2
  49. data/solr/conf/schema.xml +7 -2
  50. data/tasks/arclight.rake +5 -1
  51. data/template.rb +1 -1
  52. metadata +94 -28
  53. data/app/views/catalog/_arclight_document_show_header.html.erb +0 -15
  54. data/app/views/catalog/_arclight_document_show_header_collection.html.erb +0 -12
  55. data/app/views/catalog/_search_within_form.html.erb +0 -16
  56. data/app/views/catalog/_show_header.html.erb +0 -5
@@ -83,7 +83,7 @@ module Arclight
83
83
  end
84
84
 
85
85
  def add_normalized_title(solr_doc)
86
- dates = Arclight::NormalizedDate.new(unitdate_inclusive.first, unitdate_bulk.first, unitdate_other.first).to_s
86
+ dates = Arclight::NormalizedDate.new(unitdate_inclusive, unitdate_bulk, unitdate_other).to_s
87
87
  title = Arclight::NormalizedTitle.new(solr_doc['title_ssm'].try(:first), dates).to_s
88
88
  solr_doc['normalized_title_ssm'] = [title]
89
89
  solr_doc['normalized_date_ssm'] = [dates]
@@ -102,16 +102,12 @@ module Arclight
102
102
 
103
103
  # TODO: these xpaths should be DRY'd up -- they're in both terminologies
104
104
  def extract_title_and_dates(node, prefix = nil)
105
- data = {
106
- title: node.at_xpath("#{prefix}did/unittitle"),
107
- unitdate_inclusive: node.at_xpath("#{prefix}did/unitdate[@type=\"inclusive\"]"),
108
- unitdate_bulk: node.at_xpath("#{prefix}did/unitdate[@type=\"bulk\"]"),
109
- unitdate_other: node.at_xpath("#{prefix}did/unitdate[not(@type)]")
105
+ {
106
+ title: node.at_xpath("#{prefix}did/unittitle").try(:text),
107
+ unitdate_inclusive: node.xpath("#{prefix}did/unitdate[@type=\"inclusive\"]").map(&:text),
108
+ unitdate_bulk: node.xpath("#{prefix}did/unitdate[@type=\"bulk\"]").map(&:text),
109
+ unitdate_other: node.xpath("#{prefix}did/unitdate[not(@type)]").map(&:text)
110
110
  }
111
- data.each do |k, v|
112
- data[k] = v.text if v
113
- end
114
- data
115
111
  end
116
112
 
117
113
  def normalized_component_id(node)
@@ -0,0 +1,475 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+ require 'traject'
5
+ require 'traject/nokogiri_reader'
6
+ require 'traject_plus'
7
+ require 'traject_plus/macros'
8
+ require 'arclight/normalized_date'
9
+ require 'arclight/normalized_title'
10
+ require 'active_model/conversion' ## Needed for Arclight::Repository
11
+ require 'active_support/core_ext/array/wrap'
12
+ require 'arclight/digital_object'
13
+ require 'arclight/year_range'
14
+ require 'arclight/repository'
15
+ require 'arclight/missing_id_strategy'
16
+
17
+ NAME_ELEMENTS = %w[corpname famname name persname].freeze
18
+
19
+ # rubocop:disable Style/MixinUsage
20
+ extend TrajectPlus::Macros
21
+ # rubocop:enable Style/MixinUsage
22
+
23
+ SEARCHABLE_NOTES_FIELDS = %w[
24
+ accessrestrict
25
+ accruals
26
+ altformavail
27
+ appraisal
28
+ arrangement
29
+ bibliography
30
+ bioghist
31
+ custodhist
32
+ fileplan
33
+ note
34
+ odd
35
+ originalsloc
36
+ otherfindaid
37
+ phystech
38
+ prefercite
39
+ processinfo
40
+ relatedmaterial
41
+ scopecontent
42
+ separatedmaterial
43
+ userestrict
44
+ ].freeze
45
+
46
+ DID_SEARCHABLE_NOTES_FIELDS = %w[
47
+ abstract
48
+ materialspec
49
+ physloc
50
+ ].freeze
51
+
52
+ settings do
53
+ provide 'nokogiri.namespaces',
54
+ 'xmlns' => 'urn:isbn:1-931666-22-9'
55
+ provide 'solr_writer.commit_on_close', 'true'
56
+ provide 'repository', ENV['REPOSITORY_ID']
57
+ provide 'logger', Logger.new($stderr)
58
+ end
59
+
60
+ each_record do |_record, context|
61
+ next unless settings['repository']
62
+
63
+ context.clipboard[:repository] = Arclight::Repository.find_by(
64
+ slug: settings['repository']
65
+ ).name
66
+ end
67
+
68
+ # Top level
69
+ to_field 'id', extract_xpath('//xmlns:eadid'), strip, gsub('.', '-')
70
+ to_field 'title_filing_si', extract_xpath('//xmlns:titleproper[@type="filing"]')
71
+ to_field 'title_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unittitle')
72
+ to_field 'title_teim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unittitle')
73
+ to_field 'ead_ssi', extract_xpath('//xmlns:eadid')
74
+
75
+ to_field 'unitdate_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate')
76
+ to_field 'unitdate_bulk_ssim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[@type="bulk"]')
77
+ to_field 'unitdate_inclusive_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[@type="inclusive"]')
78
+ to_field 'unitdate_other_ssim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitdate[not(@type)]')
79
+
80
+ to_field 'level_ssm' do |record, accumulator|
81
+ accumulator << record.at_xpath('//xmlns:archdesc').attribute('level').value
82
+ end
83
+
84
+ to_field 'level_sim' do |record, accumulator|
85
+ accumulator << record.at_xpath('//xmlns:archdesc').attribute('level').value&.capitalize
86
+ end
87
+
88
+ to_field 'unitid_ssm', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitid')
89
+ to_field 'unitid_teim', extract_xpath('//xmlns:archdesc/xmlns:did/xmlns:unitid')
90
+
91
+ to_field 'normalized_title_ssm' do |_record, accumulator, context|
92
+ dates = Arclight::NormalizedDate.new(
93
+ context.output_hash['unitdate_inclusive_ssm'],
94
+ context.output_hash['unitdate_bulk_ssim'],
95
+ context.output_hash['unitdate_other_ssim']
96
+ ).to_s
97
+ title = context.output_hash['title_ssm'].first
98
+ accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
99
+ end
100
+
101
+ to_field 'normalized_date_ssm' do |_record, accumulator, context|
102
+ accumulator << Arclight::NormalizedDate.new(
103
+ context.output_hash['unitdate_inclusive_ssm'],
104
+ context.output_hash['unitdate_bulk_ssim'],
105
+ context.output_hash['unitdate_other_ssim']
106
+ ).to_s
107
+ end
108
+
109
+ to_field 'collection_ssm' do |_record, accumulator, context|
110
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
111
+ end
112
+ to_field 'collection_sim' do |_record, accumulator, context|
113
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
114
+ end
115
+
116
+ to_field 'repository_ssm' do |_record, accumulator, context|
117
+ accumulator << context.clipboard[:repository]
118
+ end
119
+
120
+ to_field 'repository_sim' do |_record, accumulator, context|
121
+ accumulator << context.clipboard[:repository]
122
+ end
123
+
124
+ to_field 'geogname_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
125
+
126
+ to_field 'geogname_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
127
+
128
+ to_field 'creator_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
129
+ to_field 'creator_sim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
130
+ to_field 'creator_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']")
131
+ to_field 'creator_sort' do |record, accumulator|
132
+ accumulator << record.xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']").map { |c| c.text.strip }.join(', ')
133
+ end
134
+
135
+ to_field 'creator_persname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:persname")
136
+ to_field 'creator_persname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:persname")
137
+ to_field 'creator_corpname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
138
+ to_field 'creator_corpname_sim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
139
+ to_field 'creator_corpname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:corpname")
140
+ to_field 'creator_famname_ssm', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:famname")
141
+ to_field 'creator_famname_ssim', extract_xpath("//xmlns:archdesc/xmlns:did/xmlns:origination[@label='creator']/xmlns:famname")
142
+
143
+ to_field 'persname_sim', extract_xpath('//xmlns:persname')
144
+
145
+ to_field 'creators_ssim' do |_record, accumulator, context|
146
+ accumulator.concat context.output_hash['creator_persname_ssm'] if context.output_hash['creator_persname_ssm']
147
+ accumulator.concat context.output_hash['creator_corpname_ssm'] if context.output_hash['creator_corpname_ssm']
148
+ accumulator.concat context.output_hash['creator_famname_ssm'] if context.output_hash['creator_famname_ssm']
149
+ end
150
+
151
+ to_field 'places_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
152
+ to_field 'places_ssim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
153
+ to_field 'places_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:geogname')
154
+
155
+ to_field 'access_terms_ssm', extract_xpath('//xmlns:archdesc/xmlns:userestrict/xmlns:p')
156
+
157
+ # Indexes the acquisition group information into the notes field
158
+ # Please see https://www.loc.gov/ead/tglib/elements/acqinfo.html
159
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:acqinfo/*[local-name()!="head"]')
160
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
161
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:acqinfo/*[local-name()!="head"]')
162
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
163
+ to_field 'acqinfo_ssm' do |_record, accumulator, context|
164
+ accumulator.concat(context.output_hash.fetch('acqinfo_ssim', []))
165
+ end
166
+
167
+ # Indexes only specified controlled terms for archival description into the access_subject field
168
+ to_field 'access_subjects_ssim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess', to_text: false) do |_record, accumulator|
169
+ accumulator.map! do |element|
170
+ %w[subject function occupation genreform].map do |selector|
171
+ element.xpath(".//xmlns:#{selector}").map(&:text)
172
+ end
173
+ end.flatten!
174
+ end
175
+
176
+ to_field 'access_subjects_ssm' do |_record, accumulator, context|
177
+ accumulator.concat Array.wrap(context.output_hash['access_subjects_ssim'])
178
+ end
179
+
180
+ to_field 'has_online_content_ssim', extract_xpath('.//xmlns:dao') do |_record, accumulator|
181
+ accumulator.replace([accumulator.any?])
182
+ end
183
+
184
+ to_field 'extent_ssm', extract_xpath('//xmlns:did/xmlns:physdesc/xmlns:extent')
185
+ to_field 'extent_teim', extract_xpath('//xmlns:did/xmlns:physdesc/xmlns:extent')
186
+ to_field 'genreform_sim', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:genreform')
187
+ to_field 'genreform_ssm', extract_xpath('//xmlns:archdesc/xmlns:controlaccess/xmlns:genreform')
188
+
189
+ to_field 'date_range_sim', extract_xpath('.//xmlns:did/xmlns:unitdate/@normal', to_text: false) do |_record, accumulator|
190
+ range = Arclight::YearRange.new
191
+ next range.years if accumulator.blank?
192
+
193
+ ranges = accumulator.map(&:to_s)
194
+ range << range.parse_ranges(ranges)
195
+ accumulator.replace range.years
196
+ end
197
+
198
+ SEARCHABLE_NOTES_FIELDS.map do |selector|
199
+ to_field "#{selector}_ssm", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/*[local-name()!='head']")
200
+ to_field "#{selector}_heading_ssm", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/xmlns:head") unless selector == 'prefercite'
201
+ to_field "#{selector}_teim", extract_xpath("//xmlns:archdesc/xmlns:#{selector}/*[local-name()!='head']")
202
+ end
203
+
204
+ DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
205
+ to_field "#{selector}_ssm", extract_xpath("//xmlns:did/xmlns:#{selector}")
206
+ end
207
+ NAME_ELEMENTS.map do |selector|
208
+ to_field 'names_coll_ssim', extract_xpath("/xmlns:ead/xmlns:archdesc/xmlns:controlaccess/xmlns:#{selector}")
209
+ to_field 'names_ssim', extract_xpath("//xmlns:#{selector}")
210
+ to_field "#{selector}_ssm", extract_xpath("//xmlns:#{selector}")
211
+ end
212
+ to_field 'corpname_sim', extract_xpath('//xmlns:corpname')
213
+
214
+ to_field 'language_sim', extract_xpath('//xmlns:did/xmlns:langmaterial')
215
+ to_field 'language_ssm', extract_xpath('//xmlns:did/xmlns:langmaterial')
216
+
217
+ # Each component child document
218
+ # <c> <c01> <c12>
219
+ compose 'components', ->(record, accumulator, _context) { accumulator.concat record.xpath('//*[is_component(.)]', NokogiriXpathExtensions.new) } do
220
+ to_field 'ref_ssi' do |record, accumulator, context|
221
+ accumulator << if record.attribute('id').blank?
222
+ strategy = Arclight::MissingIdStrategy.selected
223
+ hexdigest = strategy.new(record).to_hexdigest
224
+ parent_id = context.clipboard[:parent].output_hash['id'].first
225
+ logger.warn('MISSING ID WARNING') do
226
+ [
227
+ "A component in #{parent_id} did not have and ID so one was minted using the #{strategy} strategy.",
228
+ "The ID of this document will be #{parent_id}#{hexdigest}."
229
+ ].join(' ')
230
+ end
231
+ else
232
+ record.attribute('id')&.value&.strip&.gsub('.', '-')
233
+ end
234
+ end
235
+ to_field 'ref_ssm' do |_record, accumulator, context|
236
+ accumulator.concat context.output_hash['ref_ssi']
237
+ end
238
+
239
+ to_field 'id' do |_record, accumulator, context|
240
+ accumulator << [
241
+ context.clipboard[:parent].output_hash['id'],
242
+ context.output_hash['ref_ssi']
243
+ ].join('')
244
+ end
245
+
246
+ to_field 'ead_ssi' do |_record, accumulator, context|
247
+ accumulator << context.clipboard[:parent].output_hash['ead_ssi'].first
248
+ end
249
+
250
+ to_field 'title_filing_si', extract_xpath('./xmlns:did/xmlns:unittitle'), first_only
251
+ to_field 'title_ssm', extract_xpath('./xmlns:did/xmlns:unittitle')
252
+ to_field 'title_teim', extract_xpath('./xmlns:did/xmlns:unittitle')
253
+
254
+ to_field 'unitdate_bulk_ssim', extract_xpath('./xmlns:did/xmlns:unitdate[@type="bulk"]')
255
+ to_field 'unitdate_inclusive_ssm', extract_xpath('./xmlns:did/xmlns:unitdate[@type="inclusive"]')
256
+ to_field 'unitdate_other_ssim', extract_xpath('./xmlns:did/xmlns:unitdate[not(@type)]')
257
+
258
+ to_field 'normalized_title_ssm' do |_record, accumulator, context|
259
+ dates = Arclight::NormalizedDate.new(
260
+ context.output_hash['unitdate_inclusive_ssm'],
261
+ context.output_hash['unitdate_bulk_ssim'],
262
+ context.output_hash['unitdate_other_ssim']
263
+ ).to_s
264
+ title = context.output_hash['title_ssm']&.first
265
+ accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
266
+ end
267
+
268
+ to_field 'normalized_date_ssm' do |_record, accumulator, context|
269
+ accumulator << Arclight::NormalizedDate.new(
270
+ context.output_hash['unitdate_inclusive_ssm'],
271
+ context.output_hash['unitdate_bulk_ssim'],
272
+ context.output_hash['unitdate_other_ssim']
273
+ ).to_s
274
+ end
275
+
276
+ to_field 'component_level_isim' do |record, accumulator|
277
+ accumulator << 1 + record.ancestors.count { |node| node.name == 'c' }
278
+ end
279
+
280
+ to_field 'parent_ssm' do |record, accumulator, context|
281
+ accumulator << context.clipboard[:parent].output_hash['id'].first
282
+ accumulator.concat NokogiriXpathExtensions.new.is_component(record.ancestors).reverse.map { |n| n.attribute('id').value }
283
+ end
284
+
285
+ to_field 'parent_ssi' do |_record, accumulator, context|
286
+ accumulator << context.output_hash['parent_ssm'].last
287
+ end
288
+
289
+ to_field 'parent_unittitles_ssm' do |_record, accumulator, context|
290
+ ## Top level document
291
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
292
+ ## Other components
293
+ context.output_hash['parent_ssm']&.drop(1)&.each do |id|
294
+ accumulator.concat Array
295
+ .wrap(context.clipboard[:parent].output_hash['components'])
296
+ .find { |c| c['ref_ssi'] == [id] }&.[]('normalized_title_ssm')
297
+ end
298
+ end
299
+ to_field 'parent_unittitles_teim' do |_record, accumulator, context|
300
+ accumulator.concat context.output_hash['parent_unittitles_ssm']
301
+ end
302
+
303
+ to_field 'unitid_ssm', extract_xpath('./xmlns:did/xmlns:unitid')
304
+ to_field 'repository_ssm' do |_record, accumulator, context|
305
+ accumulator << context.clipboard[:parent].clipboard[:repository]
306
+ end
307
+ to_field 'repository_sim' do |_record, accumulator, context|
308
+ accumulator << context.clipboard[:parent].clipboard[:repository]
309
+ end
310
+ to_field 'collection_ssm' do |_record, accumulator, context|
311
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
312
+ end
313
+ to_field 'collection_sim' do |_record, accumulator, context|
314
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
315
+ end
316
+
317
+ to_field 'extent_ssm', extract_xpath('./xmlns:did/xmlns:physdesc/xmlns:extent')
318
+ to_field 'creator_ssm', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
319
+ to_field 'creator_ssim', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
320
+ to_field 'creators_ssim', extract_xpath("./xmlns:did/xmlns:origination[@label='creator']")
321
+ to_field 'creator_sort' do |record, accumulator|
322
+ accumulator << record.xpath("./xmlns:did/xmlns:origination[@label='creator']").map(&:text).join(', ')
323
+ end
324
+ to_field 'collection_creator_ssm' do |_record, accumulator, context|
325
+ accumulator.concat Array.wrap(context.clipboard[:parent].output_hash['creator_ssm'])
326
+ end
327
+ to_field 'has_online_content_ssim', extract_xpath('.//xmlns:dao') do |_record, accumulator|
328
+ accumulator.replace([accumulator.any?])
329
+ end
330
+ to_field 'child_component_count_isim', extract_xpath('xmlns:c') do |_record, accumulator|
331
+ accumulator.replace([accumulator.length])
332
+ end
333
+
334
+ to_field 'ref_ssm' do |record, accumulator|
335
+ accumulator << record.attribute('id')
336
+ end
337
+
338
+ to_field 'level_ssm' do |record, accumulator|
339
+ level = record.attribute('level')&.value
340
+ other_level = record.attribute('otherlevel')&.value
341
+
342
+ accumulator << if level == 'otherlevel'
343
+ alternative_level = other_level if other_level
344
+ alternative_level.present? ? alternative_level : 'Other'
345
+ elsif level.present?
346
+ level&.capitalize
347
+ end
348
+ end
349
+
350
+ to_field 'level_sim' do |_record, accumulator, context|
351
+ next unless context.output_hash['level_ssm']
352
+
353
+ accumulator.concat context.output_hash['level_ssm']&.map(&:capitalize)
354
+ end
355
+
356
+ to_field 'parent_access_restrict_ssm', extract_xpath('./xmlns:accessrestrict/xmlns:p')
357
+
358
+ to_field 'parent_access_restrict_ssm' do |_record, accumulator, context|
359
+ next unless context.output_hash['accessrestrict_ssm'].nil?
360
+
361
+ context.output_hash['parent_ssm']&.each do |id|
362
+ accumulator.concat Array
363
+ .wrap(context.clipboard[:parent]&.output_hash&.[]('components'))
364
+ .select { |c| c['ref_ssi'] == [id] }.map { |c| c['accessrestrict_ssm'] }
365
+ end
366
+ end
367
+
368
+ to_field 'parent_access_restrict_ssm' do |_record, accumulator, context|
369
+ next unless context.output_hash['parent_access_restrict_ssm'].nil?
370
+
371
+ accumulator.concat Array.wrap(context.clipboard[:parent]&.output_hash&.[]('accessrestrict_ssm'))
372
+ end
373
+
374
+ to_field 'parent_access_terms_ssm', extract_xpath('xmlns:userestrict/xmlns:p')
375
+
376
+ to_field 'parent_access_terms_ssm' do |_record, accumulator, context|
377
+ next unless context.output_hash['userestrict_ssm'].nil?
378
+
379
+ context.output_hash['parent_ssm']&.each do |id|
380
+ accumulator.concat Array
381
+ .wrap(context.clipboard[:parent]&.output_hash&.[]('components'))
382
+ .select { |c| c['ref_ssi'] == [id] }.map { |c| c['userestrict_ssm'] }
383
+ end
384
+ end
385
+
386
+ to_field 'parent_access_terms_ssm' do |_record, accumulator, context|
387
+ next unless context.output_hash['parent_access_terms_ssm'].nil?
388
+
389
+ accumulator << context.clipboard[:parent]&.output_hash&.[]('access_terms_ssm')&.first
390
+ end
391
+
392
+ to_field 'digital_objects_ssm', extract_xpath('./xmlns:dao') do |record, accumulator|
393
+ accumulator.concat(record.xpath('.//xmlns:dao', xmlns: 'urn:isbn:1-931666-22-9').map do |dao|
394
+ label = dao.attributes['title']&.value ||
395
+ dao.xpath('xmlns:daodesc/xmlns:p', xmlns: 'urn:isbn:1-931666-22-9')&.text
396
+ href = (dao.attributes['href'] || dao.attributes['xlink:href'])&.value
397
+ Arclight::DigitalObject.new(label: label, href: href).to_json
398
+ end.to_a)
399
+ end
400
+
401
+ to_field 'date_range_sim', extract_xpath('.//xmlns:did/xmlns:unitdate/@normal', to_text: false) do |_record, accumulator|
402
+ range = Arclight::YearRange.new
403
+ next range.years if accumulator.blank?
404
+
405
+ ranges = accumulator.map(&:to_s)
406
+ range << range.parse_ranges(ranges)
407
+ accumulator.replace range.years
408
+ end
409
+
410
+ NAME_ELEMENTS.map do |selector|
411
+ to_field 'names_ssim', extract_xpath("./xmlns:controlaccess/xmlns:#{selector}")
412
+ to_field "#{selector}_ssm", extract_xpath(".//xmlns:#{selector}")
413
+ end
414
+
415
+ to_field 'geogname_sim', extract_xpath('./xmlns:controlaccess/xmlns:geogname')
416
+ to_field 'geogname_ssm', extract_xpath('./xmlns:controlaccess/xmlns:geogname')
417
+ to_field 'places_ssim', extract_xpath('xmlns:controlaccess/xmlns:geogname')
418
+
419
+ # Indexes only specified controlled terms for archival description into the access_subject field
420
+ to_field 'access_subjects_ssim', extract_xpath('./xmlns:controlaccess', to_text: false) do |_record, accumulator|
421
+ accumulator.map! do |element|
422
+ %w[subject function occupation genreform].map do |selector|
423
+ element.xpath(".//xmlns:#{selector}").map(&:text)
424
+ end
425
+ end.flatten!
426
+ end
427
+
428
+ to_field 'access_subjects_ssm' do |_record, accumulator, context|
429
+ accumulator.concat(context.output_hash.fetch('access_subjects_ssim', []))
430
+ end
431
+
432
+ # Indexes the acquisition group information into the notes field
433
+ # Please see https://www.loc.gov/ead/tglib/elements/acqinfo.html
434
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:acqinfo/*[local-name()!="head"]')
435
+ to_field 'acqinfo_ssim', extract_xpath('/xmlns:ead/xmlns:archdesc/xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
436
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:acqinfo/*[local-name()!="head"]')
437
+ to_field 'acqinfo_ssim', extract_xpath('./xmlns:descgrp/xmlns:acqinfo/*[local-name()!="head"]')
438
+ to_field 'acqinfo_ssm' do |_record, accumulator, context|
439
+ accumulator.concat(context.output_hash.fetch('acqinfo_ssim', []))
440
+ end
441
+
442
+ to_field 'language_ssm', extract_xpath('./xmlns:did/xmlns:langmaterial')
443
+ to_field 'containers_ssim' do |record, accumulator|
444
+ record.xpath('./xmlns:did/xmlns:container').each do |node|
445
+ accumulator << [node.attribute('type'), node.text].join(' ').strip
446
+ end
447
+ end
448
+ SEARCHABLE_NOTES_FIELDS.map do |selector|
449
+ to_field "#{selector}_ssm", extract_xpath(".//xmlns:#{selector}/*[local-name()!='head']")
450
+ to_field "#{selector}_heading_ssm", extract_xpath(".//xmlns:archdesc/xmlns:#{selector}/xmlns:head")
451
+ to_field "#{selector}_teim", extract_xpath(".//xmlns:#{selector}/*[local-name()!='head']")
452
+ end
453
+ DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
454
+ to_field "#{selector}_ssm", extract_xpath(".//xmlns:did/xmlns:#{selector}")
455
+ end
456
+ to_field 'did_note_ssm', extract_xpath('.//xmlns:did/xmlns:note')
457
+ end
458
+
459
+ each_record do |_record, context|
460
+ context.output_hash['components'] &&= context.output_hash['components'].select { |c| c.keys.any? }
461
+ end
462
+
463
+ ##
464
+ # Used for evaluating xpath components to find
465
+ class NokogiriXpathExtensions
466
+ # rubocop:disable Naming/PredicateName, Style/FormatString
467
+ def is_component(node_set)
468
+ node_set.find_all do |node|
469
+ component_elements = (1..12).map { |i| "c#{'%02d' % i}" }
470
+ component_elements.push 'c'
471
+ component_elements.include? node.name
472
+ end
473
+ end
474
+ # rubocop:enable Naming/PredicateName, Style/FormatString
475
+ end