arclight 0.1.4 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +5 -5
  2. data/.all-contributorsrc +450 -0
  3. data/.babelrc +3 -0
  4. data/.codeclimate.yml +5 -0
  5. data/.eslintrc +3 -0
  6. data/.gitignore +1 -0
  7. data/.rubocop.yml +20 -42
  8. data/.rubocop_todo.yml +139 -0
  9. data/.travis.yml +24 -20
  10. data/CONTRIBUTORS.md +79 -0
  11. data/README.md +34 -23
  12. data/Rakefile +0 -1
  13. data/app/assets/images/blacklight/bookmark.svg +1 -0
  14. data/app/assets/images/blacklight/collection.svg +5 -0
  15. data/app/assets/images/blacklight/compact.svg +1 -25
  16. data/app/assets/images/blacklight/container.svg +5 -0
  17. data/app/assets/images/blacklight/ead.svg +1 -0
  18. data/app/assets/images/blacklight/file.svg +5 -0
  19. data/app/assets/images/blacklight/folder.svg +1 -0
  20. data/app/assets/images/blacklight/list.svg +1 -0
  21. data/app/assets/images/blacklight/minus.svg +1 -0
  22. data/app/assets/images/blacklight/online.svg +5 -0
  23. data/app/assets/images/blacklight/pdf.svg +1 -0
  24. data/app/assets/images/blacklight/plus.svg +1 -0
  25. data/app/assets/images/blacklight/repository.svg +1 -0
  26. data/app/assets/javascripts/arclight/arclight.js +1 -2
  27. data/app/assets/javascripts/arclight/collection_navigation.js +36 -50
  28. data/app/assets/javascripts/arclight/collection_scrollspy.js +1 -1
  29. data/app/assets/javascripts/arclight/context_navigation.js +386 -0
  30. data/app/assets/javascripts/arclight/oembed_viewer.js +11 -4
  31. data/app/assets/javascripts/arclight/truncator.js.erb +8 -2
  32. data/app/assets/stylesheets/arclight/application.scss +4 -1
  33. data/app/assets/stylesheets/arclight/bootstrap_overrides.scss +23 -0
  34. data/app/assets/stylesheets/arclight/modules/context_navigation.scss +75 -0
  35. data/app/assets/stylesheets/arclight/modules/hierarchy_and_online_contents.scss +34 -38
  36. data/app/assets/stylesheets/arclight/modules/highlights.scss +2 -1
  37. data/app/assets/stylesheets/arclight/modules/icons.scss +4 -0
  38. data/app/assets/stylesheets/arclight/modules/layout.scss +149 -11
  39. data/app/assets/stylesheets/arclight/modules/mastheads.scss +60 -5
  40. data/app/assets/stylesheets/arclight/modules/repositories.scss +1 -5
  41. data/app/assets/stylesheets/arclight/modules/repository_card.scss +6 -7
  42. data/app/assets/stylesheets/arclight/modules/search_results.scss +145 -24
  43. data/app/assets/stylesheets/arclight/modules/show_collection.scss +38 -59
  44. data/app/assets/stylesheets/arclight/responsive.scss +13 -0
  45. data/app/assets/stylesheets/arclight/variables.scss +21 -1
  46. data/app/controllers/concerns/arclight/ead_format_helpers.rb +225 -0
  47. data/app/controllers/concerns/arclight/field_config_helpers.rb +23 -7
  48. data/app/factories/blacklight_field_configuration_factory.rb +1 -0
  49. data/app/helpers/arclight_helper.rb +198 -36
  50. data/app/models/arclight/document_downloads.rb +125 -0
  51. data/app/models/arclight/parent.rb +4 -2
  52. data/app/models/arclight/parents.rb +6 -4
  53. data/app/models/arclight/requests/aeon_external_request.rb +42 -0
  54. data/app/models/arclight/requests/aeon_web_ead.rb +47 -0
  55. data/app/models/arclight/requests/google_form.rb +2 -2
  56. data/app/models/concerns/arclight/catalog.rb +14 -2
  57. data/app/models/concerns/arclight/search_behavior.rb +27 -12
  58. data/app/models/concerns/arclight/solr_document.rb +29 -7
  59. data/app/views/arclight/_requests.html.erb +7 -0
  60. data/app/views/arclight/repositories/_in_person_repository.html.erb +2 -2
  61. data/app/views/arclight/repositories/_repository.html.erb +2 -2
  62. data/app/views/arclight/repositories/_repository_contact.html.erb +9 -0
  63. data/app/views/arclight/repositories/index.html.erb +3 -0
  64. data/app/views/arclight/repositories/show.html.erb +5 -4
  65. data/app/views/arclight/requests/_aeon_external_request_endpoint.html.erb +9 -0
  66. data/app/views/arclight/requests/_aeon_web_ead.html.erb +7 -0
  67. data/app/views/arclight/requests/_google_form.html.erb +2 -1
  68. data/app/views/arclight/viewers/_oembed.html.erb +2 -1
  69. data/app/views/catalog/_access_contents.html.erb +15 -0
  70. data/app/views/catalog/_arclight_abstract_or_scope.html.erb +5 -0
  71. data/app/views/catalog/_arclight_bookmark_control.html.erb +38 -0
  72. data/app/views/catalog/_arclight_document_header_icon.html.erb +1 -0
  73. data/app/views/catalog/_arclight_index_compact_default.html.erb +18 -11
  74. data/app/views/catalog/_arclight_index_default.html.erb +45 -0
  75. data/app/views/catalog/_arclight_index_group_document_compact_default.html.erb +19 -0
  76. data/app/views/catalog/_arclight_index_group_document_default.html.erb +18 -0
  77. data/app/views/catalog/_arclight_online_content_indicator.html.erb +1 -3
  78. data/app/views/catalog/_collection_contents.html.erb +2 -10
  79. data/app/views/catalog/_collection_context.html.erb +15 -0
  80. data/app/views/catalog/_collection_context_nav.html.erb +12 -0
  81. data/app/views/catalog/_collection_online_contents.html.erb +3 -3
  82. data/app/views/catalog/_component_context.html.erb +5 -0
  83. data/app/views/catalog/_containers.html.erb +3 -0
  84. data/app/views/catalog/_context_card.html.erb +1 -1
  85. data/app/views/catalog/_context_sidebar.html.erb +2 -2
  86. data/app/views/catalog/_custom_metadata.html.erb +1 -1
  87. data/app/views/catalog/_document_downloads.html.erb +14 -0
  88. data/app/views/catalog/_group.html.erb +21 -0
  89. data/app/views/catalog/_group_header_compact_default.html.erb +15 -0
  90. data/app/views/catalog/_group_header_default.html.erb +20 -0
  91. data/app/views/catalog/_group_toggle.html.erb +10 -0
  92. data/app/views/catalog/_home.html.erb +1 -1
  93. data/app/views/catalog/_index_breadcrumb_default.html.erb +5 -2
  94. data/app/views/catalog/_index_collection_context_default.html.erb +53 -0
  95. data/app/views/catalog/_index_default.html.erb +1 -1
  96. data/app/views/catalog/_index_header.html.erb +3 -3
  97. data/app/views/catalog/_index_online_contents_default.html.erb +1 -1
  98. data/app/views/catalog/_online_content_label.html.erb +5 -0
  99. data/app/views/catalog/_search_form.html.erb +34 -0
  100. data/app/views/catalog/_search_results.html.erb +1 -4
  101. data/app/views/catalog/_show_actions_box_default.html.erb +27 -0
  102. data/app/views/catalog/_show_breadcrumbs_default.html.erb +5 -6
  103. data/app/views/catalog/_show_collection.html.erb +42 -24
  104. data/app/views/catalog/_show_default.html.erb +64 -26
  105. data/app/views/catalog/_show_upper_metadata_collection.html.erb +1 -0
  106. data/app/views/catalog/_show_upper_metadata_default.html.erb +14 -0
  107. data/app/views/catalog/_sort_and_per_page.html.erb +8 -0
  108. data/app/views/catalog/_within_collection_dropdown.html.erb +26 -0
  109. data/app/views/shared/_breadcrumbs.html.erb +4 -4
  110. data/app/views/shared/_context_sidebar.html.erb +2 -2
  111. data/app/views/shared/_header_navbar.html.erb +51 -43
  112. data/app/views/shared/_main_menu_links.html.erb +1 -1
  113. data/app/views/shared/_show_breadcrumbs.html.erb +27 -0
  114. data/arclight.gemspec +15 -12
  115. data/config/i18n-tasks.yml +133 -0
  116. data/config/locales/arclight.en.yml +89 -55
  117. data/config/repositories.yml +0 -0
  118. data/lib/arclight/engine.rb +23 -12
  119. data/lib/arclight/hash_absolute_xpath.rb +61 -0
  120. data/lib/arclight/level_label.rb +46 -0
  121. data/lib/arclight/missing_id_strategy.rb +21 -0
  122. data/lib/arclight/normalized_date.rb +21 -12
  123. data/lib/arclight/normalized_id.rb +1 -0
  124. data/lib/arclight/normalized_title.rb +1 -0
  125. data/lib/arclight/repository.rb +61 -25
  126. data/lib/arclight/traject/ead2_config.rb +495 -0
  127. data/lib/arclight/traject/nokogiri_namespaceless_reader.rb +22 -0
  128. data/lib/arclight/version.rb +1 -1
  129. data/lib/arclight/viewers/oembed.rb +1 -0
  130. data/lib/arclight/year_range.rb +9 -1
  131. data/lib/generators/arclight/install_generator.rb +34 -3
  132. data/lib/generators/arclight/templates/catalog_controller.rb +162 -131
  133. data/lib/generators/arclight/templates/config/downloads.yml +12 -0
  134. data/lib/generators/arclight/templates/config/repositories.yml +20 -2
  135. data/lib/generators/arclight/update_generator.rb +1 -1
  136. data/lib/tasks/index.rake +19 -19
  137. data/package.json +8 -1
  138. data/solr/conf/schema.xml +56 -292
  139. data/solr/conf/solrconfig.xml +40 -125
  140. data/tasks/arclight.rake +6 -1
  141. data/template.rb +1 -1
  142. data/vendor/assets/javascripts/responsiveTruncator.js +2 -2
  143. metadata +159 -60
  144. data/app/assets/javascripts/arclight/collection_context.js +0 -18
  145. data/app/assets/javascripts/arclight/component_ancestors.js +0 -56
  146. data/app/assets/stylesheets/arclight/modules/sidebar.scss +0 -21
  147. data/app/views/catalog/_arclight_document_show_header.html.erb +0 -15
  148. data/app/views/catalog/_arclight_document_show_header_collection.html.erb +0 -12
  149. data/app/views/catalog/_collection_count.html.erb +0 -7
  150. data/app/views/catalog/_collection_downloads.html.erb +0 -15
  151. data/app/views/catalog/_collection_overview.html.erb +0 -7
  152. data/app/views/catalog/_component_overview.html.erb +0 -46
  153. data/app/views/catalog/_index_header_hierarchy_default.html.erb +0 -42
  154. data/app/views/catalog/_index_hierarchy_default.html.erb +0 -28
  155. data/app/views/catalog/_results_histogram.html.erb +0 -10
  156. data/app/views/catalog/_search_within_form.html.erb +0 -16
  157. data/app/views/catalog/_show_component_sidebar.html.erb +0 -12
  158. data/app/views/catalog/_show_header.html.erb +0 -5
  159. data/app/views/catalog/_show_sidebar.html.erb +0 -30
  160. data/lib/arclight/custom_component.rb +0 -99
  161. data/lib/arclight/custom_document.rb +0 -93
  162. data/lib/arclight/indexer.rb +0 -9
  163. data/lib/arclight/shared_indexing_behavior.rb +0 -97
  164. data/lib/arclight/shared_terminology_behavior.rb +0 -65
  165. data/lib/arclight/solr_ead_indexer_ext.rb +0 -159
  166. data/lib/generators/arclight/templates/arclight.js +0 -2
File without changes
@@ -1,33 +1,36 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'sprockets/bumble_d'
3
4
  require 'blacklight'
4
- require 'solr_ead'
5
+ require 'traject'
6
+ require 'active_model'
5
7
  require 'arclight/exceptions'
6
8
  require 'arclight/normalized_date'
7
9
  require 'arclight/normalized_id'
8
10
  require 'arclight/normalized_title'
9
11
  require 'arclight/digital_object'
10
- require 'arclight/shared_indexing_behavior'
11
- require 'arclight/shared_terminology_behavior'
12
- require 'arclight/custom_document'
13
- require 'arclight/custom_component'
14
- require 'arclight/solr_ead_indexer_ext'
15
- require 'arclight/indexer'
16
12
  require 'arclight/viewer'
17
13
 
18
14
  module Arclight
19
15
  ##
20
16
  # This is the defining class for the Arclight Rails Engine
21
17
  class Engine < ::Rails::Engine
18
+ extend ::Sprockets::BumbleD::DSL
19
+
20
+ # We're not sure this is right, but we aren't doing module imports
21
+ # at the moment anyway
22
+ register_umd_globals :arclight,
23
+ 'blacklight' => 'Blacklight'
24
+
22
25
  config.viewer_class = Arclight::Viewers::OEmbed
23
26
  config.oembed_resource_exclude_patterns = [/\.pdf$/, /\.ppt$/]
24
27
 
25
28
  Arclight::Engine.config.catalog_controller_field_accessors = %i[
26
29
  summary_field
27
30
  access_field
31
+ contact_field
28
32
  background_field
29
33
  related_field
30
- admin_info_field
31
34
  terms_field
32
35
  cite_field
33
36
  indexed_terms_field
@@ -38,6 +41,18 @@ module Arclight
38
41
  component_indexed_terms_field
39
42
  ]
40
43
 
44
+ Arclight::Engine.config.catalog_controller_group_query_params = {
45
+ group: true,
46
+ 'group.field': 'collection_ssi',
47
+ 'group.ngroups': true,
48
+ 'group.limit': 3,
49
+ fl: '*,parent:[subquery]',
50
+ 'parent.fl': '*',
51
+ 'parent.q': '{!term f=collection_sim v=$row.collection_ssi}',
52
+ 'parent.fq': '{!term f=level_sim v="Collection"}',
53
+ 'parent.defType': 'lucene'
54
+ }
55
+
41
56
  initializer 'arclight.fields' do
42
57
  Arclight::Engine.config.catalog_controller_field_accessors.each do |field|
43
58
  Blacklight::Configuration.define_field_access field
@@ -47,9 +62,5 @@ module Arclight
47
62
  initializer 'arclight.helpers' do
48
63
  ActionView::Base.send :include, ArclightHelper
49
64
  end
50
-
51
- initializer 'arclight.views' do
52
- Blacklight::Configuration.default_values[:view].hierarchy
53
- end
54
65
  end
55
66
  end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+
5
+ module Arclight
6
+ ##
7
+ # Take a Nokogiri node and get its absolute path (inserting our own indexes for component levels)
8
+ # and hash that outout. This is intended as a potential strategy for handling missing IDs in EADs.
9
+ class HashAbsoluteXpath
10
+ class << self
11
+ attr_writer :hash_algorithm
12
+
13
+ def hash_algorithm
14
+ return Digest::SHA1 unless defined? @hash_algorithm
15
+
16
+ @hash_algorithm
17
+ end
18
+ end
19
+
20
+ COMPONENT_NODE_NAME_REGEX = /^c\d{,2}$/.freeze
21
+ attr_reader :node
22
+ def initialize(node)
23
+ @node = node
24
+ end
25
+
26
+ def to_hexdigest
27
+ self.class.hash_algorithm.hexdigest(absolute_xpath).prepend('al_')
28
+ end
29
+
30
+ def absolute_xpath
31
+ ancestor_tree = node.ancestors.map do |ancestor|
32
+ ancestor_name_and_index(ancestor)
33
+ end
34
+
35
+ "#{[ancestor_tree.reverse, node.name].flatten.join('/')}#{current_index}"
36
+ end
37
+
38
+ private
39
+
40
+ def current_index
41
+ siblings.index(node)
42
+ end
43
+
44
+ def component_siblings_for_node(xml_node)
45
+ xml_node.parent.children.select { |n| n.name =~ COMPONENT_NODE_NAME_REGEX }
46
+ end
47
+
48
+ def siblings
49
+ @siblings ||= component_siblings_for_node(node)
50
+ end
51
+
52
+ def ancestor_name_and_index(ancestor)
53
+ if ancestor.name =~ COMPONENT_NODE_NAME_REGEX
54
+ index = component_siblings_for_node(ancestor).index(ancestor)
55
+ "#{ancestor.name}#{index}"
56
+ else
57
+ ancestor.name
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Arclight
4
+ ##
5
+ # A utility class to return a human-readable label for an EAD @level code.
6
+ # Can use the value from @otherlevel if provided.
7
+ # Examples from @level: recordgrp = "Record Group"
8
+ # collection = "Collection"
9
+ # subseries = "Subseries"
10
+ # otherlevel = (text provided in @otherlevel)
11
+ class LevelLabel
12
+ # @param [String] `level` from the collection or component @level
13
+ # @param [String] `other_level` from the collection or component @otherlevel
14
+ def initialize(level, other_level = nil)
15
+ @level = level
16
+ @other_level = other_level if other_level.present?
17
+ end
18
+
19
+ # @return [String] the human-readable label
20
+ def to_s
21
+ human_readable_level
22
+ end
23
+
24
+ private
25
+
26
+ attr_reader :level, :other_level
27
+
28
+ CUSTOM_LEVEL_LABELS = {
29
+ recordgrp: 'Record Group',
30
+ subgrp: 'Subgroup'
31
+ }.freeze
32
+
33
+ def human_readable_level
34
+ if level == 'otherlevel'
35
+ alternative_level
36
+ elsif level.present?
37
+ CUSTOM_LEVEL_LABELS.fetch(level.to_sym, level.capitalize).to_s
38
+ end
39
+ end
40
+
41
+ def alternative_level
42
+ alternative_level = other_level if other_level
43
+ alternative_level.present? ? alternative_level.capitalize : 'Other'
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'arclight/hash_absolute_xpath'
4
+
5
+ module Arclight
6
+ ##
7
+ # A class to configure a selected MissingIdStrategy.
8
+ # Defaults to Arclight::HashAbsoluteXpath
9
+ # This can be updated in an initializer to be any other class
10
+ class MissingIdStrategy
11
+ class << self
12
+ attr_writer :selected
13
+
14
+ def selected
15
+ return Arclight::HashAbsoluteXpath unless defined? @selected
16
+
17
+ @selected
18
+ end
19
+ end
20
+ end
21
+ end
@@ -7,16 +7,21 @@ module Arclight
7
7
  # @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date
8
8
  class NormalizedDate
9
9
  # @param [String | Array<String>] `inclusive` from the `unitdate`
10
- # @param [String] `bulk` from the `unitdate`
11
- # @param [String] `other` from the `unitdate` when type is not specified
12
- def initialize(inclusive, bulk = nil, other = nil)
13
- if inclusive.is_a? Array # of YYYY-YYYY for ranges
14
- @inclusive = YearRange.new(inclusive.include?('/') ? inclusive : inclusive.map { |v| v.tr('-', '/') }).to_s
15
- elsif inclusive.present?
16
- @inclusive = inclusive.strip
17
- end
18
- @bulk = bulk.strip if bulk.present?
19
- @other = other.strip if other.present?
10
+ # @param [Array<String>] `bulk` from the `unitdate`
11
+ # @param [Array<String>] `other` from the `unitdate` when type is not specified
12
+ def initialize(inclusive, bulk = [], other = [])
13
+ @inclusive = (inclusive || []).map do |inclusive_text|
14
+ if inclusive_text.is_a? Array # of YYYY-YYYY for ranges
15
+ # NOTE: This code is not routable AFAICT in actual indexing.
16
+ # We pass arrays of strings (or xml nodes) here, and never a multidimensional array
17
+ year_range(inclusive_text)
18
+ elsif inclusive_text.present?
19
+ inclusive_text.strip
20
+ end
21
+ end&.join(', ')
22
+
23
+ @bulk = Array.wrap(bulk).compact.map(&:strip).join(', ')
24
+ @other = Array.wrap(other).compact.map(&:strip).join(', ')
20
25
  end
21
26
 
22
27
  # @return [String] the normalized title/date
@@ -28,6 +33,10 @@ module Arclight
28
33
 
29
34
  attr_reader :inclusive, :bulk, :other
30
35
 
36
+ def year_range(date_array)
37
+ YearRange.new(date_array.include?('/') ? date_array : date_array.map { |v| v.tr('-', '/') }).to_s
38
+ end
39
+
31
40
  # @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date for rules
32
41
  def normalize
33
42
  if inclusive.present?
@@ -38,8 +47,8 @@ module Arclight
38
47
  else
39
48
  result = nil
40
49
  end
41
- return if result.blank?
42
- result.strip
50
+
51
+ result&.strip
43
52
  end
44
53
  end
45
54
  end
@@ -19,6 +19,7 @@ module Arclight
19
19
 
20
20
  def normalize
21
21
  raise Arclight::Exceptions::IDNotFound if id.blank?
22
+
22
23
  id.strip.tr('.', '-')
23
24
  end
24
25
  end
@@ -24,6 +24,7 @@ module Arclight
24
24
  def normalize
25
25
  result = [title, date].compact.join(', ')
26
26
  raise Arclight::Exceptions::TitleNotFound if result.blank?
27
+
27
28
  result
28
29
  end
29
30
  end
@@ -7,31 +7,14 @@ module Arclight
7
7
  class Repository
8
8
  include ActiveModel::Conversion # for to_partial_path
9
9
 
10
- FIELDS = %i[name
11
- description
12
- visit_note
13
- building
14
- address1
15
- address2
16
- city
17
- state
18
- zip
19
- country
20
- phone
21
- contact_info
22
- thumbnail_url
23
- google_request_url
24
- google_request_mappings
25
- collection_count].freeze
26
-
27
- attr_accessor :slug, *FIELDS
10
+ attr_accessor :slug, :collection_count
28
11
 
29
12
  # @param [String] `slug` the unique identifier for the repository
30
13
  # @param [Hash] `data`
31
14
  def initialize(slug, data = {})
32
15
  @slug = slug
33
- FIELDS.each do |field|
34
- value = data[field.to_s]
16
+ data.each do |field, value|
17
+ self.class.attr_accessor field.to_sym
35
18
  send("#{field}=", value) if value.present?
36
19
  end
37
20
  end
@@ -43,6 +26,57 @@ module Arclight
43
26
  [city, state_zip, country].compact.join(', ')
44
27
  end
45
28
 
29
+ # Why are we using self#respond_to? below?
30
+ #
31
+ # All the keys in the config hash from `repositories.yml` are
32
+ # on-the-fly added as attr_accessors up in #initialize. If the
33
+ # request_types key isn't present, the method won't be created.
34
+ #
35
+ # Since the original data is thrown away, this is the best way
36
+ # to see if that key was present.
37
+ def request_config_present?
38
+ return false unless respond_to? :request_types
39
+ return false if request_types.nil? || request_types.empty?
40
+
41
+ request_configs = request_types.map { |_k, v| v }
42
+ request_configs[0]&.fetch('request_url').present? &&
43
+ request_configs[0]&.fetch('request_mappings').present?
44
+ end
45
+
46
+ def request_config_present_for_type?(type)
47
+ return false unless type && request_config_present?
48
+
49
+ config = request_types[type]
50
+ config&.fetch('request_url').present? &&
51
+ config&.fetch('request_mappings').present?
52
+ end
53
+
54
+ def request_config_for_type(type)
55
+ return nil unless type && request_config_present_for_type?(type)
56
+
57
+ request_types[type]
58
+ end
59
+
60
+ def request_url_for_type(type)
61
+ return nil unless type && request_config_present_for_type?(type)
62
+
63
+ config = request_config_for_type(type)
64
+ config.fetch('request_url')
65
+ end
66
+
67
+ def request_mappings_for_type(type)
68
+ return nil unless type && request_config_present_for_type?(type)
69
+
70
+ config = request_config_for_type(type)
71
+ config.fetch('request_mappings')
72
+ end
73
+
74
+ def available_request_types
75
+ return [] unless request_types.present?
76
+
77
+ request_types.keys
78
+ end
79
+
46
80
  # Load repository information from a YAML file
47
81
  #
48
82
  # @param [String] `filename`
@@ -59,19 +93,20 @@ module Arclight
59
93
  # Mimics ActiveRecord's `all` behavior
60
94
  #
61
95
  # @return [Array<Repository>]
62
- def self.all
63
- from_yaml(ENV['REPOSITORY_FILE'] || 'config/repositories.yml').values
96
+ def self.all(yaml_file = nil)
97
+ yaml_file = ENV['REPOSITORY_FILE'] || 'config/repositories.yml' if yaml_file.nil?
98
+ from_yaml(yaml_file).values
64
99
  end
65
100
 
66
101
  # Mimics ActiveRecord dynamic `find_by` behavior for the slug or name
67
102
  #
68
103
  # @param [String] `slug` or `name`
69
104
  # @return [Repository]
70
- def self.find_by(slug: nil, name: nil)
105
+ def self.find_by(slug: nil, name: nil, yaml_file: nil)
71
106
  if slug
72
- all.find { |repo| repo.slug == slug }
107
+ all(yaml_file).find { |repo| repo.slug == slug }
73
108
  elsif name
74
- all.find { |repo| repo.name == name }
109
+ all(yaml_file).find { |repo| repo.name == name }
75
110
  else
76
111
  raise ArgumentError, 'Requires either slug or name parameters to find_by'
77
112
  end
@@ -85,6 +120,7 @@ module Arclight
85
120
  def self.find_by!(*args)
86
121
  repository = find_by(*args)
87
122
  raise ActiveRecord::RecordNotFound if repository.blank?
123
+
88
124
  repository
89
125
  end
90
126
  end
@@ -0,0 +1,495 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logger'
4
+ require 'traject'
5
+ require 'traject/nokogiri_reader'
6
+ require 'traject_plus'
7
+ require 'traject_plus/macros'
8
+ require 'arclight/exceptions'
9
+ require 'arclight/level_label'
10
+ require 'arclight/normalized_date'
11
+ require 'arclight/normalized_title'
12
+ require 'active_model/conversion' ## Needed for Arclight::Repository
13
+ require 'active_support/core_ext/array/wrap'
14
+ require 'arclight/digital_object'
15
+ require 'arclight/year_range'
16
+ require 'arclight/repository'
17
+ require 'arclight/missing_id_strategy'
18
+ require 'arclight/traject/nokogiri_namespaceless_reader'
19
+
20
+ # rubocop:disable Style/MixinUsage
21
+ extend TrajectPlus::Macros
22
+ # rubocop:enable Style/MixinUsage
23
+
24
+ NAME_ELEMENTS = %w[corpname famname name persname].freeze
25
+
26
+ SEARCHABLE_NOTES_FIELDS = %w[
27
+ accessrestrict
28
+ accruals
29
+ altformavail
30
+ appraisal
31
+ arrangement
32
+ bibliography
33
+ bioghist
34
+ custodhist
35
+ fileplan
36
+ note
37
+ odd
38
+ originalsloc
39
+ otherfindaid
40
+ phystech
41
+ prefercite
42
+ processinfo
43
+ relatedmaterial
44
+ scopecontent
45
+ separatedmaterial
46
+ userestrict
47
+ ].freeze
48
+
49
+ DID_SEARCHABLE_NOTES_FIELDS = %w[
50
+ abstract
51
+ materialspec
52
+ physloc
53
+ ].freeze
54
+
55
+ settings do
56
+ provide 'reader_class_name', 'Arclight::Traject::NokogiriNamespacelessReader'
57
+ provide 'solr_writer.commit_on_close', 'true'
58
+ provide 'repository', ENV['REPOSITORY_ID']
59
+ provide 'logger', Logger.new($stderr)
60
+ end
61
+
62
+ each_record do |_record, context|
63
+ next unless settings['repository']
64
+
65
+ context.clipboard[:repository] = Arclight::Repository.find_by(
66
+ slug: settings['repository']
67
+ ).name
68
+ end
69
+
70
+ # ==================
71
+ # Top level document
72
+ # ==================
73
+
74
+ to_field 'id', extract_xpath('/ead/eadheader/eadid'), strip, gsub('.', '-')
75
+ to_field 'title_filing_si', extract_xpath('/ead/eadheader/filedesc/titlestmt/titleproper[@type="filing"]')
76
+ to_field 'title_ssm', extract_xpath('/ead/archdesc/did/unittitle')
77
+ to_field 'title_teim', extract_xpath('/ead/archdesc/did/unittitle')
78
+ to_field 'ead_ssi', extract_xpath('/ead/eadheader/eadid')
79
+
80
+ to_field 'unitdate_ssm', extract_xpath('/ead/archdesc/did/unitdate')
81
+ to_field 'unitdate_bulk_ssim', extract_xpath('/ead/archdesc/did/unitdate[@type="bulk"]')
82
+ to_field 'unitdate_inclusive_ssm', extract_xpath('/ead/archdesc/did/unitdate[@type="inclusive"]')
83
+ to_field 'unitdate_other_ssim', extract_xpath('/ead/archdesc/did/unitdate[not(@type)]')
84
+
85
+ # All top-level docs treated as 'collection' for routing / display purposes
86
+ to_field 'level_ssm' do |_record, accumulator|
87
+ accumulator << 'collection'
88
+ end
89
+
90
+ # Keep the original top-level archdesc/@level for Level facet in addition to 'Collection'
91
+ to_field 'level_sim' do |record, accumulator|
92
+ level = record.at_xpath('/ead/archdesc').attribute('level')&.value
93
+ other_level = record.at_xpath('/ead/archdesc').attribute('otherlevel')&.value
94
+
95
+ accumulator << Arclight::LevelLabel.new(level, other_level).to_s
96
+ accumulator << 'Collection' unless level == 'collection'
97
+ end
98
+
99
+ to_field 'unitid_ssm', extract_xpath('/ead/archdesc/did/unitid')
100
+ to_field 'unitid_teim', extract_xpath('/ead/archdesc/did/unitid')
101
+ to_field 'collection_unitid_ssm', extract_xpath('/ead/archdesc/did/unitid')
102
+
103
+ to_field 'normalized_title_ssm' do |_record, accumulator, context|
104
+ dates = Arclight::NormalizedDate.new(
105
+ context.output_hash['unitdate_inclusive_ssm'],
106
+ context.output_hash['unitdate_bulk_ssim'],
107
+ context.output_hash['unitdate_other_ssim']
108
+ ).to_s
109
+ title = context.output_hash['title_ssm'].first
110
+ accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
111
+ end
112
+
113
+ to_field 'normalized_date_ssm' do |_record, accumulator, context|
114
+ accumulator << Arclight::NormalizedDate.new(
115
+ context.output_hash['unitdate_inclusive_ssm'],
116
+ context.output_hash['unitdate_bulk_ssim'],
117
+ context.output_hash['unitdate_other_ssim']
118
+ ).to_s
119
+ end
120
+
121
+ to_field 'collection_ssm' do |_record, accumulator, context|
122
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
123
+ end
124
+ to_field 'collection_sim' do |_record, accumulator, context|
125
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
126
+ end
127
+ to_field 'collection_ssi' do |_record, accumulator, context|
128
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
129
+ end
130
+ to_field 'collection_title_tesim' do |_record, accumulator, context|
131
+ accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
132
+ end
133
+
134
+ to_field 'repository_ssm' do |_record, accumulator, context|
135
+ accumulator << context.clipboard[:repository]
136
+ end
137
+
138
+ to_field 'repository_sim' do |_record, accumulator, context|
139
+ accumulator << context.clipboard[:repository]
140
+ end
141
+
142
+ to_field 'geogname_ssm', extract_xpath('/ead/archdesc/controlaccess/geogname')
143
+ to_field 'geogname_sim', extract_xpath('/ead/archdesc/controlaccess/geogname')
144
+
145
+ to_field 'creator_ssm', extract_xpath('/ead/archdesc/did/origination')
146
+ to_field 'creator_sim', extract_xpath('/ead/archdesc/did/origination')
147
+ to_field 'creator_ssim', extract_xpath('/ead/archdesc/did/origination')
148
+ to_field 'creator_sort' do |record, accumulator|
149
+ accumulator << record.xpath('/ead/archdesc/did/origination').map { |c| c.text.strip }.join(', ')
150
+ end
151
+
152
+ to_field 'creator_persname_ssm', extract_xpath('/ead/archdesc/did/origination/persname')
153
+ to_field 'creator_persname_ssim', extract_xpath('/ead/archdesc/did/origination/persname')
154
+ to_field 'creator_corpname_ssm', extract_xpath('/ead/archdesc/did/origination/corpname')
155
+ to_field 'creator_corpname_sim', extract_xpath('/ead/archdesc/did/origination/corpname')
156
+ to_field 'creator_corpname_ssim', extract_xpath('/ead/archdesc/did/origination/corpname')
157
+ to_field 'creator_famname_ssm', extract_xpath('/ead/archdesc/did/origination/famname')
158
+ to_field 'creator_famname_ssim', extract_xpath('/ead/archdesc/did/origination/famname')
159
+
160
+ to_field 'persname_sim', extract_xpath('//persname')
161
+
162
+ to_field 'creators_ssim' do |_record, accumulator, context|
163
+ accumulator.concat context.output_hash['creator_persname_ssm'] if context.output_hash['creator_persname_ssm']
164
+ accumulator.concat context.output_hash['creator_corpname_ssm'] if context.output_hash['creator_corpname_ssm']
165
+ accumulator.concat context.output_hash['creator_famname_ssm'] if context.output_hash['creator_famname_ssm']
166
+ end
167
+
168
+ to_field 'places_sim', extract_xpath('/ead/archdesc/controlaccess/geogname')
169
+ to_field 'places_ssim', extract_xpath('/ead/archdesc/controlaccess/geogname')
170
+ to_field 'places_ssm', extract_xpath('/ead/archdesc/controlaccess/geogname')
171
+
172
+ to_field 'access_terms_ssm', extract_xpath('/ead/archdesc/userestrict/*[local-name()!="head"]')
173
+
174
+ to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/acqinfo/*[local-name()!="head"]')
175
+ to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/descgrp/acqinfo/*[local-name()!="head"]')
176
+
177
+ to_field 'access_subjects_ssim', extract_xpath('/ead/archdesc/controlaccess', to_text: false) do |_record, accumulator|
178
+ accumulator.map! do |element|
179
+ %w[subject function occupation genreform].map do |selector|
180
+ element.xpath(".//#{selector}").map(&:text)
181
+ end
182
+ end.flatten!
183
+ end
184
+
185
+ to_field 'access_subjects_ssm' do |_record, accumulator, context|
186
+ accumulator.concat Array.wrap(context.output_hash['access_subjects_ssim'])
187
+ end
188
+
189
+ to_field 'has_online_content_ssim', extract_xpath('.//dao') do |_record, accumulator|
190
+ accumulator.replace([accumulator.any?])
191
+ end
192
+
193
+ to_field 'digital_objects_ssm', extract_xpath('/ead/archdesc/did/dao|/ead/archdesc/dao', to_text: false) do |_record, accumulator|
194
+ accumulator.map! do |dao|
195
+ label = dao.attributes['title']&.value ||
196
+ dao.xpath('daodesc/p')&.text
197
+ href = (dao.attributes['href'] || dao.attributes['xlink:href'])&.value
198
+ Arclight::DigitalObject.new(label: label, href: href).to_json
199
+ end
200
+ end
201
+
202
+ to_field 'extent_ssm', extract_xpath('/ead/archdesc/did/physdesc/extent')
203
+ to_field 'extent_teim', extract_xpath('/ead/archdesc/did/physdesc/extent')
204
+ to_field 'genreform_sim', extract_xpath('/ead/archdesc/controlaccess/genreform')
205
+ to_field 'genreform_ssm', extract_xpath('/ead/archdesc/controlaccess/genreform')
206
+
207
+ to_field 'date_range_sim', extract_xpath('/ead/archdesc/did/unitdate/@normal', to_text: false) do |_record, accumulator|
208
+ range = Arclight::YearRange.new
209
+ next range.years if accumulator.blank?
210
+
211
+ ranges = accumulator.map(&:to_s)
212
+ range << range.parse_ranges(ranges)
213
+ accumulator.replace range.years
214
+ end
215
+
216
+ SEARCHABLE_NOTES_FIELDS.map do |selector|
217
+ to_field "#{selector}_ssm", extract_xpath("/ead/archdesc/#{selector}/*[local-name()!='head']", to_text: false)
218
+ to_field "#{selector}_heading_ssm", extract_xpath("/ead/archdesc/#{selector}/head") unless selector == 'prefercite'
219
+ to_field "#{selector}_teim", extract_xpath("/ead/archdesc/#{selector}/*[local-name()!='head']")
220
+ end
221
+
222
+ DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
223
+ to_field "#{selector}_ssm", extract_xpath("/ead/archdesc/did/#{selector}", to_text: false)
224
+ end
225
+
226
+ NAME_ELEMENTS.map do |selector|
227
+ to_field 'names_coll_ssim', extract_xpath("/ead/archdesc/controlaccess/#{selector}")
228
+ to_field 'names_ssim', extract_xpath("//#{selector}")
229
+ to_field "#{selector}_ssm", extract_xpath("//#{selector}")
230
+ end
231
+
232
+ to_field 'corpname_sim', extract_xpath('//corpname')
233
+
234
+ to_field 'language_sim', extract_xpath('/ead/archdesc/did/langmaterial')
235
+ to_field 'language_ssm', extract_xpath('/ead/archdesc/did/langmaterial')
236
+
237
+ to_field 'descrules_ssm', extract_xpath('/ead/eadheader/profiledesc/descrules')
238
+
239
+ # =============================
240
+ # Each component child document
241
+ # <c> <c01> <c12>
242
+ # =============================
243
+
244
+ compose 'components', ->(record, accumulator, _context) { accumulator.concat record.xpath('//*[is_component(.)]', NokogiriXpathExtensions.new) } do
245
+ to_field 'ref_ssi' do |record, accumulator, context|
246
+ accumulator << if record.attribute('id').blank?
247
+ strategy = Arclight::MissingIdStrategy.selected
248
+ hexdigest = strategy.new(record).to_hexdigest
249
+ parent_id = context.clipboard[:parent].output_hash['id'].first
250
+ logger.warn('MISSING ID WARNING') do
251
+ [
252
+ "A component in #{parent_id} did not have an ID so one was minted using the #{strategy} strategy.",
253
+ "The ID of this document will be #{parent_id}#{hexdigest}."
254
+ ].join(' ')
255
+ end
256
+ record['id'] = hexdigest
257
+ hexdigest
258
+ else
259
+ record.attribute('id')&.value&.strip&.gsub('.', '-')
260
+ end
261
+ end
262
+ to_field 'ref_ssm' do |_record, accumulator, context|
263
+ accumulator.concat context.output_hash['ref_ssi']
264
+ end
265
+
266
+ to_field 'id' do |_record, accumulator, context|
267
+ accumulator << [
268
+ context.clipboard[:parent].output_hash['id'],
269
+ context.output_hash['ref_ssi']
270
+ ].join('')
271
+ end
272
+
273
+ to_field 'ead_ssi' do |_record, accumulator, context|
274
+ accumulator << context.clipboard[:parent].output_hash['ead_ssi'].first
275
+ end
276
+
277
+ to_field 'title_filing_si', extract_xpath('./did/unittitle'), first_only
278
+ to_field 'title_ssm', extract_xpath('./did/unittitle')
279
+ to_field 'title_teim', extract_xpath('./did/unittitle')
280
+
281
+ to_field 'unitdate_bulk_ssim', extract_xpath('./did/unitdate[@type="bulk"]')
282
+ to_field 'unitdate_inclusive_ssm', extract_xpath('./did/unitdate[@type="inclusive"]')
283
+ to_field 'unitdate_other_ssim', extract_xpath('./did/unitdate[not(@type)]')
284
+
285
+ to_field 'normalized_title_ssm' do |_record, accumulator, context|
286
+ dates = Arclight::NormalizedDate.new(
287
+ context.output_hash['unitdate_inclusive_ssm'],
288
+ context.output_hash['unitdate_bulk_ssim'],
289
+ context.output_hash['unitdate_other_ssim']
290
+ ).to_s
291
+ title = context.output_hash['title_ssm']&.first
292
+ accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
293
+ end
294
+
295
+ to_field 'normalized_date_ssm' do |_record, accumulator, context|
296
+ accumulator << Arclight::NormalizedDate.new(
297
+ context.output_hash['unitdate_inclusive_ssm'],
298
+ context.output_hash['unitdate_bulk_ssim'],
299
+ context.output_hash['unitdate_other_ssim']
300
+ ).to_s
301
+ end
302
+
303
+ to_field 'component_level_isim' do |record, accumulator|
304
+ accumulator << 1 + NokogiriXpathExtensions.new.is_component(record.ancestors).count
305
+ end
306
+
307
+ to_field 'parent_ssim' do |record, accumulator, context|
308
+ accumulator << context.clipboard[:parent].output_hash['id'].first
309
+ accumulator.concat NokogiriXpathExtensions.new.is_component(record.ancestors).reverse.map { |n| n.attribute('id')&.value }
310
+ end
311
+
312
+ to_field 'parent_ssi' do |_record, accumulator, context|
313
+ accumulator << context.output_hash['parent_ssim'].last
314
+ end
315
+
316
+ to_field 'parent_unittitles_ssm' do |_rec, accumulator, context|
317
+ # top level document
318
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
319
+ parent_ssim = context.output_hash['parent_ssim']
320
+ components = context.clipboard[:parent].output_hash['components']
321
+
322
+ # other components
323
+ if parent_ssim && components
324
+ ancestors = parent_ssim.drop(1).map { |x| [x] }
325
+ accumulator.concat components.select { |c| ancestors.include? c['ref_ssi'] }.flat_map { |c| c['normalized_title_ssm'] }
326
+ end
327
+ end
328
+
329
+ to_field 'parent_unittitles_teim' do |_record, accumulator, context|
330
+ accumulator.concat context.output_hash['parent_unittitles_ssm']
331
+ end
332
+
333
+ to_field 'parent_levels_ssm' do |_record, accumulator, context|
334
+ ## Top level document
335
+ accumulator.concat context.clipboard[:parent].output_hash['level_ssm']
336
+ ## Other components
337
+ context.output_hash['parent_ssim']&.drop(1)&.each do |id|
338
+ accumulator.concat Array
339
+ .wrap(context.clipboard[:parent].output_hash['components'])
340
+ .select { |c| c['ref_ssi'] == [id] }.map { |c| c['level_ssm'] }.flatten
341
+ end
342
+ end
343
+
344
+ to_field 'unitid_ssm', extract_xpath('./did/unitid')
345
+ to_field 'collection_unitid_ssm' do |_record, accumulator, context|
346
+ accumulator.concat Array.wrap(context.clipboard[:parent].output_hash['unitid_ssm'])
347
+ end
348
+ to_field 'repository_ssm' do |_record, accumulator, context|
349
+ accumulator << context.clipboard[:parent].clipboard[:repository]
350
+ end
351
+ to_field 'repository_sim' do |_record, accumulator, context|
352
+ accumulator << context.clipboard[:parent].clipboard[:repository]
353
+ end
354
+ to_field 'collection_ssm' do |_record, accumulator, context|
355
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
356
+ end
357
+ to_field 'collection_sim' do |_record, accumulator, context|
358
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
359
+ end
360
+ to_field 'collection_ssi' do |_record, accumulator, context|
361
+ accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
362
+ end
363
+
364
+ to_field 'extent_ssm', extract_xpath('./did/physdesc/extent')
365
+ to_field 'extent_teim', extract_xpath('./did/physdesc/extent')
366
+
367
+ to_field 'creator_ssm', extract_xpath('./did/origination')
368
+ to_field 'creator_ssim', extract_xpath('./did/origination')
369
+ to_field 'creators_ssim', extract_xpath('./did/origination')
370
+ to_field 'creator_sort' do |record, accumulator|
371
+ accumulator << record.xpath('./did/origination').map(&:text).join(', ')
372
+ end
373
+ to_field 'collection_creator_ssm' do |_record, accumulator, context|
374
+ accumulator.concat Array.wrap(context.clipboard[:parent].output_hash['creator_ssm'])
375
+ end
376
+ to_field 'has_online_content_ssim', extract_xpath('.//dao') do |_record, accumulator|
377
+ accumulator.replace([accumulator.any?])
378
+ end
379
+ to_field 'child_component_count_isim' do |record, accumulator|
380
+ accumulator << NokogiriXpathExtensions.new.is_component(record.children).count
381
+ end
382
+
383
+ to_field 'ref_ssm' do |record, accumulator|
384
+ accumulator << record.attribute('id')
385
+ end
386
+
387
+ to_field 'level_ssm' do |record, accumulator|
388
+ level = record.attribute('level')&.value
389
+ other_level = record.attribute('otherlevel')&.value
390
+ accumulator << Arclight::LevelLabel.new(level, other_level).to_s
391
+ end
392
+
393
+ to_field 'level_sim' do |_record, accumulator, context|
394
+ next unless context.output_hash['level_ssm']
395
+
396
+ accumulator.concat context.output_hash['level_ssm']&.map(&:capitalize)
397
+ end
398
+
399
+ to_field 'sort_ii' do |_record, accumulator, context|
400
+ accumulator.replace([context.position])
401
+ end
402
+
403
+ # Get the <accessrestrict> from the closest ancestor that has one (includes top-level)
404
+ to_field 'parent_access_restrict_ssm' do |record, accumulator|
405
+ accumulator.concat Array
406
+ .wrap(record.xpath('(./ancestor::*/accessrestrict)[last()]/*[local-name()!="head"]')
407
+ .map(&:text))
408
+ end
409
+
410
+ # Get the <userestrict> from self OR the closest ancestor that has one (includes top-level)
411
+ to_field 'parent_access_terms_ssm' do |record, accumulator|
412
+ accumulator.concat Array
413
+ .wrap(record.xpath('(./ancestor-or-self::*/userestrict)[last()]/*[local-name()!="head"]')
414
+ .map(&:text))
415
+ end
416
+
417
+ to_field 'digital_objects_ssm', extract_xpath('./dao|./did/dao', to_text: false) do |_record, accumulator|
418
+ accumulator.map! do |dao|
419
+ label = dao.attributes['title']&.value ||
420
+ dao.xpath('daodesc/p')&.text
421
+ href = (dao.attributes['href'] || dao.attributes['xlink:href'])&.value
422
+ Arclight::DigitalObject.new(label: label, href: href).to_json
423
+ end
424
+ end
425
+
426
+ to_field 'date_range_sim', extract_xpath('./did/unitdate/@normal', to_text: false) do |_record, accumulator|
427
+ range = Arclight::YearRange.new
428
+ next range.years if accumulator.blank?
429
+
430
+ ranges = accumulator.map(&:to_s)
431
+ range << range.parse_ranges(ranges)
432
+ accumulator.replace range.years
433
+ end
434
+
435
+ NAME_ELEMENTS.map do |selector|
436
+ to_field 'names_ssim', extract_xpath("./controlaccess/#{selector}")
437
+ to_field "#{selector}_ssm", extract_xpath(".//#{selector}")
438
+ end
439
+
440
+ to_field 'geogname_sim', extract_xpath('./controlaccess/geogname')
441
+ to_field 'geogname_ssm', extract_xpath('./controlaccess/geogname')
442
+ to_field 'places_ssim', extract_xpath('./controlaccess/geogname')
443
+
444
+ to_field 'access_subjects_ssim', extract_xpath('./controlaccess', to_text: false) do |_record, accumulator|
445
+ accumulator.map! do |element|
446
+ %w[subject function occupation genreform].map do |selector|
447
+ element.xpath(".//#{selector}").map(&:text)
448
+ end
449
+ end.flatten!
450
+ end
451
+
452
+ to_field 'access_subjects_ssm' do |_record, accumulator, context|
453
+ accumulator.concat(context.output_hash.fetch('access_subjects_ssim', []))
454
+ end
455
+
456
+ to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/acqinfo/*[local-name()!="head"]')
457
+ to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/descgrp/acqinfo/*[local-name()!="head"]')
458
+ to_field 'acqinfo_ssim', extract_xpath('./acqinfo/*[local-name()!="head"]')
459
+ to_field 'acqinfo_ssim', extract_xpath('./descgrp/acqinfo/*[local-name()!="head"]')
460
+
461
+ to_field 'language_ssm', extract_xpath('./did/langmaterial')
462
+ to_field 'containers_ssim' do |record, accumulator|
463
+ record.xpath('./did/container').each do |node|
464
+ accumulator << [node.attribute('type'), node.text].join(' ').strip
465
+ end
466
+ end
467
+
468
+ SEARCHABLE_NOTES_FIELDS.map do |selector|
469
+ to_field "#{selector}_ssm", extract_xpath("./#{selector}/*[local-name()!='head']", to_text: false)
470
+ to_field "#{selector}_heading_ssm", extract_xpath("./#{selector}/head")
471
+ to_field "#{selector}_teim", extract_xpath("./#{selector}/*[local-name()!='head']")
472
+ end
473
+ DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
474
+ to_field "#{selector}_ssm", extract_xpath("./did/#{selector}", to_text: false)
475
+ end
476
+ to_field 'did_note_ssm', extract_xpath('./did/note')
477
+ end
478
+
479
+ each_record do |_record, context|
480
+ context.output_hash['components'] &&= context.output_hash['components'].select { |c| c.keys.any? }
481
+ end
482
+
483
+ ##
484
+ # Used for evaluating xpath components to find
485
+ class NokogiriXpathExtensions
486
+ # rubocop:disable Naming/PredicateName, Style/FormatString
487
+ def is_component(node_set)
488
+ node_set.find_all do |node|
489
+ component_elements = (1..12).map { |i| "c#{'%02d' % i}" }
490
+ component_elements.push 'c'
491
+ component_elements.include? node.name
492
+ end
493
+ end
494
+ # rubocop:enable Naming/PredicateName, Style/FormatString
495
+ end