arclight 0.1.4 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.all-contributorsrc +450 -0
- data/.babelrc +3 -0
- data/.codeclimate.yml +5 -0
- data/.eslintrc +3 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +20 -42
- data/.rubocop_todo.yml +139 -0
- data/.travis.yml +24 -20
- data/CONTRIBUTORS.md +79 -0
- data/README.md +34 -23
- data/Rakefile +0 -1
- data/app/assets/images/blacklight/bookmark.svg +1 -0
- data/app/assets/images/blacklight/collection.svg +5 -0
- data/app/assets/images/blacklight/compact.svg +1 -25
- data/app/assets/images/blacklight/container.svg +5 -0
- data/app/assets/images/blacklight/ead.svg +1 -0
- data/app/assets/images/blacklight/file.svg +5 -0
- data/app/assets/images/blacklight/folder.svg +1 -0
- data/app/assets/images/blacklight/list.svg +1 -0
- data/app/assets/images/blacklight/minus.svg +1 -0
- data/app/assets/images/blacklight/online.svg +5 -0
- data/app/assets/images/blacklight/pdf.svg +1 -0
- data/app/assets/images/blacklight/plus.svg +1 -0
- data/app/assets/images/blacklight/repository.svg +1 -0
- data/app/assets/javascripts/arclight/arclight.js +1 -2
- data/app/assets/javascripts/arclight/collection_navigation.js +36 -50
- data/app/assets/javascripts/arclight/collection_scrollspy.js +1 -1
- data/app/assets/javascripts/arclight/context_navigation.js +386 -0
- data/app/assets/javascripts/arclight/oembed_viewer.js +11 -4
- data/app/assets/javascripts/arclight/truncator.js.erb +8 -2
- data/app/assets/stylesheets/arclight/application.scss +4 -1
- data/app/assets/stylesheets/arclight/bootstrap_overrides.scss +23 -0
- data/app/assets/stylesheets/arclight/modules/context_navigation.scss +75 -0
- data/app/assets/stylesheets/arclight/modules/hierarchy_and_online_contents.scss +34 -38
- data/app/assets/stylesheets/arclight/modules/highlights.scss +2 -1
- data/app/assets/stylesheets/arclight/modules/icons.scss +4 -0
- data/app/assets/stylesheets/arclight/modules/layout.scss +149 -11
- data/app/assets/stylesheets/arclight/modules/mastheads.scss +60 -5
- data/app/assets/stylesheets/arclight/modules/repositories.scss +1 -5
- data/app/assets/stylesheets/arclight/modules/repository_card.scss +6 -7
- data/app/assets/stylesheets/arclight/modules/search_results.scss +145 -24
- data/app/assets/stylesheets/arclight/modules/show_collection.scss +38 -59
- data/app/assets/stylesheets/arclight/responsive.scss +13 -0
- data/app/assets/stylesheets/arclight/variables.scss +21 -1
- data/app/controllers/concerns/arclight/ead_format_helpers.rb +225 -0
- data/app/controllers/concerns/arclight/field_config_helpers.rb +23 -7
- data/app/factories/blacklight_field_configuration_factory.rb +1 -0
- data/app/helpers/arclight_helper.rb +198 -36
- data/app/models/arclight/document_downloads.rb +125 -0
- data/app/models/arclight/parent.rb +4 -2
- data/app/models/arclight/parents.rb +6 -4
- data/app/models/arclight/requests/aeon_external_request.rb +42 -0
- data/app/models/arclight/requests/aeon_web_ead.rb +47 -0
- data/app/models/arclight/requests/google_form.rb +2 -2
- data/app/models/concerns/arclight/catalog.rb +14 -2
- data/app/models/concerns/arclight/search_behavior.rb +27 -12
- data/app/models/concerns/arclight/solr_document.rb +29 -7
- data/app/views/arclight/_requests.html.erb +7 -0
- data/app/views/arclight/repositories/_in_person_repository.html.erb +2 -2
- data/app/views/arclight/repositories/_repository.html.erb +2 -2
- data/app/views/arclight/repositories/_repository_contact.html.erb +9 -0
- data/app/views/arclight/repositories/index.html.erb +3 -0
- data/app/views/arclight/repositories/show.html.erb +5 -4
- data/app/views/arclight/requests/_aeon_external_request_endpoint.html.erb +9 -0
- data/app/views/arclight/requests/_aeon_web_ead.html.erb +7 -0
- data/app/views/arclight/requests/_google_form.html.erb +2 -1
- data/app/views/arclight/viewers/_oembed.html.erb +2 -1
- data/app/views/catalog/_access_contents.html.erb +15 -0
- data/app/views/catalog/_arclight_abstract_or_scope.html.erb +5 -0
- data/app/views/catalog/_arclight_bookmark_control.html.erb +38 -0
- data/app/views/catalog/_arclight_document_header_icon.html.erb +1 -0
- data/app/views/catalog/_arclight_index_compact_default.html.erb +18 -11
- data/app/views/catalog/_arclight_index_default.html.erb +45 -0
- data/app/views/catalog/_arclight_index_group_document_compact_default.html.erb +19 -0
- data/app/views/catalog/_arclight_index_group_document_default.html.erb +18 -0
- data/app/views/catalog/_arclight_online_content_indicator.html.erb +1 -3
- data/app/views/catalog/_collection_contents.html.erb +2 -10
- data/app/views/catalog/_collection_context.html.erb +15 -0
- data/app/views/catalog/_collection_context_nav.html.erb +12 -0
- data/app/views/catalog/_collection_online_contents.html.erb +3 -3
- data/app/views/catalog/_component_context.html.erb +5 -0
- data/app/views/catalog/_containers.html.erb +3 -0
- data/app/views/catalog/_context_card.html.erb +1 -1
- data/app/views/catalog/_context_sidebar.html.erb +2 -2
- data/app/views/catalog/_custom_metadata.html.erb +1 -1
- data/app/views/catalog/_document_downloads.html.erb +14 -0
- data/app/views/catalog/_group.html.erb +21 -0
- data/app/views/catalog/_group_header_compact_default.html.erb +15 -0
- data/app/views/catalog/_group_header_default.html.erb +20 -0
- data/app/views/catalog/_group_toggle.html.erb +10 -0
- data/app/views/catalog/_home.html.erb +1 -1
- data/app/views/catalog/_index_breadcrumb_default.html.erb +5 -2
- data/app/views/catalog/_index_collection_context_default.html.erb +53 -0
- data/app/views/catalog/_index_default.html.erb +1 -1
- data/app/views/catalog/_index_header.html.erb +3 -3
- data/app/views/catalog/_index_online_contents_default.html.erb +1 -1
- data/app/views/catalog/_online_content_label.html.erb +5 -0
- data/app/views/catalog/_search_form.html.erb +34 -0
- data/app/views/catalog/_search_results.html.erb +1 -4
- data/app/views/catalog/_show_actions_box_default.html.erb +27 -0
- data/app/views/catalog/_show_breadcrumbs_default.html.erb +5 -6
- data/app/views/catalog/_show_collection.html.erb +42 -24
- data/app/views/catalog/_show_default.html.erb +64 -26
- data/app/views/catalog/_show_upper_metadata_collection.html.erb +1 -0
- data/app/views/catalog/_show_upper_metadata_default.html.erb +14 -0
- data/app/views/catalog/_sort_and_per_page.html.erb +8 -0
- data/app/views/catalog/_within_collection_dropdown.html.erb +26 -0
- data/app/views/shared/_breadcrumbs.html.erb +4 -4
- data/app/views/shared/_context_sidebar.html.erb +2 -2
- data/app/views/shared/_header_navbar.html.erb +51 -43
- data/app/views/shared/_main_menu_links.html.erb +1 -1
- data/app/views/shared/_show_breadcrumbs.html.erb +27 -0
- data/arclight.gemspec +15 -12
- data/config/i18n-tasks.yml +133 -0
- data/config/locales/arclight.en.yml +89 -55
- data/config/repositories.yml +0 -0
- data/lib/arclight/engine.rb +23 -12
- data/lib/arclight/hash_absolute_xpath.rb +61 -0
- data/lib/arclight/level_label.rb +46 -0
- data/lib/arclight/missing_id_strategy.rb +21 -0
- data/lib/arclight/normalized_date.rb +21 -12
- data/lib/arclight/normalized_id.rb +1 -0
- data/lib/arclight/normalized_title.rb +1 -0
- data/lib/arclight/repository.rb +61 -25
- data/lib/arclight/traject/ead2_config.rb +495 -0
- data/lib/arclight/traject/nokogiri_namespaceless_reader.rb +22 -0
- data/lib/arclight/version.rb +1 -1
- data/lib/arclight/viewers/oembed.rb +1 -0
- data/lib/arclight/year_range.rb +9 -1
- data/lib/generators/arclight/install_generator.rb +34 -3
- data/lib/generators/arclight/templates/catalog_controller.rb +162 -131
- data/lib/generators/arclight/templates/config/downloads.yml +12 -0
- data/lib/generators/arclight/templates/config/repositories.yml +20 -2
- data/lib/generators/arclight/update_generator.rb +1 -1
- data/lib/tasks/index.rake +19 -19
- data/package.json +8 -1
- data/solr/conf/schema.xml +56 -292
- data/solr/conf/solrconfig.xml +40 -125
- data/tasks/arclight.rake +6 -1
- data/template.rb +1 -1
- data/vendor/assets/javascripts/responsiveTruncator.js +2 -2
- metadata +159 -60
- data/app/assets/javascripts/arclight/collection_context.js +0 -18
- data/app/assets/javascripts/arclight/component_ancestors.js +0 -56
- data/app/assets/stylesheets/arclight/modules/sidebar.scss +0 -21
- data/app/views/catalog/_arclight_document_show_header.html.erb +0 -15
- data/app/views/catalog/_arclight_document_show_header_collection.html.erb +0 -12
- data/app/views/catalog/_collection_count.html.erb +0 -7
- data/app/views/catalog/_collection_downloads.html.erb +0 -15
- data/app/views/catalog/_collection_overview.html.erb +0 -7
- data/app/views/catalog/_component_overview.html.erb +0 -46
- data/app/views/catalog/_index_header_hierarchy_default.html.erb +0 -42
- data/app/views/catalog/_index_hierarchy_default.html.erb +0 -28
- data/app/views/catalog/_results_histogram.html.erb +0 -10
- data/app/views/catalog/_search_within_form.html.erb +0 -16
- data/app/views/catalog/_show_component_sidebar.html.erb +0 -12
- data/app/views/catalog/_show_header.html.erb +0 -5
- data/app/views/catalog/_show_sidebar.html.erb +0 -30
- data/lib/arclight/custom_component.rb +0 -99
- data/lib/arclight/custom_document.rb +0 -93
- data/lib/arclight/indexer.rb +0 -9
- data/lib/arclight/shared_indexing_behavior.rb +0 -97
- data/lib/arclight/shared_terminology_behavior.rb +0 -65
- data/lib/arclight/solr_ead_indexer_ext.rb +0 -159
- data/lib/generators/arclight/templates/arclight.js +0 -2
|
File without changes
|
data/lib/arclight/engine.rb
CHANGED
|
@@ -1,33 +1,36 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'sprockets/bumble_d'
|
|
3
4
|
require 'blacklight'
|
|
4
|
-
require '
|
|
5
|
+
require 'traject'
|
|
6
|
+
require 'active_model'
|
|
5
7
|
require 'arclight/exceptions'
|
|
6
8
|
require 'arclight/normalized_date'
|
|
7
9
|
require 'arclight/normalized_id'
|
|
8
10
|
require 'arclight/normalized_title'
|
|
9
11
|
require 'arclight/digital_object'
|
|
10
|
-
require 'arclight/shared_indexing_behavior'
|
|
11
|
-
require 'arclight/shared_terminology_behavior'
|
|
12
|
-
require 'arclight/custom_document'
|
|
13
|
-
require 'arclight/custom_component'
|
|
14
|
-
require 'arclight/solr_ead_indexer_ext'
|
|
15
|
-
require 'arclight/indexer'
|
|
16
12
|
require 'arclight/viewer'
|
|
17
13
|
|
|
18
14
|
module Arclight
|
|
19
15
|
##
|
|
20
16
|
# This is the defining class for the Arclight Rails Engine
|
|
21
17
|
class Engine < ::Rails::Engine
|
|
18
|
+
extend ::Sprockets::BumbleD::DSL
|
|
19
|
+
|
|
20
|
+
# We're not sure this is right, but we aren't doing module imports
|
|
21
|
+
# at the moment anyway
|
|
22
|
+
register_umd_globals :arclight,
|
|
23
|
+
'blacklight' => 'Blacklight'
|
|
24
|
+
|
|
22
25
|
config.viewer_class = Arclight::Viewers::OEmbed
|
|
23
26
|
config.oembed_resource_exclude_patterns = [/\.pdf$/, /\.ppt$/]
|
|
24
27
|
|
|
25
28
|
Arclight::Engine.config.catalog_controller_field_accessors = %i[
|
|
26
29
|
summary_field
|
|
27
30
|
access_field
|
|
31
|
+
contact_field
|
|
28
32
|
background_field
|
|
29
33
|
related_field
|
|
30
|
-
admin_info_field
|
|
31
34
|
terms_field
|
|
32
35
|
cite_field
|
|
33
36
|
indexed_terms_field
|
|
@@ -38,6 +41,18 @@ module Arclight
|
|
|
38
41
|
component_indexed_terms_field
|
|
39
42
|
]
|
|
40
43
|
|
|
44
|
+
Arclight::Engine.config.catalog_controller_group_query_params = {
|
|
45
|
+
group: true,
|
|
46
|
+
'group.field': 'collection_ssi',
|
|
47
|
+
'group.ngroups': true,
|
|
48
|
+
'group.limit': 3,
|
|
49
|
+
fl: '*,parent:[subquery]',
|
|
50
|
+
'parent.fl': '*',
|
|
51
|
+
'parent.q': '{!term f=collection_sim v=$row.collection_ssi}',
|
|
52
|
+
'parent.fq': '{!term f=level_sim v="Collection"}',
|
|
53
|
+
'parent.defType': 'lucene'
|
|
54
|
+
}
|
|
55
|
+
|
|
41
56
|
initializer 'arclight.fields' do
|
|
42
57
|
Arclight::Engine.config.catalog_controller_field_accessors.each do |field|
|
|
43
58
|
Blacklight::Configuration.define_field_access field
|
|
@@ -47,9 +62,5 @@ module Arclight
|
|
|
47
62
|
initializer 'arclight.helpers' do
|
|
48
63
|
ActionView::Base.send :include, ArclightHelper
|
|
49
64
|
end
|
|
50
|
-
|
|
51
|
-
initializer 'arclight.views' do
|
|
52
|
-
Blacklight::Configuration.default_values[:view].hierarchy
|
|
53
|
-
end
|
|
54
65
|
end
|
|
55
66
|
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
5
|
+
module Arclight
|
|
6
|
+
##
|
|
7
|
+
# Take a Nokogiri node and get its absolute path (inserting our own indexes for component levels)
|
|
8
|
+
# and hash that outout. This is intended as a potential strategy for handling missing IDs in EADs.
|
|
9
|
+
class HashAbsoluteXpath
|
|
10
|
+
class << self
|
|
11
|
+
attr_writer :hash_algorithm
|
|
12
|
+
|
|
13
|
+
def hash_algorithm
|
|
14
|
+
return Digest::SHA1 unless defined? @hash_algorithm
|
|
15
|
+
|
|
16
|
+
@hash_algorithm
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
COMPONENT_NODE_NAME_REGEX = /^c\d{,2}$/.freeze
|
|
21
|
+
attr_reader :node
|
|
22
|
+
def initialize(node)
|
|
23
|
+
@node = node
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def to_hexdigest
|
|
27
|
+
self.class.hash_algorithm.hexdigest(absolute_xpath).prepend('al_')
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def absolute_xpath
|
|
31
|
+
ancestor_tree = node.ancestors.map do |ancestor|
|
|
32
|
+
ancestor_name_and_index(ancestor)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
"#{[ancestor_tree.reverse, node.name].flatten.join('/')}#{current_index}"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def current_index
|
|
41
|
+
siblings.index(node)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def component_siblings_for_node(xml_node)
|
|
45
|
+
xml_node.parent.children.select { |n| n.name =~ COMPONENT_NODE_NAME_REGEX }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def siblings
|
|
49
|
+
@siblings ||= component_siblings_for_node(node)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def ancestor_name_and_index(ancestor)
|
|
53
|
+
if ancestor.name =~ COMPONENT_NODE_NAME_REGEX
|
|
54
|
+
index = component_siblings_for_node(ancestor).index(ancestor)
|
|
55
|
+
"#{ancestor.name}#{index}"
|
|
56
|
+
else
|
|
57
|
+
ancestor.name
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Arclight
|
|
4
|
+
##
|
|
5
|
+
# A utility class to return a human-readable label for an EAD @level code.
|
|
6
|
+
# Can use the value from @otherlevel if provided.
|
|
7
|
+
# Examples from @level: recordgrp = "Record Group"
|
|
8
|
+
# collection = "Collection"
|
|
9
|
+
# subseries = "Subseries"
|
|
10
|
+
# otherlevel = (text provided in @otherlevel)
|
|
11
|
+
class LevelLabel
|
|
12
|
+
# @param [String] `level` from the collection or component @level
|
|
13
|
+
# @param [String] `other_level` from the collection or component @otherlevel
|
|
14
|
+
def initialize(level, other_level = nil)
|
|
15
|
+
@level = level
|
|
16
|
+
@other_level = other_level if other_level.present?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# @return [String] the human-readable label
|
|
20
|
+
def to_s
|
|
21
|
+
human_readable_level
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
attr_reader :level, :other_level
|
|
27
|
+
|
|
28
|
+
CUSTOM_LEVEL_LABELS = {
|
|
29
|
+
recordgrp: 'Record Group',
|
|
30
|
+
subgrp: 'Subgroup'
|
|
31
|
+
}.freeze
|
|
32
|
+
|
|
33
|
+
def human_readable_level
|
|
34
|
+
if level == 'otherlevel'
|
|
35
|
+
alternative_level
|
|
36
|
+
elsif level.present?
|
|
37
|
+
CUSTOM_LEVEL_LABELS.fetch(level.to_sym, level.capitalize).to_s
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def alternative_level
|
|
42
|
+
alternative_level = other_level if other_level
|
|
43
|
+
alternative_level.present? ? alternative_level.capitalize : 'Other'
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'arclight/hash_absolute_xpath'
|
|
4
|
+
|
|
5
|
+
module Arclight
|
|
6
|
+
##
|
|
7
|
+
# A class to configure a selected MissingIdStrategy.
|
|
8
|
+
# Defaults to Arclight::HashAbsoluteXpath
|
|
9
|
+
# This can be updated in an initializer to be any other class
|
|
10
|
+
class MissingIdStrategy
|
|
11
|
+
class << self
|
|
12
|
+
attr_writer :selected
|
|
13
|
+
|
|
14
|
+
def selected
|
|
15
|
+
return Arclight::HashAbsoluteXpath unless defined? @selected
|
|
16
|
+
|
|
17
|
+
@selected
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -7,16 +7,21 @@ module Arclight
|
|
|
7
7
|
# @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date
|
|
8
8
|
class NormalizedDate
|
|
9
9
|
# @param [String | Array<String>] `inclusive` from the `unitdate`
|
|
10
|
-
# @param [String] `bulk` from the `unitdate`
|
|
11
|
-
# @param [String] `other` from the `unitdate` when type is not specified
|
|
12
|
-
def initialize(inclusive, bulk =
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
10
|
+
# @param [Array<String>] `bulk` from the `unitdate`
|
|
11
|
+
# @param [Array<String>] `other` from the `unitdate` when type is not specified
|
|
12
|
+
def initialize(inclusive, bulk = [], other = [])
|
|
13
|
+
@inclusive = (inclusive || []).map do |inclusive_text|
|
|
14
|
+
if inclusive_text.is_a? Array # of YYYY-YYYY for ranges
|
|
15
|
+
# NOTE: This code is not routable AFAICT in actual indexing.
|
|
16
|
+
# We pass arrays of strings (or xml nodes) here, and never a multidimensional array
|
|
17
|
+
year_range(inclusive_text)
|
|
18
|
+
elsif inclusive_text.present?
|
|
19
|
+
inclusive_text.strip
|
|
20
|
+
end
|
|
21
|
+
end&.join(', ')
|
|
22
|
+
|
|
23
|
+
@bulk = Array.wrap(bulk).compact.map(&:strip).join(', ')
|
|
24
|
+
@other = Array.wrap(other).compact.map(&:strip).join(', ')
|
|
20
25
|
end
|
|
21
26
|
|
|
22
27
|
# @return [String] the normalized title/date
|
|
@@ -28,6 +33,10 @@ module Arclight
|
|
|
28
33
|
|
|
29
34
|
attr_reader :inclusive, :bulk, :other
|
|
30
35
|
|
|
36
|
+
def year_range(date_array)
|
|
37
|
+
YearRange.new(date_array.include?('/') ? date_array : date_array.map { |v| v.tr('-', '/') }).to_s
|
|
38
|
+
end
|
|
39
|
+
|
|
31
40
|
# @see http://www2.archivists.org/standards/DACS/part_I/chapter_2/4_date for rules
|
|
32
41
|
def normalize
|
|
33
42
|
if inclusive.present?
|
|
@@ -38,8 +47,8 @@ module Arclight
|
|
|
38
47
|
else
|
|
39
48
|
result = nil
|
|
40
49
|
end
|
|
41
|
-
|
|
42
|
-
result
|
|
50
|
+
|
|
51
|
+
result&.strip
|
|
43
52
|
end
|
|
44
53
|
end
|
|
45
54
|
end
|
data/lib/arclight/repository.rb
CHANGED
|
@@ -7,31 +7,14 @@ module Arclight
|
|
|
7
7
|
class Repository
|
|
8
8
|
include ActiveModel::Conversion # for to_partial_path
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
description
|
|
12
|
-
visit_note
|
|
13
|
-
building
|
|
14
|
-
address1
|
|
15
|
-
address2
|
|
16
|
-
city
|
|
17
|
-
state
|
|
18
|
-
zip
|
|
19
|
-
country
|
|
20
|
-
phone
|
|
21
|
-
contact_info
|
|
22
|
-
thumbnail_url
|
|
23
|
-
google_request_url
|
|
24
|
-
google_request_mappings
|
|
25
|
-
collection_count].freeze
|
|
26
|
-
|
|
27
|
-
attr_accessor :slug, *FIELDS
|
|
10
|
+
attr_accessor :slug, :collection_count
|
|
28
11
|
|
|
29
12
|
# @param [String] `slug` the unique identifier for the repository
|
|
30
13
|
# @param [Hash] `data`
|
|
31
14
|
def initialize(slug, data = {})
|
|
32
15
|
@slug = slug
|
|
33
|
-
|
|
34
|
-
|
|
16
|
+
data.each do |field, value|
|
|
17
|
+
self.class.attr_accessor field.to_sym
|
|
35
18
|
send("#{field}=", value) if value.present?
|
|
36
19
|
end
|
|
37
20
|
end
|
|
@@ -43,6 +26,57 @@ module Arclight
|
|
|
43
26
|
[city, state_zip, country].compact.join(', ')
|
|
44
27
|
end
|
|
45
28
|
|
|
29
|
+
# Why are we using self#respond_to? below?
|
|
30
|
+
#
|
|
31
|
+
# All the keys in the config hash from `repositories.yml` are
|
|
32
|
+
# on-the-fly added as attr_accessors up in #initialize. If the
|
|
33
|
+
# request_types key isn't present, the method won't be created.
|
|
34
|
+
#
|
|
35
|
+
# Since the original data is thrown away, this is the best way
|
|
36
|
+
# to see if that key was present.
|
|
37
|
+
def request_config_present?
|
|
38
|
+
return false unless respond_to? :request_types
|
|
39
|
+
return false if request_types.nil? || request_types.empty?
|
|
40
|
+
|
|
41
|
+
request_configs = request_types.map { |_k, v| v }
|
|
42
|
+
request_configs[0]&.fetch('request_url').present? &&
|
|
43
|
+
request_configs[0]&.fetch('request_mappings').present?
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def request_config_present_for_type?(type)
|
|
47
|
+
return false unless type && request_config_present?
|
|
48
|
+
|
|
49
|
+
config = request_types[type]
|
|
50
|
+
config&.fetch('request_url').present? &&
|
|
51
|
+
config&.fetch('request_mappings').present?
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def request_config_for_type(type)
|
|
55
|
+
return nil unless type && request_config_present_for_type?(type)
|
|
56
|
+
|
|
57
|
+
request_types[type]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def request_url_for_type(type)
|
|
61
|
+
return nil unless type && request_config_present_for_type?(type)
|
|
62
|
+
|
|
63
|
+
config = request_config_for_type(type)
|
|
64
|
+
config.fetch('request_url')
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def request_mappings_for_type(type)
|
|
68
|
+
return nil unless type && request_config_present_for_type?(type)
|
|
69
|
+
|
|
70
|
+
config = request_config_for_type(type)
|
|
71
|
+
config.fetch('request_mappings')
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def available_request_types
|
|
75
|
+
return [] unless request_types.present?
|
|
76
|
+
|
|
77
|
+
request_types.keys
|
|
78
|
+
end
|
|
79
|
+
|
|
46
80
|
# Load repository information from a YAML file
|
|
47
81
|
#
|
|
48
82
|
# @param [String] `filename`
|
|
@@ -59,19 +93,20 @@ module Arclight
|
|
|
59
93
|
# Mimics ActiveRecord's `all` behavior
|
|
60
94
|
#
|
|
61
95
|
# @return [Array<Repository>]
|
|
62
|
-
def self.all
|
|
63
|
-
|
|
96
|
+
def self.all(yaml_file = nil)
|
|
97
|
+
yaml_file = ENV['REPOSITORY_FILE'] || 'config/repositories.yml' if yaml_file.nil?
|
|
98
|
+
from_yaml(yaml_file).values
|
|
64
99
|
end
|
|
65
100
|
|
|
66
101
|
# Mimics ActiveRecord dynamic `find_by` behavior for the slug or name
|
|
67
102
|
#
|
|
68
103
|
# @param [String] `slug` or `name`
|
|
69
104
|
# @return [Repository]
|
|
70
|
-
def self.find_by(slug: nil, name: nil)
|
|
105
|
+
def self.find_by(slug: nil, name: nil, yaml_file: nil)
|
|
71
106
|
if slug
|
|
72
|
-
all.find { |repo| repo.slug == slug }
|
|
107
|
+
all(yaml_file).find { |repo| repo.slug == slug }
|
|
73
108
|
elsif name
|
|
74
|
-
all.find { |repo| repo.name == name }
|
|
109
|
+
all(yaml_file).find { |repo| repo.name == name }
|
|
75
110
|
else
|
|
76
111
|
raise ArgumentError, 'Requires either slug or name parameters to find_by'
|
|
77
112
|
end
|
|
@@ -85,6 +120,7 @@ module Arclight
|
|
|
85
120
|
def self.find_by!(*args)
|
|
86
121
|
repository = find_by(*args)
|
|
87
122
|
raise ActiveRecord::RecordNotFound if repository.blank?
|
|
123
|
+
|
|
88
124
|
repository
|
|
89
125
|
end
|
|
90
126
|
end
|
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
require 'traject'
|
|
5
|
+
require 'traject/nokogiri_reader'
|
|
6
|
+
require 'traject_plus'
|
|
7
|
+
require 'traject_plus/macros'
|
|
8
|
+
require 'arclight/exceptions'
|
|
9
|
+
require 'arclight/level_label'
|
|
10
|
+
require 'arclight/normalized_date'
|
|
11
|
+
require 'arclight/normalized_title'
|
|
12
|
+
require 'active_model/conversion' ## Needed for Arclight::Repository
|
|
13
|
+
require 'active_support/core_ext/array/wrap'
|
|
14
|
+
require 'arclight/digital_object'
|
|
15
|
+
require 'arclight/year_range'
|
|
16
|
+
require 'arclight/repository'
|
|
17
|
+
require 'arclight/missing_id_strategy'
|
|
18
|
+
require 'arclight/traject/nokogiri_namespaceless_reader'
|
|
19
|
+
|
|
20
|
+
# rubocop:disable Style/MixinUsage
|
|
21
|
+
extend TrajectPlus::Macros
|
|
22
|
+
# rubocop:enable Style/MixinUsage
|
|
23
|
+
|
|
24
|
+
NAME_ELEMENTS = %w[corpname famname name persname].freeze
|
|
25
|
+
|
|
26
|
+
SEARCHABLE_NOTES_FIELDS = %w[
|
|
27
|
+
accessrestrict
|
|
28
|
+
accruals
|
|
29
|
+
altformavail
|
|
30
|
+
appraisal
|
|
31
|
+
arrangement
|
|
32
|
+
bibliography
|
|
33
|
+
bioghist
|
|
34
|
+
custodhist
|
|
35
|
+
fileplan
|
|
36
|
+
note
|
|
37
|
+
odd
|
|
38
|
+
originalsloc
|
|
39
|
+
otherfindaid
|
|
40
|
+
phystech
|
|
41
|
+
prefercite
|
|
42
|
+
processinfo
|
|
43
|
+
relatedmaterial
|
|
44
|
+
scopecontent
|
|
45
|
+
separatedmaterial
|
|
46
|
+
userestrict
|
|
47
|
+
].freeze
|
|
48
|
+
|
|
49
|
+
DID_SEARCHABLE_NOTES_FIELDS = %w[
|
|
50
|
+
abstract
|
|
51
|
+
materialspec
|
|
52
|
+
physloc
|
|
53
|
+
].freeze
|
|
54
|
+
|
|
55
|
+
settings do
|
|
56
|
+
provide 'reader_class_name', 'Arclight::Traject::NokogiriNamespacelessReader'
|
|
57
|
+
provide 'solr_writer.commit_on_close', 'true'
|
|
58
|
+
provide 'repository', ENV['REPOSITORY_ID']
|
|
59
|
+
provide 'logger', Logger.new($stderr)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
each_record do |_record, context|
|
|
63
|
+
next unless settings['repository']
|
|
64
|
+
|
|
65
|
+
context.clipboard[:repository] = Arclight::Repository.find_by(
|
|
66
|
+
slug: settings['repository']
|
|
67
|
+
).name
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# ==================
|
|
71
|
+
# Top level document
|
|
72
|
+
# ==================
|
|
73
|
+
|
|
74
|
+
to_field 'id', extract_xpath('/ead/eadheader/eadid'), strip, gsub('.', '-')
|
|
75
|
+
to_field 'title_filing_si', extract_xpath('/ead/eadheader/filedesc/titlestmt/titleproper[@type="filing"]')
|
|
76
|
+
to_field 'title_ssm', extract_xpath('/ead/archdesc/did/unittitle')
|
|
77
|
+
to_field 'title_teim', extract_xpath('/ead/archdesc/did/unittitle')
|
|
78
|
+
to_field 'ead_ssi', extract_xpath('/ead/eadheader/eadid')
|
|
79
|
+
|
|
80
|
+
to_field 'unitdate_ssm', extract_xpath('/ead/archdesc/did/unitdate')
|
|
81
|
+
to_field 'unitdate_bulk_ssim', extract_xpath('/ead/archdesc/did/unitdate[@type="bulk"]')
|
|
82
|
+
to_field 'unitdate_inclusive_ssm', extract_xpath('/ead/archdesc/did/unitdate[@type="inclusive"]')
|
|
83
|
+
to_field 'unitdate_other_ssim', extract_xpath('/ead/archdesc/did/unitdate[not(@type)]')
|
|
84
|
+
|
|
85
|
+
# All top-level docs treated as 'collection' for routing / display purposes
|
|
86
|
+
to_field 'level_ssm' do |_record, accumulator|
|
|
87
|
+
accumulator << 'collection'
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Keep the original top-level archdesc/@level for Level facet in addition to 'Collection'
|
|
91
|
+
to_field 'level_sim' do |record, accumulator|
|
|
92
|
+
level = record.at_xpath('/ead/archdesc').attribute('level')&.value
|
|
93
|
+
other_level = record.at_xpath('/ead/archdesc').attribute('otherlevel')&.value
|
|
94
|
+
|
|
95
|
+
accumulator << Arclight::LevelLabel.new(level, other_level).to_s
|
|
96
|
+
accumulator << 'Collection' unless level == 'collection'
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
to_field 'unitid_ssm', extract_xpath('/ead/archdesc/did/unitid')
|
|
100
|
+
to_field 'unitid_teim', extract_xpath('/ead/archdesc/did/unitid')
|
|
101
|
+
to_field 'collection_unitid_ssm', extract_xpath('/ead/archdesc/did/unitid')
|
|
102
|
+
|
|
103
|
+
to_field 'normalized_title_ssm' do |_record, accumulator, context|
|
|
104
|
+
dates = Arclight::NormalizedDate.new(
|
|
105
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
|
106
|
+
context.output_hash['unitdate_bulk_ssim'],
|
|
107
|
+
context.output_hash['unitdate_other_ssim']
|
|
108
|
+
).to_s
|
|
109
|
+
title = context.output_hash['title_ssm'].first
|
|
110
|
+
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
to_field 'normalized_date_ssm' do |_record, accumulator, context|
|
|
114
|
+
accumulator << Arclight::NormalizedDate.new(
|
|
115
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
|
116
|
+
context.output_hash['unitdate_bulk_ssim'],
|
|
117
|
+
context.output_hash['unitdate_other_ssim']
|
|
118
|
+
).to_s
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
to_field 'collection_ssm' do |_record, accumulator, context|
|
|
122
|
+
accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
|
|
123
|
+
end
|
|
124
|
+
to_field 'collection_sim' do |_record, accumulator, context|
|
|
125
|
+
accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
|
|
126
|
+
end
|
|
127
|
+
to_field 'collection_ssi' do |_record, accumulator, context|
|
|
128
|
+
accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
|
|
129
|
+
end
|
|
130
|
+
to_field 'collection_title_tesim' do |_record, accumulator, context|
|
|
131
|
+
accumulator.concat context.output_hash.fetch('normalized_title_ssm', [])
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
to_field 'repository_ssm' do |_record, accumulator, context|
|
|
135
|
+
accumulator << context.clipboard[:repository]
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
to_field 'repository_sim' do |_record, accumulator, context|
|
|
139
|
+
accumulator << context.clipboard[:repository]
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
to_field 'geogname_ssm', extract_xpath('/ead/archdesc/controlaccess/geogname')
|
|
143
|
+
to_field 'geogname_sim', extract_xpath('/ead/archdesc/controlaccess/geogname')
|
|
144
|
+
|
|
145
|
+
to_field 'creator_ssm', extract_xpath('/ead/archdesc/did/origination')
|
|
146
|
+
to_field 'creator_sim', extract_xpath('/ead/archdesc/did/origination')
|
|
147
|
+
to_field 'creator_ssim', extract_xpath('/ead/archdesc/did/origination')
|
|
148
|
+
to_field 'creator_sort' do |record, accumulator|
|
|
149
|
+
accumulator << record.xpath('/ead/archdesc/did/origination').map { |c| c.text.strip }.join(', ')
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
to_field 'creator_persname_ssm', extract_xpath('/ead/archdesc/did/origination/persname')
|
|
153
|
+
to_field 'creator_persname_ssim', extract_xpath('/ead/archdesc/did/origination/persname')
|
|
154
|
+
to_field 'creator_corpname_ssm', extract_xpath('/ead/archdesc/did/origination/corpname')
|
|
155
|
+
to_field 'creator_corpname_sim', extract_xpath('/ead/archdesc/did/origination/corpname')
|
|
156
|
+
to_field 'creator_corpname_ssim', extract_xpath('/ead/archdesc/did/origination/corpname')
|
|
157
|
+
to_field 'creator_famname_ssm', extract_xpath('/ead/archdesc/did/origination/famname')
|
|
158
|
+
to_field 'creator_famname_ssim', extract_xpath('/ead/archdesc/did/origination/famname')
|
|
159
|
+
|
|
160
|
+
to_field 'persname_sim', extract_xpath('//persname')
|
|
161
|
+
|
|
162
|
+
to_field 'creators_ssim' do |_record, accumulator, context|
|
|
163
|
+
accumulator.concat context.output_hash['creator_persname_ssm'] if context.output_hash['creator_persname_ssm']
|
|
164
|
+
accumulator.concat context.output_hash['creator_corpname_ssm'] if context.output_hash['creator_corpname_ssm']
|
|
165
|
+
accumulator.concat context.output_hash['creator_famname_ssm'] if context.output_hash['creator_famname_ssm']
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
to_field 'places_sim', extract_xpath('/ead/archdesc/controlaccess/geogname')
|
|
169
|
+
to_field 'places_ssim', extract_xpath('/ead/archdesc/controlaccess/geogname')
|
|
170
|
+
to_field 'places_ssm', extract_xpath('/ead/archdesc/controlaccess/geogname')
|
|
171
|
+
|
|
172
|
+
to_field 'access_terms_ssm', extract_xpath('/ead/archdesc/userestrict/*[local-name()!="head"]')
|
|
173
|
+
|
|
174
|
+
to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/acqinfo/*[local-name()!="head"]')
|
|
175
|
+
to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/descgrp/acqinfo/*[local-name()!="head"]')
|
|
176
|
+
|
|
177
|
+
to_field 'access_subjects_ssim', extract_xpath('/ead/archdesc/controlaccess', to_text: false) do |_record, accumulator|
|
|
178
|
+
accumulator.map! do |element|
|
|
179
|
+
%w[subject function occupation genreform].map do |selector|
|
|
180
|
+
element.xpath(".//#{selector}").map(&:text)
|
|
181
|
+
end
|
|
182
|
+
end.flatten!
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
to_field 'access_subjects_ssm' do |_record, accumulator, context|
|
|
186
|
+
accumulator.concat Array.wrap(context.output_hash['access_subjects_ssim'])
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
to_field 'has_online_content_ssim', extract_xpath('.//dao') do |_record, accumulator|
|
|
190
|
+
accumulator.replace([accumulator.any?])
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
to_field 'digital_objects_ssm', extract_xpath('/ead/archdesc/did/dao|/ead/archdesc/dao', to_text: false) do |_record, accumulator|
|
|
194
|
+
accumulator.map! do |dao|
|
|
195
|
+
label = dao.attributes['title']&.value ||
|
|
196
|
+
dao.xpath('daodesc/p')&.text
|
|
197
|
+
href = (dao.attributes['href'] || dao.attributes['xlink:href'])&.value
|
|
198
|
+
Arclight::DigitalObject.new(label: label, href: href).to_json
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
to_field 'extent_ssm', extract_xpath('/ead/archdesc/did/physdesc/extent')
|
|
203
|
+
to_field 'extent_teim', extract_xpath('/ead/archdesc/did/physdesc/extent')
|
|
204
|
+
to_field 'genreform_sim', extract_xpath('/ead/archdesc/controlaccess/genreform')
|
|
205
|
+
to_field 'genreform_ssm', extract_xpath('/ead/archdesc/controlaccess/genreform')
|
|
206
|
+
|
|
207
|
+
to_field 'date_range_sim', extract_xpath('/ead/archdesc/did/unitdate/@normal', to_text: false) do |_record, accumulator|
|
|
208
|
+
range = Arclight::YearRange.new
|
|
209
|
+
next range.years if accumulator.blank?
|
|
210
|
+
|
|
211
|
+
ranges = accumulator.map(&:to_s)
|
|
212
|
+
range << range.parse_ranges(ranges)
|
|
213
|
+
accumulator.replace range.years
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
SEARCHABLE_NOTES_FIELDS.map do |selector|
|
|
217
|
+
to_field "#{selector}_ssm", extract_xpath("/ead/archdesc/#{selector}/*[local-name()!='head']", to_text: false)
|
|
218
|
+
to_field "#{selector}_heading_ssm", extract_xpath("/ead/archdesc/#{selector}/head") unless selector == 'prefercite'
|
|
219
|
+
to_field "#{selector}_teim", extract_xpath("/ead/archdesc/#{selector}/*[local-name()!='head']")
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
|
|
223
|
+
to_field "#{selector}_ssm", extract_xpath("/ead/archdesc/did/#{selector}", to_text: false)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
NAME_ELEMENTS.map do |selector|
|
|
227
|
+
to_field 'names_coll_ssim', extract_xpath("/ead/archdesc/controlaccess/#{selector}")
|
|
228
|
+
to_field 'names_ssim', extract_xpath("//#{selector}")
|
|
229
|
+
to_field "#{selector}_ssm", extract_xpath("//#{selector}")
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
to_field 'corpname_sim', extract_xpath('//corpname')
|
|
233
|
+
|
|
234
|
+
to_field 'language_sim', extract_xpath('/ead/archdesc/did/langmaterial')
|
|
235
|
+
to_field 'language_ssm', extract_xpath('/ead/archdesc/did/langmaterial')
|
|
236
|
+
|
|
237
|
+
to_field 'descrules_ssm', extract_xpath('/ead/eadheader/profiledesc/descrules')
|
|
238
|
+
|
|
239
|
+
# =============================
|
|
240
|
+
# Each component child document
|
|
241
|
+
# <c> <c01> <c12>
|
|
242
|
+
# =============================
|
|
243
|
+
|
|
244
|
+
compose 'components', ->(record, accumulator, _context) { accumulator.concat record.xpath('//*[is_component(.)]', NokogiriXpathExtensions.new) } do
|
|
245
|
+
to_field 'ref_ssi' do |record, accumulator, context|
|
|
246
|
+
accumulator << if record.attribute('id').blank?
|
|
247
|
+
strategy = Arclight::MissingIdStrategy.selected
|
|
248
|
+
hexdigest = strategy.new(record).to_hexdigest
|
|
249
|
+
parent_id = context.clipboard[:parent].output_hash['id'].first
|
|
250
|
+
logger.warn('MISSING ID WARNING') do
|
|
251
|
+
[
|
|
252
|
+
"A component in #{parent_id} did not have an ID so one was minted using the #{strategy} strategy.",
|
|
253
|
+
"The ID of this document will be #{parent_id}#{hexdigest}."
|
|
254
|
+
].join(' ')
|
|
255
|
+
end
|
|
256
|
+
record['id'] = hexdigest
|
|
257
|
+
hexdigest
|
|
258
|
+
else
|
|
259
|
+
record.attribute('id')&.value&.strip&.gsub('.', '-')
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
to_field 'ref_ssm' do |_record, accumulator, context|
|
|
263
|
+
accumulator.concat context.output_hash['ref_ssi']
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
to_field 'id' do |_record, accumulator, context|
|
|
267
|
+
accumulator << [
|
|
268
|
+
context.clipboard[:parent].output_hash['id'],
|
|
269
|
+
context.output_hash['ref_ssi']
|
|
270
|
+
].join('')
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
to_field 'ead_ssi' do |_record, accumulator, context|
|
|
274
|
+
accumulator << context.clipboard[:parent].output_hash['ead_ssi'].first
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
to_field 'title_filing_si', extract_xpath('./did/unittitle'), first_only
|
|
278
|
+
to_field 'title_ssm', extract_xpath('./did/unittitle')
|
|
279
|
+
to_field 'title_teim', extract_xpath('./did/unittitle')
|
|
280
|
+
|
|
281
|
+
to_field 'unitdate_bulk_ssim', extract_xpath('./did/unitdate[@type="bulk"]')
|
|
282
|
+
to_field 'unitdate_inclusive_ssm', extract_xpath('./did/unitdate[@type="inclusive"]')
|
|
283
|
+
to_field 'unitdate_other_ssim', extract_xpath('./did/unitdate[not(@type)]')
|
|
284
|
+
|
|
285
|
+
to_field 'normalized_title_ssm' do |_record, accumulator, context|
|
|
286
|
+
dates = Arclight::NormalizedDate.new(
|
|
287
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
|
288
|
+
context.output_hash['unitdate_bulk_ssim'],
|
|
289
|
+
context.output_hash['unitdate_other_ssim']
|
|
290
|
+
).to_s
|
|
291
|
+
title = context.output_hash['title_ssm']&.first
|
|
292
|
+
accumulator << Arclight::NormalizedTitle.new(title, dates).to_s
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
to_field 'normalized_date_ssm' do |_record, accumulator, context|
|
|
296
|
+
accumulator << Arclight::NormalizedDate.new(
|
|
297
|
+
context.output_hash['unitdate_inclusive_ssm'],
|
|
298
|
+
context.output_hash['unitdate_bulk_ssim'],
|
|
299
|
+
context.output_hash['unitdate_other_ssim']
|
|
300
|
+
).to_s
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
to_field 'component_level_isim' do |record, accumulator|
|
|
304
|
+
accumulator << 1 + NokogiriXpathExtensions.new.is_component(record.ancestors).count
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
to_field 'parent_ssim' do |record, accumulator, context|
|
|
308
|
+
accumulator << context.clipboard[:parent].output_hash['id'].first
|
|
309
|
+
accumulator.concat NokogiriXpathExtensions.new.is_component(record.ancestors).reverse.map { |n| n.attribute('id')&.value }
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
to_field 'parent_ssi' do |_record, accumulator, context|
|
|
313
|
+
accumulator << context.output_hash['parent_ssim'].last
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
to_field 'parent_unittitles_ssm' do |_rec, accumulator, context|
|
|
317
|
+
# top level document
|
|
318
|
+
accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
|
|
319
|
+
parent_ssim = context.output_hash['parent_ssim']
|
|
320
|
+
components = context.clipboard[:parent].output_hash['components']
|
|
321
|
+
|
|
322
|
+
# other components
|
|
323
|
+
if parent_ssim && components
|
|
324
|
+
ancestors = parent_ssim.drop(1).map { |x| [x] }
|
|
325
|
+
accumulator.concat components.select { |c| ancestors.include? c['ref_ssi'] }.flat_map { |c| c['normalized_title_ssm'] }
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
to_field 'parent_unittitles_teim' do |_record, accumulator, context|
|
|
330
|
+
accumulator.concat context.output_hash['parent_unittitles_ssm']
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
to_field 'parent_levels_ssm' do |_record, accumulator, context|
|
|
334
|
+
## Top level document
|
|
335
|
+
accumulator.concat context.clipboard[:parent].output_hash['level_ssm']
|
|
336
|
+
## Other components
|
|
337
|
+
context.output_hash['parent_ssim']&.drop(1)&.each do |id|
|
|
338
|
+
accumulator.concat Array
|
|
339
|
+
.wrap(context.clipboard[:parent].output_hash['components'])
|
|
340
|
+
.select { |c| c['ref_ssi'] == [id] }.map { |c| c['level_ssm'] }.flatten
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
to_field 'unitid_ssm', extract_xpath('./did/unitid')
|
|
345
|
+
to_field 'collection_unitid_ssm' do |_record, accumulator, context|
|
|
346
|
+
accumulator.concat Array.wrap(context.clipboard[:parent].output_hash['unitid_ssm'])
|
|
347
|
+
end
|
|
348
|
+
to_field 'repository_ssm' do |_record, accumulator, context|
|
|
349
|
+
accumulator << context.clipboard[:parent].clipboard[:repository]
|
|
350
|
+
end
|
|
351
|
+
to_field 'repository_sim' do |_record, accumulator, context|
|
|
352
|
+
accumulator << context.clipboard[:parent].clipboard[:repository]
|
|
353
|
+
end
|
|
354
|
+
to_field 'collection_ssm' do |_record, accumulator, context|
|
|
355
|
+
accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
|
|
356
|
+
end
|
|
357
|
+
to_field 'collection_sim' do |_record, accumulator, context|
|
|
358
|
+
accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
|
|
359
|
+
end
|
|
360
|
+
to_field 'collection_ssi' do |_record, accumulator, context|
|
|
361
|
+
accumulator.concat context.clipboard[:parent].output_hash['normalized_title_ssm']
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
to_field 'extent_ssm', extract_xpath('./did/physdesc/extent')
|
|
365
|
+
to_field 'extent_teim', extract_xpath('./did/physdesc/extent')
|
|
366
|
+
|
|
367
|
+
to_field 'creator_ssm', extract_xpath('./did/origination')
|
|
368
|
+
to_field 'creator_ssim', extract_xpath('./did/origination')
|
|
369
|
+
to_field 'creators_ssim', extract_xpath('./did/origination')
|
|
370
|
+
to_field 'creator_sort' do |record, accumulator|
|
|
371
|
+
accumulator << record.xpath('./did/origination').map(&:text).join(', ')
|
|
372
|
+
end
|
|
373
|
+
to_field 'collection_creator_ssm' do |_record, accumulator, context|
|
|
374
|
+
accumulator.concat Array.wrap(context.clipboard[:parent].output_hash['creator_ssm'])
|
|
375
|
+
end
|
|
376
|
+
to_field 'has_online_content_ssim', extract_xpath('.//dao') do |_record, accumulator|
|
|
377
|
+
accumulator.replace([accumulator.any?])
|
|
378
|
+
end
|
|
379
|
+
to_field 'child_component_count_isim' do |record, accumulator|
|
|
380
|
+
accumulator << NokogiriXpathExtensions.new.is_component(record.children).count
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
to_field 'ref_ssm' do |record, accumulator|
|
|
384
|
+
accumulator << record.attribute('id')
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
to_field 'level_ssm' do |record, accumulator|
|
|
388
|
+
level = record.attribute('level')&.value
|
|
389
|
+
other_level = record.attribute('otherlevel')&.value
|
|
390
|
+
accumulator << Arclight::LevelLabel.new(level, other_level).to_s
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
to_field 'level_sim' do |_record, accumulator, context|
|
|
394
|
+
next unless context.output_hash['level_ssm']
|
|
395
|
+
|
|
396
|
+
accumulator.concat context.output_hash['level_ssm']&.map(&:capitalize)
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
to_field 'sort_ii' do |_record, accumulator, context|
|
|
400
|
+
accumulator.replace([context.position])
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Get the <accessrestrict> from the closest ancestor that has one (includes top-level)
|
|
404
|
+
to_field 'parent_access_restrict_ssm' do |record, accumulator|
|
|
405
|
+
accumulator.concat Array
|
|
406
|
+
.wrap(record.xpath('(./ancestor::*/accessrestrict)[last()]/*[local-name()!="head"]')
|
|
407
|
+
.map(&:text))
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Get the <userestrict> from self OR the closest ancestor that has one (includes top-level)
|
|
411
|
+
to_field 'parent_access_terms_ssm' do |record, accumulator|
|
|
412
|
+
accumulator.concat Array
|
|
413
|
+
.wrap(record.xpath('(./ancestor-or-self::*/userestrict)[last()]/*[local-name()!="head"]')
|
|
414
|
+
.map(&:text))
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
to_field 'digital_objects_ssm', extract_xpath('./dao|./did/dao', to_text: false) do |_record, accumulator|
|
|
418
|
+
accumulator.map! do |dao|
|
|
419
|
+
label = dao.attributes['title']&.value ||
|
|
420
|
+
dao.xpath('daodesc/p')&.text
|
|
421
|
+
href = (dao.attributes['href'] || dao.attributes['xlink:href'])&.value
|
|
422
|
+
Arclight::DigitalObject.new(label: label, href: href).to_json
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
to_field 'date_range_sim', extract_xpath('./did/unitdate/@normal', to_text: false) do |_record, accumulator|
|
|
427
|
+
range = Arclight::YearRange.new
|
|
428
|
+
next range.years if accumulator.blank?
|
|
429
|
+
|
|
430
|
+
ranges = accumulator.map(&:to_s)
|
|
431
|
+
range << range.parse_ranges(ranges)
|
|
432
|
+
accumulator.replace range.years
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
NAME_ELEMENTS.map do |selector|
|
|
436
|
+
to_field 'names_ssim', extract_xpath("./controlaccess/#{selector}")
|
|
437
|
+
to_field "#{selector}_ssm", extract_xpath(".//#{selector}")
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
to_field 'geogname_sim', extract_xpath('./controlaccess/geogname')
|
|
441
|
+
to_field 'geogname_ssm', extract_xpath('./controlaccess/geogname')
|
|
442
|
+
to_field 'places_ssim', extract_xpath('./controlaccess/geogname')
|
|
443
|
+
|
|
444
|
+
to_field 'access_subjects_ssim', extract_xpath('./controlaccess', to_text: false) do |_record, accumulator|
|
|
445
|
+
accumulator.map! do |element|
|
|
446
|
+
%w[subject function occupation genreform].map do |selector|
|
|
447
|
+
element.xpath(".//#{selector}").map(&:text)
|
|
448
|
+
end
|
|
449
|
+
end.flatten!
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
to_field 'access_subjects_ssm' do |_record, accumulator, context|
|
|
453
|
+
accumulator.concat(context.output_hash.fetch('access_subjects_ssim', []))
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/acqinfo/*[local-name()!="head"]')
|
|
457
|
+
to_field 'acqinfo_ssim', extract_xpath('/ead/archdesc/descgrp/acqinfo/*[local-name()!="head"]')
|
|
458
|
+
to_field 'acqinfo_ssim', extract_xpath('./acqinfo/*[local-name()!="head"]')
|
|
459
|
+
to_field 'acqinfo_ssim', extract_xpath('./descgrp/acqinfo/*[local-name()!="head"]')
|
|
460
|
+
|
|
461
|
+
to_field 'language_ssm', extract_xpath('./did/langmaterial')
|
|
462
|
+
to_field 'containers_ssim' do |record, accumulator|
|
|
463
|
+
record.xpath('./did/container').each do |node|
|
|
464
|
+
accumulator << [node.attribute('type'), node.text].join(' ').strip
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
SEARCHABLE_NOTES_FIELDS.map do |selector|
|
|
469
|
+
to_field "#{selector}_ssm", extract_xpath("./#{selector}/*[local-name()!='head']", to_text: false)
|
|
470
|
+
to_field "#{selector}_heading_ssm", extract_xpath("./#{selector}/head")
|
|
471
|
+
to_field "#{selector}_teim", extract_xpath("./#{selector}/*[local-name()!='head']")
|
|
472
|
+
end
|
|
473
|
+
DID_SEARCHABLE_NOTES_FIELDS.map do |selector|
|
|
474
|
+
to_field "#{selector}_ssm", extract_xpath("./did/#{selector}", to_text: false)
|
|
475
|
+
end
|
|
476
|
+
to_field 'did_note_ssm', extract_xpath('./did/note')
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
each_record do |_record, context|
|
|
480
|
+
context.output_hash['components'] &&= context.output_hash['components'].select { |c| c.keys.any? }
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
##
|
|
484
|
+
# Used for evaluating xpath components to find
|
|
485
|
+
class NokogiriXpathExtensions
|
|
486
|
+
# rubocop:disable Naming/PredicateName, Style/FormatString
|
|
487
|
+
def is_component(node_set)
|
|
488
|
+
node_set.find_all do |node|
|
|
489
|
+
component_elements = (1..12).map { |i| "c#{'%02d' % i}" }
|
|
490
|
+
component_elements.push 'c'
|
|
491
|
+
component_elements.include? node.name
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
# rubocop:enable Naming/PredicateName, Style/FormatString
|
|
495
|
+
end
|