dor_indexing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +355 -0
  4. data/Gemfile +16 -0
  5. data/Gemfile.lock +218 -0
  6. data/README.md +33 -0
  7. data/Rakefile +11 -0
  8. data/dor_indexing.gemspec +40 -0
  9. data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
  10. data/lib/dor_indexing/builders/author_builder.rb +31 -0
  11. data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
  12. data/lib/dor_indexing/builders/document_builder.rb +106 -0
  13. data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
  14. data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
  15. data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
  16. data/lib/dor_indexing/builders/name_builder.rb +70 -0
  17. data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
  18. data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
  19. data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
  20. data/lib/dor_indexing/builders/topic_builder.rb +96 -0
  21. data/lib/dor_indexing/cocina_repository.rb +24 -0
  22. data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
  23. data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
  24. data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
  25. data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
  26. data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
  27. data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
  28. data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
  29. data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
  30. data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
  31. data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
  32. data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
  33. data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
  34. data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
  35. data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
  36. data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
  37. data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
  38. data/lib/dor_indexing/marc_country.rb +359 -0
  39. data/lib/dor_indexing/selectors/event_selector.rb +112 -0
  40. data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
  41. data/lib/dor_indexing/version.rb +5 -0
  42. data/lib/dor_indexing/workflow_fields.rb +63 -0
  43. data/lib/dor_indexing/workflow_solr_document.rb +93 -0
  44. data/lib/dor_indexing.rb +19 -0
  45. metadata +173 -0
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the topic fields for a solr document
6
+ class TopicBuilder
7
+ # @param [Array] subjects
8
+ # @param [String] filter can either be 'topic' or 'name'
9
+ def self.build(subjects, filter:, remove_trailing_punctuation: false)
10
+ new(filter:, remove_trailing_punctuation:).build(subjects)
11
+ end
12
+
13
+ def initialize(filter:, remove_trailing_punctuation:)
14
+ @filter = filter
15
+ @remove_trailing_punctuation = remove_trailing_punctuation
16
+ end
17
+
18
+ def build(subjects)
19
+ topics(subjects).flat_map { |topic| flat_topic(topic) }.compact.uniq
20
+ end
21
+
22
+ private
23
+
24
+ attr_reader :filter
25
+
26
+ def remove_trailing_punctuation?
27
+ @remove_trailing_punctuation
28
+ end
29
+
30
+ # Filter the subjects we are interested in>
31
+ # Handles:
32
+ # parallelValue that contain structuredValue and the parallelValue has the type AND
33
+ # parallelValue that contain structuredValue each with their own type AND
34
+ # parallelValue that has a type conferred to the child AND
35
+ # structuredValue that contains structuredValue where the type can be at the higher or lower level.
36
+ def topics(subjects)
37
+ (
38
+ subjects.flat_map { |subject| basic_value(subject) } +
39
+ subjects.flat_map { |subject| structured_values(subject) } +
40
+ parallel_subjects(subjects)
41
+ ).compact
42
+ end
43
+
44
+ def parallel_subjects(subjects)
45
+ parallels = subjects.select(&:parallelValue)
46
+ parallels.flat_map { |subject| parallel_with_type(subject, subject.type) if subject.type } +
47
+ parallels.flat_map { |subject| topics(subject.parallelValue) unless subject.type }
48
+ end
49
+
50
+ def flat_topic(value)
51
+ if value.parallelValue.present?
52
+ value.parallelValue.flat_map { |topic| flat_topic(topic) }
53
+ elsif remove_trailing_punctuation?
54
+ # comma, semicolon, and backslash are dropped
55
+ Array(value.value&.sub(/[ ,;\\]+$/, ''))
56
+ else
57
+ Array(value.value)
58
+ end
59
+ end
60
+
61
+ def parallel_with_type(item, type_from_parent)
62
+ return unless type_matches_filter?(type_from_parent)
63
+
64
+ item
65
+ end
66
+
67
+ def basic_value(subject)
68
+ return create_fullname(subject) if filter == 'name' && subject.type == 'person'
69
+ return create_title(subject) if filter == 'name' && subject.type == 'title'
70
+
71
+ subject if type_matches_filter?(subject.type)
72
+ end
73
+
74
+ def structured_values(subject)
75
+ selected = Array(subject.structuredValue).select { |child| type_matches_filter?(child.type) }
76
+
77
+ topics(selected)
78
+ end
79
+
80
+ def create_title(title)
81
+ titles = Cocina::Models::Builders::TitleBuilder.build([title], strategy: :all, add_punctuation: false)
82
+ titles.map { |value| Cocina::Models::DescriptiveValue.new(value:) }
83
+ end
84
+
85
+ def create_fullname(name)
86
+ names = NameBuilder.build([name], strategy: :all)
87
+ names.map { |value| Cocina::Models::DescriptiveValue.new(value:) }
88
+ end
89
+
90
+ def type_matches_filter?(type)
91
+ (filter == 'name' && %w[person organization title occupation].include?(type)) ||
92
+ type == filter
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Interface for retrieving Cocina objects.
5
+ # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
+ # In DIA, the concrete implementation backs this with Dor Services Client.
7
+ class CocinaRepository
8
+ class RepositoryError < StandardError; end
9
+
10
+ # @param [String] druid
11
+ # @return [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Models::AdminPolicyWithMetadata]
12
+ # @raise [RepositoryError] if the object is not found or other error occurs
13
+ def find(druid)
14
+ raise NotImplementedError
15
+ end
16
+
17
+ # @param [String] druid
18
+ # @return [Array<String>] administrative tags
19
+ # @raise [RepositoryError] if the object is not found or other error occurs
20
+ def administrative_tags(druid)
21
+ raise NotImplementedError
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Index administrative tags for an object.
6
+ # NOTE: Most of this code was extracted from the dor-services gem:
7
+ # https://github.com/sul-dlss/dor-services/blob/v9.0.0/lib/dor/datastreams/identity_metadata_ds.rb#L196-L218
8
+ class AdministrativeTagIndexer
9
+ TAG_PART_DELIMITER = ' : '
10
+ SPECIAL_TAG_TYPES_TO_INDEX = ['Project', 'Registered By'].freeze
11
+
12
+ attr_reader :id
13
+
14
+ def initialize(id:, administrative_tags:, **)
15
+ @id = id
16
+ @administrative_tags = administrative_tags
17
+ end
18
+
19
+ # @return [Hash] the partial solr document for administrative tags
20
+ # rubocop:disable Metrics/MethodLength
21
+ # rubocop:disable Metrics/AbcSize
22
+ # rubocop:disable Metrics/CyclomaticComplexity
23
+ def to_solr
24
+ solr_doc = {
25
+ 'tag_ssim' => [],
26
+ 'tag_text_unstemmed_im' => [],
27
+ 'exploded_nonproject_tag_ssim' => []
28
+ }
29
+ administrative_tags.each do |tag|
30
+ tag_prefix, rest = tag.split(TAG_PART_DELIMITER, 2)
31
+ prefix = tag_prefix.downcase.strip.gsub(/\s/, '_')
32
+
33
+ solr_doc['tag_ssim'] << tag # for facet and display
34
+ solr_doc['tag_text_unstemmed_im'] << tag # for search
35
+
36
+ solr_doc['exploded_nonproject_tag_ssim'] += exploded_tags_from(tag) unless prefix == 'project'
37
+
38
+ next if SPECIAL_TAG_TYPES_TO_INDEX.exclude?(tag_prefix) || rest.nil?
39
+
40
+ (solr_doc["#{prefix}_tag_ssim"] ||= []) << rest.strip
41
+
42
+ if prefix == 'project'
43
+ solr_doc['exploded_project_tag_ssim'] ||= []
44
+ solr_doc['exploded_project_tag_ssim'] += exploded_tags_from(rest.strip)
45
+ end
46
+ end
47
+ solr_doc
48
+ end
49
+ # rubocop:enable Metrics/MethodLength
50
+ # rubocop:enable Metrics/AbcSize
51
+ # rubocop:enable Metrics/CyclomaticComplexity
52
+
53
+ private
54
+
55
+ attr_reader :administrative_tags
56
+
57
+ # solrize each possible prefix for the tag, inclusive of the full tag.
58
+ # e.g., for a tag such as "A : B : C", this will solrize to an _ssim field
59
+ # that contains ["A", "A : B", "A : B : C"].
60
+ def exploded_tags_from(tag)
61
+ tag_parts = tag.split(TAG_PART_DELIMITER)
62
+
63
+ 1.upto(tag_parts.count).map do |i|
64
+ tag_parts.take(i).join(TAG_PART_DELIMITER)
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the collection title
6
+ class CollectionTitleIndexer
7
+ attr_reader :cocina, :parent_collections
8
+
9
+ def initialize(cocina:, parent_collections:, **)
10
+ @cocina = cocina
11
+ @parent_collections = parent_collections
12
+ end
13
+
14
+ # @return [Hash] the partial solr document for identifiable concerns
15
+ def to_solr
16
+ {}.tap do |solr_doc|
17
+ parent_collections.each do |related_obj|
18
+ coll_title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
19
+
20
+ # create/append collection_title_tesim and collection_title_ssim
21
+ ::Solrizer.insert_field(solr_doc, 'collection_title', coll_title, :stored_searchable, :symbol)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb
6
+ class CompositeIndexer
7
+ attr_reader :indexers
8
+
9
+ def initialize(*indexers)
10
+ @indexers = indexers
11
+ end
12
+
13
+ def new(**)
14
+ Instance.new(indexers, **)
15
+ end
16
+
17
+ # Instance for a composite indexer
18
+ class Instance
19
+ attr_reader :indexers
20
+
21
+ def initialize(indexers, **)
22
+ @indexers = indexers.map do |i|
23
+ i.new(**)
24
+ rescue ArgumentError => e
25
+ raise ArgumentError, "Unable to initialize #{i}. #{e.message}"
26
+ end
27
+ end
28
+
29
+ # @return [Hash] the merged solr document for all the sub-indexers
30
+ def to_solr
31
+ indexers.map(&:to_solr).inject({}, &:merge)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the content metadata
6
+ class ContentMetadataIndexer
7
+ attr_reader :cocina
8
+
9
+ def initialize(cocina:, **)
10
+ @cocina = cocina
11
+ end
12
+
13
+ # @return [Hash] the partial solr document for contentMetadata
14
+ def to_solr
15
+ {
16
+ 'content_type_ssim' => type(cocina.type),
17
+ 'content_file_mimetypes_ssim' => files.map(&:hasMimeType).uniq,
18
+ 'content_file_count_itsi' => files.size,
19
+ 'shelved_content_file_count_itsi' => shelved_files.size,
20
+ 'resource_count_itsi' => file_sets.size,
21
+ 'preserved_size_dbtsi' => preserved_size, # double (trie) to support very large sizes
22
+ 'content_file_roles_ssim' => files.filter_map(&:use),
23
+ # first_shelved_image is neither indexed nor multiple
24
+ 'first_shelved_image_ss' => first_shelved_image
25
+ }
26
+ end
27
+
28
+ private
29
+
30
+ def first_shelved_image
31
+ shelved_files.find { |file| file.filename.end_with?('jp2') }&.filename
32
+ end
33
+
34
+ def shelved_files
35
+ files.select { |file| file.administrative.shelve }
36
+ end
37
+
38
+ def preserved_size
39
+ files.select { |file| file.administrative.sdrPreserve }
40
+ .filter_map(&:size).sum # filter out missing size
41
+ end
42
+
43
+ def files
44
+ @files ||= file_sets.flat_map { |fs| fs.structural.contains }
45
+ end
46
+
47
+ def file_sets
48
+ @file_sets ||= Array(cocina.structural&.contains)
49
+ end
50
+
51
+ TYPES = {
52
+ Cocina::Models::ObjectType.image => 'image',
53
+ Cocina::Models::ObjectType.manuscript => 'image',
54
+ Cocina::Models::ObjectType.book => 'book',
55
+ Cocina::Models::ObjectType.map => 'map',
56
+ Cocina::Models::ObjectType.three_dimensional => '3d',
57
+ Cocina::Models::ObjectType.media => 'media',
58
+ Cocina::Models::ObjectType.webarchive_seed => 'webarchive-seed',
59
+ Cocina::Models::ObjectType.webarchive_binary => 'webarchive-binary',
60
+ Cocina::Models::ObjectType.geo => 'geo',
61
+ Cocina::Models::ObjectType.document => 'document'
62
+ }.freeze
63
+
64
+ def type(object_type)
65
+ TYPES.fetch(object_type, 'file')
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexing provided by ActiveFedora
6
+ class DataIndexer
7
+ attr_reader :cocina, :workflow_client
8
+
9
+ def initialize(cocina:, workflow_client:, **)
10
+ @cocina = cocina
11
+ @workflow_client = workflow_client
12
+ end
13
+
14
+ # rubocop:disable Metrics/AbcSize
15
+ # rubocop:disable Metrics/MethodLength
16
+ def to_solr
17
+ {}.tap do |solr_doc|
18
+ solr_doc[:id] = cocina.externalIdentifier
19
+ solr_doc['current_version_isi'] = cocina.version # Argo Facet field "Version"
20
+ solr_doc['obj_label_tesim'] = cocina.label
21
+
22
+ solr_doc['modified_latest_dttsi'] = modified_latest
23
+ solr_doc['created_at_dttsi'] = created_at
24
+
25
+ # is_member_of_collection_ssim is used by dor-services-app for querying for members of a
26
+ # collection and it is a facet in Argo
27
+ solr_doc['is_member_of_collection_ssim'] = legacy_collections
28
+ solr_doc['is_governed_by_ssim'] = legacy_apo # Argo facet
29
+
30
+ # Used so that DSA can generate public XML whereas a constituent can find the virtual object it is part of.
31
+ solr_doc['has_constituents_ssim'] = virtual_object_constituents
32
+ end.merge(DorIndexing::WorkflowFields.for(druid: cocina.externalIdentifier, version: cocina.version, workflow_client:))
33
+ .transform_keys(&:to_s)
34
+ end
35
+ # rubocop:enable Metrics/AbcSize
36
+ # rubocop:enable Metrics/MethodLength
37
+
38
+ def modified_latest
39
+ cocina.modified.to_datetime.strftime('%FT%TZ')
40
+ end
41
+
42
+ def created_at
43
+ cocina.created.to_datetime.strftime('%FT%TZ')
44
+ end
45
+
46
+ def legacy_collections
47
+ case cocina.type
48
+ when Cocina::Models::ObjectType.admin_policy, Cocina::Models::ObjectType.collection
49
+ []
50
+ else
51
+ Array(cocina.structural&.isMemberOf).map { |col_id| "info:fedora/#{col_id}" }
52
+ end
53
+ end
54
+
55
+ def virtual_object_constituents
56
+ return unless cocina.dro?
57
+
58
+ Array(cocina.structural&.hasMemberOrders).first&.members
59
+ end
60
+
61
+ def legacy_apo
62
+ "info:fedora/#{cocina.administrative.hasAdminPolicy}"
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the default object rights
6
+ class DefaultObjectRightsIndexer
7
+ attr_reader :cocina
8
+
9
+ def initialize(cocina:, **)
10
+ @cocina = cocina
11
+ end
12
+
13
+ # @return [Hash] the partial solr document for defaultObjectRights
14
+ def to_solr
15
+ return {} unless cocina.administrative.accessTemplate
16
+
17
+ {
18
+ 'use_statement_ssim' => use_statement,
19
+ 'copyright_ssim' => copyright,
20
+ 'rights_descriptions_ssim' => 'dark',
21
+ 'default_rights_descriptions_ssim' => Cocina::Models::Builders::RightsDescriptionBuilder.build(cocina)
22
+ }
23
+ end
24
+
25
+ private
26
+
27
+ def use_statement
28
+ cocina.administrative.accessTemplate.useAndReproductionStatement
29
+ end
30
+
31
+ def copyright
32
+ cocina.administrative.accessTemplate.copyright
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stanford-mods'
4
+
5
+ class DorIndexing
6
+ module Indexers
7
+ # rubocop:disable Metrics/ClassLength
8
+ # Indexes the descriptive metadata
9
+ class DescriptiveMetadataIndexer
10
+ attr_reader :cocina, :stanford_mods_record
11
+
12
+ def initialize(cocina:, **)
13
+ @cocina = cocina
14
+ mods_ng = Cocina::Models::Mapping::ToMods::Description.transform(cocina.description, cocina.externalIdentifier)
15
+ @stanford_mods_record = Stanford::Mods::Record.new.from_nk_node(mods_ng.root)
16
+ end
17
+
18
+ # @return [Hash] the partial solr document for descriptive metadata
19
+ # rubocop:disable Metrics/MethodLength
20
+ # rubocop:disable Metrics/AbcSize
21
+ def to_solr
22
+ {
23
+ # title
24
+ 'sw_display_title_tesim' => title,
25
+ # contributor
26
+ 'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
27
+ 'sw_author_tesim' => author_primary, # used for author display in Argo
28
+ 'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
29
+ 'contributor_orcids_ssim' => orcids,
30
+ # topic
31
+ 'topic_ssim' => stanford_mods_record.topic_facet&.uniq,
32
+ 'topic_tesim' => stemmable_topics,
33
+ # publication
34
+ 'originInfo_date_created_tesim' => creation_date,
35
+ 'originInfo_publisher_tesim' => publisher_name,
36
+ 'originInfo_place_placeTerm_tesim' => event_place, # do we want this?
37
+ 'sw_pub_date_facet_ssi' => stanford_mods_record.pub_year_int.to_s, # SW Date facet
38
+
39
+ 'metadata_format_ssim' => 'mods', # no longer used? https://github.com/search?q=org%3Asul-dlss+metadata_format_ssim&type=code
40
+
41
+ # SW facets plus a friend facet
42
+ 'sw_format_ssim' => sw_format, # SW Resource Type facet
43
+ 'mods_typeOfResource_ssim' => resource_type, # MODS Resource Type facet
44
+ 'sw_genre_ssim' => stanford_mods_record.sw_genre, # SW Genre facet
45
+ 'sw_language_ssim' => stanford_mods_record.sw_language_facet, # SW Language facet
46
+ 'sw_subject_temporal_ssim' => stanford_mods_record.era_facet, # SW Era facet
47
+ 'sw_subject_geographic_ssim' => subject_geographic, # SW Region facet
48
+
49
+ # all the descriptive data that we want to search on, with different flavors for better recall and precision
50
+ 'descriptive_tiv' => all_search_text, # ICU tokenized, ICU folded
51
+ 'descriptive_text_nostem_i' => all_search_text, # whitespace tokenized, ICU folded, word delimited
52
+ 'descriptive_teiv' => all_search_text # ICU tokenized, ICU folded, minimal stemming
53
+ }.select { |_k, v| v.present? }
54
+ end
55
+ # rubocop:enable Metrics/MethodLength
56
+ # rubocop:enable Metrics/AbcSize
57
+
58
+ private
59
+
60
+ def subject_temporal
61
+ DorIndexing::Builders::TemporalBuilder.build(subjects)
62
+ end
63
+
64
+ def subject_geographic
65
+ DorIndexing::Builders::GeographicBuilder.build(subjects)
66
+ end
67
+
68
+ def subjects
69
+ @subjects ||= Array(cocina.description.subject)
70
+ end
71
+
72
+ def author_primary
73
+ author_builder.build_primary
74
+ end
75
+
76
+ def author_all
77
+ author_builder.build_all
78
+ end
79
+
80
+ def author_builder
81
+ @author_builder ||= DorIndexing::Builders::AuthorBuilder.new(Array(cocina.description.contributor))
82
+ end
83
+
84
+ def orcids
85
+ DorIndexing::Builders::OrcidBuilder.build(Array(cocina.description.contributor))
86
+ end
87
+
88
+ def title
89
+ Cocina::Models::Builders::TitleBuilder.build(cocina.description.title)
90
+ end
91
+
92
+ def forms
93
+ @forms ||= Array(cocina.description.form)
94
+ end
95
+
96
+ def resource_type
97
+ @resource_type ||= forms.select do |form|
98
+ form.source&.value == 'MODS resource types' &&
99
+ %w[collection manuscript].exclude?(form.value)
100
+ end.map(&:value)
101
+ end
102
+
103
+ # See https://github.com/sul-dlss/stanford-mods/blob/master/lib/stanford-mods/searchworks.rb#L244
104
+ FORMAT = {
105
+ 'cartographic' => 'Map',
106
+ 'manuscript' => 'Archive/Manuscript',
107
+ 'mixed material' => 'Archive/Manuscript',
108
+ 'moving image' => 'Video',
109
+ 'notated music' => 'Music score',
110
+ 'software, multimedia' => 'Software/Multimedia',
111
+ 'sound recording-musical' => 'Music recording',
112
+ 'sound recording-nonmusical' => 'Sound recording',
113
+ 'sound recording' => 'Sound recording',
114
+ 'still image' => 'Image',
115
+ 'three dimensional object' => 'Object',
116
+ 'text' => 'Book'
117
+ }.freeze
118
+
119
+ # rubocop:disable Metrics/CyclomaticComplexity
120
+ # rubocop:disable Metrics/PerceivedComplexity
121
+ # rubocop:disable Metrics/AbcSize
122
+ def sw_format
123
+ return ['Map'] if resource_type?('software, multimedia') && resource_type?('cartographic')
124
+ return ['Dataset'] if resource_type?('software, multimedia') && genre?('dataset')
125
+ return ['Archived website'] if resource_type?('text') && genre?('archived website')
126
+ return ['Book'] if resource_type?('text') && issuance?('monographic')
127
+ return ['Journal/Periodical'] if resource_type?('text') && (issuance?('continuing') || issuance?('serial') || frequency?)
128
+
129
+ resource_type_formats = flat_forms_for('resource type').map { |form| FORMAT[form.value&.downcase] }.uniq.compact
130
+ resource_type_formats.delete('Book') if resource_type_formats.include?('Archive/Manuscript')
131
+
132
+ return resource_type_formats if resource_type_formats == ['Book']
133
+
134
+ genre_formats = flat_forms_for('genre').map { |form| form.value&.capitalize }.uniq
135
+
136
+ (resource_type_formats + genre_formats).presence
137
+ end
138
+ # rubocop:enable Metrics/CyclomaticComplexity
139
+ # rubocop:enable Metrics/PerceivedComplexity
140
+ # rubocop:enable Metrics/AbcSize
141
+
142
+ def resource_type?(type)
143
+ flat_forms_for('resource type').any? { |form| form.value == type }
144
+ end
145
+
146
+ def genre?(genre)
147
+ flat_forms_for('genre').any? { |form| form.value == genre }
148
+ end
149
+
150
+ def issuance?(issuance)
151
+ flat_event_notes.any? { |note| note.type == 'issuance' && note.value == issuance }
152
+ end
153
+
154
+ def frequency?
155
+ flat_event_notes.any? { |note| note.type == 'frequency' }
156
+ end
157
+
158
+ def flat_forms_for(type)
159
+ forms.flat_map do |form|
160
+ if form.type == type
161
+ flat_value(form)
162
+ else
163
+ flat_value(form).select { |form_value| form_value.type == type }
164
+ end
165
+ end
166
+ end
167
+
168
+ def flat_event_notes
169
+ @flat_event_notes ||= events.flat_map { |event| flat_event(event) }.flat_map do |event|
170
+ Array(event.note).flat_map do |note|
171
+ flat_value(note)
172
+ end
173
+ end
174
+ end
175
+
176
+ def pub_year
177
+ DorIndexing::Selectors::PubYearSelector.build(events)
178
+ end
179
+
180
+ def creation_date
181
+ @creation_date ||= DorIndexing::Builders::EventDateBuilder.build(creation_event, 'creation')
182
+ end
183
+
184
+ def event_place
185
+ place_event = events.find { |event| event.type == 'publication' } || events.first
186
+ DorIndexing::Builders::EventPlaceBuilder.build(place_event)
187
+ end
188
+
189
+ def publisher_name
190
+ publish_events = events.map { |event| event.parallelEvent&.first || event }
191
+ return if publish_events.blank?
192
+
193
+ DorIndexing::Builders::PublisherNameBuilder.build(publish_events)
194
+ end
195
+
196
+ def stemmable_topics
197
+ DorIndexing::Builders::TopicBuilder.build(Array(cocina.description.subject), filter: 'topic')
198
+ end
199
+
200
+ def publication_event
201
+ @publication_event ||= DorIndexing::Selectors::EventSelector.select(events, 'publication')
202
+ end
203
+
204
+ def creation_event
205
+ @creation_event ||= DorIndexing::Selectors::EventSelector.select(events, 'creation')
206
+ end
207
+
208
+ def events
209
+ @events ||= Array(cocina.description.event).compact
210
+ end
211
+
212
+ def flat_event(event)
213
+ event.parallelEvent.presence || Array(event)
214
+ end
215
+
216
+ def flat_value(value)
217
+ value.parallelValue.presence || value.groupedValue.presence || value.structuredValue.presence || Array(value)
218
+ end
219
+
220
+ def all_search_text
221
+ @all_search_text ||= DorIndexing::Builders::AllSearchTextBuilder.build(cocina.description)
222
+ end
223
+ end
224
+ # rubocop:enable Metrics/ClassLength
225
+ end
226
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the embargo metadata
6
+ class EmbargoMetadataIndexer
7
+ attr_reader :cocina
8
+
9
+ def initialize(cocina:, **)
10
+ @cocina = cocina
11
+ end
12
+
13
+ # These fields are used by the EmbargoReleaseService in dor-services-app
14
+ # @return [Hash] the partial solr document for embargoMetadata
15
+ def to_solr
16
+ {}.tap do |solr_doc|
17
+ embargo_release_date = embargo_release_date(cocina)
18
+ if embargo_release_date.present?
19
+ solr_doc['embargo_status_ssim'] = ['embargoed']
20
+ solr_doc['embargo_release_dtsim'] = [embargo_release_date.utc.iso8601]
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def embargo_release_date(cocina)
28
+ cocina.access.embargo.releaseDate if cocina.access.embargo&.releaseDate.present?
29
+ end
30
+ end
31
+ end
32
+ end