dor_indexing 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +355 -0
  4. data/Gemfile +16 -0
  5. data/Gemfile.lock +218 -0
  6. data/README.md +33 -0
  7. data/Rakefile +11 -0
  8. data/dor_indexing.gemspec +40 -0
  9. data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
  10. data/lib/dor_indexing/builders/author_builder.rb +31 -0
  11. data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
  12. data/lib/dor_indexing/builders/document_builder.rb +106 -0
  13. data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
  14. data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
  15. data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
  16. data/lib/dor_indexing/builders/name_builder.rb +70 -0
  17. data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
  18. data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
  19. data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
  20. data/lib/dor_indexing/builders/topic_builder.rb +96 -0
  21. data/lib/dor_indexing/cocina_repository.rb +24 -0
  22. data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
  23. data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
  24. data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
  25. data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
  26. data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
  27. data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
  28. data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
  29. data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
  30. data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
  31. data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
  32. data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
  33. data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
  34. data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
  35. data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
  36. data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
  37. data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
  38. data/lib/dor_indexing/marc_country.rb +359 -0
  39. data/lib/dor_indexing/selectors/event_selector.rb +112 -0
  40. data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
  41. data/lib/dor_indexing/version.rb +5 -0
  42. data/lib/dor_indexing/workflow_fields.rb +63 -0
  43. data/lib/dor_indexing/workflow_solr_document.rb +93 -0
  44. data/lib/dor_indexing.rb +19 -0
  45. metadata +173 -0
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the topic fields for a solr document
6
+ class TopicBuilder
7
+ # @param [Array] subjects
8
+ # @param [String] filter can either be 'topic' or 'name'
9
+ def self.build(subjects, filter:, remove_trailing_punctuation: false)
10
+ new(filter:, remove_trailing_punctuation:).build(subjects)
11
+ end
12
+
13
+ def initialize(filter:, remove_trailing_punctuation:)
14
+ @filter = filter
15
+ @remove_trailing_punctuation = remove_trailing_punctuation
16
+ end
17
+
18
+ def build(subjects)
19
+ topics(subjects).flat_map { |topic| flat_topic(topic) }.compact.uniq
20
+ end
21
+
22
+ private
23
+
24
+ attr_reader :filter
25
+
26
+ def remove_trailing_punctuation?
27
+ @remove_trailing_punctuation
28
+ end
29
+
30
+ # Filter the subjects we are interested in>
31
+ # Handles:
32
+ # parallelValue that contain structuredValue and the parallelValue has the type AND
33
+ # parallelValue that contain structuredValue each with their own type AND
34
+ # parallelValue that has a type conferred to the child AND
35
+ # structuredValue that contains structuredValue where the type can be at the higher or lower level.
36
+ def topics(subjects)
37
+ (
38
+ subjects.flat_map { |subject| basic_value(subject) } +
39
+ subjects.flat_map { |subject| structured_values(subject) } +
40
+ parallel_subjects(subjects)
41
+ ).compact
42
+ end
43
+
44
+ def parallel_subjects(subjects)
45
+ parallels = subjects.select(&:parallelValue)
46
+ parallels.flat_map { |subject| parallel_with_type(subject, subject.type) if subject.type } +
47
+ parallels.flat_map { |subject| topics(subject.parallelValue) unless subject.type }
48
+ end
49
+
50
+ def flat_topic(value)
51
+ if value.parallelValue.present?
52
+ value.parallelValue.flat_map { |topic| flat_topic(topic) }
53
+ elsif remove_trailing_punctuation?
54
+ # comma, semicolon, and backslash are dropped
55
+ Array(value.value&.sub(/[ ,;\\]+$/, ''))
56
+ else
57
+ Array(value.value)
58
+ end
59
+ end
60
+
61
+ def parallel_with_type(item, type_from_parent)
62
+ return unless type_matches_filter?(type_from_parent)
63
+
64
+ item
65
+ end
66
+
67
+ def basic_value(subject)
68
+ return create_fullname(subject) if filter == 'name' && subject.type == 'person'
69
+ return create_title(subject) if filter == 'name' && subject.type == 'title'
70
+
71
+ subject if type_matches_filter?(subject.type)
72
+ end
73
+
74
+ def structured_values(subject)
75
+ selected = Array(subject.structuredValue).select { |child| type_matches_filter?(child.type) }
76
+
77
+ topics(selected)
78
+ end
79
+
80
+ def create_title(title)
81
+ titles = Cocina::Models::Builders::TitleBuilder.build([title], strategy: :all, add_punctuation: false)
82
+ titles.map { |value| Cocina::Models::DescriptiveValue.new(value:) }
83
+ end
84
+
85
+ def create_fullname(name)
86
+ names = NameBuilder.build([name], strategy: :all)
87
+ names.map { |value| Cocina::Models::DescriptiveValue.new(value:) }
88
+ end
89
+
90
+ def type_matches_filter?(type)
91
+ (filter == 'name' && %w[person organization title occupation].include?(type)) ||
92
+ type == filter
93
+ end
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Interface for retrieving Cocina objects.
5
+ # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
+ # In DIA, the concrete implementation backs this with Dor Services Client.
7
+ class CocinaRepository
8
+ class RepositoryError < StandardError; end
9
+
10
+ # @param [String] druid
11
+ # @return [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Models::AdminPolicyWithMetadata]
12
+ # @raise [RepositoryError] if the object is not found or other error occurs
13
+ def find(druid)
14
+ raise NotImplementedError
15
+ end
16
+
17
+ # @param [String] druid
18
+ # @return [Array<String>] administrative tags
19
+ # @raise [RepositoryError] if the object is not found or other error occurs
20
+ def administrative_tags(druid)
21
+ raise NotImplementedError
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Index administrative tags for an object.
6
+ # NOTE: Most of this code was extracted from the dor-services gem:
7
+ # https://github.com/sul-dlss/dor-services/blob/v9.0.0/lib/dor/datastreams/identity_metadata_ds.rb#L196-L218
8
+ class AdministrativeTagIndexer
9
+ TAG_PART_DELIMITER = ' : '
10
+ SPECIAL_TAG_TYPES_TO_INDEX = ['Project', 'Registered By'].freeze
11
+
12
+ attr_reader :id
13
+
14
+ def initialize(id:, administrative_tags:, **)
15
+ @id = id
16
+ @administrative_tags = administrative_tags
17
+ end
18
+
19
+ # @return [Hash] the partial solr document for administrative tags
20
+ # rubocop:disable Metrics/MethodLength
21
+ # rubocop:disable Metrics/AbcSize
22
+ # rubocop:disable Metrics/CyclomaticComplexity
23
+ def to_solr
24
+ solr_doc = {
25
+ 'tag_ssim' => [],
26
+ 'tag_text_unstemmed_im' => [],
27
+ 'exploded_nonproject_tag_ssim' => []
28
+ }
29
+ administrative_tags.each do |tag|
30
+ tag_prefix, rest = tag.split(TAG_PART_DELIMITER, 2)
31
+ prefix = tag_prefix.downcase.strip.gsub(/\s/, '_')
32
+
33
+ solr_doc['tag_ssim'] << tag # for facet and display
34
+ solr_doc['tag_text_unstemmed_im'] << tag # for search
35
+
36
+ solr_doc['exploded_nonproject_tag_ssim'] += exploded_tags_from(tag) unless prefix == 'project'
37
+
38
+ next if SPECIAL_TAG_TYPES_TO_INDEX.exclude?(tag_prefix) || rest.nil?
39
+
40
+ (solr_doc["#{prefix}_tag_ssim"] ||= []) << rest.strip
41
+
42
+ if prefix == 'project'
43
+ solr_doc['exploded_project_tag_ssim'] ||= []
44
+ solr_doc['exploded_project_tag_ssim'] += exploded_tags_from(rest.strip)
45
+ end
46
+ end
47
+ solr_doc
48
+ end
49
+ # rubocop:enable Metrics/MethodLength
50
+ # rubocop:enable Metrics/AbcSize
51
+ # rubocop:enable Metrics/CyclomaticComplexity
52
+
53
+ private
54
+
55
+ attr_reader :administrative_tags
56
+
57
+ # solrize each possible prefix for the tag, inclusive of the full tag.
58
+ # e.g., for a tag such as "A : B : C", this will solrize to an _ssim field
59
+ # that contains ["A", "A : B", "A : B : C"].
60
+ def exploded_tags_from(tag)
61
+ tag_parts = tag.split(TAG_PART_DELIMITER)
62
+
63
+ 1.upto(tag_parts.count).map do |i|
64
+ tag_parts.take(i).join(TAG_PART_DELIMITER)
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the collection title
6
+ class CollectionTitleIndexer
7
+ attr_reader :cocina, :parent_collections
8
+
9
+ def initialize(cocina:, parent_collections:, **)
10
+ @cocina = cocina
11
+ @parent_collections = parent_collections
12
+ end
13
+
14
+ # @return [Hash] the partial solr document for identifiable concerns
15
+ def to_solr
16
+ {}.tap do |solr_doc|
17
+ parent_collections.each do |related_obj|
18
+ coll_title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
19
+
20
+ # create/append collection_title_tesim and collection_title_ssim
21
+ ::Solrizer.insert_field(solr_doc, 'collection_title', coll_title, :stored_searchable, :symbol)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb
6
+ class CompositeIndexer
7
+ attr_reader :indexers
8
+
9
+ def initialize(*indexers)
10
+ @indexers = indexers
11
+ end
12
+
13
+ def new(**)
14
+ Instance.new(indexers, **)
15
+ end
16
+
17
+ # Instance for a composite indexer
18
+ class Instance
19
+ attr_reader :indexers
20
+
21
+ def initialize(indexers, **)
22
+ @indexers = indexers.map do |i|
23
+ i.new(**)
24
+ rescue ArgumentError => e
25
+ raise ArgumentError, "Unable to initialize #{i}. #{e.message}"
26
+ end
27
+ end
28
+
29
+ # @return [Hash] the merged solr document for all the sub-indexers
30
+ def to_solr
31
+ indexers.map(&:to_solr).inject({}, &:merge)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the content metadata
6
+ class ContentMetadataIndexer
7
+ attr_reader :cocina
8
+
9
+ def initialize(cocina:, **)
10
+ @cocina = cocina
11
+ end
12
+
13
+ # @return [Hash] the partial solr document for contentMetadata
14
+ def to_solr
15
+ {
16
+ 'content_type_ssim' => type(cocina.type),
17
+ 'content_file_mimetypes_ssim' => files.map(&:hasMimeType).uniq,
18
+ 'content_file_count_itsi' => files.size,
19
+ 'shelved_content_file_count_itsi' => shelved_files.size,
20
+ 'resource_count_itsi' => file_sets.size,
21
+ 'preserved_size_dbtsi' => preserved_size, # double (trie) to support very large sizes
22
+ 'content_file_roles_ssim' => files.filter_map(&:use),
23
+ # first_shelved_image is neither indexed nor multiple
24
+ 'first_shelved_image_ss' => first_shelved_image
25
+ }
26
+ end
27
+
28
+ private
29
+
30
+ def first_shelved_image
31
+ shelved_files.find { |file| file.filename.end_with?('jp2') }&.filename
32
+ end
33
+
34
+ def shelved_files
35
+ files.select { |file| file.administrative.shelve }
36
+ end
37
+
38
+ def preserved_size
39
+ files.select { |file| file.administrative.sdrPreserve }
40
+ .filter_map(&:size).sum # filter out missing size
41
+ end
42
+
43
+ def files
44
+ @files ||= file_sets.flat_map { |fs| fs.structural.contains }
45
+ end
46
+
47
+ def file_sets
48
+ @file_sets ||= Array(cocina.structural&.contains)
49
+ end
50
+
51
+ TYPES = {
52
+ Cocina::Models::ObjectType.image => 'image',
53
+ Cocina::Models::ObjectType.manuscript => 'image',
54
+ Cocina::Models::ObjectType.book => 'book',
55
+ Cocina::Models::ObjectType.map => 'map',
56
+ Cocina::Models::ObjectType.three_dimensional => '3d',
57
+ Cocina::Models::ObjectType.media => 'media',
58
+ Cocina::Models::ObjectType.webarchive_seed => 'webarchive-seed',
59
+ Cocina::Models::ObjectType.webarchive_binary => 'webarchive-binary',
60
+ Cocina::Models::ObjectType.geo => 'geo',
61
+ Cocina::Models::ObjectType.document => 'document'
62
+ }.freeze
63
+
64
+ def type(object_type)
65
+ TYPES.fetch(object_type, 'file')
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexing provided by ActiveFedora
6
+ class DataIndexer
7
+ attr_reader :cocina, :workflow_client
8
+
9
+ def initialize(cocina:, workflow_client:, **)
10
+ @cocina = cocina
11
+ @workflow_client = workflow_client
12
+ end
13
+
14
+ # rubocop:disable Metrics/AbcSize
15
+ # rubocop:disable Metrics/MethodLength
16
+ def to_solr
17
+ {}.tap do |solr_doc|
18
+ solr_doc[:id] = cocina.externalIdentifier
19
+ solr_doc['current_version_isi'] = cocina.version # Argo Facet field "Version"
20
+ solr_doc['obj_label_tesim'] = cocina.label
21
+
22
+ solr_doc['modified_latest_dttsi'] = modified_latest
23
+ solr_doc['created_at_dttsi'] = created_at
24
+
25
+ # is_member_of_collection_ssim is used by dor-services-app for querying for members of a
26
+ # collection and it is a facet in Argo
27
+ solr_doc['is_member_of_collection_ssim'] = legacy_collections
28
+ solr_doc['is_governed_by_ssim'] = legacy_apo # Argo facet
29
+
30
+ # Used so that DSA can generate public XML whereas a constituent can find the virtual object it is part of.
31
+ solr_doc['has_constituents_ssim'] = virtual_object_constituents
32
+ end.merge(DorIndexing::WorkflowFields.for(druid: cocina.externalIdentifier, version: cocina.version, workflow_client:))
33
+ .transform_keys(&:to_s)
34
+ end
35
+ # rubocop:enable Metrics/AbcSize
36
+ # rubocop:enable Metrics/MethodLength
37
+
38
+ def modified_latest
39
+ cocina.modified.to_datetime.strftime('%FT%TZ')
40
+ end
41
+
42
+ def created_at
43
+ cocina.created.to_datetime.strftime('%FT%TZ')
44
+ end
45
+
46
+ def legacy_collections
47
+ case cocina.type
48
+ when Cocina::Models::ObjectType.admin_policy, Cocina::Models::ObjectType.collection
49
+ []
50
+ else
51
+ Array(cocina.structural&.isMemberOf).map { |col_id| "info:fedora/#{col_id}" }
52
+ end
53
+ end
54
+
55
+ def virtual_object_constituents
56
+ return unless cocina.dro?
57
+
58
+ Array(cocina.structural&.hasMemberOrders).first&.members
59
+ end
60
+
61
+ def legacy_apo
62
+ "info:fedora/#{cocina.administrative.hasAdminPolicy}"
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the default object rights
6
+ class DefaultObjectRightsIndexer
7
+ attr_reader :cocina
8
+
9
+ def initialize(cocina:, **)
10
+ @cocina = cocina
11
+ end
12
+
13
+ # @return [Hash] the partial solr document for defaultObjectRights
14
+ def to_solr
15
+ return {} unless cocina.administrative.accessTemplate
16
+
17
+ {
18
+ 'use_statement_ssim' => use_statement,
19
+ 'copyright_ssim' => copyright,
20
+ 'rights_descriptions_ssim' => 'dark',
21
+ 'default_rights_descriptions_ssim' => Cocina::Models::Builders::RightsDescriptionBuilder.build(cocina)
22
+ }
23
+ end
24
+
25
+ private
26
+
27
+ def use_statement
28
+ cocina.administrative.accessTemplate.useAndReproductionStatement
29
+ end
30
+
31
+ def copyright
32
+ cocina.administrative.accessTemplate.copyright
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stanford-mods'
4
+
5
+ class DorIndexing
6
+ module Indexers
7
+ # rubocop:disable Metrics/ClassLength
8
+ # Indexes the descriptive metadata
9
+ class DescriptiveMetadataIndexer
10
+ attr_reader :cocina, :stanford_mods_record
11
+
12
+ def initialize(cocina:, **)
13
+ @cocina = cocina
14
+ mods_ng = Cocina::Models::Mapping::ToMods::Description.transform(cocina.description, cocina.externalIdentifier)
15
+ @stanford_mods_record = Stanford::Mods::Record.new.from_nk_node(mods_ng.root)
16
+ end
17
+
18
+ # @return [Hash] the partial solr document for descriptive metadata
19
+ # rubocop:disable Metrics/MethodLength
20
+ # rubocop:disable Metrics/AbcSize
21
+ def to_solr
22
+ {
23
+ # title
24
+ 'sw_display_title_tesim' => title,
25
+ # contributor
26
+ 'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
27
+ 'sw_author_tesim' => author_primary, # used for author display in Argo
28
+ 'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
29
+ 'contributor_orcids_ssim' => orcids,
30
+ # topic
31
+ 'topic_ssim' => stanford_mods_record.topic_facet&.uniq,
32
+ 'topic_tesim' => stemmable_topics,
33
+ # publication
34
+ 'originInfo_date_created_tesim' => creation_date,
35
+ 'originInfo_publisher_tesim' => publisher_name,
36
+ 'originInfo_place_placeTerm_tesim' => event_place, # do we want this?
37
+ 'sw_pub_date_facet_ssi' => stanford_mods_record.pub_year_int.to_s, # SW Date facet
38
+
39
+ 'metadata_format_ssim' => 'mods', # no longer used? https://github.com/search?q=org%3Asul-dlss+metadata_format_ssim&type=code
40
+
41
+ # SW facets plus a friend facet
42
+ 'sw_format_ssim' => sw_format, # SW Resource Type facet
43
+ 'mods_typeOfResource_ssim' => resource_type, # MODS Resource Type facet
44
+ 'sw_genre_ssim' => stanford_mods_record.sw_genre, # SW Genre facet
45
+ 'sw_language_ssim' => stanford_mods_record.sw_language_facet, # SW Language facet
46
+ 'sw_subject_temporal_ssim' => stanford_mods_record.era_facet, # SW Era facet
47
+ 'sw_subject_geographic_ssim' => subject_geographic, # SW Region facet
48
+
49
+ # all the descriptive data that we want to search on, with different flavors for better recall and precision
50
+ 'descriptive_tiv' => all_search_text, # ICU tokenized, ICU folded
51
+ 'descriptive_text_nostem_i' => all_search_text, # whitespace tokenized, ICU folded, word delimited
52
+ 'descriptive_teiv' => all_search_text # ICU tokenized, ICU folded, minimal stemming
53
+ }.select { |_k, v| v.present? }
54
+ end
55
+ # rubocop:enable Metrics/MethodLength
56
+ # rubocop:enable Metrics/AbcSize
57
+
58
+ private
59
+
60
+ def subject_temporal
61
+ DorIndexing::Builders::TemporalBuilder.build(subjects)
62
+ end
63
+
64
+ def subject_geographic
65
+ DorIndexing::Builders::GeographicBuilder.build(subjects)
66
+ end
67
+
68
+ def subjects
69
+ @subjects ||= Array(cocina.description.subject)
70
+ end
71
+
72
+ def author_primary
73
+ author_builder.build_primary
74
+ end
75
+
76
+ def author_all
77
+ author_builder.build_all
78
+ end
79
+
80
+ def author_builder
81
+ @author_builder ||= DorIndexing::Builders::AuthorBuilder.new(Array(cocina.description.contributor))
82
+ end
83
+
84
+ def orcids
85
+ DorIndexing::Builders::OrcidBuilder.build(Array(cocina.description.contributor))
86
+ end
87
+
88
+ def title
89
+ Cocina::Models::Builders::TitleBuilder.build(cocina.description.title)
90
+ end
91
+
92
+ def forms
93
+ @forms ||= Array(cocina.description.form)
94
+ end
95
+
96
+ def resource_type
97
+ @resource_type ||= forms.select do |form|
98
+ form.source&.value == 'MODS resource types' &&
99
+ %w[collection manuscript].exclude?(form.value)
100
+ end.map(&:value)
101
+ end
102
+
103
+ # See https://github.com/sul-dlss/stanford-mods/blob/master/lib/stanford-mods/searchworks.rb#L244
104
+ FORMAT = {
105
+ 'cartographic' => 'Map',
106
+ 'manuscript' => 'Archive/Manuscript',
107
+ 'mixed material' => 'Archive/Manuscript',
108
+ 'moving image' => 'Video',
109
+ 'notated music' => 'Music score',
110
+ 'software, multimedia' => 'Software/Multimedia',
111
+ 'sound recording-musical' => 'Music recording',
112
+ 'sound recording-nonmusical' => 'Sound recording',
113
+ 'sound recording' => 'Sound recording',
114
+ 'still image' => 'Image',
115
+ 'three dimensional object' => 'Object',
116
+ 'text' => 'Book'
117
+ }.freeze
118
+
119
+ # rubocop:disable Metrics/CyclomaticComplexity
120
+ # rubocop:disable Metrics/PerceivedComplexity
121
+ # rubocop:disable Metrics/AbcSize
122
+ def sw_format
123
+ return ['Map'] if resource_type?('software, multimedia') && resource_type?('cartographic')
124
+ return ['Dataset'] if resource_type?('software, multimedia') && genre?('dataset')
125
+ return ['Archived website'] if resource_type?('text') && genre?('archived website')
126
+ return ['Book'] if resource_type?('text') && issuance?('monographic')
127
+ return ['Journal/Periodical'] if resource_type?('text') && (issuance?('continuing') || issuance?('serial') || frequency?)
128
+
129
+ resource_type_formats = flat_forms_for('resource type').map { |form| FORMAT[form.value&.downcase] }.uniq.compact
130
+ resource_type_formats.delete('Book') if resource_type_formats.include?('Archive/Manuscript')
131
+
132
+ return resource_type_formats if resource_type_formats == ['Book']
133
+
134
+ genre_formats = flat_forms_for('genre').map { |form| form.value&.capitalize }.uniq
135
+
136
+ (resource_type_formats + genre_formats).presence
137
+ end
138
+ # rubocop:enable Metrics/CyclomaticComplexity
139
+ # rubocop:enable Metrics/PerceivedComplexity
140
+ # rubocop:enable Metrics/AbcSize
141
+
142
+ def resource_type?(type)
143
+ flat_forms_for('resource type').any? { |form| form.value == type }
144
+ end
145
+
146
+ def genre?(genre)
147
+ flat_forms_for('genre').any? { |form| form.value == genre }
148
+ end
149
+
150
+ def issuance?(issuance)
151
+ flat_event_notes.any? { |note| note.type == 'issuance' && note.value == issuance }
152
+ end
153
+
154
+ def frequency?
155
+ flat_event_notes.any? { |note| note.type == 'frequency' }
156
+ end
157
+
158
+ def flat_forms_for(type)
159
+ forms.flat_map do |form|
160
+ if form.type == type
161
+ flat_value(form)
162
+ else
163
+ flat_value(form).select { |form_value| form_value.type == type }
164
+ end
165
+ end
166
+ end
167
+
168
+ def flat_event_notes
169
+ @flat_event_notes ||= events.flat_map { |event| flat_event(event) }.flat_map do |event|
170
+ Array(event.note).flat_map do |note|
171
+ flat_value(note)
172
+ end
173
+ end
174
+ end
175
+
176
+ def pub_year
177
+ DorIndexing::Selectors::PubYearSelector.build(events)
178
+ end
179
+
180
+ def creation_date
181
+ @creation_date ||= DorIndexing::Builders::EventDateBuilder.build(creation_event, 'creation')
182
+ end
183
+
184
+ def event_place
185
+ place_event = events.find { |event| event.type == 'publication' } || events.first
186
+ DorIndexing::Builders::EventPlaceBuilder.build(place_event)
187
+ end
188
+
189
+ def publisher_name
190
+ publish_events = events.map { |event| event.parallelEvent&.first || event }
191
+ return if publish_events.blank?
192
+
193
+ DorIndexing::Builders::PublisherNameBuilder.build(publish_events)
194
+ end
195
+
196
+ def stemmable_topics
197
+ DorIndexing::Builders::TopicBuilder.build(Array(cocina.description.subject), filter: 'topic')
198
+ end
199
+
200
+ def publication_event
201
+ @publication_event ||= DorIndexing::Selectors::EventSelector.select(events, 'publication')
202
+ end
203
+
204
+ def creation_event
205
+ @creation_event ||= DorIndexing::Selectors::EventSelector.select(events, 'creation')
206
+ end
207
+
208
+ def events
209
+ @events ||= Array(cocina.description.event).compact
210
+ end
211
+
212
+ def flat_event(event)
213
+ event.parallelEvent.presence || Array(event)
214
+ end
215
+
216
+ def flat_value(value)
217
+ value.parallelValue.presence || value.groupedValue.presence || value.structuredValue.presence || Array(value)
218
+ end
219
+
220
+ def all_search_text
221
+ @all_search_text ||= DorIndexing::Builders::AllSearchTextBuilder.build(cocina.description)
222
+ end
223
+ end
224
+ # rubocop:enable Metrics/ClassLength
225
+ end
226
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Indexers
5
+ # Indexes the embargo metadata
6
+ class EmbargoMetadataIndexer
7
+ attr_reader :cocina
8
+
9
+ def initialize(cocina:, **)
10
+ @cocina = cocina
11
+ end
12
+
13
+ # These fields are used by the EmbargoReleaseService in dor-services-app
14
+ # @return [Hash] the partial solr document for embargoMetadata
15
+ def to_solr
16
+ {}.tap do |solr_doc|
17
+ embargo_release_date = embargo_release_date(cocina)
18
+ if embargo_release_date.present?
19
+ solr_doc['embargo_status_ssim'] = ['embargoed']
20
+ solr_doc['embargo_release_dtsim'] = [embargo_release_date.utc.iso8601]
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def embargo_release_date(cocina)
28
+ cocina.access.embargo.releaseDate if cocina.access.embargo&.releaseDate.present?
29
+ end
30
+ end
31
+ end
32
+ end