dor_indexing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +355 -0
  4. data/Gemfile +16 -0
  5. data/Gemfile.lock +218 -0
  6. data/README.md +33 -0
  7. data/Rakefile +11 -0
  8. data/dor_indexing.gemspec +40 -0
  9. data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
  10. data/lib/dor_indexing/builders/author_builder.rb +31 -0
  11. data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
  12. data/lib/dor_indexing/builders/document_builder.rb +106 -0
  13. data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
  14. data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
  15. data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
  16. data/lib/dor_indexing/builders/name_builder.rb +70 -0
  17. data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
  18. data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
  19. data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
  20. data/lib/dor_indexing/builders/topic_builder.rb +96 -0
  21. data/lib/dor_indexing/cocina_repository.rb +24 -0
  22. data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
  23. data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
  24. data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
  25. data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
  26. data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
  27. data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
  28. data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
  29. data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
  30. data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
  31. data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
  32. data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
  33. data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
  34. data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
  35. data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
  36. data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
  37. data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
  38. data/lib/dor_indexing/marc_country.rb +359 -0
  39. data/lib/dor_indexing/selectors/event_selector.rb +112 -0
  40. data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
  41. data/lib/dor_indexing/version.rb +5 -0
  42. data/lib/dor_indexing/workflow_fields.rb +63 -0
  43. data/lib/dor_indexing/workflow_solr_document.rb +93 -0
  44. data/lib/dor_indexing.rb +19 -0
  45. metadata +173 -0
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Extracts useful text for searching from Cocina Description
6
+ class AllSearchTextBuilder
7
+ def self.build(cocina_description)
8
+ new(cocina_description).build
9
+ end
10
+
11
+ def initialize(cocina_description)
12
+ @cocina_description = cocina_description
13
+ end
14
+
15
+ def build
16
+ @text = []
17
+ recurse(cocina_description)
18
+ text.join(' ')
19
+ end
20
+
21
+ private
22
+
23
+ attr_reader :cocina_description, :text
24
+
25
+ # this originally had displayLabel, but Arcadia recommends against it
26
+ TEXT_KEYS = %i[
27
+ value
28
+ ].freeze
29
+
30
+ RECURSE_KEYS = %i[
31
+ structuredValue
32
+ parallelValue
33
+ groupedValue
34
+ title
35
+ contributor
36
+ event
37
+ form
38
+ language
39
+ note
40
+ relatedResource
41
+ subject
42
+ name
43
+ location
44
+ ].freeze
45
+
46
+ def recurse(desc)
47
+ TEXT_KEYS.each do |key|
48
+ value = desc.try(key)
49
+ text << value if value.present?
50
+ end
51
+
52
+ RECURSE_KEYS.each do |key|
53
+ Array(desc.try(key)).each { |value| recurse(value) }
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the author fields for a solr document
6
+ class AuthorBuilder
7
+ def initialize(cocina_contributors)
8
+ @cocina_contributors = Array(cocina_contributors)
9
+ end
10
+
11
+ def build_primary
12
+ contributor = primary_cocina_contributor || cocina_contributors.first
13
+ return unless contributor
14
+
15
+ NameBuilder.build_primary_name(contributor.name) if contributor
16
+ end
17
+
18
+ def build_all
19
+ NameBuilder.build_all(cocina_contributors.filter_map(&:name))
20
+ end
21
+
22
+ private
23
+
24
+ attr_reader :cocina_contributors
25
+
26
+ def primary_cocina_contributor
27
+ cocina_contributors.find { |cocina_contributor| cocina_contributor.status == 'primary' }
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Rights description builder for collections
6
+ class CollectionRightsDescriptionBuilder
7
+ def self.build(cocina)
8
+ new(cocina).build
9
+ end
10
+
11
+ def initialize(cocina)
12
+ @cocina = cocina
13
+ end
14
+
15
+ def build
16
+ case cocina.access.view
17
+ when 'world'
18
+ 'world'
19
+ else
20
+ 'dark'
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ attr_reader :cocina
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds solr document for indexing.
6
+ class DocumentBuilder
7
+ ADMIN_POLICY_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
8
+ DorIndexing::Indexers::AdministrativeTagIndexer,
9
+ DorIndexing::Indexers::DataIndexer,
10
+ DorIndexing::Indexers::RoleMetadataIndexer,
11
+ DorIndexing::Indexers::DefaultObjectRightsIndexer,
12
+ DorIndexing::Indexers::IdentityMetadataIndexer,
13
+ DorIndexing::Indexers::DescriptiveMetadataIndexer,
14
+ DorIndexing::Indexers::IdentifiableIndexer,
15
+ DorIndexing::Indexers::WorkflowsIndexer
16
+ )
17
+
18
+ COLLECTION_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
19
+ DorIndexing::Indexers::AdministrativeTagIndexer,
20
+ DorIndexing::Indexers::DataIndexer,
21
+ DorIndexing::Indexers::RightsMetadataIndexer,
22
+ DorIndexing::Indexers::IdentityMetadataIndexer,
23
+ DorIndexing::Indexers::DescriptiveMetadataIndexer,
24
+ DorIndexing::Indexers::IdentifiableIndexer,
25
+ DorIndexing::Indexers::ReleasableIndexer,
26
+ DorIndexing::Indexers::WorkflowsIndexer
27
+ )
28
+
29
+ ITEM_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
30
+ DorIndexing::Indexers::AdministrativeTagIndexer,
31
+ DorIndexing::Indexers::DataIndexer,
32
+ DorIndexing::Indexers::RightsMetadataIndexer,
33
+ DorIndexing::Indexers::IdentityMetadataIndexer,
34
+ DorIndexing::Indexers::DescriptiveMetadataIndexer,
35
+ DorIndexing::Indexers::EmbargoMetadataIndexer,
36
+ DorIndexing::Indexers::ContentMetadataIndexer,
37
+ DorIndexing::Indexers::IdentifiableIndexer,
38
+ DorIndexing::Indexers::CollectionTitleIndexer,
39
+ DorIndexing::Indexers::ReleasableIndexer,
40
+ DorIndexing::Indexers::WorkflowsIndexer
41
+ )
42
+
43
+ INDEXERS = {
44
+ Cocina::Models::ObjectType.agreement => ITEM_INDEXER, # Agreement uses same indexer as item
45
+ Cocina::Models::ObjectType.admin_policy => ADMIN_POLICY_INDEXER,
46
+ Cocina::Models::ObjectType.collection => COLLECTION_INDEXER
47
+ }.freeze
48
+
49
+ @@parent_collections = {} # rubocop:disable Style/ClassVars
50
+
51
+ def self.for(model:, workflow_client:, cocina_repository:)
52
+ new(model:, workflow_client:, cocina_repository:).for
53
+ end
54
+
55
+ def self.reset_parent_collections
56
+ @@parent_collections = {} # rubocop:disable Style/ClassVars
57
+ end
58
+
59
+ def initialize(model:, workflow_client:, cocina_repository:)
60
+ @model = model
61
+ @workflow_client = workflow_client
62
+ @cocina_repository = cocina_repository
63
+ end
64
+
65
+ # @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
66
+ def for
67
+ indexer_for_type(model.type).new(id:,
68
+ cocina: model,
69
+ parent_collections:,
70
+ administrative_tags:,
71
+ workflow_client:,
72
+ cocina_repository:)
73
+ end
74
+
75
+ private
76
+
77
+ attr_reader :model, :workflow_client, :cocina_repository
78
+
79
+ def id
80
+ model.externalIdentifier
81
+ end
82
+
83
+ def indexer_for_type(type)
84
+ INDEXERS.fetch(type, ITEM_INDEXER)
85
+ end
86
+
87
+ def parent_collections
88
+ return [] unless model.dro?
89
+
90
+ Array(model.structural&.isMemberOf).filter_map do |rel_druid|
91
+ @@parent_collections[rel_druid] ||= cocina_repository.find(rel_druid)
92
+ rescue DorIndexing::CocinaRepository::RepositoryError
93
+ Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
94
+ # This may happen if the referenced Collection does not exist (bad data)
95
+ nil
96
+ end
97
+ end
98
+
99
+ def administrative_tags
100
+ cocina_repository.administrative_tags(id)
101
+ rescue DorIndexing::CocinaRepository::RepositoryError
102
+ []
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the event date fields for a solr document
6
+ class EventDateBuilder
7
+ # @param [Cocina::Models::Event] event single selected event
8
+ # @return [String, nil] the date value for Solr
9
+ def self.build(event, date_type)
10
+ event_dates = Array(event&.date) + Array(event&.parallelEvent&.map(&:date))
11
+
12
+ matching_date_value_with_status_primary(event_dates, date_type) ||
13
+ matching_date_value(event_dates, date_type) ||
14
+ untyped_date_value(event_dates)
15
+ end
16
+
17
+ # @return [String, nil] date.value from a date of type of date_type and of status primary
18
+ def self.matching_date_value_with_status_primary(event_dates, date_type)
19
+ event_dates.flatten.compact.find do |date|
20
+ next if date.type != date_type
21
+
22
+ next unless DorIndexing::Selectors::EventSelector.date_status_primary(date)
23
+
24
+ return date_value(date)
25
+ end
26
+ end
27
+ private_class_method :matching_date_value_with_status_primary
28
+
29
+ # @return [String, nil] date.value from a date of type of date_type
30
+ def self.matching_date_value(event_dates, date_type)
31
+ event_dates.flatten.compact.find do |date|
32
+ next if date.type != date_type
33
+
34
+ return date_value(date)
35
+ end
36
+ end
37
+ private_class_method :matching_date_value
38
+
39
+ # @return [String, nil] date.value from a date without a type
40
+ def self.untyped_date_value(event_dates)
41
+ event_dates.flatten.compact.find do |date|
42
+ next if date.type.present?
43
+
44
+ return date_value(date)
45
+ end
46
+ end
47
+ private_class_method :untyped_date_value
48
+
49
+ # @param [Cocina::Models::DescriptiveValue] a date object from an event
50
+ # @return [String, nil] value from date object
51
+ # rubocop:disable Metrics/PerceivedComplexity
52
+ # rubocop:disable Metrics/AbcSize
53
+ # rubocop:disable Metrics/CyclomaticComplexity
54
+ def self.date_value(date)
55
+ return date.value if date&.value.present?
56
+
57
+ Array(date&.structuredValue).find do |structured_value|
58
+ return structured_value.value if structured_value&.value.present?
59
+ end
60
+
61
+ Array(date&.parallelValue).find do |parallel_value|
62
+ return parallel_value.value if parallel_value&.value.present?
63
+ end
64
+ end
65
+ # rubocop:enable Metrics/PerceivedComplexity
66
+ # rubocop:enable Metrics/AbcSize
67
+ # rubocop:enable Metrics/CyclomaticComplexity
68
+ private_class_method :date_value
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Finds the place to index from publication events
6
+ class EventPlaceBuilder
7
+ # @param [Cocina::Models::Event] event
8
+ # @return [String] the place value for Solr
9
+ def self.build(event)
10
+ new(event).build
11
+ end
12
+
13
+ def initialize(event)
14
+ @event = event
15
+ end
16
+
17
+ def build
18
+ return unless event
19
+
20
+ primary_location || location_from(flat_locations)
21
+ end
22
+
23
+ private
24
+
25
+ attr_reader :event
26
+
27
+ def primary_location
28
+ location_from([flat_locations.find { |location| location.status == 'primary' }].compact)
29
+ end
30
+
31
+ def location_from(locations)
32
+ return if locations.empty?
33
+
34
+ value_locations_for(locations) ||
35
+ marccountry_text_for(locations) ||
36
+ marccountry_code_for(locations)
37
+ end
38
+
39
+ # rubocop:disable Metrics/AbcSize
40
+ def flat_locations
41
+ @flat_locations ||= begin
42
+ locations = if event.parallelEvent.present?
43
+ event.parallelEvent.flat_map { |parallel_event| Array(parallel_event.location) }
44
+ else
45
+ Array(event.location)
46
+ end
47
+ locations.flat_map { |location| location.parallelValue.presence || location.structuredValue.presence || location }
48
+ end
49
+ end
50
+ # rubocop:enable Metrics/AbcSize
51
+
52
+ def marccountry_text_for(locations)
53
+ locations.find { |location| marc_country?(location) && location.value }&.value
54
+ end
55
+
56
+ # rubocop:disable Metrics/CyclomaticComplexity
57
+ def marccountry_code_for(locations)
58
+ DorIndexing::MarcCountry.from_code(locations.find { |location| marc_country?(location) && location.code }&.code) ||
59
+ DorIndexing::MarcCountry.from_uri(locations.find { |location| location.uri&.start_with?(DorIndexing::MarcCountry::MARC_COUNTRY_URI) }&.uri)
60
+ end
61
+ # rubocop:enable Metrics/CyclomaticComplexity
62
+
63
+ def value_locations_for(locations)
64
+ locations.select { |location| location.value && !marc_country?(location) }.map(&:value).join(' : ').presence
65
+ end
66
+
67
+ def marc_country?(location)
68
+ location.source&.code == DorIndexing::MarcCountry::MARC_COUNTRY_CODE ||
69
+ location.source&.uri == DorIndexing::MarcCountry::MARC_COUNTRY_URI
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the geographic fields for a solr document
6
+ class GeographicBuilder
7
+ # @param [Array<Cocina::Models::Subject>] subjects
8
+ # @return [Array<String>] the geographic values for Solr
9
+ def self.build(subjects)
10
+ new(subjects).build
11
+ end
12
+
13
+ def initialize(subjects)
14
+ @subjects = Array(subjects)
15
+ end
16
+
17
+ def build
18
+ extract_place_from_subjects(subjects)
19
+ end
20
+
21
+ # rubocop:disable Metrics/AbcSize
22
+ def extract_place_from_subjects(local_subjects)
23
+ (
24
+ build_place_nodes(local_subjects.select { |node| node.type == 'place' }) +
25
+ local_subjects.reject(&:type).flat_map do |subject|
26
+ next extract_place_from_subjects(subject.parallelValue) if subject.parallelValue.present?
27
+
28
+ build_place_nodes(Array(subject.structuredValue).select { |node| node.type == 'place' })
29
+ end
30
+ ).uniq
31
+ end
32
+ # rubocop:enable Metrics/AbcSize
33
+
34
+ private
35
+
36
+ attr_reader :subjects
37
+
38
+ def build_place_nodes(nodes)
39
+ Array(nodes).flat_map { |node| build_place(node) }
40
+ end
41
+
42
+ # @param [Cocina::Models::DescriptiveValue]
43
+ def build_place(node)
44
+ remove_trailing_punctuation(
45
+ Array(node.value) +
46
+ place_from_code(node) +
47
+ build_hierarchical_subject(node) +
48
+ Array(node.parallelValue).flat_map { |child| build_place(child) }
49
+ )
50
+ end
51
+
52
+ # @return [Array<String>]
53
+ # rubocop:disable Metrics/MethodLength
54
+ def place_from_code(node)
55
+ return [] unless node.code && node.source
56
+
57
+ code = node.code.gsub(/[^\w-]/, '') # remove any punctuation (except dash).
58
+ case node.source.code
59
+ when 'marcgac'
60
+ [Marc::Vocab::GeographicArea.fetch(code)]
61
+ when 'marccountry'
62
+ [Marc::Vocab::Country.fetch(code)]
63
+ else
64
+ []
65
+ end
66
+ rescue KeyError
67
+ # Per Arcadia, halt HB notification until after data clean-up.
68
+ # Honeybadger.notify("[DATA ERROR] Unable to find \"#{code}\" in authority \"#{node.source.code}\"")
69
+ []
70
+ end
71
+ # rubocop:enable Metrics/MethodLength
72
+
73
+ def build_hierarchical_subject(node)
74
+ Array(node.structuredValue&.map(&:value).presence&.join(' '))
75
+ end
76
+
77
+ def remove_trailing_punctuation(strings)
78
+ strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the author fields for a solr document
6
+ class NameBuilder
7
+ # @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
8
+ # @return [Array<String>] names
9
+ def self.build_all(cocina_contributors)
10
+ flat_names = cocina_contributors.filter_map { |cocina_contributor| flat_names_for(cocina_contributor) }.flatten
11
+ flat_names.filter_map { |name| build_name(name) }
12
+ end
13
+
14
+ # @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
15
+ # @return [String] name
16
+ def self.build_primary_name(names, strategy: :first)
17
+ names = Array(names) unless names.is_a?(Array)
18
+ flat_names = flat_names_for(names)
19
+ name = display_name_for(flat_names) || primary_name_for(flat_names)
20
+ name ||= flat_names.first if strategy == :first
21
+ return build_name(name) if name
22
+
23
+ flat_names.filter_map { |one| build_name(one) }.first
24
+ end
25
+
26
+ # rubocop:disable Metrics/MethodLength
27
+ # rubocop:disable Metrics/AbcSize
28
+ def self.build_name(name)
29
+ if name.groupedValue.present?
30
+ name.groupedValue.find { |grouped_value| grouped_value.type == 'name' }&.value
31
+ elsif name.structuredValue.present?
32
+ name_part = joined_name_parts(name, 'name', '. ').presence
33
+ surname = joined_name_parts(name, 'surname', ' ')
34
+ forename = joined_name_parts(name, 'forename', ' ')
35
+ terms_of_address = joined_name_parts(name, 'term of address', ', ')
36
+ life_dates = joined_name_parts(name, 'life dates', ', ')
37
+ activity_dates = joined_name_parts(name, 'activity dates', ', ')
38
+ joined_name = name_part || join_parts([surname, forename], ', ')
39
+ joined_name = join_parts([joined_name, terms_of_address], ' ')
40
+ joined_name = join_parts([joined_name, life_dates], ', ')
41
+ join_parts([joined_name, activity_dates], ', ')
42
+ else
43
+ name.value
44
+ end
45
+ end
46
+ # rubocop:enable Metrics/MethodLength
47
+ # rubocop:enable Metrics/AbcSize
48
+
49
+ def self.display_name_for(names)
50
+ names.find { |name| name.type == 'display' }
51
+ end
52
+
53
+ def self.primary_name_for(names)
54
+ names.find { |name| name.status == 'primary' }
55
+ end
56
+
57
+ def self.flat_names_for(names)
58
+ names.flat_map { |name| name.parallelValue.presence || name }
59
+ end
60
+
61
+ def self.joined_name_parts(name, type, joiner)
62
+ join_parts(name.structuredValue.select { |structured_value| structured_value.type == type }.map(&:value), joiner)
63
+ end
64
+
65
+ def self.join_parts(parts, joiner)
66
+ parts.compact_blank.join(joiner)
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Helper methods for working with Orcid in Cocina
6
+ class OrcidBuilder
7
+ # NOTE: there is similar code in orcid_client which fetches
8
+ # ORCIDs out of cocina. Consider consolidating at some point or keeping in sync.
9
+ # see https://github.com/sul-dlss/orcid_client/blob/main/lib/sul_orcid_client/cocina_support.rb
10
+ # and https://github.com/sul-dlss/dor_indexing_app/issues/1022
11
+
12
+ # @param [Array<Cocina::Models::Contributor>] contributors
13
+ # @return [String] the list of contributor ORCIDs to index into solr
14
+ def self.build(contributors)
15
+ new(contributors).build
16
+ end
17
+
18
+ def initialize(contributors)
19
+ @contributors = Array(contributors)
20
+ end
21
+
22
+ def build
23
+ cited_contributors.filter_map { |contributor| orcidid(contributor) }
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :contributors
29
+
30
+ # @param [Cocina::Models::Contributor] array of contributors
31
+ # @return [Array<String>] array of contributors who are listed as cited
32
+ # Note that non-cited contributors are excluded.
33
+ def cited_contributors
34
+ contributors.select { |contributor| cited?(contributor) }
35
+ end
36
+
37
+ # @param [Cocina::Models::Contributor] contributor to check
38
+ # @return [Boolean] true unless the contributor has a citation status of false
39
+ def cited?(contributor)
40
+ contributor.note.none? { |note| note.type == 'citation status' && note.value == 'false' }
41
+ end
42
+
43
+ # @param [Cocina::Models::Contributor] contributor to check
44
+ # @return [String, nil] orcid id including host if present
45
+ # rubocop:disable Metrics/AbcSize
46
+ def orcidid(contributor)
47
+ identifier = contributor.identifier.find { |id| id.type == 'ORCID' }
48
+ return unless identifier
49
+
50
+ # some records have the full ORCID URI in the data, just return it if so, e.g. druid:gf852zt8324
51
+ return identifier.uri if identifier.uri
52
+ return identifier.value if identifier.value.start_with?('https://orcid.org/')
53
+
54
+ # some records have just the ORCIDID without the URL prefix, add it if so, e.g. druid:tp865ng1792
55
+ return URI.join('https://orcid.org/', identifier.value).to_s if identifier.source.uri.blank?
56
+
57
+ URI.join(identifier.source.uri, identifier.value).to_s
58
+ end
59
+ # rubocop:enable Metrics/AbcSize
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the publisher name for a solr document
6
+ class PublisherNameBuilder
7
+ def self.build(events)
8
+ roles = publisher_roles(events)
9
+
10
+ publisher_names_for(roles)
11
+ end
12
+
13
+ def self.publisher_roles(events)
14
+ contributors = events.flat_map(&:contributor).compact
15
+ return [] if contributors.blank?
16
+
17
+ contributors.select { |contributor| Array(contributor.role).any? { |role| role.value&.downcase == 'publisher' } }
18
+ end
19
+
20
+ # Returns the primary publisher if available.
21
+ def self.publisher_names_for(publisher_roles)
22
+ return if publisher_roles.blank?
23
+
24
+ primary_publisher = publisher_roles.find { |role| role.status == 'primary' }
25
+
26
+ return contributor_name(primary_publisher).first if primary_publisher
27
+
28
+ publisher_roles.flat_map { |contributor| contributor_name(contributor) }.join(' : ')
29
+ end
30
+
31
+ def self.contributor_name(contributor)
32
+ contributor.name.flat_map { |name| flat_name(name) }
33
+ end
34
+
35
+ def self.flat_name(value)
36
+ primary_name = value.parallelValue&.find { |role| role.status == 'primary' }
37
+ return parallel_name(value.parallelValue) if !primary_name && value.parallelValue.present?
38
+
39
+ return name_for(primary_name) if primary_name
40
+
41
+ name_for(value)
42
+ end
43
+
44
+ def self.name_for(name)
45
+ name.structuredValue.present? ? name.structuredValue.map(&:value).join('. ') : name.value
46
+ end
47
+
48
+ def self.parallel_name(names)
49
+ names.map { |single_name| name_for(single_name) }.join(' : ')
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the temporal fields for a solr document
6
+ class TemporalBuilder
7
+ # @param [Array<Cocina::Models::Subject>] subjects
8
+ # @return [Array<String>] the temporal values for Solr
9
+ def self.build(subjects)
10
+ new(subjects).build
11
+ end
12
+
13
+ def initialize(subjects)
14
+ @subjects = Array(subjects)
15
+ end
16
+
17
+ def build
18
+ extract_temporal_from_subjects(subjects)
19
+ end
20
+
21
+ # rubocop:disable Metrics/AbcSize
22
+ def extract_temporal_from_subjects(local_subjects)
23
+ (
24
+ build_temporal_nodes(local_subjects.select { |node| node.type == 'time' }) +
25
+ local_subjects.reject(&:type).flat_map do |subject|
26
+ next extract_temporal_from_subjects(subject.parallelValue) if subject.parallelValue.present?
27
+
28
+ build_temporal_nodes(Array(subject.structuredValue).select { |node| node.type == 'time' })
29
+ end
30
+ ).uniq
31
+ end
32
+ # rubocop:enable Metrics/AbcSize
33
+
34
+ private
35
+
36
+ attr_reader :subjects
37
+
38
+ def build_temporal_nodes(nodes)
39
+ Array(nodes).flat_map { |node| build_temporal(node) }
40
+ end
41
+
42
+ # @param [Cocina::Models::DescriptiveValue]
43
+ def build_temporal(node)
44
+ remove_trailing_punctuation(
45
+ Array(node.value) +
46
+ Array(node.structuredValue).map(&:value) +
47
+ Array(node.parallelValue).flat_map { |child| build_temporal(child) }
48
+ )
49
+ end
50
+
51
+ def remove_trailing_punctuation(strings)
52
+ strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
53
+ end
54
+ end
55
+ end
56
+ end