dor_indexing 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +2 -0
  3. data/.rubocop.yml +355 -0
  4. data/Gemfile +16 -0
  5. data/Gemfile.lock +218 -0
  6. data/README.md +33 -0
  7. data/Rakefile +11 -0
  8. data/dor_indexing.gemspec +40 -0
  9. data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
  10. data/lib/dor_indexing/builders/author_builder.rb +31 -0
  11. data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
  12. data/lib/dor_indexing/builders/document_builder.rb +106 -0
  13. data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
  14. data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
  15. data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
  16. data/lib/dor_indexing/builders/name_builder.rb +70 -0
  17. data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
  18. data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
  19. data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
  20. data/lib/dor_indexing/builders/topic_builder.rb +96 -0
  21. data/lib/dor_indexing/cocina_repository.rb +24 -0
  22. data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
  23. data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
  24. data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
  25. data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
  26. data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
  27. data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
  28. data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
  29. data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
  30. data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
  31. data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
  32. data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
  33. data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
  34. data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
  35. data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
  36. data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
  37. data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
  38. data/lib/dor_indexing/marc_country.rb +359 -0
  39. data/lib/dor_indexing/selectors/event_selector.rb +112 -0
  40. data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
  41. data/lib/dor_indexing/version.rb +5 -0
  42. data/lib/dor_indexing/workflow_fields.rb +63 -0
  43. data/lib/dor_indexing/workflow_solr_document.rb +93 -0
  44. data/lib/dor_indexing.rb +19 -0
  45. metadata +173 -0
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Extracts useful text for searching from Cocina Description
6
+ class AllSearchTextBuilder
7
+ def self.build(cocina_description)
8
+ new(cocina_description).build
9
+ end
10
+
11
+ def initialize(cocina_description)
12
+ @cocina_description = cocina_description
13
+ end
14
+
15
+ def build
16
+ @text = []
17
+ recurse(cocina_description)
18
+ text.join(' ')
19
+ end
20
+
21
+ private
22
+
23
+ attr_reader :cocina_description, :text
24
+
25
+ # this originally had displayLabel, but Arcadia recommends against it
26
+ TEXT_KEYS = %i[
27
+ value
28
+ ].freeze
29
+
30
+ RECURSE_KEYS = %i[
31
+ structuredValue
32
+ parallelValue
33
+ groupedValue
34
+ title
35
+ contributor
36
+ event
37
+ form
38
+ language
39
+ note
40
+ relatedResource
41
+ subject
42
+ name
43
+ location
44
+ ].freeze
45
+
46
+ def recurse(desc)
47
+ TEXT_KEYS.each do |key|
48
+ value = desc.try(key)
49
+ text << value if value.present?
50
+ end
51
+
52
+ RECURSE_KEYS.each do |key|
53
+ Array(desc.try(key)).each { |value| recurse(value) }
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the author fields for a solr document
6
+ class AuthorBuilder
7
+ def initialize(cocina_contributors)
8
+ @cocina_contributors = Array(cocina_contributors)
9
+ end
10
+
11
+ def build_primary
12
+ contributor = primary_cocina_contributor || cocina_contributors.first
13
+ return unless contributor
14
+
15
+ NameBuilder.build_primary_name(contributor.name) if contributor
16
+ end
17
+
18
+ def build_all
19
+ NameBuilder.build_all(cocina_contributors.filter_map(&:name))
20
+ end
21
+
22
+ private
23
+
24
+ attr_reader :cocina_contributors
25
+
26
+ def primary_cocina_contributor
27
+ cocina_contributors.find { |cocina_contributor| cocina_contributor.status == 'primary' }
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Rights description builder for collections
6
+ class CollectionRightsDescriptionBuilder
7
+ def self.build(cocina)
8
+ new(cocina).build
9
+ end
10
+
11
+ def initialize(cocina)
12
+ @cocina = cocina
13
+ end
14
+
15
+ def build
16
+ case cocina.access.view
17
+ when 'world'
18
+ 'world'
19
+ else
20
+ 'dark'
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ attr_reader :cocina
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds solr document for indexing.
6
+ class DocumentBuilder
7
+ ADMIN_POLICY_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
8
+ DorIndexing::Indexers::AdministrativeTagIndexer,
9
+ DorIndexing::Indexers::DataIndexer,
10
+ DorIndexing::Indexers::RoleMetadataIndexer,
11
+ DorIndexing::Indexers::DefaultObjectRightsIndexer,
12
+ DorIndexing::Indexers::IdentityMetadataIndexer,
13
+ DorIndexing::Indexers::DescriptiveMetadataIndexer,
14
+ DorIndexing::Indexers::IdentifiableIndexer,
15
+ DorIndexing::Indexers::WorkflowsIndexer
16
+ )
17
+
18
+ COLLECTION_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
19
+ DorIndexing::Indexers::AdministrativeTagIndexer,
20
+ DorIndexing::Indexers::DataIndexer,
21
+ DorIndexing::Indexers::RightsMetadataIndexer,
22
+ DorIndexing::Indexers::IdentityMetadataIndexer,
23
+ DorIndexing::Indexers::DescriptiveMetadataIndexer,
24
+ DorIndexing::Indexers::IdentifiableIndexer,
25
+ DorIndexing::Indexers::ReleasableIndexer,
26
+ DorIndexing::Indexers::WorkflowsIndexer
27
+ )
28
+
29
+ ITEM_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
30
+ DorIndexing::Indexers::AdministrativeTagIndexer,
31
+ DorIndexing::Indexers::DataIndexer,
32
+ DorIndexing::Indexers::RightsMetadataIndexer,
33
+ DorIndexing::Indexers::IdentityMetadataIndexer,
34
+ DorIndexing::Indexers::DescriptiveMetadataIndexer,
35
+ DorIndexing::Indexers::EmbargoMetadataIndexer,
36
+ DorIndexing::Indexers::ContentMetadataIndexer,
37
+ DorIndexing::Indexers::IdentifiableIndexer,
38
+ DorIndexing::Indexers::CollectionTitleIndexer,
39
+ DorIndexing::Indexers::ReleasableIndexer,
40
+ DorIndexing::Indexers::WorkflowsIndexer
41
+ )
42
+
43
+ INDEXERS = {
44
+ Cocina::Models::ObjectType.agreement => ITEM_INDEXER, # Agreement uses same indexer as item
45
+ Cocina::Models::ObjectType.admin_policy => ADMIN_POLICY_INDEXER,
46
+ Cocina::Models::ObjectType.collection => COLLECTION_INDEXER
47
+ }.freeze
48
+
49
+ @@parent_collections = {} # rubocop:disable Style/ClassVars
50
+
51
+ def self.for(model:, workflow_client:, cocina_repository:)
52
+ new(model:, workflow_client:, cocina_repository:).for
53
+ end
54
+
55
+ def self.reset_parent_collections
56
+ @@parent_collections = {} # rubocop:disable Style/ClassVars
57
+ end
58
+
59
+ def initialize(model:, workflow_client:, cocina_repository:)
60
+ @model = model
61
+ @workflow_client = workflow_client
62
+ @cocina_repository = cocina_repository
63
+ end
64
+
65
+ # @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
66
+ def for
67
+ indexer_for_type(model.type).new(id:,
68
+ cocina: model,
69
+ parent_collections:,
70
+ administrative_tags:,
71
+ workflow_client:,
72
+ cocina_repository:)
73
+ end
74
+
75
+ private
76
+
77
+ attr_reader :model, :workflow_client, :cocina_repository
78
+
79
+ def id
80
+ model.externalIdentifier
81
+ end
82
+
83
+ def indexer_for_type(type)
84
+ INDEXERS.fetch(type, ITEM_INDEXER)
85
+ end
86
+
87
+ def parent_collections
88
+ return [] unless model.dro?
89
+
90
+ Array(model.structural&.isMemberOf).filter_map do |rel_druid|
91
+ @@parent_collections[rel_druid] ||= cocina_repository.find(rel_druid)
92
+ rescue DorIndexing::CocinaRepository::RepositoryError
93
+ Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
94
+ # This may happen if the referenced Collection does not exist (bad data)
95
+ nil
96
+ end
97
+ end
98
+
99
+ def administrative_tags
100
+ cocina_repository.administrative_tags(id)
101
+ rescue DorIndexing::CocinaRepository::RepositoryError
102
+ []
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the event date fields for a solr document
6
+ class EventDateBuilder
7
+ # @param [Cocina::Models::Event] event single selected event
8
+ # @return [String, nil] the date value for Solr
9
+ def self.build(event, date_type)
10
+ event_dates = Array(event&.date) + Array(event&.parallelEvent&.map(&:date))
11
+
12
+ matching_date_value_with_status_primary(event_dates, date_type) ||
13
+ matching_date_value(event_dates, date_type) ||
14
+ untyped_date_value(event_dates)
15
+ end
16
+
17
+ # @return [String, nil] date.value from a date of type of date_type and of status primary
18
+ def self.matching_date_value_with_status_primary(event_dates, date_type)
19
+ event_dates.flatten.compact.find do |date|
20
+ next if date.type != date_type
21
+
22
+ next unless DorIndexing::Selectors::EventSelector.date_status_primary(date)
23
+
24
+ return date_value(date)
25
+ end
26
+ end
27
+ private_class_method :matching_date_value_with_status_primary
28
+
29
+ # @return [String, nil] date.value from a date of type of date_type
30
+ def self.matching_date_value(event_dates, date_type)
31
+ event_dates.flatten.compact.find do |date|
32
+ next if date.type != date_type
33
+
34
+ return date_value(date)
35
+ end
36
+ end
37
+ private_class_method :matching_date_value
38
+
39
+ # @return [String, nil] date.value from a date without a type
40
+ def self.untyped_date_value(event_dates)
41
+ event_dates.flatten.compact.find do |date|
42
+ next if date.type.present?
43
+
44
+ return date_value(date)
45
+ end
46
+ end
47
+ private_class_method :untyped_date_value
48
+
49
+ # @param [Cocina::Models::DescriptiveValue] a date object from an event
50
+ # @return [String, nil] value from date object
51
+ # rubocop:disable Metrics/PerceivedComplexity
52
+ # rubocop:disable Metrics/AbcSize
53
+ # rubocop:disable Metrics/CyclomaticComplexity
54
+ def self.date_value(date)
55
+ return date.value if date&.value.present?
56
+
57
+ Array(date&.structuredValue).find do |structured_value|
58
+ return structured_value.value if structured_value&.value.present?
59
+ end
60
+
61
+ Array(date&.parallelValue).find do |parallel_value|
62
+ return parallel_value.value if parallel_value&.value.present?
63
+ end
64
+ end
65
+ # rubocop:enable Metrics/PerceivedComplexity
66
+ # rubocop:enable Metrics/AbcSize
67
+ # rubocop:enable Metrics/CyclomaticComplexity
68
+ private_class_method :date_value
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Finds the place to index from publication events
6
+ class EventPlaceBuilder
7
+ # @param [Cocina::Models::Event] event
8
+ # @return [String] the place value for Solr
9
+ def self.build(event)
10
+ new(event).build
11
+ end
12
+
13
+ def initialize(event)
14
+ @event = event
15
+ end
16
+
17
+ def build
18
+ return unless event
19
+
20
+ primary_location || location_from(flat_locations)
21
+ end
22
+
23
+ private
24
+
25
+ attr_reader :event
26
+
27
+ def primary_location
28
+ location_from([flat_locations.find { |location| location.status == 'primary' }].compact)
29
+ end
30
+
31
+ def location_from(locations)
32
+ return if locations.empty?
33
+
34
+ value_locations_for(locations) ||
35
+ marccountry_text_for(locations) ||
36
+ marccountry_code_for(locations)
37
+ end
38
+
39
+ # rubocop:disable Metrics/AbcSize
40
+ def flat_locations
41
+ @flat_locations ||= begin
42
+ locations = if event.parallelEvent.present?
43
+ event.parallelEvent.flat_map { |parallel_event| Array(parallel_event.location) }
44
+ else
45
+ Array(event.location)
46
+ end
47
+ locations.flat_map { |location| location.parallelValue.presence || location.structuredValue.presence || location }
48
+ end
49
+ end
50
+ # rubocop:enable Metrics/AbcSize
51
+
52
+ def marccountry_text_for(locations)
53
+ locations.find { |location| marc_country?(location) && location.value }&.value
54
+ end
55
+
56
+ # rubocop:disable Metrics/CyclomaticComplexity
57
+ def marccountry_code_for(locations)
58
+ DorIndexing::MarcCountry.from_code(locations.find { |location| marc_country?(location) && location.code }&.code) ||
59
+ DorIndexing::MarcCountry.from_uri(locations.find { |location| location.uri&.start_with?(DorIndexing::MarcCountry::MARC_COUNTRY_URI) }&.uri)
60
+ end
61
+ # rubocop:enable Metrics/CyclomaticComplexity
62
+
63
+ def value_locations_for(locations)
64
+ locations.select { |location| location.value && !marc_country?(location) }.map(&:value).join(' : ').presence
65
+ end
66
+
67
+ def marc_country?(location)
68
+ location.source&.code == DorIndexing::MarcCountry::MARC_COUNTRY_CODE ||
69
+ location.source&.uri == DorIndexing::MarcCountry::MARC_COUNTRY_URI
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the geographic fields for a solr document
6
+ class GeographicBuilder
7
+ # @param [Array<Cocina::Models::Subject>] subjects
8
+ # @return [Array<String>] the geographic values for Solr
9
+ def self.build(subjects)
10
+ new(subjects).build
11
+ end
12
+
13
+ def initialize(subjects)
14
+ @subjects = Array(subjects)
15
+ end
16
+
17
+ def build
18
+ extract_place_from_subjects(subjects)
19
+ end
20
+
21
+ # rubocop:disable Metrics/AbcSize
22
+ def extract_place_from_subjects(local_subjects)
23
+ (
24
+ build_place_nodes(local_subjects.select { |node| node.type == 'place' }) +
25
+ local_subjects.reject(&:type).flat_map do |subject|
26
+ next extract_place_from_subjects(subject.parallelValue) if subject.parallelValue.present?
27
+
28
+ build_place_nodes(Array(subject.structuredValue).select { |node| node.type == 'place' })
29
+ end
30
+ ).uniq
31
+ end
32
+ # rubocop:enable Metrics/AbcSize
33
+
34
+ private
35
+
36
+ attr_reader :subjects
37
+
38
+ def build_place_nodes(nodes)
39
+ Array(nodes).flat_map { |node| build_place(node) }
40
+ end
41
+
42
+ # @param [Cocina::Models::DescriptiveValue]
43
+ def build_place(node)
44
+ remove_trailing_punctuation(
45
+ Array(node.value) +
46
+ place_from_code(node) +
47
+ build_hierarchical_subject(node) +
48
+ Array(node.parallelValue).flat_map { |child| build_place(child) }
49
+ )
50
+ end
51
+
52
+ # @return [Array<String>]
53
+ # rubocop:disable Metrics/MethodLength
54
+ def place_from_code(node)
55
+ return [] unless node.code && node.source
56
+
57
+ code = node.code.gsub(/[^\w-]/, '') # remove any punctuation (except dash).
58
+ case node.source.code
59
+ when 'marcgac'
60
+ [Marc::Vocab::GeographicArea.fetch(code)]
61
+ when 'marccountry'
62
+ [Marc::Vocab::Country.fetch(code)]
63
+ else
64
+ []
65
+ end
66
+ rescue KeyError
67
+ # Per Arcadia, halt HB notification until after data clean-up.
68
+ # Honeybadger.notify("[DATA ERROR] Unable to find \"#{code}\" in authority \"#{node.source.code}\"")
69
+ []
70
+ end
71
+ # rubocop:enable Metrics/MethodLength
72
+
73
+ def build_hierarchical_subject(node)
74
+ Array(node.structuredValue&.map(&:value).presence&.join(' '))
75
+ end
76
+
77
+ def remove_trailing_punctuation(strings)
78
+ strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the author fields for a solr document
6
+ class NameBuilder
7
+ # @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
8
+ # @return [Array<String>] names
9
+ def self.build_all(cocina_contributors)
10
+ flat_names = cocina_contributors.filter_map { |cocina_contributor| flat_names_for(cocina_contributor) }.flatten
11
+ flat_names.filter_map { |name| build_name(name) }
12
+ end
13
+
14
+ # @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
15
+ # @return [String] name
16
+ def self.build_primary_name(names, strategy: :first)
17
+ names = Array(names) unless names.is_a?(Array)
18
+ flat_names = flat_names_for(names)
19
+ name = display_name_for(flat_names) || primary_name_for(flat_names)
20
+ name ||= flat_names.first if strategy == :first
21
+ return build_name(name) if name
22
+
23
+ flat_names.filter_map { |one| build_name(one) }.first
24
+ end
25
+
26
+ # rubocop:disable Metrics/MethodLength
27
+ # rubocop:disable Metrics/AbcSize
28
+ def self.build_name(name)
29
+ if name.groupedValue.present?
30
+ name.groupedValue.find { |grouped_value| grouped_value.type == 'name' }&.value
31
+ elsif name.structuredValue.present?
32
+ name_part = joined_name_parts(name, 'name', '. ').presence
33
+ surname = joined_name_parts(name, 'surname', ' ')
34
+ forename = joined_name_parts(name, 'forename', ' ')
35
+ terms_of_address = joined_name_parts(name, 'term of address', ', ')
36
+ life_dates = joined_name_parts(name, 'life dates', ', ')
37
+ activity_dates = joined_name_parts(name, 'activity dates', ', ')
38
+ joined_name = name_part || join_parts([surname, forename], ', ')
39
+ joined_name = join_parts([joined_name, terms_of_address], ' ')
40
+ joined_name = join_parts([joined_name, life_dates], ', ')
41
+ join_parts([joined_name, activity_dates], ', ')
42
+ else
43
+ name.value
44
+ end
45
+ end
46
+ # rubocop:enable Metrics/MethodLength
47
+ # rubocop:enable Metrics/AbcSize
48
+
49
+ def self.display_name_for(names)
50
+ names.find { |name| name.type == 'display' }
51
+ end
52
+
53
+ def self.primary_name_for(names)
54
+ names.find { |name| name.status == 'primary' }
55
+ end
56
+
57
+ def self.flat_names_for(names)
58
+ names.flat_map { |name| name.parallelValue.presence || name }
59
+ end
60
+
61
+ def self.joined_name_parts(name, type, joiner)
62
+ join_parts(name.structuredValue.select { |structured_value| structured_value.type == type }.map(&:value), joiner)
63
+ end
64
+
65
+ def self.join_parts(parts, joiner)
66
+ parts.compact_blank.join(joiner)
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Helper methods for working with Orcid in Cocina
6
+ class OrcidBuilder
7
+ # NOTE: there is similar code in orcid_client which fetches
8
+ # ORCIDs out of cocina. Consider consolidating at some point or keeping in sync.
9
+ # see https://github.com/sul-dlss/orcid_client/blob/main/lib/sul_orcid_client/cocina_support.rb
10
+ # and https://github.com/sul-dlss/dor_indexing_app/issues/1022
11
+
12
+ # @param [Array<Cocina::Models::Contributor>] contributors
13
+ # @return [String] the list of contributor ORCIDs to index into solr
14
+ def self.build(contributors)
15
+ new(contributors).build
16
+ end
17
+
18
+ def initialize(contributors)
19
+ @contributors = Array(contributors)
20
+ end
21
+
22
+ def build
23
+ cited_contributors.filter_map { |contributor| orcidid(contributor) }
24
+ end
25
+
26
+ private
27
+
28
+ attr_reader :contributors
29
+
30
+ # @param [Cocina::Models::Contributor] array of contributors
31
+ # @return [Array<String>] array of contributors who are listed as cited
32
+ # Note that non-cited contributors are excluded.
33
+ def cited_contributors
34
+ contributors.select { |contributor| cited?(contributor) }
35
+ end
36
+
37
+ # @param [Cocina::Models::Contributor] contributor to check
38
+ # @return [Boolean] true unless the contributor has a citation status of false
39
+ def cited?(contributor)
40
+ contributor.note.none? { |note| note.type == 'citation status' && note.value == 'false' }
41
+ end
42
+
43
+ # @param [Cocina::Models::Contributor] contributor to check
44
+ # @return [String, nil] orcid id including host if present
45
+ # rubocop:disable Metrics/AbcSize
46
+ def orcidid(contributor)
47
+ identifier = contributor.identifier.find { |id| id.type == 'ORCID' }
48
+ return unless identifier
49
+
50
+ # some records have the full ORCID URI in the data, just return it if so, e.g. druid:gf852zt8324
51
+ return identifier.uri if identifier.uri
52
+ return identifier.value if identifier.value.start_with?('https://orcid.org/')
53
+
54
+ # some records have just the ORCIDID without the URL prefix, add it if so, e.g. druid:tp865ng1792
55
+ return URI.join('https://orcid.org/', identifier.value).to_s if identifier.source.uri.blank?
56
+
57
+ URI.join(identifier.source.uri, identifier.value).to_s
58
+ end
59
+ # rubocop:enable Metrics/AbcSize
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the publisher name for a solr document
6
+ class PublisherNameBuilder
7
+ def self.build(events)
8
+ roles = publisher_roles(events)
9
+
10
+ publisher_names_for(roles)
11
+ end
12
+
13
+ def self.publisher_roles(events)
14
+ contributors = events.flat_map(&:contributor).compact
15
+ return [] if contributors.blank?
16
+
17
+ contributors.select { |contributor| Array(contributor.role).any? { |role| role.value&.downcase == 'publisher' } }
18
+ end
19
+
20
+ # Returns the primary publisher if available.
21
+ def self.publisher_names_for(publisher_roles)
22
+ return if publisher_roles.blank?
23
+
24
+ primary_publisher = publisher_roles.find { |role| role.status == 'primary' }
25
+
26
+ return contributor_name(primary_publisher).first if primary_publisher
27
+
28
+ publisher_roles.flat_map { |contributor| contributor_name(contributor) }.join(' : ')
29
+ end
30
+
31
+ def self.contributor_name(contributor)
32
+ contributor.name.flat_map { |name| flat_name(name) }
33
+ end
34
+
35
+ def self.flat_name(value)
36
+ primary_name = value.parallelValue&.find { |role| role.status == 'primary' }
37
+ return parallel_name(value.parallelValue) if !primary_name && value.parallelValue.present?
38
+
39
+ return name_for(primary_name) if primary_name
40
+
41
+ name_for(value)
42
+ end
43
+
44
+ def self.name_for(name)
45
+ name.structuredValue.present? ? name.structuredValue.map(&:value).join('. ') : name.value
46
+ end
47
+
48
+ def self.parallel_name(names)
49
+ names.map { |single_name| name_for(single_name) }.join(' : ')
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ module Builders
5
+ # Builds the temporal fields for a solr document
6
+ class TemporalBuilder
7
+ # @param [Array<Cocina::Models::Subject>] subjects
8
+ # @return [Array<String>] the temporal values for Solr
9
+ def self.build(subjects)
10
+ new(subjects).build
11
+ end
12
+
13
+ def initialize(subjects)
14
+ @subjects = Array(subjects)
15
+ end
16
+
17
+ def build
18
+ extract_temporal_from_subjects(subjects)
19
+ end
20
+
21
+ # rubocop:disable Metrics/AbcSize
22
+ def extract_temporal_from_subjects(local_subjects)
23
+ (
24
+ build_temporal_nodes(local_subjects.select { |node| node.type == 'time' }) +
25
+ local_subjects.reject(&:type).flat_map do |subject|
26
+ next extract_temporal_from_subjects(subject.parallelValue) if subject.parallelValue.present?
27
+
28
+ build_temporal_nodes(Array(subject.structuredValue).select { |node| node.type == 'time' })
29
+ end
30
+ ).uniq
31
+ end
32
+ # rubocop:enable Metrics/AbcSize
33
+
34
+ private
35
+
36
+ attr_reader :subjects
37
+
38
+ def build_temporal_nodes(nodes)
39
+ Array(nodes).flat_map { |node| build_temporal(node) }
40
+ end
41
+
42
+ # @param [Cocina::Models::DescriptiveValue]
43
+ def build_temporal(node)
44
+ remove_trailing_punctuation(
45
+ Array(node.value) +
46
+ Array(node.structuredValue).map(&:value) +
47
+ Array(node.parallelValue).flat_map { |child| build_temporal(child) }
48
+ )
49
+ end
50
+
51
+ def remove_trailing_punctuation(strings)
52
+ strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
53
+ end
54
+ end
55
+ end
56
+ end