dor_indexing 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +355 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +218 -0
- data/README.md +33 -0
- data/Rakefile +11 -0
- data/dor_indexing.gemspec +40 -0
- data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
- data/lib/dor_indexing/builders/author_builder.rb +31 -0
- data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
- data/lib/dor_indexing/builders/document_builder.rb +106 -0
- data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
- data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
- data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
- data/lib/dor_indexing/builders/name_builder.rb +70 -0
- data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
- data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
- data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
- data/lib/dor_indexing/builders/topic_builder.rb +96 -0
- data/lib/dor_indexing/cocina_repository.rb +24 -0
- data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
- data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
- data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
- data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
- data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
- data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
- data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
- data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
- data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
- data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
- data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
- data/lib/dor_indexing/marc_country.rb +359 -0
- data/lib/dor_indexing/selectors/event_selector.rb +112 -0
- data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
- data/lib/dor_indexing/version.rb +5 -0
- data/lib/dor_indexing/workflow_fields.rb +63 -0
- data/lib/dor_indexing/workflow_solr_document.rb +93 -0
- data/lib/dor_indexing.rb +19 -0
- metadata +173 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Extracts useful text for searching from Cocina Description
|
6
|
+
class AllSearchTextBuilder
|
7
|
+
def self.build(cocina_description)
|
8
|
+
new(cocina_description).build
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(cocina_description)
|
12
|
+
@cocina_description = cocina_description
|
13
|
+
end
|
14
|
+
|
15
|
+
def build
|
16
|
+
@text = []
|
17
|
+
recurse(cocina_description)
|
18
|
+
text.join(' ')
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
attr_reader :cocina_description, :text
|
24
|
+
|
25
|
+
# this originally had displayLabel, but Arcadia recommends against it
|
26
|
+
TEXT_KEYS = %i[
|
27
|
+
value
|
28
|
+
].freeze
|
29
|
+
|
30
|
+
RECURSE_KEYS = %i[
|
31
|
+
structuredValue
|
32
|
+
parallelValue
|
33
|
+
groupedValue
|
34
|
+
title
|
35
|
+
contributor
|
36
|
+
event
|
37
|
+
form
|
38
|
+
language
|
39
|
+
note
|
40
|
+
relatedResource
|
41
|
+
subject
|
42
|
+
name
|
43
|
+
location
|
44
|
+
].freeze
|
45
|
+
|
46
|
+
def recurse(desc)
|
47
|
+
TEXT_KEYS.each do |key|
|
48
|
+
value = desc.try(key)
|
49
|
+
text << value if value.present?
|
50
|
+
end
|
51
|
+
|
52
|
+
RECURSE_KEYS.each do |key|
|
53
|
+
Array(desc.try(key)).each { |value| recurse(value) }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the author fields for a solr document
|
6
|
+
class AuthorBuilder
|
7
|
+
def initialize(cocina_contributors)
|
8
|
+
@cocina_contributors = Array(cocina_contributors)
|
9
|
+
end
|
10
|
+
|
11
|
+
def build_primary
|
12
|
+
contributor = primary_cocina_contributor || cocina_contributors.first
|
13
|
+
return unless contributor
|
14
|
+
|
15
|
+
NameBuilder.build_primary_name(contributor.name) if contributor
|
16
|
+
end
|
17
|
+
|
18
|
+
def build_all
|
19
|
+
NameBuilder.build_all(cocina_contributors.filter_map(&:name))
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
attr_reader :cocina_contributors
|
25
|
+
|
26
|
+
def primary_cocina_contributor
|
27
|
+
cocina_contributors.find { |cocina_contributor| cocina_contributor.status == 'primary' }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Rights description builder for collections
|
6
|
+
class CollectionRightsDescriptionBuilder
|
7
|
+
def self.build(cocina)
|
8
|
+
new(cocina).build
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(cocina)
|
12
|
+
@cocina = cocina
|
13
|
+
end
|
14
|
+
|
15
|
+
def build
|
16
|
+
case cocina.access.view
|
17
|
+
when 'world'
|
18
|
+
'world'
|
19
|
+
else
|
20
|
+
'dark'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
attr_reader :cocina
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds solr document for indexing.
|
6
|
+
class DocumentBuilder
|
7
|
+
ADMIN_POLICY_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
|
8
|
+
DorIndexing::Indexers::AdministrativeTagIndexer,
|
9
|
+
DorIndexing::Indexers::DataIndexer,
|
10
|
+
DorIndexing::Indexers::RoleMetadataIndexer,
|
11
|
+
DorIndexing::Indexers::DefaultObjectRightsIndexer,
|
12
|
+
DorIndexing::Indexers::IdentityMetadataIndexer,
|
13
|
+
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
14
|
+
DorIndexing::Indexers::IdentifiableIndexer,
|
15
|
+
DorIndexing::Indexers::WorkflowsIndexer
|
16
|
+
)
|
17
|
+
|
18
|
+
COLLECTION_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
|
19
|
+
DorIndexing::Indexers::AdministrativeTagIndexer,
|
20
|
+
DorIndexing::Indexers::DataIndexer,
|
21
|
+
DorIndexing::Indexers::RightsMetadataIndexer,
|
22
|
+
DorIndexing::Indexers::IdentityMetadataIndexer,
|
23
|
+
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
24
|
+
DorIndexing::Indexers::IdentifiableIndexer,
|
25
|
+
DorIndexing::Indexers::ReleasableIndexer,
|
26
|
+
DorIndexing::Indexers::WorkflowsIndexer
|
27
|
+
)
|
28
|
+
|
29
|
+
ITEM_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
|
30
|
+
DorIndexing::Indexers::AdministrativeTagIndexer,
|
31
|
+
DorIndexing::Indexers::DataIndexer,
|
32
|
+
DorIndexing::Indexers::RightsMetadataIndexer,
|
33
|
+
DorIndexing::Indexers::IdentityMetadataIndexer,
|
34
|
+
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
35
|
+
DorIndexing::Indexers::EmbargoMetadataIndexer,
|
36
|
+
DorIndexing::Indexers::ContentMetadataIndexer,
|
37
|
+
DorIndexing::Indexers::IdentifiableIndexer,
|
38
|
+
DorIndexing::Indexers::CollectionTitleIndexer,
|
39
|
+
DorIndexing::Indexers::ReleasableIndexer,
|
40
|
+
DorIndexing::Indexers::WorkflowsIndexer
|
41
|
+
)
|
42
|
+
|
43
|
+
INDEXERS = {
|
44
|
+
Cocina::Models::ObjectType.agreement => ITEM_INDEXER, # Agreement uses same indexer as item
|
45
|
+
Cocina::Models::ObjectType.admin_policy => ADMIN_POLICY_INDEXER,
|
46
|
+
Cocina::Models::ObjectType.collection => COLLECTION_INDEXER
|
47
|
+
}.freeze
|
48
|
+
|
49
|
+
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
50
|
+
|
51
|
+
def self.for(model:, workflow_client:, cocina_repository:)
|
52
|
+
new(model:, workflow_client:, cocina_repository:).for
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.reset_parent_collections
|
56
|
+
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize(model:, workflow_client:, cocina_repository:)
|
60
|
+
@model = model
|
61
|
+
@workflow_client = workflow_client
|
62
|
+
@cocina_repository = cocina_repository
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
|
66
|
+
def for
|
67
|
+
indexer_for_type(model.type).new(id:,
|
68
|
+
cocina: model,
|
69
|
+
parent_collections:,
|
70
|
+
administrative_tags:,
|
71
|
+
workflow_client:,
|
72
|
+
cocina_repository:)
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
attr_reader :model, :workflow_client, :cocina_repository
|
78
|
+
|
79
|
+
def id
|
80
|
+
model.externalIdentifier
|
81
|
+
end
|
82
|
+
|
83
|
+
def indexer_for_type(type)
|
84
|
+
INDEXERS.fetch(type, ITEM_INDEXER)
|
85
|
+
end
|
86
|
+
|
87
|
+
def parent_collections
|
88
|
+
return [] unless model.dro?
|
89
|
+
|
90
|
+
Array(model.structural&.isMemberOf).filter_map do |rel_druid|
|
91
|
+
@@parent_collections[rel_druid] ||= cocina_repository.find(rel_druid)
|
92
|
+
rescue DorIndexing::CocinaRepository::RepositoryError
|
93
|
+
Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
|
94
|
+
# This may happen if the referenced Collection does not exist (bad data)
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def administrative_tags
|
100
|
+
cocina_repository.administrative_tags(id)
|
101
|
+
rescue DorIndexing::CocinaRepository::RepositoryError
|
102
|
+
[]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the event date fields for a solr document
|
6
|
+
class EventDateBuilder
|
7
|
+
# @param [Cocina::Models::Event] event single selected event
|
8
|
+
# @return [String, nil] the date value for Solr
|
9
|
+
def self.build(event, date_type)
|
10
|
+
event_dates = Array(event&.date) + Array(event&.parallelEvent&.map(&:date))
|
11
|
+
|
12
|
+
matching_date_value_with_status_primary(event_dates, date_type) ||
|
13
|
+
matching_date_value(event_dates, date_type) ||
|
14
|
+
untyped_date_value(event_dates)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [String, nil] date.value from a date of type of date_type and of status primary
|
18
|
+
def self.matching_date_value_with_status_primary(event_dates, date_type)
|
19
|
+
event_dates.flatten.compact.find do |date|
|
20
|
+
next if date.type != date_type
|
21
|
+
|
22
|
+
next unless DorIndexing::Selectors::EventSelector.date_status_primary(date)
|
23
|
+
|
24
|
+
return date_value(date)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
private_class_method :matching_date_value_with_status_primary
|
28
|
+
|
29
|
+
# @return [String, nil] date.value from a date of type of date_type
|
30
|
+
def self.matching_date_value(event_dates, date_type)
|
31
|
+
event_dates.flatten.compact.find do |date|
|
32
|
+
next if date.type != date_type
|
33
|
+
|
34
|
+
return date_value(date)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
private_class_method :matching_date_value
|
38
|
+
|
39
|
+
# @return [String, nil] date.value from a date without a type
|
40
|
+
def self.untyped_date_value(event_dates)
|
41
|
+
event_dates.flatten.compact.find do |date|
|
42
|
+
next if date.type.present?
|
43
|
+
|
44
|
+
return date_value(date)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
private_class_method :untyped_date_value
|
48
|
+
|
49
|
+
# @param [Cocina::Models::DescriptiveValue] a date object from an event
|
50
|
+
# @return [String, nil] value from date object
|
51
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
52
|
+
# rubocop:disable Metrics/AbcSize
|
53
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
54
|
+
def self.date_value(date)
|
55
|
+
return date.value if date&.value.present?
|
56
|
+
|
57
|
+
Array(date&.structuredValue).find do |structured_value|
|
58
|
+
return structured_value.value if structured_value&.value.present?
|
59
|
+
end
|
60
|
+
|
61
|
+
Array(date&.parallelValue).find do |parallel_value|
|
62
|
+
return parallel_value.value if parallel_value&.value.present?
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
66
|
+
# rubocop:enable Metrics/AbcSize
|
67
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
68
|
+
private_class_method :date_value
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Finds the place to index from publication events
|
6
|
+
class EventPlaceBuilder
|
7
|
+
# @param [Cocina::Models::Event] event
|
8
|
+
# @return [String] the place value for Solr
|
9
|
+
def self.build(event)
|
10
|
+
new(event).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(event)
|
14
|
+
@event = event
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
return unless event
|
19
|
+
|
20
|
+
primary_location || location_from(flat_locations)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
attr_reader :event
|
26
|
+
|
27
|
+
def primary_location
|
28
|
+
location_from([flat_locations.find { |location| location.status == 'primary' }].compact)
|
29
|
+
end
|
30
|
+
|
31
|
+
def location_from(locations)
|
32
|
+
return if locations.empty?
|
33
|
+
|
34
|
+
value_locations_for(locations) ||
|
35
|
+
marccountry_text_for(locations) ||
|
36
|
+
marccountry_code_for(locations)
|
37
|
+
end
|
38
|
+
|
39
|
+
# rubocop:disable Metrics/AbcSize
|
40
|
+
def flat_locations
|
41
|
+
@flat_locations ||= begin
|
42
|
+
locations = if event.parallelEvent.present?
|
43
|
+
event.parallelEvent.flat_map { |parallel_event| Array(parallel_event.location) }
|
44
|
+
else
|
45
|
+
Array(event.location)
|
46
|
+
end
|
47
|
+
locations.flat_map { |location| location.parallelValue.presence || location.structuredValue.presence || location }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
# rubocop:enable Metrics/AbcSize
|
51
|
+
|
52
|
+
def marccountry_text_for(locations)
|
53
|
+
locations.find { |location| marc_country?(location) && location.value }&.value
|
54
|
+
end
|
55
|
+
|
56
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
57
|
+
def marccountry_code_for(locations)
|
58
|
+
DorIndexing::MarcCountry.from_code(locations.find { |location| marc_country?(location) && location.code }&.code) ||
|
59
|
+
DorIndexing::MarcCountry.from_uri(locations.find { |location| location.uri&.start_with?(DorIndexing::MarcCountry::MARC_COUNTRY_URI) }&.uri)
|
60
|
+
end
|
61
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
62
|
+
|
63
|
+
def value_locations_for(locations)
|
64
|
+
locations.select { |location| location.value && !marc_country?(location) }.map(&:value).join(' : ').presence
|
65
|
+
end
|
66
|
+
|
67
|
+
def marc_country?(location)
|
68
|
+
location.source&.code == DorIndexing::MarcCountry::MARC_COUNTRY_CODE ||
|
69
|
+
location.source&.uri == DorIndexing::MarcCountry::MARC_COUNTRY_URI
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the geographic fields for a solr document
|
6
|
+
class GeographicBuilder
|
7
|
+
# @param [Array<Cocina::Models::Subject>] subjects
|
8
|
+
# @return [Array<String>] the geographic values for Solr
|
9
|
+
def self.build(subjects)
|
10
|
+
new(subjects).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(subjects)
|
14
|
+
@subjects = Array(subjects)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
extract_place_from_subjects(subjects)
|
19
|
+
end
|
20
|
+
|
21
|
+
# rubocop:disable Metrics/AbcSize
|
22
|
+
def extract_place_from_subjects(local_subjects)
|
23
|
+
(
|
24
|
+
build_place_nodes(local_subjects.select { |node| node.type == 'place' }) +
|
25
|
+
local_subjects.reject(&:type).flat_map do |subject|
|
26
|
+
next extract_place_from_subjects(subject.parallelValue) if subject.parallelValue.present?
|
27
|
+
|
28
|
+
build_place_nodes(Array(subject.structuredValue).select { |node| node.type == 'place' })
|
29
|
+
end
|
30
|
+
).uniq
|
31
|
+
end
|
32
|
+
# rubocop:enable Metrics/AbcSize
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
attr_reader :subjects
|
37
|
+
|
38
|
+
def build_place_nodes(nodes)
|
39
|
+
Array(nodes).flat_map { |node| build_place(node) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param [Cocina::Models::DescriptiveValue]
|
43
|
+
def build_place(node)
|
44
|
+
remove_trailing_punctuation(
|
45
|
+
Array(node.value) +
|
46
|
+
place_from_code(node) +
|
47
|
+
build_hierarchical_subject(node) +
|
48
|
+
Array(node.parallelValue).flat_map { |child| build_place(child) }
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Array<String>]
|
53
|
+
# rubocop:disable Metrics/MethodLength
|
54
|
+
def place_from_code(node)
|
55
|
+
return [] unless node.code && node.source
|
56
|
+
|
57
|
+
code = node.code.gsub(/[^\w-]/, '') # remove any punctuation (except dash).
|
58
|
+
case node.source.code
|
59
|
+
when 'marcgac'
|
60
|
+
[Marc::Vocab::GeographicArea.fetch(code)]
|
61
|
+
when 'marccountry'
|
62
|
+
[Marc::Vocab::Country.fetch(code)]
|
63
|
+
else
|
64
|
+
[]
|
65
|
+
end
|
66
|
+
rescue KeyError
|
67
|
+
# Per Arcadia, halt HB notification until after data clean-up.
|
68
|
+
# Honeybadger.notify("[DATA ERROR] Unable to find \"#{code}\" in authority \"#{node.source.code}\"")
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
# rubocop:enable Metrics/MethodLength
|
72
|
+
|
73
|
+
def build_hierarchical_subject(node)
|
74
|
+
Array(node.structuredValue&.map(&:value).presence&.join(' '))
|
75
|
+
end
|
76
|
+
|
77
|
+
def remove_trailing_punctuation(strings)
|
78
|
+
strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the author fields for a solr document
|
6
|
+
class NameBuilder
|
7
|
+
# @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
|
8
|
+
# @return [Array<String>] names
|
9
|
+
def self.build_all(cocina_contributors)
|
10
|
+
flat_names = cocina_contributors.filter_map { |cocina_contributor| flat_names_for(cocina_contributor) }.flatten
|
11
|
+
flat_names.filter_map { |name| build_name(name) }
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
|
15
|
+
# @return [String] name
|
16
|
+
def self.build_primary_name(names, strategy: :first)
|
17
|
+
names = Array(names) unless names.is_a?(Array)
|
18
|
+
flat_names = flat_names_for(names)
|
19
|
+
name = display_name_for(flat_names) || primary_name_for(flat_names)
|
20
|
+
name ||= flat_names.first if strategy == :first
|
21
|
+
return build_name(name) if name
|
22
|
+
|
23
|
+
flat_names.filter_map { |one| build_name(one) }.first
|
24
|
+
end
|
25
|
+
|
26
|
+
# rubocop:disable Metrics/MethodLength
|
27
|
+
# rubocop:disable Metrics/AbcSize
|
28
|
+
def self.build_name(name)
|
29
|
+
if name.groupedValue.present?
|
30
|
+
name.groupedValue.find { |grouped_value| grouped_value.type == 'name' }&.value
|
31
|
+
elsif name.structuredValue.present?
|
32
|
+
name_part = joined_name_parts(name, 'name', '. ').presence
|
33
|
+
surname = joined_name_parts(name, 'surname', ' ')
|
34
|
+
forename = joined_name_parts(name, 'forename', ' ')
|
35
|
+
terms_of_address = joined_name_parts(name, 'term of address', ', ')
|
36
|
+
life_dates = joined_name_parts(name, 'life dates', ', ')
|
37
|
+
activity_dates = joined_name_parts(name, 'activity dates', ', ')
|
38
|
+
joined_name = name_part || join_parts([surname, forename], ', ')
|
39
|
+
joined_name = join_parts([joined_name, terms_of_address], ' ')
|
40
|
+
joined_name = join_parts([joined_name, life_dates], ', ')
|
41
|
+
join_parts([joined_name, activity_dates], ', ')
|
42
|
+
else
|
43
|
+
name.value
|
44
|
+
end
|
45
|
+
end
|
46
|
+
# rubocop:enable Metrics/MethodLength
|
47
|
+
# rubocop:enable Metrics/AbcSize
|
48
|
+
|
49
|
+
def self.display_name_for(names)
|
50
|
+
names.find { |name| name.type == 'display' }
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.primary_name_for(names)
|
54
|
+
names.find { |name| name.status == 'primary' }
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.flat_names_for(names)
|
58
|
+
names.flat_map { |name| name.parallelValue.presence || name }
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.joined_name_parts(name, type, joiner)
|
62
|
+
join_parts(name.structuredValue.select { |structured_value| structured_value.type == type }.map(&:value), joiner)
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.join_parts(parts, joiner)
|
66
|
+
parts.compact_blank.join(joiner)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Helper methods for working with Orcid in Cocina
|
6
|
+
class OrcidBuilder
|
7
|
+
# NOTE: there is similar code in orcid_client which fetches
|
8
|
+
# ORCIDs out of cocina. Consider consolidating at some point or keeping in sync.
|
9
|
+
# see https://github.com/sul-dlss/orcid_client/blob/main/lib/sul_orcid_client/cocina_support.rb
|
10
|
+
# and https://github.com/sul-dlss/dor_indexing_app/issues/1022
|
11
|
+
|
12
|
+
# @param [Array<Cocina::Models::Contributor>] contributors
|
13
|
+
# @return [String] the list of contributor ORCIDs to index into solr
|
14
|
+
def self.build(contributors)
|
15
|
+
new(contributors).build
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(contributors)
|
19
|
+
@contributors = Array(contributors)
|
20
|
+
end
|
21
|
+
|
22
|
+
def build
|
23
|
+
cited_contributors.filter_map { |contributor| orcidid(contributor) }
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
attr_reader :contributors
|
29
|
+
|
30
|
+
# @param [Cocina::Models::Contributor] array of contributors
|
31
|
+
# @return [Array<String>] array of contributors who are listed as cited
|
32
|
+
# Note that non-cited contributors are excluded.
|
33
|
+
def cited_contributors
|
34
|
+
contributors.select { |contributor| cited?(contributor) }
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param [Cocina::Models::Contributor] contributor to check
|
38
|
+
# @return [Boolean] true unless the contributor has a citation status of false
|
39
|
+
def cited?(contributor)
|
40
|
+
contributor.note.none? { |note| note.type == 'citation status' && note.value == 'false' }
|
41
|
+
end
|
42
|
+
|
43
|
+
# @param [Cocina::Models::Contributor] contributor to check
|
44
|
+
# @return [String, nil] orcid id including host if present
|
45
|
+
# rubocop:disable Metrics/AbcSize
|
46
|
+
def orcidid(contributor)
|
47
|
+
identifier = contributor.identifier.find { |id| id.type == 'ORCID' }
|
48
|
+
return unless identifier
|
49
|
+
|
50
|
+
# some records have the full ORCID URI in the data, just return it if so, e.g. druid:gf852zt8324
|
51
|
+
return identifier.uri if identifier.uri
|
52
|
+
return identifier.value if identifier.value.start_with?('https://orcid.org/')
|
53
|
+
|
54
|
+
# some records have just the ORCIDID without the URL prefix, add it if so, e.g. druid:tp865ng1792
|
55
|
+
return URI.join('https://orcid.org/', identifier.value).to_s if identifier.source.uri.blank?
|
56
|
+
|
57
|
+
URI.join(identifier.source.uri, identifier.value).to_s
|
58
|
+
end
|
59
|
+
# rubocop:enable Metrics/AbcSize
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the publisher name for a solr document
|
6
|
+
class PublisherNameBuilder
|
7
|
+
def self.build(events)
|
8
|
+
roles = publisher_roles(events)
|
9
|
+
|
10
|
+
publisher_names_for(roles)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.publisher_roles(events)
|
14
|
+
contributors = events.flat_map(&:contributor).compact
|
15
|
+
return [] if contributors.blank?
|
16
|
+
|
17
|
+
contributors.select { |contributor| Array(contributor.role).any? { |role| role.value&.downcase == 'publisher' } }
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns the primary publisher if available.
|
21
|
+
def self.publisher_names_for(publisher_roles)
|
22
|
+
return if publisher_roles.blank?
|
23
|
+
|
24
|
+
primary_publisher = publisher_roles.find { |role| role.status == 'primary' }
|
25
|
+
|
26
|
+
return contributor_name(primary_publisher).first if primary_publisher
|
27
|
+
|
28
|
+
publisher_roles.flat_map { |contributor| contributor_name(contributor) }.join(' : ')
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.contributor_name(contributor)
|
32
|
+
contributor.name.flat_map { |name| flat_name(name) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.flat_name(value)
|
36
|
+
primary_name = value.parallelValue&.find { |role| role.status == 'primary' }
|
37
|
+
return parallel_name(value.parallelValue) if !primary_name && value.parallelValue.present?
|
38
|
+
|
39
|
+
return name_for(primary_name) if primary_name
|
40
|
+
|
41
|
+
name_for(value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.name_for(name)
|
45
|
+
name.structuredValue.present? ? name.structuredValue.map(&:value).join('. ') : name.value
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.parallel_name(names)
|
49
|
+
names.map { |single_name| name_for(single_name) }.join(' : ')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the temporal fields for a solr document
|
6
|
+
class TemporalBuilder
|
7
|
+
# @param [Array<Cocina::Models::Subject>] subjects
|
8
|
+
# @return [Array<String>] the temporal values for Solr
|
9
|
+
def self.build(subjects)
|
10
|
+
new(subjects).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(subjects)
|
14
|
+
@subjects = Array(subjects)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
extract_temporal_from_subjects(subjects)
|
19
|
+
end
|
20
|
+
|
21
|
+
# rubocop:disable Metrics/AbcSize
|
22
|
+
def extract_temporal_from_subjects(local_subjects)
|
23
|
+
(
|
24
|
+
build_temporal_nodes(local_subjects.select { |node| node.type == 'time' }) +
|
25
|
+
local_subjects.reject(&:type).flat_map do |subject|
|
26
|
+
next extract_temporal_from_subjects(subject.parallelValue) if subject.parallelValue.present?
|
27
|
+
|
28
|
+
build_temporal_nodes(Array(subject.structuredValue).select { |node| node.type == 'time' })
|
29
|
+
end
|
30
|
+
).uniq
|
31
|
+
end
|
32
|
+
# rubocop:enable Metrics/AbcSize
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
attr_reader :subjects
|
37
|
+
|
38
|
+
def build_temporal_nodes(nodes)
|
39
|
+
Array(nodes).flat_map { |node| build_temporal(node) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param [Cocina::Models::DescriptiveValue]
|
43
|
+
def build_temporal(node)
|
44
|
+
remove_trailing_punctuation(
|
45
|
+
Array(node.value) +
|
46
|
+
Array(node.structuredValue).map(&:value) +
|
47
|
+
Array(node.parallelValue).flat_map { |child| build_temporal(child) }
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def remove_trailing_punctuation(strings)
|
52
|
+
strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|