dor_indexing 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +355 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +218 -0
- data/README.md +33 -0
- data/Rakefile +11 -0
- data/dor_indexing.gemspec +40 -0
- data/lib/dor_indexing/builders/all_search_text_builder.rb +58 -0
- data/lib/dor_indexing/builders/author_builder.rb +31 -0
- data/lib/dor_indexing/builders/collection_rights_description_builder.rb +29 -0
- data/lib/dor_indexing/builders/document_builder.rb +106 -0
- data/lib/dor_indexing/builders/event_date_builder.rb +71 -0
- data/lib/dor_indexing/builders/event_place_builder.rb +73 -0
- data/lib/dor_indexing/builders/geographic_builder.rb +82 -0
- data/lib/dor_indexing/builders/name_builder.rb +70 -0
- data/lib/dor_indexing/builders/orcid_builder.rb +62 -0
- data/lib/dor_indexing/builders/publisher_name_builder.rb +53 -0
- data/lib/dor_indexing/builders/temporal_builder.rb +56 -0
- data/lib/dor_indexing/builders/topic_builder.rb +96 -0
- data/lib/dor_indexing/cocina_repository.rb +24 -0
- data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/collection_title_indexer.rb +27 -0
- data/lib/dor_indexing/indexers/composite_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/content_metadata_indexer.rb +69 -0
- data/lib/dor_indexing/indexers/data_indexer.rb +66 -0
- data/lib/dor_indexing/indexers/default_object_rights_indexer.rb +36 -0
- data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +226 -0
- data/lib/dor_indexing/indexers/embargo_metadata_indexer.rb +32 -0
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +92 -0
- data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +85 -0
- data/lib/dor_indexing/indexers/process_indexer.rb +63 -0
- data/lib/dor_indexing/indexers/releasable_indexer.rb +62 -0
- data/lib/dor_indexing/indexers/rights_metadata_indexer.rb +59 -0
- data/lib/dor_indexing/indexers/role_metadata_indexer.rb +31 -0
- data/lib/dor_indexing/indexers/workflow_indexer.rb +51 -0
- data/lib/dor_indexing/indexers/workflows_indexer.rb +40 -0
- data/lib/dor_indexing/marc_country.rb +359 -0
- data/lib/dor_indexing/selectors/event_selector.rb +112 -0
- data/lib/dor_indexing/selectors/pub_year_selector.rb +119 -0
- data/lib/dor_indexing/version.rb +5 -0
- data/lib/dor_indexing/workflow_fields.rb +63 -0
- data/lib/dor_indexing/workflow_solr_document.rb +93 -0
- data/lib/dor_indexing.rb +19 -0
- metadata +173 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Extracts useful text for searching from Cocina Description
|
6
|
+
class AllSearchTextBuilder
|
7
|
+
def self.build(cocina_description)
|
8
|
+
new(cocina_description).build
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(cocina_description)
|
12
|
+
@cocina_description = cocina_description
|
13
|
+
end
|
14
|
+
|
15
|
+
def build
|
16
|
+
@text = []
|
17
|
+
recurse(cocina_description)
|
18
|
+
text.join(' ')
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
attr_reader :cocina_description, :text
|
24
|
+
|
25
|
+
# this originally had displayLabel, but Arcadia recommends against it
|
26
|
+
TEXT_KEYS = %i[
|
27
|
+
value
|
28
|
+
].freeze
|
29
|
+
|
30
|
+
RECURSE_KEYS = %i[
|
31
|
+
structuredValue
|
32
|
+
parallelValue
|
33
|
+
groupedValue
|
34
|
+
title
|
35
|
+
contributor
|
36
|
+
event
|
37
|
+
form
|
38
|
+
language
|
39
|
+
note
|
40
|
+
relatedResource
|
41
|
+
subject
|
42
|
+
name
|
43
|
+
location
|
44
|
+
].freeze
|
45
|
+
|
46
|
+
def recurse(desc)
|
47
|
+
TEXT_KEYS.each do |key|
|
48
|
+
value = desc.try(key)
|
49
|
+
text << value if value.present?
|
50
|
+
end
|
51
|
+
|
52
|
+
RECURSE_KEYS.each do |key|
|
53
|
+
Array(desc.try(key)).each { |value| recurse(value) }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the author fields for a solr document
|
6
|
+
class AuthorBuilder
|
7
|
+
def initialize(cocina_contributors)
|
8
|
+
@cocina_contributors = Array(cocina_contributors)
|
9
|
+
end
|
10
|
+
|
11
|
+
def build_primary
|
12
|
+
contributor = primary_cocina_contributor || cocina_contributors.first
|
13
|
+
return unless contributor
|
14
|
+
|
15
|
+
NameBuilder.build_primary_name(contributor.name) if contributor
|
16
|
+
end
|
17
|
+
|
18
|
+
def build_all
|
19
|
+
NameBuilder.build_all(cocina_contributors.filter_map(&:name))
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
attr_reader :cocina_contributors
|
25
|
+
|
26
|
+
def primary_cocina_contributor
|
27
|
+
cocina_contributors.find { |cocina_contributor| cocina_contributor.status == 'primary' }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Rights description builder for collections
|
6
|
+
class CollectionRightsDescriptionBuilder
|
7
|
+
def self.build(cocina)
|
8
|
+
new(cocina).build
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(cocina)
|
12
|
+
@cocina = cocina
|
13
|
+
end
|
14
|
+
|
15
|
+
def build
|
16
|
+
case cocina.access.view
|
17
|
+
when 'world'
|
18
|
+
'world'
|
19
|
+
else
|
20
|
+
'dark'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
attr_reader :cocina
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds solr document for indexing.
|
6
|
+
class DocumentBuilder
|
7
|
+
ADMIN_POLICY_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
|
8
|
+
DorIndexing::Indexers::AdministrativeTagIndexer,
|
9
|
+
DorIndexing::Indexers::DataIndexer,
|
10
|
+
DorIndexing::Indexers::RoleMetadataIndexer,
|
11
|
+
DorIndexing::Indexers::DefaultObjectRightsIndexer,
|
12
|
+
DorIndexing::Indexers::IdentityMetadataIndexer,
|
13
|
+
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
14
|
+
DorIndexing::Indexers::IdentifiableIndexer,
|
15
|
+
DorIndexing::Indexers::WorkflowsIndexer
|
16
|
+
)
|
17
|
+
|
18
|
+
COLLECTION_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
|
19
|
+
DorIndexing::Indexers::AdministrativeTagIndexer,
|
20
|
+
DorIndexing::Indexers::DataIndexer,
|
21
|
+
DorIndexing::Indexers::RightsMetadataIndexer,
|
22
|
+
DorIndexing::Indexers::IdentityMetadataIndexer,
|
23
|
+
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
24
|
+
DorIndexing::Indexers::IdentifiableIndexer,
|
25
|
+
DorIndexing::Indexers::ReleasableIndexer,
|
26
|
+
DorIndexing::Indexers::WorkflowsIndexer
|
27
|
+
)
|
28
|
+
|
29
|
+
ITEM_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
|
30
|
+
DorIndexing::Indexers::AdministrativeTagIndexer,
|
31
|
+
DorIndexing::Indexers::DataIndexer,
|
32
|
+
DorIndexing::Indexers::RightsMetadataIndexer,
|
33
|
+
DorIndexing::Indexers::IdentityMetadataIndexer,
|
34
|
+
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
35
|
+
DorIndexing::Indexers::EmbargoMetadataIndexer,
|
36
|
+
DorIndexing::Indexers::ContentMetadataIndexer,
|
37
|
+
DorIndexing::Indexers::IdentifiableIndexer,
|
38
|
+
DorIndexing::Indexers::CollectionTitleIndexer,
|
39
|
+
DorIndexing::Indexers::ReleasableIndexer,
|
40
|
+
DorIndexing::Indexers::WorkflowsIndexer
|
41
|
+
)
|
42
|
+
|
43
|
+
INDEXERS = {
|
44
|
+
Cocina::Models::ObjectType.agreement => ITEM_INDEXER, # Agreement uses same indexer as item
|
45
|
+
Cocina::Models::ObjectType.admin_policy => ADMIN_POLICY_INDEXER,
|
46
|
+
Cocina::Models::ObjectType.collection => COLLECTION_INDEXER
|
47
|
+
}.freeze
|
48
|
+
|
49
|
+
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
50
|
+
|
51
|
+
def self.for(model:, workflow_client:, cocina_repository:)
|
52
|
+
new(model:, workflow_client:, cocina_repository:).for
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.reset_parent_collections
|
56
|
+
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize(model:, workflow_client:, cocina_repository:)
|
60
|
+
@model = model
|
61
|
+
@workflow_client = workflow_client
|
62
|
+
@cocina_repository = cocina_repository
|
63
|
+
end
|
64
|
+
|
65
|
+
# @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
|
66
|
+
def for
|
67
|
+
indexer_for_type(model.type).new(id:,
|
68
|
+
cocina: model,
|
69
|
+
parent_collections:,
|
70
|
+
administrative_tags:,
|
71
|
+
workflow_client:,
|
72
|
+
cocina_repository:)
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
attr_reader :model, :workflow_client, :cocina_repository
|
78
|
+
|
79
|
+
def id
|
80
|
+
model.externalIdentifier
|
81
|
+
end
|
82
|
+
|
83
|
+
def indexer_for_type(type)
|
84
|
+
INDEXERS.fetch(type, ITEM_INDEXER)
|
85
|
+
end
|
86
|
+
|
87
|
+
def parent_collections
|
88
|
+
return [] unless model.dro?
|
89
|
+
|
90
|
+
Array(model.structural&.isMemberOf).filter_map do |rel_druid|
|
91
|
+
@@parent_collections[rel_druid] ||= cocina_repository.find(rel_druid)
|
92
|
+
rescue DorIndexing::CocinaRepository::RepositoryError
|
93
|
+
Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
|
94
|
+
# This may happen if the referenced Collection does not exist (bad data)
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def administrative_tags
|
100
|
+
cocina_repository.administrative_tags(id)
|
101
|
+
rescue DorIndexing::CocinaRepository::RepositoryError
|
102
|
+
[]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the event date fields for a solr document
|
6
|
+
class EventDateBuilder
|
7
|
+
# @param [Cocina::Models::Event] event single selected event
|
8
|
+
# @return [String, nil] the date value for Solr
|
9
|
+
def self.build(event, date_type)
|
10
|
+
event_dates = Array(event&.date) + Array(event&.parallelEvent&.map(&:date))
|
11
|
+
|
12
|
+
matching_date_value_with_status_primary(event_dates, date_type) ||
|
13
|
+
matching_date_value(event_dates, date_type) ||
|
14
|
+
untyped_date_value(event_dates)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [String, nil] date.value from a date of type of date_type and of status primary
|
18
|
+
def self.matching_date_value_with_status_primary(event_dates, date_type)
|
19
|
+
event_dates.flatten.compact.find do |date|
|
20
|
+
next if date.type != date_type
|
21
|
+
|
22
|
+
next unless DorIndexing::Selectors::EventSelector.date_status_primary(date)
|
23
|
+
|
24
|
+
return date_value(date)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
private_class_method :matching_date_value_with_status_primary
|
28
|
+
|
29
|
+
# @return [String, nil] date.value from a date of type of date_type
|
30
|
+
def self.matching_date_value(event_dates, date_type)
|
31
|
+
event_dates.flatten.compact.find do |date|
|
32
|
+
next if date.type != date_type
|
33
|
+
|
34
|
+
return date_value(date)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
private_class_method :matching_date_value
|
38
|
+
|
39
|
+
# @return [String, nil] date.value from a date without a type
|
40
|
+
def self.untyped_date_value(event_dates)
|
41
|
+
event_dates.flatten.compact.find do |date|
|
42
|
+
next if date.type.present?
|
43
|
+
|
44
|
+
return date_value(date)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
private_class_method :untyped_date_value
|
48
|
+
|
49
|
+
# @param [Cocina::Models::DescriptiveValue] a date object from an event
|
50
|
+
# @return [String, nil] value from date object
|
51
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
52
|
+
# rubocop:disable Metrics/AbcSize
|
53
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
54
|
+
def self.date_value(date)
|
55
|
+
return date.value if date&.value.present?
|
56
|
+
|
57
|
+
Array(date&.structuredValue).find do |structured_value|
|
58
|
+
return structured_value.value if structured_value&.value.present?
|
59
|
+
end
|
60
|
+
|
61
|
+
Array(date&.parallelValue).find do |parallel_value|
|
62
|
+
return parallel_value.value if parallel_value&.value.present?
|
63
|
+
end
|
64
|
+
end
|
65
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
66
|
+
# rubocop:enable Metrics/AbcSize
|
67
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
68
|
+
private_class_method :date_value
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Finds the place to index from publication events
|
6
|
+
class EventPlaceBuilder
|
7
|
+
# @param [Cocina::Models::Event] event
|
8
|
+
# @return [String] the place value for Solr
|
9
|
+
def self.build(event)
|
10
|
+
new(event).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(event)
|
14
|
+
@event = event
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
return unless event
|
19
|
+
|
20
|
+
primary_location || location_from(flat_locations)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
attr_reader :event
|
26
|
+
|
27
|
+
def primary_location
|
28
|
+
location_from([flat_locations.find { |location| location.status == 'primary' }].compact)
|
29
|
+
end
|
30
|
+
|
31
|
+
def location_from(locations)
|
32
|
+
return if locations.empty?
|
33
|
+
|
34
|
+
value_locations_for(locations) ||
|
35
|
+
marccountry_text_for(locations) ||
|
36
|
+
marccountry_code_for(locations)
|
37
|
+
end
|
38
|
+
|
39
|
+
# rubocop:disable Metrics/AbcSize
|
40
|
+
def flat_locations
|
41
|
+
@flat_locations ||= begin
|
42
|
+
locations = if event.parallelEvent.present?
|
43
|
+
event.parallelEvent.flat_map { |parallel_event| Array(parallel_event.location) }
|
44
|
+
else
|
45
|
+
Array(event.location)
|
46
|
+
end
|
47
|
+
locations.flat_map { |location| location.parallelValue.presence || location.structuredValue.presence || location }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
# rubocop:enable Metrics/AbcSize
|
51
|
+
|
52
|
+
def marccountry_text_for(locations)
|
53
|
+
locations.find { |location| marc_country?(location) && location.value }&.value
|
54
|
+
end
|
55
|
+
|
56
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
57
|
+
def marccountry_code_for(locations)
|
58
|
+
DorIndexing::MarcCountry.from_code(locations.find { |location| marc_country?(location) && location.code }&.code) ||
|
59
|
+
DorIndexing::MarcCountry.from_uri(locations.find { |location| location.uri&.start_with?(DorIndexing::MarcCountry::MARC_COUNTRY_URI) }&.uri)
|
60
|
+
end
|
61
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
62
|
+
|
63
|
+
def value_locations_for(locations)
|
64
|
+
locations.select { |location| location.value && !marc_country?(location) }.map(&:value).join(' : ').presence
|
65
|
+
end
|
66
|
+
|
67
|
+
def marc_country?(location)
|
68
|
+
location.source&.code == DorIndexing::MarcCountry::MARC_COUNTRY_CODE ||
|
69
|
+
location.source&.uri == DorIndexing::MarcCountry::MARC_COUNTRY_URI
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the geographic fields for a solr document
|
6
|
+
class GeographicBuilder
|
7
|
+
# @param [Array<Cocina::Models::Subject>] subjects
|
8
|
+
# @return [Array<String>] the geographic values for Solr
|
9
|
+
def self.build(subjects)
|
10
|
+
new(subjects).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(subjects)
|
14
|
+
@subjects = Array(subjects)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
extract_place_from_subjects(subjects)
|
19
|
+
end
|
20
|
+
|
21
|
+
# rubocop:disable Metrics/AbcSize
|
22
|
+
def extract_place_from_subjects(local_subjects)
|
23
|
+
(
|
24
|
+
build_place_nodes(local_subjects.select { |node| node.type == 'place' }) +
|
25
|
+
local_subjects.reject(&:type).flat_map do |subject|
|
26
|
+
next extract_place_from_subjects(subject.parallelValue) if subject.parallelValue.present?
|
27
|
+
|
28
|
+
build_place_nodes(Array(subject.structuredValue).select { |node| node.type == 'place' })
|
29
|
+
end
|
30
|
+
).uniq
|
31
|
+
end
|
32
|
+
# rubocop:enable Metrics/AbcSize
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
attr_reader :subjects
|
37
|
+
|
38
|
+
def build_place_nodes(nodes)
|
39
|
+
Array(nodes).flat_map { |node| build_place(node) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param [Cocina::Models::DescriptiveValue]
|
43
|
+
def build_place(node)
|
44
|
+
remove_trailing_punctuation(
|
45
|
+
Array(node.value) +
|
46
|
+
place_from_code(node) +
|
47
|
+
build_hierarchical_subject(node) +
|
48
|
+
Array(node.parallelValue).flat_map { |child| build_place(child) }
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Array<String>]
|
53
|
+
# rubocop:disable Metrics/MethodLength
|
54
|
+
def place_from_code(node)
|
55
|
+
return [] unless node.code && node.source
|
56
|
+
|
57
|
+
code = node.code.gsub(/[^\w-]/, '') # remove any punctuation (except dash).
|
58
|
+
case node.source.code
|
59
|
+
when 'marcgac'
|
60
|
+
[Marc::Vocab::GeographicArea.fetch(code)]
|
61
|
+
when 'marccountry'
|
62
|
+
[Marc::Vocab::Country.fetch(code)]
|
63
|
+
else
|
64
|
+
[]
|
65
|
+
end
|
66
|
+
rescue KeyError
|
67
|
+
# Per Arcadia, halt HB notification until after data clean-up.
|
68
|
+
# Honeybadger.notify("[DATA ERROR] Unable to find \"#{code}\" in authority \"#{node.source.code}\"")
|
69
|
+
[]
|
70
|
+
end
|
71
|
+
# rubocop:enable Metrics/MethodLength
|
72
|
+
|
73
|
+
def build_hierarchical_subject(node)
|
74
|
+
Array(node.structuredValue&.map(&:value).presence&.join(' '))
|
75
|
+
end
|
76
|
+
|
77
|
+
def remove_trailing_punctuation(strings)
|
78
|
+
strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the author fields for a solr document
|
6
|
+
class NameBuilder
|
7
|
+
# @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
|
8
|
+
# @return [Array<String>] names
|
9
|
+
def self.build_all(cocina_contributors)
|
10
|
+
flat_names = cocina_contributors.filter_map { |cocina_contributor| flat_names_for(cocina_contributor) }.flatten
|
11
|
+
flat_names.filter_map { |name| build_name(name) }
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
|
15
|
+
# @return [String] name
|
16
|
+
def self.build_primary_name(names, strategy: :first)
|
17
|
+
names = Array(names) unless names.is_a?(Array)
|
18
|
+
flat_names = flat_names_for(names)
|
19
|
+
name = display_name_for(flat_names) || primary_name_for(flat_names)
|
20
|
+
name ||= flat_names.first if strategy == :first
|
21
|
+
return build_name(name) if name
|
22
|
+
|
23
|
+
flat_names.filter_map { |one| build_name(one) }.first
|
24
|
+
end
|
25
|
+
|
26
|
+
# rubocop:disable Metrics/MethodLength
|
27
|
+
# rubocop:disable Metrics/AbcSize
|
28
|
+
def self.build_name(name)
|
29
|
+
if name.groupedValue.present?
|
30
|
+
name.groupedValue.find { |grouped_value| grouped_value.type == 'name' }&.value
|
31
|
+
elsif name.structuredValue.present?
|
32
|
+
name_part = joined_name_parts(name, 'name', '. ').presence
|
33
|
+
surname = joined_name_parts(name, 'surname', ' ')
|
34
|
+
forename = joined_name_parts(name, 'forename', ' ')
|
35
|
+
terms_of_address = joined_name_parts(name, 'term of address', ', ')
|
36
|
+
life_dates = joined_name_parts(name, 'life dates', ', ')
|
37
|
+
activity_dates = joined_name_parts(name, 'activity dates', ', ')
|
38
|
+
joined_name = name_part || join_parts([surname, forename], ', ')
|
39
|
+
joined_name = join_parts([joined_name, terms_of_address], ' ')
|
40
|
+
joined_name = join_parts([joined_name, life_dates], ', ')
|
41
|
+
join_parts([joined_name, activity_dates], ', ')
|
42
|
+
else
|
43
|
+
name.value
|
44
|
+
end
|
45
|
+
end
|
46
|
+
# rubocop:enable Metrics/MethodLength
|
47
|
+
# rubocop:enable Metrics/AbcSize
|
48
|
+
|
49
|
+
def self.display_name_for(names)
|
50
|
+
names.find { |name| name.type == 'display' }
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.primary_name_for(names)
|
54
|
+
names.find { |name| name.status == 'primary' }
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.flat_names_for(names)
|
58
|
+
names.flat_map { |name| name.parallelValue.presence || name }
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.joined_name_parts(name, type, joiner)
|
62
|
+
join_parts(name.structuredValue.select { |structured_value| structured_value.type == type }.map(&:value), joiner)
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.join_parts(parts, joiner)
|
66
|
+
parts.compact_blank.join(joiner)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Helper methods for working with Orcid in Cocina
|
6
|
+
class OrcidBuilder
|
7
|
+
# NOTE: there is similar code in orcid_client which fetches
|
8
|
+
# ORCIDs out of cocina. Consider consolidating at some point or keeping in sync.
|
9
|
+
# see https://github.com/sul-dlss/orcid_client/blob/main/lib/sul_orcid_client/cocina_support.rb
|
10
|
+
# and https://github.com/sul-dlss/dor_indexing_app/issues/1022
|
11
|
+
|
12
|
+
# @param [Array<Cocina::Models::Contributor>] contributors
|
13
|
+
# @return [String] the list of contributor ORCIDs to index into solr
|
14
|
+
def self.build(contributors)
|
15
|
+
new(contributors).build
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(contributors)
|
19
|
+
@contributors = Array(contributors)
|
20
|
+
end
|
21
|
+
|
22
|
+
def build
|
23
|
+
cited_contributors.filter_map { |contributor| orcidid(contributor) }
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
attr_reader :contributors
|
29
|
+
|
30
|
+
# @param [Cocina::Models::Contributor] array of contributors
|
31
|
+
# @return [Array<String>] array of contributors who are listed as cited
|
32
|
+
# Note that non-cited contributors are excluded.
|
33
|
+
def cited_contributors
|
34
|
+
contributors.select { |contributor| cited?(contributor) }
|
35
|
+
end
|
36
|
+
|
37
|
+
# @param [Cocina::Models::Contributor] contributor to check
|
38
|
+
# @return [Boolean] true unless the contributor has a citation status of false
|
39
|
+
def cited?(contributor)
|
40
|
+
contributor.note.none? { |note| note.type == 'citation status' && note.value == 'false' }
|
41
|
+
end
|
42
|
+
|
43
|
+
# @param [Cocina::Models::Contributor] contributor to check
|
44
|
+
# @return [String, nil] orcid id including host if present
|
45
|
+
# rubocop:disable Metrics/AbcSize
|
46
|
+
def orcidid(contributor)
|
47
|
+
identifier = contributor.identifier.find { |id| id.type == 'ORCID' }
|
48
|
+
return unless identifier
|
49
|
+
|
50
|
+
# some records have the full ORCID URI in the data, just return it if so, e.g. druid:gf852zt8324
|
51
|
+
return identifier.uri if identifier.uri
|
52
|
+
return identifier.value if identifier.value.start_with?('https://orcid.org/')
|
53
|
+
|
54
|
+
# some records have just the ORCIDID without the URL prefix, add it if so, e.g. druid:tp865ng1792
|
55
|
+
return URI.join('https://orcid.org/', identifier.value).to_s if identifier.source.uri.blank?
|
56
|
+
|
57
|
+
URI.join(identifier.source.uri, identifier.value).to_s
|
58
|
+
end
|
59
|
+
# rubocop:enable Metrics/AbcSize
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the publisher name for a solr document
|
6
|
+
class PublisherNameBuilder
|
7
|
+
def self.build(events)
|
8
|
+
roles = publisher_roles(events)
|
9
|
+
|
10
|
+
publisher_names_for(roles)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.publisher_roles(events)
|
14
|
+
contributors = events.flat_map(&:contributor).compact
|
15
|
+
return [] if contributors.blank?
|
16
|
+
|
17
|
+
contributors.select { |contributor| Array(contributor.role).any? { |role| role.value&.downcase == 'publisher' } }
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns the primary publisher if available.
|
21
|
+
def self.publisher_names_for(publisher_roles)
|
22
|
+
return if publisher_roles.blank?
|
23
|
+
|
24
|
+
primary_publisher = publisher_roles.find { |role| role.status == 'primary' }
|
25
|
+
|
26
|
+
return contributor_name(primary_publisher).first if primary_publisher
|
27
|
+
|
28
|
+
publisher_roles.flat_map { |contributor| contributor_name(contributor) }.join(' : ')
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.contributor_name(contributor)
|
32
|
+
contributor.name.flat_map { |name| flat_name(name) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.flat_name(value)
|
36
|
+
primary_name = value.parallelValue&.find { |role| role.status == 'primary' }
|
37
|
+
return parallel_name(value.parallelValue) if !primary_name && value.parallelValue.present?
|
38
|
+
|
39
|
+
return name_for(primary_name) if primary_name
|
40
|
+
|
41
|
+
name_for(value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.name_for(name)
|
45
|
+
name.structuredValue.present? ? name.structuredValue.map(&:value).join('. ') : name.value
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.parallel_name(names)
|
49
|
+
names.map { |single_name| name_for(single_name) }.join(' : ')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
module Builders
|
5
|
+
# Builds the temporal fields for a solr document
|
6
|
+
class TemporalBuilder
|
7
|
+
# @param [Array<Cocina::Models::Subject>] subjects
|
8
|
+
# @return [Array<String>] the temporal values for Solr
|
9
|
+
def self.build(subjects)
|
10
|
+
new(subjects).build
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(subjects)
|
14
|
+
@subjects = Array(subjects)
|
15
|
+
end
|
16
|
+
|
17
|
+
def build
|
18
|
+
extract_temporal_from_subjects(subjects)
|
19
|
+
end
|
20
|
+
|
21
|
+
# rubocop:disable Metrics/AbcSize
|
22
|
+
def extract_temporal_from_subjects(local_subjects)
|
23
|
+
(
|
24
|
+
build_temporal_nodes(local_subjects.select { |node| node.type == 'time' }) +
|
25
|
+
local_subjects.reject(&:type).flat_map do |subject|
|
26
|
+
next extract_temporal_from_subjects(subject.parallelValue) if subject.parallelValue.present?
|
27
|
+
|
28
|
+
build_temporal_nodes(Array(subject.structuredValue).select { |node| node.type == 'time' })
|
29
|
+
end
|
30
|
+
).uniq
|
31
|
+
end
|
32
|
+
# rubocop:enable Metrics/AbcSize
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
attr_reader :subjects
|
37
|
+
|
38
|
+
def build_temporal_nodes(nodes)
|
39
|
+
Array(nodes).flat_map { |node| build_temporal(node) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# @param [Cocina::Models::DescriptiveValue]
|
43
|
+
def build_temporal(node)
|
44
|
+
remove_trailing_punctuation(
|
45
|
+
Array(node.value) +
|
46
|
+
Array(node.structuredValue).map(&:value) +
|
47
|
+
Array(node.parallelValue).flat_map { |child| build_temporal(child) }
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def remove_trailing_punctuation(strings)
|
52
|
+
strings.map { |str| str.sub(%r{[ ,\\/;]+$}, '') }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|