dor_indexing 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +19 -25
- data/dor_indexing.gemspec +2 -2
- data/lib/dor_indexing/builders/document_builder.rb +1 -1
- data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +16 -15
- data/lib/dor_indexing/indexers/basic_indexer.rb +2 -1
- data/lib/dor_indexing/indexers/collection_title_indexer.rb +9 -6
- data/lib/dor_indexing/indexers/composite_indexer.rb +1 -0
- data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +3 -4
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +21 -19
- data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +6 -22
- data/lib/dor_indexing/indexers/{content_metadata_indexer.rb → object_files_indexer.rb} +3 -3
- data/lib/dor_indexing/indexers/role_metadata_indexer.rb +1 -1
- data/lib/dor_indexing/indexers/workflow_indexer.rb +3 -3
- data/lib/dor_indexing/indexers/{process_indexer.rb → workflow_process_indexer.rb} +4 -4
- data/lib/dor_indexing/indexers/workflows_indexer.rb +2 -3
- data/lib/dor_indexing/selectors/event_selector.rb +1 -1
- data/lib/dor_indexing/version.rb +1 -1
- data/lib/dor_indexing.rb +0 -1
- metadata +19 -19
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 108a49c639925b8e1ba4e892d0ed3e4d903f7e741311f480ce85dd1be88d17c0
|
|
4
|
+
data.tar.gz: 5483c10c3b7fa972097e6b8f8cc889a6abc8a67e82bc864253ea003b97e6e578
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d2f7554c2a2d7dc5ab0eebe685b309f88706cef647ee0aeeeded3da2dc252016b7b43b4328303cfcc0da63c6c45d89ae1f883b6e120f982bcabf750c2a90040a
|
|
7
|
+
data.tar.gz: 5fdf32e3282b995903bc70892cc0baa9c8db8139668c40fbc92f1aff0ee0e40957c3242f07bffcd5d9512bf748697024c407f1708109f4a2837f638ea49012a3
|
data/Gemfile.lock
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
dor_indexing (1.
|
|
4
|
+
dor_indexing (1.4.0)
|
|
5
|
+
activesupport
|
|
5
6
|
cocina-models (~> 0.95.0)
|
|
6
7
|
dor-workflow-client (~> 7.0)
|
|
7
8
|
honeybadger
|
|
8
9
|
marc-vocab (~> 0.3.0)
|
|
9
|
-
solrizer
|
|
10
10
|
stanford-mods
|
|
11
11
|
zeitwerk
|
|
12
12
|
|
|
@@ -49,7 +49,7 @@ GEM
|
|
|
49
49
|
connection_pool (2.4.1)
|
|
50
50
|
deprecation (1.1.0)
|
|
51
51
|
activesupport
|
|
52
|
-
diff-lcs (1.5.
|
|
52
|
+
diff-lcs (1.5.1)
|
|
53
53
|
docile (1.4.0)
|
|
54
54
|
dor-workflow-client (7.0.2)
|
|
55
55
|
activesupport (>= 3.2.1, < 8)
|
|
@@ -100,7 +100,7 @@ GEM
|
|
|
100
100
|
multi_json
|
|
101
101
|
language_server-protocol (3.17.0.3)
|
|
102
102
|
marc-vocab (0.3.0)
|
|
103
|
-
minitest (5.
|
|
103
|
+
minitest (5.22.2)
|
|
104
104
|
mods (3.0.4)
|
|
105
105
|
edtf (~> 3.0)
|
|
106
106
|
iso-639
|
|
@@ -110,9 +110,9 @@ GEM
|
|
|
110
110
|
mutex_m (0.2.0)
|
|
111
111
|
net-http (0.4.1)
|
|
112
112
|
uri
|
|
113
|
-
nokogiri (1.16.
|
|
113
|
+
nokogiri (1.16.2-x86_64-darwin)
|
|
114
114
|
racc (~> 1.4)
|
|
115
|
-
nokogiri (1.16.
|
|
115
|
+
nokogiri (1.16.2-x86_64-linux)
|
|
116
116
|
racc (~> 1.4)
|
|
117
117
|
nom-xml (1.2.0)
|
|
118
118
|
i18n
|
|
@@ -132,19 +132,19 @@ GEM
|
|
|
132
132
|
rake (13.1.0)
|
|
133
133
|
regexp_parser (2.9.0)
|
|
134
134
|
rexml (3.2.6)
|
|
135
|
-
rspec (3.
|
|
136
|
-
rspec-core (~> 3.
|
|
137
|
-
rspec-expectations (~> 3.
|
|
138
|
-
rspec-mocks (~> 3.
|
|
139
|
-
rspec-core (3.
|
|
140
|
-
rspec-support (~> 3.
|
|
141
|
-
rspec-expectations (3.
|
|
135
|
+
rspec (3.13.0)
|
|
136
|
+
rspec-core (~> 3.13.0)
|
|
137
|
+
rspec-expectations (~> 3.13.0)
|
|
138
|
+
rspec-mocks (~> 3.13.0)
|
|
139
|
+
rspec-core (3.13.0)
|
|
140
|
+
rspec-support (~> 3.13.0)
|
|
141
|
+
rspec-expectations (3.13.0)
|
|
142
142
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
143
|
-
rspec-support (~> 3.
|
|
144
|
-
rspec-mocks (3.
|
|
143
|
+
rspec-support (~> 3.13.0)
|
|
144
|
+
rspec-mocks (3.13.0)
|
|
145
145
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
146
|
-
rspec-support (~> 3.
|
|
147
|
-
rspec-support (3.
|
|
146
|
+
rspec-support (~> 3.13.0)
|
|
147
|
+
rspec-support (3.13.0)
|
|
148
148
|
rss (0.3.0)
|
|
149
149
|
rexml
|
|
150
150
|
rubocop (1.60.2)
|
|
@@ -179,14 +179,10 @@ GEM
|
|
|
179
179
|
simplecov_json_formatter (~> 0.1)
|
|
180
180
|
simplecov-html (0.12.3)
|
|
181
181
|
simplecov_json_formatter (0.1.4)
|
|
182
|
-
solrizer (4.1.0)
|
|
183
|
-
activesupport
|
|
184
|
-
nokogiri
|
|
185
|
-
xml-simple
|
|
186
182
|
stanford-mods (3.3.9)
|
|
187
183
|
activesupport
|
|
188
184
|
mods (~> 3.0, >= 3.0.4)
|
|
189
|
-
super_diff (0.
|
|
185
|
+
super_diff (0.11.0)
|
|
190
186
|
attr_extras (>= 6.2.4)
|
|
191
187
|
diff-lcs
|
|
192
188
|
patience_diff
|
|
@@ -195,9 +191,7 @@ GEM
|
|
|
195
191
|
concurrent-ruby (~> 1.0)
|
|
196
192
|
unicode-display_width (2.5.0)
|
|
197
193
|
uri (0.13.0)
|
|
198
|
-
|
|
199
|
-
rexml
|
|
200
|
-
zeitwerk (2.6.12)
|
|
194
|
+
zeitwerk (2.6.13)
|
|
201
195
|
|
|
202
196
|
PLATFORMS
|
|
203
197
|
x86_64-darwin-21
|
data/dor_indexing.gemspec
CHANGED
|
@@ -31,11 +31,11 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
32
32
|
spec.require_paths = ['lib']
|
|
33
33
|
|
|
34
|
+
spec.add_dependency 'activesupport' # for blank? method
|
|
34
35
|
spec.add_dependency 'cocina-models', '~> 0.95.0'
|
|
35
36
|
spec.add_dependency 'dor-workflow-client', '~> 7.0'
|
|
36
37
|
spec.add_dependency 'honeybadger'
|
|
37
|
-
spec.add_dependency 'marc-vocab', '~> 0.3.0'
|
|
38
|
-
spec.add_dependency 'solrizer'
|
|
38
|
+
spec.add_dependency 'marc-vocab', '~> 0.3.0' # for marcgac and marccountry
|
|
39
39
|
spec.add_dependency 'stanford-mods'
|
|
40
40
|
spec.add_dependency 'zeitwerk'
|
|
41
41
|
end
|
|
@@ -33,7 +33,7 @@ class DorIndexing
|
|
|
33
33
|
DorIndexing::Indexers::IdentityMetadataIndexer,
|
|
34
34
|
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
|
35
35
|
DorIndexing::Indexers::EmbargoMetadataIndexer,
|
|
36
|
-
DorIndexing::Indexers::
|
|
36
|
+
DorIndexing::Indexers::ObjectFilesIndexer,
|
|
37
37
|
DorIndexing::Indexers::IdentifiableIndexer,
|
|
38
38
|
DorIndexing::Indexers::CollectionTitleIndexer,
|
|
39
39
|
DorIndexing::Indexers::ReleasableIndexer,
|
|
@@ -2,14 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Index administrative tags for an object
|
|
6
|
-
# NOTE: Most of this code was extracted from the dor-services gem:
|
|
7
|
-
# https://github.com/sul-dlss/dor-services/blob/v9.0.0/lib/dor/datastreams/identity_metadata_ds.rb#L196-L218
|
|
5
|
+
# Index administrative tags for an object
|
|
8
6
|
class AdministrativeTagIndexer
|
|
9
7
|
TAG_PART_DELIMITER = ' : '
|
|
10
8
|
SPECIAL_TAG_TYPES_TO_INDEX = ['Project', 'Registered By'].freeze
|
|
11
9
|
|
|
12
|
-
attr_reader :id
|
|
10
|
+
attr_reader :id, :administrative_tags
|
|
13
11
|
|
|
14
12
|
def initialize(id:, administrative_tags:, **)
|
|
15
13
|
@id = id
|
|
@@ -30,18 +28,23 @@ class DorIndexing
|
|
|
30
28
|
tag_prefix, rest = tag.split(TAG_PART_DELIMITER, 2)
|
|
31
29
|
prefix = tag_prefix.downcase.strip.gsub(/\s/, '_')
|
|
32
30
|
|
|
33
|
-
solr_doc['tag_ssim'] << tag # for
|
|
34
|
-
solr_doc['tag_text_unstemmed_im'] << tag # for search
|
|
31
|
+
solr_doc['tag_ssim'] << tag # for Argo display and fq
|
|
32
|
+
solr_doc['tag_text_unstemmed_im'] << tag # for Argo search
|
|
35
33
|
|
|
36
|
-
|
|
34
|
+
# exploded tags are for hierarchical facets in Argo
|
|
35
|
+
solr_doc['exploded_nonproject_tag_ssim'] += explode_tag_hierarchy(tag) unless prefix == 'project'
|
|
37
36
|
|
|
38
|
-
next if
|
|
37
|
+
next if rest.blank?
|
|
38
|
+
|
|
39
|
+
# Index specific tag types that are used in Argo:
|
|
40
|
+
# project tags for search results and registered by tags for reports ...
|
|
41
|
+
next unless SPECIAL_TAG_TYPES_TO_INDEX.include?(tag_prefix)
|
|
39
42
|
|
|
40
43
|
(solr_doc["#{prefix}_tag_ssim"] ||= []) << rest.strip
|
|
41
44
|
|
|
42
45
|
if prefix == 'project'
|
|
43
46
|
solr_doc['exploded_project_tag_ssim'] ||= []
|
|
44
|
-
solr_doc['exploded_project_tag_ssim'] +=
|
|
47
|
+
solr_doc['exploded_project_tag_ssim'] += explode_tag_hierarchy(rest.strip)
|
|
45
48
|
end
|
|
46
49
|
end
|
|
47
50
|
solr_doc
|
|
@@ -52,12 +55,10 @@ class DorIndexing
|
|
|
52
55
|
|
|
53
56
|
private
|
|
54
57
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
#
|
|
58
|
-
|
|
59
|
-
# that contains ["A", "A : B", "A : B : C"].
|
|
60
|
-
def exploded_tags_from(tag)
|
|
58
|
+
# index each possible path, inclusive of the full tag.
|
|
59
|
+
# e.g., for "A : B : C", return ["A", "A : B", "A : B : C"].
|
|
60
|
+
# this is for the blacklight-hierarchy plugin for faceting on each level of the hierarchy
|
|
61
|
+
def explode_tag_hierarchy(tag)
|
|
61
62
|
tag_parts = tag.split(TAG_PART_DELIMITER)
|
|
62
63
|
|
|
63
64
|
1.upto(tag_parts.count).map do |i|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Basic indexing for
|
|
5
|
+
# Basic indexing for any object
|
|
6
6
|
class BasicIndexer
|
|
7
7
|
attr_reader :cocina, :workflow_client
|
|
8
8
|
|
|
@@ -11,6 +11,7 @@ class DorIndexing
|
|
|
11
11
|
@workflow_client = workflow_client
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
+
# @return [Hash] the partial solr document for basic data
|
|
14
15
|
# rubocop:disable Metrics/AbcSize
|
|
15
16
|
# rubocop:disable Metrics/MethodLength
|
|
16
17
|
def to_solr
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Indexes
|
|
5
|
+
# Indexes collection titles for an object
|
|
6
6
|
class CollectionTitleIndexer
|
|
7
7
|
attr_reader :cocina, :parent_collections
|
|
8
8
|
|
|
@@ -11,14 +11,17 @@ class DorIndexing
|
|
|
11
11
|
@parent_collections = parent_collections
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
# @return [Hash] the partial solr document for
|
|
14
|
+
# @return [Hash] the partial solr document for collection title concerns
|
|
15
15
|
def to_solr
|
|
16
16
|
{}.tap do |solr_doc|
|
|
17
|
-
parent_collections.each do |
|
|
18
|
-
coll_title = Cocina::Models::Builders::TitleBuilder.build(
|
|
17
|
+
parent_collections.each do |collection_obj|
|
|
18
|
+
coll_title = Cocina::Models::Builders::TitleBuilder.build(collection_obj.description.title)
|
|
19
|
+
next if coll_title.blank?
|
|
19
20
|
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
solr_doc['collection_title_ssim'] ||= []
|
|
22
|
+
solr_doc['collection_title_ssim'] << coll_title
|
|
23
|
+
solr_doc['collection_title_tesim'] ||= []
|
|
24
|
+
solr_doc['collection_title_tesim'] << coll_title
|
|
22
25
|
end
|
|
23
26
|
end
|
|
24
27
|
end
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
+
# Allows DorIndexing::Builders::DocumentBuilder class (which builds the solr doc for an object) to be much more readable
|
|
5
6
|
# Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb
|
|
6
7
|
class CompositeIndexer
|
|
7
8
|
attr_reader :indexers
|
|
@@ -4,8 +4,8 @@ require 'stanford-mods'
|
|
|
4
4
|
|
|
5
5
|
class DorIndexing
|
|
6
6
|
module Indexers
|
|
7
|
-
# rubocop:disable Metrics/ClassLength
|
|
8
7
|
# Indexes the descriptive metadata
|
|
8
|
+
# rubocop:disable Metrics/ClassLength
|
|
9
9
|
class DescriptiveMetadataIndexer
|
|
10
10
|
attr_reader :cocina, :stanford_mods_record
|
|
11
11
|
|
|
@@ -29,7 +29,8 @@ class DorIndexing
|
|
|
29
29
|
|
|
30
30
|
# contributor
|
|
31
31
|
'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
|
|
32
|
-
'sw_author_tesim' => author_primary, # used for author display in Argo
|
|
32
|
+
'sw_author_tesim' => author_primary, # DEPRECATED - used for author display in Argo
|
|
33
|
+
'author_display_ss' => author_primary, # used for author display in Argo
|
|
33
34
|
'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
|
|
34
35
|
'contributor_orcids_ssim' => orcids,
|
|
35
36
|
|
|
@@ -43,8 +44,6 @@ class DorIndexing
|
|
|
43
44
|
'originInfo_place_placeTerm_tesim' => event_place, # do we want this?
|
|
44
45
|
'sw_pub_date_facet_ssi' => stanford_mods_record.pub_year_int.to_s, # SW Date facet
|
|
45
46
|
|
|
46
|
-
'metadata_format_ssim' => 'mods', # no longer used? https://github.com/search?q=org%3Asul-dlss+metadata_format_ssim&type=code
|
|
47
|
-
|
|
48
47
|
# SW facets plus a friend facet
|
|
49
48
|
'sw_format_ssim' => sw_format, # SW Resource Type facet
|
|
50
49
|
'mods_typeOfResource_ssim' => resource_type, # MODS Resource Type facet
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Indexes the
|
|
5
|
+
# Indexes the druid, metadata sources, and the apo titles
|
|
6
6
|
class IdentifiableIndexer
|
|
7
7
|
attr_reader :cocina, :cocina_repository
|
|
8
8
|
|
|
@@ -13,23 +13,29 @@ class DorIndexing
|
|
|
13
13
|
@cocina_repository = cocina_repository
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
## Module-level
|
|
17
|
-
## used for caching
|
|
16
|
+
## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
|
|
17
|
+
## used for caching apo titles
|
|
18
18
|
@@apo_hash = {} # rubocop:disable Style/ClassVars
|
|
19
19
|
|
|
20
20
|
# @return [Hash] the partial solr document for identifiable concerns
|
|
21
|
-
def to_solr
|
|
21
|
+
def to_solr # rubocop:disable Metrics/AbcSize
|
|
22
22
|
{}.tap do |solr_doc|
|
|
23
23
|
add_apo_titles(solr_doc, cocina.administrative.hasAdminPolicy)
|
|
24
24
|
|
|
25
25
|
solr_doc['metadata_source_ssim'] = identity_metadata_sources unless cocina.is_a? Cocina::Models::AdminPolicyWithMetadata
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')]
|
|
26
|
+
solr_doc['druid_prefixed_ssi'] = cocina.externalIdentifier
|
|
27
|
+
solr_doc['druid_bare_ssi'] = cocina.externalIdentifier.delete_prefix('druid:')
|
|
28
|
+
solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')] # DEPRECATED
|
|
30
29
|
end
|
|
31
30
|
end
|
|
32
31
|
|
|
32
|
+
# Clears out the cache of apos. Used primarily in testing.
|
|
33
|
+
def self.reset_cache!
|
|
34
|
+
@@apo_hash = {} # rubocop:disable Style/ClassVars
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
33
39
|
# @return [Array<String>] calculated values for Solr index
|
|
34
40
|
def identity_metadata_sources
|
|
35
41
|
return ['DOR'] if !cocina.identification.respond_to?(:catalogLinks) || distinct_current_catalog_types.empty?
|
|
@@ -37,13 +43,6 @@ class DorIndexing
|
|
|
37
43
|
distinct_current_catalog_types.map(&:capitalize)
|
|
38
44
|
end
|
|
39
45
|
|
|
40
|
-
# Clears out the cache of items. Used primarily in testing.
|
|
41
|
-
def self.reset_cache!
|
|
42
|
-
@@apo_hash = {} # rubocop:disable Style/ClassVars
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
private
|
|
46
|
-
|
|
47
46
|
def distinct_current_catalog_types
|
|
48
47
|
# Filter out e.g. "previous symphony", "previous folio"
|
|
49
48
|
@distinct_current_catalog_types ||=
|
|
@@ -57,15 +56,18 @@ class DorIndexing
|
|
|
57
56
|
|
|
58
57
|
# @param [Hash] solr_doc
|
|
59
58
|
# @param [String] admin_policy_id
|
|
60
|
-
def add_apo_titles(solr_doc, admin_policy_id)
|
|
59
|
+
def add_apo_titles(solr_doc, admin_policy_id) # rubocop:disable Metrics/MethodLength
|
|
61
60
|
row = populate_cache(admin_policy_id)
|
|
62
61
|
title = row['related_obj_title']
|
|
63
62
|
if row['is_from_hydrus']
|
|
64
|
-
|
|
63
|
+
solr_doc['hydrus_apo_title_ssim'] ||= []
|
|
64
|
+
solr_doc['hydrus_apo_title_ssim'] << title
|
|
65
65
|
else
|
|
66
|
-
|
|
66
|
+
solr_doc['nonhydrus_apo_title_ssim'] ||= []
|
|
67
|
+
solr_doc['nonhydrus_apo_title_ssim'] << title
|
|
67
68
|
end
|
|
68
|
-
|
|
69
|
+
solr_doc['apo_title_ssim'] ||= []
|
|
70
|
+
solr_doc['apo_title_ssim'] << title
|
|
69
71
|
end
|
|
70
72
|
|
|
71
73
|
# populate cache if necessary
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Indexes the identity metadata
|
|
5
|
+
# Indexes the identity metadata from cocina.identification
|
|
6
6
|
class IdentityMetadataIndexer
|
|
7
7
|
attr_reader :cocina_object
|
|
8
8
|
|
|
@@ -11,24 +11,21 @@ class DorIndexing
|
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
# @return [Hash] the partial solr document for identityMetadata
|
|
14
|
-
# rubocop:disable Metrics/AbcSize
|
|
15
14
|
# rubocop:disable Metrics/MethodLength
|
|
16
15
|
def to_solr
|
|
17
|
-
return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.
|
|
16
|
+
return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.blank?
|
|
18
17
|
|
|
19
18
|
{
|
|
20
19
|
'objectType_ssim' => [object_type],
|
|
21
|
-
'
|
|
22
|
-
'
|
|
23
|
-
'identifier_tesim' => prefixed_identifiers,
|
|
20
|
+
'identifier_ssim' => prefixed_identifiers, # sourceid, barcode, folio_instance_hrid for display
|
|
21
|
+
'identifier_tesim' => prefixed_identifiers, # ditto ^^, for search, tokenized (can search prefix and value as separate tokens)
|
|
24
22
|
'barcode_id_ssim' => [barcode].compact,
|
|
25
|
-
'source_id_ssi' => source_id,
|
|
26
|
-
'source_id_text_nostem_i' => source_id,
|
|
23
|
+
'source_id_ssi' => source_id, # for search and display (reports, track_sheet)
|
|
24
|
+
'source_id_text_nostem_i' => source_id, # for search, tokenized per request from accessioneers
|
|
27
25
|
'folio_instance_hrid_ssim' => [folio_instance_hrid].compact,
|
|
28
26
|
'doi_ssim' => [doi].compact
|
|
29
27
|
}
|
|
30
28
|
end
|
|
31
|
-
# rubocop:enable Metrics/AbcSize
|
|
32
29
|
# rubocop:enable Metrics/MethodLength
|
|
33
30
|
|
|
34
31
|
private
|
|
@@ -37,10 +34,6 @@ class DorIndexing
|
|
|
37
34
|
@source_id ||= cocina_object.identification.sourceId
|
|
38
35
|
end
|
|
39
36
|
|
|
40
|
-
def source_id_value
|
|
41
|
-
@source_id_value ||= source_id ? source_id.split(':', 2)[1] : nil
|
|
42
|
-
end
|
|
43
|
-
|
|
44
37
|
def barcode
|
|
45
38
|
@barcode ||= object_type == 'collection' ? nil : cocina_object.identification.barcode
|
|
46
39
|
end
|
|
@@ -53,15 +46,6 @@ class DorIndexing
|
|
|
53
46
|
@folio_instance_hrid ||= Array(cocina_object.identification.catalogLinks).find { |link| link.catalog == 'folio' }&.catalogRecordId
|
|
54
47
|
end
|
|
55
48
|
|
|
56
|
-
def previous_folio_instance_hrids
|
|
57
|
-
@previous_folio_instance_hrids ||=
|
|
58
|
-
Array(cocina_object.identification.catalogLinks).filter_map { |link| link.catalogRecordId if link.catalog == 'previous folio' }
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def previous_ils_ids
|
|
62
|
-
@previous_ils_ids ||= previous_folio_instance_hrids
|
|
63
|
-
end
|
|
64
|
-
|
|
65
49
|
def object_type
|
|
66
50
|
case cocina_object
|
|
67
51
|
when Cocina::Models::AdminPolicyWithMetadata
|
|
@@ -2,15 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Indexes the
|
|
6
|
-
class
|
|
5
|
+
# Indexes the information about files in the object
|
|
6
|
+
class ObjectFilesIndexer
|
|
7
7
|
attr_reader :cocina
|
|
8
8
|
|
|
9
9
|
def initialize(cocina:, **)
|
|
10
10
|
@cocina = cocina
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
# @return [Hash] the partial solr document for
|
|
13
|
+
# @return [Hash] the partial solr document for files in the object
|
|
14
14
|
def to_solr
|
|
15
15
|
{
|
|
16
16
|
'content_type_ssim' => type(cocina.type),
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Indexes the
|
|
5
|
+
# Indexes the object's state for each process in a single workflow
|
|
6
6
|
class WorkflowIndexer
|
|
7
7
|
# @param [Workflow::Response::Workflow] workflow the workflow document to index
|
|
8
8
|
def initialize(workflow:, workflow_client:)
|
|
@@ -10,14 +10,14 @@ class DorIndexing
|
|
|
10
10
|
@workflow_client = workflow_client
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
# @return [Hash] the partial solr document for the workflow
|
|
13
|
+
# @return [Hash] the partial solr document for all the workflow processes
|
|
14
14
|
def to_solr
|
|
15
15
|
WorkflowSolrDocument.new do |solr_doc|
|
|
16
16
|
solr_doc.name = workflow_name
|
|
17
17
|
|
|
18
18
|
errors = 0 # The error count is used by the Report class in Argo
|
|
19
19
|
processes.each do |process|
|
|
20
|
-
|
|
20
|
+
WorkflowProcessIndexer.new(solr_doc:, workflow_name:, process:).to_solr
|
|
21
21
|
errors += 1 if process.status == 'error'
|
|
22
22
|
end
|
|
23
23
|
solr_doc.status = [workflow_name, workflow_status, errors].join('|')
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
#
|
|
6
|
-
class
|
|
5
|
+
# Creates solr doc fields (and values) for a process for a workflow (which is for an object)
|
|
6
|
+
class WorkflowProcessIndexer
|
|
7
7
|
ERROR_OMISSION = '... (continued)'
|
|
8
8
|
private_constant :ERROR_OMISSION
|
|
9
9
|
|
|
@@ -13,14 +13,14 @@ class DorIndexing
|
|
|
13
13
|
|
|
14
14
|
# @param [WorkflowSolrDocument] solr_doc
|
|
15
15
|
# @param [String] workflow_name
|
|
16
|
-
# @param [Dor::Workflow::Response::Process] process
|
|
16
|
+
# @param [Dor::Workflow::Response::Process] process containing data for a process in a workflow for an object
|
|
17
17
|
def initialize(solr_doc:, workflow_name:, process:)
|
|
18
18
|
@solr_doc = solr_doc
|
|
19
19
|
@workflow_name = workflow_name
|
|
20
20
|
@process = process
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
-
# @return [Hash] the partial solr document for
|
|
23
|
+
# @return [Hash] the partial solr document for a single workflow process
|
|
24
24
|
# rubocop:disable Metrics/AbcSize
|
|
25
25
|
def to_solr
|
|
26
26
|
return unless status
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
class DorIndexing
|
|
4
4
|
module Indexers
|
|
5
|
-
# Indexes the
|
|
5
|
+
# Indexes the object's state in the most recent execution of every one of its workflows
|
|
6
6
|
class WorkflowsIndexer
|
|
7
7
|
attr_reader :id
|
|
8
8
|
|
|
@@ -11,7 +11,7 @@ class DorIndexing
|
|
|
11
11
|
@workflow_client = workflow_client
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
# @return [Hash] the partial solr document for
|
|
14
|
+
# @return [Hash] the partial solr document for workflows concerns
|
|
15
15
|
def to_solr
|
|
16
16
|
WorkflowSolrDocument.new do |combined_doc|
|
|
17
17
|
workflows.each do |wf|
|
|
@@ -30,7 +30,6 @@ class DorIndexing
|
|
|
30
30
|
all_workflows.workflows
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
-
# TODO: remove Dor::Workflow::Document
|
|
34
33
|
# @return [Workflow::Response::Workflows]
|
|
35
34
|
def all_workflows
|
|
36
35
|
@all_workflows ||= workflow_client.workflow_routes.all_workflows pid: id
|
data/lib/dor_indexing/version.rb
CHANGED
data/lib/dor_indexing.rb
CHANGED
metadata
CHANGED
|
@@ -1,15 +1,29 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: dor_indexing
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Justin Littman
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-
|
|
11
|
+
date: 2024-02-17 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: activesupport
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
13
27
|
- !ruby/object:Gem::Dependency
|
|
14
28
|
name: cocina-models
|
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -66,20 +80,6 @@ dependencies:
|
|
|
66
80
|
- - "~>"
|
|
67
81
|
- !ruby/object:Gem::Version
|
|
68
82
|
version: 0.3.0
|
|
69
|
-
- !ruby/object:Gem::Dependency
|
|
70
|
-
name: solrizer
|
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
|
72
|
-
requirements:
|
|
73
|
-
- - ">="
|
|
74
|
-
- !ruby/object:Gem::Version
|
|
75
|
-
version: '0'
|
|
76
|
-
type: :runtime
|
|
77
|
-
prerelease: false
|
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
-
requirements:
|
|
80
|
-
- - ">="
|
|
81
|
-
- !ruby/object:Gem::Version
|
|
82
|
-
version: '0'
|
|
83
83
|
- !ruby/object:Gem::Dependency
|
|
84
84
|
name: stanford-mods
|
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -140,17 +140,17 @@ files:
|
|
|
140
140
|
- lib/dor_indexing/indexers/basic_indexer.rb
|
|
141
141
|
- lib/dor_indexing/indexers/collection_title_indexer.rb
|
|
142
142
|
- lib/dor_indexing/indexers/composite_indexer.rb
|
|
143
|
-
- lib/dor_indexing/indexers/content_metadata_indexer.rb
|
|
144
143
|
- lib/dor_indexing/indexers/default_object_rights_indexer.rb
|
|
145
144
|
- lib/dor_indexing/indexers/descriptive_metadata_indexer.rb
|
|
146
145
|
- lib/dor_indexing/indexers/embargo_metadata_indexer.rb
|
|
147
146
|
- lib/dor_indexing/indexers/identifiable_indexer.rb
|
|
148
147
|
- lib/dor_indexing/indexers/identity_metadata_indexer.rb
|
|
149
|
-
- lib/dor_indexing/indexers/
|
|
148
|
+
- lib/dor_indexing/indexers/object_files_indexer.rb
|
|
150
149
|
- lib/dor_indexing/indexers/releasable_indexer.rb
|
|
151
150
|
- lib/dor_indexing/indexers/rights_metadata_indexer.rb
|
|
152
151
|
- lib/dor_indexing/indexers/role_metadata_indexer.rb
|
|
153
152
|
- lib/dor_indexing/indexers/workflow_indexer.rb
|
|
153
|
+
- lib/dor_indexing/indexers/workflow_process_indexer.rb
|
|
154
154
|
- lib/dor_indexing/indexers/workflows_indexer.rb
|
|
155
155
|
- lib/dor_indexing/marc_country.rb
|
|
156
156
|
- lib/dor_indexing/selectors/event_selector.rb
|
|
@@ -180,7 +180,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
180
180
|
- !ruby/object:Gem::Version
|
|
181
181
|
version: '0'
|
|
182
182
|
requirements: []
|
|
183
|
-
rubygems_version: 3.4.
|
|
183
|
+
rubygems_version: 3.4.13
|
|
184
184
|
signing_key:
|
|
185
185
|
specification_version: 4
|
|
186
186
|
summary: Library for creating Solr documents for SDR indexing.
|