dor_indexing 1.3.1 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +31 -39
- data/dor_indexing.gemspec +2 -2
- data/lib/dor_indexing/builders/document_builder.rb +1 -1
- data/lib/dor_indexing/indexers/administrative_tag_indexer.rb +16 -15
- data/lib/dor_indexing/indexers/basic_indexer.rb +2 -1
- data/lib/dor_indexing/indexers/collection_title_indexer.rb +9 -6
- data/lib/dor_indexing/indexers/composite_indexer.rb +1 -0
- data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +2 -5
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +19 -18
- data/lib/dor_indexing/indexers/identity_metadata_indexer.rb +6 -22
- data/lib/dor_indexing/indexers/{content_metadata_indexer.rb → object_files_indexer.rb} +3 -3
- data/lib/dor_indexing/indexers/role_metadata_indexer.rb +1 -1
- data/lib/dor_indexing/indexers/workflow_indexer.rb +3 -3
- data/lib/dor_indexing/indexers/{process_indexer.rb → workflow_process_indexer.rb} +4 -4
- data/lib/dor_indexing/indexers/workflows_indexer.rb +2 -3
- data/lib/dor_indexing/selectors/event_selector.rb +1 -1
- data/lib/dor_indexing/version.rb +1 -1
- data/lib/dor_indexing.rb +9 -5
- metadata +20 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c1e2280668e9085122b921fd927b75e9982fa5fda9067a9245899ef93db77ae
|
4
|
+
data.tar.gz: 3e8d89e77994cb62ffcc1c598e47716c3a9c9f8aa8e37f226159e8fef947082d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24e3bf95ad1c541d3b403b2233018ab1da89be80cdcd3fa9acb2793e2b57673c8364202a4a548f8fe1c9ff60444585b5cf2d020968cb5b50614ad80b118d7bae
|
7
|
+
data.tar.gz: 6c015737932b1f01819d89f5f91948ab43d09e1537d6bf41ba8df85778c9ed0f18dc85b408a7fdc6ceb48ce6cfc4f7127b74da0c415aac6819d7dd460cf8a4eb
|
data/Gemfile.lock
CHANGED
@@ -1,19 +1,19 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
dor_indexing (1.
|
5
|
-
|
4
|
+
dor_indexing (1.4.1)
|
5
|
+
activesupport
|
6
|
+
cocina-models (~> 0.95.1)
|
6
7
|
dor-workflow-client (~> 7.0)
|
7
8
|
honeybadger
|
8
9
|
marc-vocab (~> 0.3.0)
|
9
|
-
solrizer
|
10
10
|
stanford-mods
|
11
11
|
zeitwerk
|
12
12
|
|
13
13
|
GEM
|
14
14
|
remote: https://rubygems.org/
|
15
15
|
specs:
|
16
|
-
activesupport (7.1.3)
|
16
|
+
activesupport (7.1.3.2)
|
17
17
|
base64
|
18
18
|
bigdecimal
|
19
19
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -28,7 +28,7 @@ GEM
|
|
28
28
|
base64 (0.2.0)
|
29
29
|
bigdecimal (3.1.6)
|
30
30
|
byebug (11.1.3)
|
31
|
-
cocina-models (0.95.
|
31
|
+
cocina-models (0.95.1)
|
32
32
|
activesupport
|
33
33
|
deprecation
|
34
34
|
dry-struct (~> 1.0)
|
@@ -49,7 +49,7 @@ GEM
|
|
49
49
|
connection_pool (2.4.1)
|
50
50
|
deprecation (1.1.0)
|
51
51
|
activesupport
|
52
|
-
diff-lcs (1.5.
|
52
|
+
diff-lcs (1.5.1)
|
53
53
|
docile (1.4.0)
|
54
54
|
dor-workflow-client (7.0.2)
|
55
55
|
activesupport (>= 3.2.1, < 8)
|
@@ -58,8 +58,7 @@ GEM
|
|
58
58
|
faraday-retry (~> 2.0)
|
59
59
|
nokogiri (~> 1.6)
|
60
60
|
zeitwerk (~> 2.1)
|
61
|
-
drb (2.2.
|
62
|
-
ruby2_keywords
|
61
|
+
drb (2.2.1)
|
63
62
|
dry-core (1.0.1)
|
64
63
|
concurrent-ruby (~> 1.0)
|
65
64
|
zeitwerk (~> 2.6)
|
@@ -90,8 +89,8 @@ GEM
|
|
90
89
|
net-http
|
91
90
|
faraday-retry (2.2.0)
|
92
91
|
faraday (~> 2.0)
|
93
|
-
honeybadger (5.
|
94
|
-
i18n (1.14.
|
92
|
+
honeybadger (5.6.0)
|
93
|
+
i18n (1.14.4)
|
95
94
|
concurrent-ruby (~> 1.0)
|
96
95
|
ice_nine (0.11.2)
|
97
96
|
iso-639 (0.3.6)
|
@@ -100,7 +99,7 @@ GEM
|
|
100
99
|
multi_json
|
101
100
|
language_server-protocol (3.17.0.3)
|
102
101
|
marc-vocab (0.3.0)
|
103
|
-
minitest (5.
|
102
|
+
minitest (5.22.2)
|
104
103
|
mods (3.0.4)
|
105
104
|
edtf (~> 3.0)
|
106
105
|
iso-639
|
@@ -110,9 +109,9 @@ GEM
|
|
110
109
|
mutex_m (0.2.0)
|
111
110
|
net-http (0.4.1)
|
112
111
|
uri
|
113
|
-
nokogiri (1.16.
|
112
|
+
nokogiri (1.16.2-x86_64-darwin)
|
114
113
|
racc (~> 1.4)
|
115
|
-
nokogiri (1.16.
|
114
|
+
nokogiri (1.16.2-x86_64-linux)
|
116
115
|
racc (~> 1.4)
|
117
116
|
nom-xml (1.2.0)
|
118
117
|
i18n
|
@@ -132,22 +131,22 @@ GEM
|
|
132
131
|
rake (13.1.0)
|
133
132
|
regexp_parser (2.9.0)
|
134
133
|
rexml (3.2.6)
|
135
|
-
rspec (3.
|
136
|
-
rspec-core (~> 3.
|
137
|
-
rspec-expectations (~> 3.
|
138
|
-
rspec-mocks (~> 3.
|
139
|
-
rspec-core (3.
|
140
|
-
rspec-support (~> 3.
|
141
|
-
rspec-expectations (3.
|
134
|
+
rspec (3.13.0)
|
135
|
+
rspec-core (~> 3.13.0)
|
136
|
+
rspec-expectations (~> 3.13.0)
|
137
|
+
rspec-mocks (~> 3.13.0)
|
138
|
+
rspec-core (3.13.0)
|
139
|
+
rspec-support (~> 3.13.0)
|
140
|
+
rspec-expectations (3.13.0)
|
142
141
|
diff-lcs (>= 1.2.0, < 2.0)
|
143
|
-
rspec-support (~> 3.
|
144
|
-
rspec-mocks (3.
|
142
|
+
rspec-support (~> 3.13.0)
|
143
|
+
rspec-mocks (3.13.0)
|
145
144
|
diff-lcs (>= 1.2.0, < 2.0)
|
146
|
-
rspec-support (~> 3.
|
147
|
-
rspec-support (3.
|
145
|
+
rspec-support (~> 3.13.0)
|
146
|
+
rspec-support (3.13.1)
|
148
147
|
rss (0.3.0)
|
149
148
|
rexml
|
150
|
-
rubocop (1.
|
149
|
+
rubocop (1.62.0)
|
151
150
|
json (~> 2.3)
|
152
151
|
language_server-protocol (>= 3.17.0)
|
153
152
|
parallel (~> 1.10)
|
@@ -155,11 +154,11 @@ GEM
|
|
155
154
|
rainbow (>= 2.2.2, < 4.0)
|
156
155
|
regexp_parser (>= 1.8, < 3.0)
|
157
156
|
rexml (>= 3.2.5, < 4.0)
|
158
|
-
rubocop-ast (>= 1.
|
157
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
159
158
|
ruby-progressbar (~> 1.7)
|
160
159
|
unicode-display_width (>= 2.4.0, < 3.0)
|
161
|
-
rubocop-ast (1.
|
162
|
-
parser (>= 3.
|
160
|
+
rubocop-ast (1.31.2)
|
161
|
+
parser (>= 3.3.0.4)
|
163
162
|
rubocop-capybara (2.20.0)
|
164
163
|
rubocop (~> 1.41)
|
165
164
|
rubocop-factory_bot (2.25.1)
|
@@ -167,37 +166,30 @@ GEM
|
|
167
166
|
rubocop-performance (1.20.2)
|
168
167
|
rubocop (>= 1.48.1, < 2.0)
|
169
168
|
rubocop-ast (>= 1.30.0, < 2.0)
|
170
|
-
rubocop-rspec (2.
|
169
|
+
rubocop-rspec (2.27.1)
|
171
170
|
rubocop (~> 1.40)
|
172
171
|
rubocop-capybara (~> 2.17)
|
173
172
|
rubocop-factory_bot (~> 2.22)
|
174
173
|
ruby-progressbar (1.13.0)
|
175
|
-
ruby2_keywords (0.0.5)
|
176
174
|
simplecov (0.22.0)
|
177
175
|
docile (~> 1.1)
|
178
176
|
simplecov-html (~> 0.11)
|
179
177
|
simplecov_json_formatter (~> 0.1)
|
180
178
|
simplecov-html (0.12.3)
|
181
179
|
simplecov_json_formatter (0.1.4)
|
182
|
-
solrizer (4.1.0)
|
183
|
-
activesupport
|
184
|
-
nokogiri
|
185
|
-
xml-simple
|
186
180
|
stanford-mods (3.3.9)
|
187
181
|
activesupport
|
188
182
|
mods (~> 3.0, >= 3.0.4)
|
189
|
-
super_diff (0.
|
183
|
+
super_diff (0.11.0)
|
190
184
|
attr_extras (>= 6.2.4)
|
191
185
|
diff-lcs
|
192
186
|
patience_diff
|
193
|
-
thor (1.3.
|
187
|
+
thor (1.3.1)
|
194
188
|
tzinfo (2.0.6)
|
195
189
|
concurrent-ruby (~> 1.0)
|
196
190
|
unicode-display_width (2.5.0)
|
197
191
|
uri (0.13.0)
|
198
|
-
|
199
|
-
rexml
|
200
|
-
zeitwerk (2.6.12)
|
192
|
+
zeitwerk (2.6.13)
|
201
193
|
|
202
194
|
PLATFORMS
|
203
195
|
x86_64-darwin-21
|
data/dor_indexing.gemspec
CHANGED
@@ -31,11 +31,11 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
32
32
|
spec.require_paths = ['lib']
|
33
33
|
|
34
|
-
spec.add_dependency '
|
34
|
+
spec.add_dependency 'activesupport'
|
35
|
+
spec.add_dependency 'cocina-models', '~> 0.95.1'
|
35
36
|
spec.add_dependency 'dor-workflow-client', '~> 7.0'
|
36
37
|
spec.add_dependency 'honeybadger'
|
37
38
|
spec.add_dependency 'marc-vocab', '~> 0.3.0'
|
38
|
-
spec.add_dependency 'solrizer'
|
39
39
|
spec.add_dependency 'stanford-mods'
|
40
40
|
spec.add_dependency 'zeitwerk'
|
41
41
|
end
|
@@ -33,7 +33,7 @@ class DorIndexing
|
|
33
33
|
DorIndexing::Indexers::IdentityMetadataIndexer,
|
34
34
|
DorIndexing::Indexers::DescriptiveMetadataIndexer,
|
35
35
|
DorIndexing::Indexers::EmbargoMetadataIndexer,
|
36
|
-
DorIndexing::Indexers::
|
36
|
+
DorIndexing::Indexers::ObjectFilesIndexer,
|
37
37
|
DorIndexing::Indexers::IdentifiableIndexer,
|
38
38
|
DorIndexing::Indexers::CollectionTitleIndexer,
|
39
39
|
DorIndexing::Indexers::ReleasableIndexer,
|
@@ -2,14 +2,12 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Index administrative tags for an object
|
6
|
-
# NOTE: Most of this code was extracted from the dor-services gem:
|
7
|
-
# https://github.com/sul-dlss/dor-services/blob/v9.0.0/lib/dor/datastreams/identity_metadata_ds.rb#L196-L218
|
5
|
+
# Index administrative tags for an object
|
8
6
|
class AdministrativeTagIndexer
|
9
7
|
TAG_PART_DELIMITER = ' : '
|
10
8
|
SPECIAL_TAG_TYPES_TO_INDEX = ['Project', 'Registered By'].freeze
|
11
9
|
|
12
|
-
attr_reader :id
|
10
|
+
attr_reader :id, :administrative_tags
|
13
11
|
|
14
12
|
def initialize(id:, administrative_tags:, **)
|
15
13
|
@id = id
|
@@ -30,18 +28,23 @@ class DorIndexing
|
|
30
28
|
tag_prefix, rest = tag.split(TAG_PART_DELIMITER, 2)
|
31
29
|
prefix = tag_prefix.downcase.strip.gsub(/\s/, '_')
|
32
30
|
|
33
|
-
solr_doc['tag_ssim'] << tag # for
|
34
|
-
solr_doc['tag_text_unstemmed_im'] << tag # for search
|
31
|
+
solr_doc['tag_ssim'] << tag # for Argo display and fq
|
32
|
+
solr_doc['tag_text_unstemmed_im'] << tag # for Argo search
|
35
33
|
|
36
|
-
|
34
|
+
# exploded tags are for hierarchical facets in Argo
|
35
|
+
solr_doc['exploded_nonproject_tag_ssim'] += explode_tag_hierarchy(tag) unless prefix == 'project'
|
37
36
|
|
38
|
-
next if
|
37
|
+
next if rest.blank?
|
38
|
+
|
39
|
+
# Index specific tag types that are used in Argo:
|
40
|
+
# project tags for search results and registered by tags for reports ...
|
41
|
+
next unless SPECIAL_TAG_TYPES_TO_INDEX.include?(tag_prefix)
|
39
42
|
|
40
43
|
(solr_doc["#{prefix}_tag_ssim"] ||= []) << rest.strip
|
41
44
|
|
42
45
|
if prefix == 'project'
|
43
46
|
solr_doc['exploded_project_tag_ssim'] ||= []
|
44
|
-
solr_doc['exploded_project_tag_ssim'] +=
|
47
|
+
solr_doc['exploded_project_tag_ssim'] += explode_tag_hierarchy(rest.strip)
|
45
48
|
end
|
46
49
|
end
|
47
50
|
solr_doc
|
@@ -52,12 +55,10 @@ class DorIndexing
|
|
52
55
|
|
53
56
|
private
|
54
57
|
|
55
|
-
|
56
|
-
|
57
|
-
#
|
58
|
-
|
59
|
-
# that contains ["A", "A : B", "A : B : C"].
|
60
|
-
def exploded_tags_from(tag)
|
58
|
+
# index each possible path, inclusive of the full tag.
|
59
|
+
# e.g., for "A : B : C", return ["A", "A : B", "A : B : C"].
|
60
|
+
# this is for the blacklight-hierarchy plugin for faceting on each level of the hierarchy
|
61
|
+
def explode_tag_hierarchy(tag)
|
61
62
|
tag_parts = tag.split(TAG_PART_DELIMITER)
|
62
63
|
|
63
64
|
1.upto(tag_parts.count).map do |i|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Basic indexing for
|
5
|
+
# Basic indexing for any object
|
6
6
|
class BasicIndexer
|
7
7
|
attr_reader :cocina, :workflow_client
|
8
8
|
|
@@ -11,6 +11,7 @@ class DorIndexing
|
|
11
11
|
@workflow_client = workflow_client
|
12
12
|
end
|
13
13
|
|
14
|
+
# @return [Hash] the partial solr document for basic data
|
14
15
|
# rubocop:disable Metrics/AbcSize
|
15
16
|
# rubocop:disable Metrics/MethodLength
|
16
17
|
def to_solr
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Indexes
|
5
|
+
# Indexes collection titles for an object
|
6
6
|
class CollectionTitleIndexer
|
7
7
|
attr_reader :cocina, :parent_collections
|
8
8
|
|
@@ -11,14 +11,17 @@ class DorIndexing
|
|
11
11
|
@parent_collections = parent_collections
|
12
12
|
end
|
13
13
|
|
14
|
-
# @return [Hash] the partial solr document for
|
14
|
+
# @return [Hash] the partial solr document for collection title concerns
|
15
15
|
def to_solr
|
16
16
|
{}.tap do |solr_doc|
|
17
|
-
parent_collections.each do |
|
18
|
-
coll_title = Cocina::Models::Builders::TitleBuilder.build(
|
17
|
+
parent_collections.each do |collection_obj|
|
18
|
+
coll_title = Cocina::Models::Builders::TitleBuilder.build(collection_obj.description.title)
|
19
|
+
next if coll_title.blank?
|
19
20
|
|
20
|
-
|
21
|
-
|
21
|
+
solr_doc['collection_title_ssim'] ||= []
|
22
|
+
solr_doc['collection_title_ssim'] << coll_title
|
23
|
+
solr_doc['collection_title_tesim'] ||= []
|
24
|
+
solr_doc['collection_title_tesim'] << coll_title
|
22
25
|
end
|
23
26
|
end
|
24
27
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
+
# Allows DorIndexing::Builders::DocumentBuilder class (which builds the solr doc for an object) to be much more readable
|
5
6
|
# Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb
|
6
7
|
class CompositeIndexer
|
7
8
|
attr_reader :indexers
|
@@ -4,8 +4,8 @@ require 'stanford-mods'
|
|
4
4
|
|
5
5
|
class DorIndexing
|
6
6
|
module Indexers
|
7
|
-
# rubocop:disable Metrics/ClassLength
|
8
7
|
# Indexes the descriptive metadata
|
8
|
+
# rubocop:disable Metrics/ClassLength
|
9
9
|
class DescriptiveMetadataIndexer
|
10
10
|
attr_reader :cocina, :stanford_mods_record
|
11
11
|
|
@@ -25,11 +25,10 @@ class DorIndexing
|
|
25
25
|
'full_title_tenim' => full_title, # for searching; 1 more field type is copyField in solr schema.xml
|
26
26
|
'additional_titles_tenim' => additional_titles, # for searching; 1 more field type is copyField in solr schema.xml
|
27
27
|
'display_title_ss' => display_title, # for display in Argo
|
28
|
-
'sw_display_title_tesim' => display_title, # for display in Argo DEPRECATED in favor of display_title_ss
|
29
28
|
|
30
29
|
# contributor
|
31
30
|
'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
|
32
|
-
'
|
31
|
+
'author_display_ss' => author_primary, # used for author display in Argo
|
33
32
|
'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
|
34
33
|
'contributor_orcids_ssim' => orcids,
|
35
34
|
|
@@ -43,8 +42,6 @@ class DorIndexing
|
|
43
42
|
'originInfo_place_placeTerm_tesim' => event_place, # do we want this?
|
44
43
|
'sw_pub_date_facet_ssi' => stanford_mods_record.pub_year_int.to_s, # SW Date facet
|
45
44
|
|
46
|
-
'metadata_format_ssim' => 'mods', # no longer used? https://github.com/search?q=org%3Asul-dlss+metadata_format_ssim&type=code
|
47
|
-
|
48
45
|
# SW facets plus a friend facet
|
49
46
|
'sw_format_ssim' => sw_format, # SW Resource Type facet
|
50
47
|
'mods_typeOfResource_ssim' => resource_type, # MODS Resource Type facet
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Indexes the
|
5
|
+
# Indexes the druid, metadata sources, and the apo titles
|
6
6
|
class IdentifiableIndexer
|
7
7
|
attr_reader :cocina, :cocina_repository
|
8
8
|
|
@@ -13,8 +13,8 @@ class DorIndexing
|
|
13
13
|
@cocina_repository = cocina_repository
|
14
14
|
end
|
15
15
|
|
16
|
-
## Module-level
|
17
|
-
## used for caching
|
16
|
+
## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
|
17
|
+
## used for caching apo titles
|
18
18
|
@@apo_hash = {} # rubocop:disable Style/ClassVars
|
19
19
|
|
20
20
|
# @return [Hash] the partial solr document for identifiable concerns
|
@@ -23,13 +23,18 @@ class DorIndexing
|
|
23
23
|
add_apo_titles(solr_doc, cocina.administrative.hasAdminPolicy)
|
24
24
|
|
25
25
|
solr_doc['metadata_source_ssim'] = identity_metadata_sources unless cocina.is_a? Cocina::Models::AdminPolicyWithMetadata
|
26
|
-
|
27
|
-
|
28
|
-
# It is used to search based on druid.
|
29
|
-
solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')]
|
26
|
+
solr_doc['druid_prefixed_ssi'] = cocina.externalIdentifier
|
27
|
+
solr_doc['druid_bare_ssi'] = cocina.externalIdentifier.delete_prefix('druid:')
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
31
|
+
# Clears out the cache of apos. Used primarily in testing.
|
32
|
+
def self.reset_cache!
|
33
|
+
@@apo_hash = {} # rubocop:disable Style/ClassVars
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
33
38
|
# @return [Array<String>] calculated values for Solr index
|
34
39
|
def identity_metadata_sources
|
35
40
|
return ['DOR'] if !cocina.identification.respond_to?(:catalogLinks) || distinct_current_catalog_types.empty?
|
@@ -37,13 +42,6 @@ class DorIndexing
|
|
37
42
|
distinct_current_catalog_types.map(&:capitalize)
|
38
43
|
end
|
39
44
|
|
40
|
-
# Clears out the cache of items. Used primarily in testing.
|
41
|
-
def self.reset_cache!
|
42
|
-
@@apo_hash = {} # rubocop:disable Style/ClassVars
|
43
|
-
end
|
44
|
-
|
45
|
-
private
|
46
|
-
|
47
45
|
def distinct_current_catalog_types
|
48
46
|
# Filter out e.g. "previous symphony", "previous folio"
|
49
47
|
@distinct_current_catalog_types ||=
|
@@ -57,15 +55,18 @@ class DorIndexing
|
|
57
55
|
|
58
56
|
# @param [Hash] solr_doc
|
59
57
|
# @param [String] admin_policy_id
|
60
|
-
def add_apo_titles(solr_doc, admin_policy_id)
|
58
|
+
def add_apo_titles(solr_doc, admin_policy_id) # rubocop:disable Metrics/MethodLength
|
61
59
|
row = populate_cache(admin_policy_id)
|
62
60
|
title = row['related_obj_title']
|
63
61
|
if row['is_from_hydrus']
|
64
|
-
|
62
|
+
solr_doc['hydrus_apo_title_ssim'] ||= []
|
63
|
+
solr_doc['hydrus_apo_title_ssim'] << title
|
65
64
|
else
|
66
|
-
|
65
|
+
solr_doc['nonhydrus_apo_title_ssim'] ||= []
|
66
|
+
solr_doc['nonhydrus_apo_title_ssim'] << title
|
67
67
|
end
|
68
|
-
|
68
|
+
solr_doc['apo_title_ssim'] ||= []
|
69
|
+
solr_doc['apo_title_ssim'] << title
|
69
70
|
end
|
70
71
|
|
71
72
|
# populate cache if necessary
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Indexes the identity metadata
|
5
|
+
# Indexes the identity metadata from cocina.identification
|
6
6
|
class IdentityMetadataIndexer
|
7
7
|
attr_reader :cocina_object
|
8
8
|
|
@@ -11,24 +11,21 @@ class DorIndexing
|
|
11
11
|
end
|
12
12
|
|
13
13
|
# @return [Hash] the partial solr document for identityMetadata
|
14
|
-
# rubocop:disable Metrics/AbcSize
|
15
14
|
# rubocop:disable Metrics/MethodLength
|
16
15
|
def to_solr
|
17
|
-
return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.
|
16
|
+
return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.blank?
|
18
17
|
|
19
18
|
{
|
20
19
|
'objectType_ssim' => [object_type],
|
21
|
-
'
|
22
|
-
'
|
23
|
-
'identifier_tesim' => prefixed_identifiers,
|
20
|
+
'identifier_ssim' => prefixed_identifiers, # sourceid, barcode, folio_instance_hrid for display
|
21
|
+
'identifier_tesim' => prefixed_identifiers, # ditto ^^, for search, tokenized (can search prefix and value as separate tokens)
|
24
22
|
'barcode_id_ssim' => [barcode].compact,
|
25
|
-
'source_id_ssi' => source_id,
|
26
|
-
'source_id_text_nostem_i' => source_id,
|
23
|
+
'source_id_ssi' => source_id, # for search and display (reports, track_sheet)
|
24
|
+
'source_id_text_nostem_i' => source_id, # for search, tokenized per request from accessioneers
|
27
25
|
'folio_instance_hrid_ssim' => [folio_instance_hrid].compact,
|
28
26
|
'doi_ssim' => [doi].compact
|
29
27
|
}
|
30
28
|
end
|
31
|
-
# rubocop:enable Metrics/AbcSize
|
32
29
|
# rubocop:enable Metrics/MethodLength
|
33
30
|
|
34
31
|
private
|
@@ -37,10 +34,6 @@ class DorIndexing
|
|
37
34
|
@source_id ||= cocina_object.identification.sourceId
|
38
35
|
end
|
39
36
|
|
40
|
-
def source_id_value
|
41
|
-
@source_id_value ||= source_id ? source_id.split(':', 2)[1] : nil
|
42
|
-
end
|
43
|
-
|
44
37
|
def barcode
|
45
38
|
@barcode ||= object_type == 'collection' ? nil : cocina_object.identification.barcode
|
46
39
|
end
|
@@ -53,15 +46,6 @@ class DorIndexing
|
|
53
46
|
@folio_instance_hrid ||= Array(cocina_object.identification.catalogLinks).find { |link| link.catalog == 'folio' }&.catalogRecordId
|
54
47
|
end
|
55
48
|
|
56
|
-
def previous_folio_instance_hrids
|
57
|
-
@previous_folio_instance_hrids ||=
|
58
|
-
Array(cocina_object.identification.catalogLinks).filter_map { |link| link.catalogRecordId if link.catalog == 'previous folio' }
|
59
|
-
end
|
60
|
-
|
61
|
-
def previous_ils_ids
|
62
|
-
@previous_ils_ids ||= previous_folio_instance_hrids
|
63
|
-
end
|
64
|
-
|
65
49
|
def object_type
|
66
50
|
case cocina_object
|
67
51
|
when Cocina::Models::AdminPolicyWithMetadata
|
@@ -2,15 +2,15 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Indexes the
|
6
|
-
class
|
5
|
+
# Indexes the information about files in the object
|
6
|
+
class ObjectFilesIndexer
|
7
7
|
attr_reader :cocina
|
8
8
|
|
9
9
|
def initialize(cocina:, **)
|
10
10
|
@cocina = cocina
|
11
11
|
end
|
12
12
|
|
13
|
-
# @return [Hash] the partial solr document for
|
13
|
+
# @return [Hash] the partial solr document for files in the object
|
14
14
|
def to_solr
|
15
15
|
{
|
16
16
|
'content_type_ssim' => type(cocina.type),
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Indexes the
|
5
|
+
# Indexes the object's state for each process in a single workflow
|
6
6
|
class WorkflowIndexer
|
7
7
|
# @param [Workflow::Response::Workflow] workflow the workflow document to index
|
8
8
|
def initialize(workflow:, workflow_client:)
|
@@ -10,14 +10,14 @@ class DorIndexing
|
|
10
10
|
@workflow_client = workflow_client
|
11
11
|
end
|
12
12
|
|
13
|
-
# @return [Hash] the partial solr document for the workflow
|
13
|
+
# @return [Hash] the partial solr document for all the workflow processes
|
14
14
|
def to_solr
|
15
15
|
WorkflowSolrDocument.new do |solr_doc|
|
16
16
|
solr_doc.name = workflow_name
|
17
17
|
|
18
18
|
errors = 0 # The error count is used by the Report class in Argo
|
19
19
|
processes.each do |process|
|
20
|
-
|
20
|
+
WorkflowProcessIndexer.new(solr_doc:, workflow_name:, process:).to_solr
|
21
21
|
errors += 1 if process.status == 'error'
|
22
22
|
end
|
23
23
|
solr_doc.status = [workflow_name, workflow_status, errors].join('|')
|
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
#
|
6
|
-
class
|
5
|
+
# Creates solr doc fields (and values) for a process for a workflow (which is for an object)
|
6
|
+
class WorkflowProcessIndexer
|
7
7
|
ERROR_OMISSION = '... (continued)'
|
8
8
|
private_constant :ERROR_OMISSION
|
9
9
|
|
@@ -13,14 +13,14 @@ class DorIndexing
|
|
13
13
|
|
14
14
|
# @param [WorkflowSolrDocument] solr_doc
|
15
15
|
# @param [String] workflow_name
|
16
|
-
# @param [Dor::Workflow::Response::Process] process
|
16
|
+
# @param [Dor::Workflow::Response::Process] process containing data for a process in a workflow for an object
|
17
17
|
def initialize(solr_doc:, workflow_name:, process:)
|
18
18
|
@solr_doc = solr_doc
|
19
19
|
@workflow_name = workflow_name
|
20
20
|
@process = process
|
21
21
|
end
|
22
22
|
|
23
|
-
# @return [Hash] the partial solr document for
|
23
|
+
# @return [Hash] the partial solr document for a single workflow process
|
24
24
|
# rubocop:disable Metrics/AbcSize
|
25
25
|
def to_solr
|
26
26
|
return unless status
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
class DorIndexing
|
4
4
|
module Indexers
|
5
|
-
# Indexes the
|
5
|
+
# Indexes the object's state in the most recent execution of every one of its workflows
|
6
6
|
class WorkflowsIndexer
|
7
7
|
attr_reader :id
|
8
8
|
|
@@ -11,7 +11,7 @@ class DorIndexing
|
|
11
11
|
@workflow_client = workflow_client
|
12
12
|
end
|
13
13
|
|
14
|
-
# @return [Hash] the partial solr document for
|
14
|
+
# @return [Hash] the partial solr document for workflows concerns
|
15
15
|
def to_solr
|
16
16
|
WorkflowSolrDocument.new do |combined_doc|
|
17
17
|
workflows.each do |wf|
|
@@ -30,7 +30,6 @@ class DorIndexing
|
|
30
30
|
all_workflows.workflows
|
31
31
|
end
|
32
32
|
|
33
|
-
# TODO: remove Dor::Workflow::Document
|
34
33
|
# @return [Workflow::Response::Workflows]
|
35
34
|
def all_workflows
|
36
35
|
@all_workflows ||= workflow_client.workflow_routes.all_workflows pid: id
|
data/lib/dor_indexing/version.rb
CHANGED
data/lib/dor_indexing.rb
CHANGED
@@ -1,14 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'zeitwerk'
|
4
|
-
require 'stanford-mods'
|
5
|
-
require 'cocina/models'
|
6
|
-
require 'solrizer'
|
7
|
-
require 'marc/vocab'
|
8
|
-
require 'honeybadger'
|
9
4
|
|
10
5
|
Zeitwerk::Loader.for_gem.setup
|
11
6
|
|
7
|
+
# Zeitwerk doesn't auto-load these dependencies
|
8
|
+
require 'active_support'
|
9
|
+
require 'active_support/core_ext/object/blank'
|
10
|
+
require 'active_support/core_ext/enumerable'
|
11
|
+
require 'active_support/core_ext/string'
|
12
|
+
require 'cocina/models'
|
13
|
+
require 'honeybadger'
|
14
|
+
require 'marc/vocab'
|
15
|
+
|
12
16
|
# Builds solr documents for indexing.
|
13
17
|
class DorIndexing
|
14
18
|
# @return [Hash] the solr document
|
metadata
CHANGED
@@ -1,29 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dor_indexing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Littman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-03-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: cocina-models
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
31
|
- - "~>"
|
18
32
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.95.
|
33
|
+
version: 0.95.1
|
20
34
|
type: :runtime
|
21
35
|
prerelease: false
|
22
36
|
version_requirements: !ruby/object:Gem::Requirement
|
23
37
|
requirements:
|
24
38
|
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.95.
|
40
|
+
version: 0.95.1
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: dor-workflow-client
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,20 +80,6 @@ dependencies:
|
|
66
80
|
- - "~>"
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: 0.3.0
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: solrizer
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: stanford-mods
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -140,17 +140,17 @@ files:
|
|
140
140
|
- lib/dor_indexing/indexers/basic_indexer.rb
|
141
141
|
- lib/dor_indexing/indexers/collection_title_indexer.rb
|
142
142
|
- lib/dor_indexing/indexers/composite_indexer.rb
|
143
|
-
- lib/dor_indexing/indexers/content_metadata_indexer.rb
|
144
143
|
- lib/dor_indexing/indexers/default_object_rights_indexer.rb
|
145
144
|
- lib/dor_indexing/indexers/descriptive_metadata_indexer.rb
|
146
145
|
- lib/dor_indexing/indexers/embargo_metadata_indexer.rb
|
147
146
|
- lib/dor_indexing/indexers/identifiable_indexer.rb
|
148
147
|
- lib/dor_indexing/indexers/identity_metadata_indexer.rb
|
149
|
-
- lib/dor_indexing/indexers/
|
148
|
+
- lib/dor_indexing/indexers/object_files_indexer.rb
|
150
149
|
- lib/dor_indexing/indexers/releasable_indexer.rb
|
151
150
|
- lib/dor_indexing/indexers/rights_metadata_indexer.rb
|
152
151
|
- lib/dor_indexing/indexers/role_metadata_indexer.rb
|
153
152
|
- lib/dor_indexing/indexers/workflow_indexer.rb
|
153
|
+
- lib/dor_indexing/indexers/workflow_process_indexer.rb
|
154
154
|
- lib/dor_indexing/indexers/workflows_indexer.rb
|
155
155
|
- lib/dor_indexing/marc_country.rb
|
156
156
|
- lib/dor_indexing/selectors/event_selector.rb
|