dor_indexing 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +26 -20
- data/README.md +15 -2
- data/dor_indexing.gemspec +4 -3
- data/lib/dor_indexing/builders/document_builder.rb +6 -4
- data/lib/dor_indexing/indexers/descriptive_metadata_indexer.rb +0 -2
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +1 -2
- data/lib/dor_indexing/indexers/releasable_indexer.rb +16 -11
- data/lib/dor_indexing/version.rb +1 -1
- data/lib/dor_indexing.rb +17 -6
- metadata +19 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae485a8233c79356bffbc42bef8d8e9f3ef1afbf3db962a2ba43b4ce522d7cd7
|
4
|
+
data.tar.gz: 1a05728e58b24d5f94164881a92326c9232402e241ce589dab14231e448ce60a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24cfdc3e6be2af97c092e893825af54f8e3e3e47d65b9b955ea90c4ebda86072dbd87f32e2b3778e5fbee005be00c6877efac710e4d87b178f2521cee42f8b62
|
7
|
+
data.tar.gz: 7a1fc488f95678830e07fb919371e3ef8f5125d8bfcbf40d62109c51f87203fb6c424bef4da4fd8b7a9ec77a87bfee3f92274325482ab58d602599d069bbafe7
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
dor_indexing (1.
|
4
|
+
dor_indexing (1.5.0)
|
5
5
|
activesupport
|
6
|
-
cocina-models (~> 0.95.
|
6
|
+
cocina-models (~> 0.95.1)
|
7
|
+
dor-services-client (~> 14.0)
|
7
8
|
dor-workflow-client (~> 7.0)
|
8
9
|
honeybadger
|
9
10
|
marc-vocab (~> 0.3.0)
|
@@ -13,7 +14,7 @@ PATH
|
|
13
14
|
GEM
|
14
15
|
remote: https://rubygems.org/
|
15
16
|
specs:
|
16
|
-
activesupport (7.1.3)
|
17
|
+
activesupport (7.1.3.2)
|
17
18
|
base64
|
18
19
|
bigdecimal
|
19
20
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
@@ -26,9 +27,9 @@ GEM
|
|
26
27
|
ast (2.4.2)
|
27
28
|
attr_extras (7.1.0)
|
28
29
|
base64 (0.2.0)
|
29
|
-
bigdecimal (3.1.
|
30
|
+
bigdecimal (3.1.7)
|
30
31
|
byebug (11.1.3)
|
31
|
-
cocina-models (0.95.
|
32
|
+
cocina-models (0.95.1)
|
32
33
|
activesupport
|
33
34
|
deprecation
|
34
35
|
dry-struct (~> 1.0)
|
@@ -51,6 +52,13 @@ GEM
|
|
51
52
|
activesupport
|
52
53
|
diff-lcs (1.5.1)
|
53
54
|
docile (1.4.0)
|
55
|
+
dor-services-client (14.4.0)
|
56
|
+
activesupport (>= 4.2, < 8)
|
57
|
+
cocina-models (~> 0.95.1)
|
58
|
+
deprecation
|
59
|
+
faraday (~> 2.0)
|
60
|
+
faraday-retry
|
61
|
+
zeitwerk (~> 2.1)
|
54
62
|
dor-workflow-client (7.0.2)
|
55
63
|
activesupport (>= 3.2.1, < 8)
|
56
64
|
deprecation (>= 0.99.0)
|
@@ -58,8 +66,7 @@ GEM
|
|
58
66
|
faraday-retry (~> 2.0)
|
59
67
|
nokogiri (~> 1.6)
|
60
68
|
zeitwerk (~> 2.1)
|
61
|
-
drb (2.2.
|
62
|
-
ruby2_keywords
|
69
|
+
drb (2.2.1)
|
63
70
|
dry-core (1.0.1)
|
64
71
|
concurrent-ruby (~> 1.0)
|
65
72
|
zeitwerk (~> 2.6)
|
@@ -90,8 +97,8 @@ GEM
|
|
90
97
|
net-http
|
91
98
|
faraday-retry (2.2.0)
|
92
99
|
faraday (~> 2.0)
|
93
|
-
honeybadger (5.
|
94
|
-
i18n (1.14.
|
100
|
+
honeybadger (5.8.0)
|
101
|
+
i18n (1.14.4)
|
95
102
|
concurrent-ruby (~> 1.0)
|
96
103
|
ice_nine (0.11.2)
|
97
104
|
iso-639 (0.3.6)
|
@@ -100,7 +107,7 @@ GEM
|
|
100
107
|
multi_json
|
101
108
|
language_server-protocol (3.17.0.3)
|
102
109
|
marc-vocab (0.3.0)
|
103
|
-
minitest (5.22.
|
110
|
+
minitest (5.22.3)
|
104
111
|
mods (3.0.4)
|
105
112
|
edtf (~> 3.0)
|
106
113
|
iso-639
|
@@ -110,9 +117,9 @@ GEM
|
|
110
117
|
mutex_m (0.2.0)
|
111
118
|
net-http (0.4.1)
|
112
119
|
uri
|
113
|
-
nokogiri (1.16.
|
120
|
+
nokogiri (1.16.3-x86_64-darwin)
|
114
121
|
racc (~> 1.4)
|
115
|
-
nokogiri (1.16.
|
122
|
+
nokogiri (1.16.3-x86_64-linux)
|
116
123
|
racc (~> 1.4)
|
117
124
|
nom-xml (1.2.0)
|
118
125
|
i18n
|
@@ -144,10 +151,10 @@ GEM
|
|
144
151
|
rspec-mocks (3.13.0)
|
145
152
|
diff-lcs (>= 1.2.0, < 2.0)
|
146
153
|
rspec-support (~> 3.13.0)
|
147
|
-
rspec-support (3.13.
|
154
|
+
rspec-support (3.13.1)
|
148
155
|
rss (0.3.0)
|
149
156
|
rexml
|
150
|
-
rubocop (1.
|
157
|
+
rubocop (1.62.1)
|
151
158
|
json (~> 2.3)
|
152
159
|
language_server-protocol (>= 3.17.0)
|
153
160
|
parallel (~> 1.10)
|
@@ -155,11 +162,11 @@ GEM
|
|
155
162
|
rainbow (>= 2.2.2, < 4.0)
|
156
163
|
regexp_parser (>= 1.8, < 3.0)
|
157
164
|
rexml (>= 3.2.5, < 4.0)
|
158
|
-
rubocop-ast (>= 1.
|
165
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
159
166
|
ruby-progressbar (~> 1.7)
|
160
167
|
unicode-display_width (>= 2.4.0, < 3.0)
|
161
|
-
rubocop-ast (1.
|
162
|
-
parser (>= 3.
|
168
|
+
rubocop-ast (1.31.2)
|
169
|
+
parser (>= 3.3.0.4)
|
163
170
|
rubocop-capybara (2.20.0)
|
164
171
|
rubocop (~> 1.41)
|
165
172
|
rubocop-factory_bot (2.25.1)
|
@@ -167,12 +174,11 @@ GEM
|
|
167
174
|
rubocop-performance (1.20.2)
|
168
175
|
rubocop (>= 1.48.1, < 2.0)
|
169
176
|
rubocop-ast (>= 1.30.0, < 2.0)
|
170
|
-
rubocop-rspec (2.
|
177
|
+
rubocop-rspec (2.27.1)
|
171
178
|
rubocop (~> 1.40)
|
172
179
|
rubocop-capybara (~> 2.17)
|
173
180
|
rubocop-factory_bot (~> 2.22)
|
174
181
|
ruby-progressbar (1.13.0)
|
175
|
-
ruby2_keywords (0.0.5)
|
176
182
|
simplecov (0.22.0)
|
177
183
|
docile (~> 1.1)
|
178
184
|
simplecov-html (~> 0.11)
|
@@ -186,7 +192,7 @@ GEM
|
|
186
192
|
attr_extras (>= 6.2.4)
|
187
193
|
diff-lcs
|
188
194
|
patience_diff
|
189
|
-
thor (1.3.
|
195
|
+
thor (1.3.1)
|
190
196
|
tzinfo (2.0.6)
|
191
197
|
concurrent-ruby (~> 1.0)
|
192
198
|
unicode-display_width (2.5.0)
|
data/README.md
CHANGED
@@ -26,12 +26,25 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
26
26
|
|
27
27
|
## Usage
|
28
28
|
|
29
|
-
DorIndexing that a configured Workflow Client and a Cocina Repository be injected.
|
29
|
+
DorIndexing that a configured Workflow Client, DOR Services Client, and a Cocina Repository be injected.
|
30
30
|
|
31
31
|
The Cocina Repository provides methods for finding Cocina objects and administrative tags. One possible implementation of a Cocina Repository would be to use DOR Services Client.
|
32
32
|
|
33
33
|
```ruby
|
34
34
|
require 'dor_indexing'
|
35
35
|
|
36
|
-
doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
|
36
|
+
doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
|
37
37
|
```
|
38
|
+
|
39
|
+
## Testing
|
40
|
+
|
41
|
+
### Integration Testing with Solr
|
42
|
+
|
43
|
+
We build and update the Solr index via dor-indexing-app amd dor-services-app, both of which use this gem for indexing logic.
|
44
|
+
|
45
|
+
Argo is the blacklight app that uses the Solr index extensively, and it already has the docker containers to create new test objects in dor-services-app and index them (via dor_indexing_app to Solr). And Argo is the app built on top of the Solr index, so a good place to check results.
|
46
|
+
|
47
|
+
To ensure our indexing behavior produces the desired results, it was easiest to put
|
48
|
+
the full stack integration tests in the argo repository -- they can be found in
|
49
|
+
https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
|
50
|
+
|
data/dor_indexing.gemspec
CHANGED
@@ -31,11 +31,12 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
32
32
|
spec.require_paths = ['lib']
|
33
33
|
|
34
|
-
spec.add_dependency 'activesupport'
|
35
|
-
spec.add_dependency 'cocina-models', '~> 0.95.
|
34
|
+
spec.add_dependency 'activesupport'
|
35
|
+
spec.add_dependency 'cocina-models', '~> 0.95.1'
|
36
|
+
spec.add_dependency 'dor-services-client', '~> 14.0'
|
36
37
|
spec.add_dependency 'dor-workflow-client', '~> 7.0'
|
37
38
|
spec.add_dependency 'honeybadger'
|
38
|
-
spec.add_dependency 'marc-vocab', '~> 0.3.0'
|
39
|
+
spec.add_dependency 'marc-vocab', '~> 0.3.0'
|
39
40
|
spec.add_dependency 'stanford-mods'
|
40
41
|
spec.add_dependency 'zeitwerk'
|
41
42
|
end
|
@@ -48,17 +48,18 @@ class DorIndexing
|
|
48
48
|
|
49
49
|
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
50
50
|
|
51
|
-
def self.for(model:, workflow_client:, cocina_repository:)
|
52
|
-
new(model:, workflow_client:, cocina_repository:).for
|
51
|
+
def self.for(model:, workflow_client:, dor_services_client:, cocina_repository:)
|
52
|
+
new(model:, workflow_client:, dor_services_client:, cocina_repository:).for
|
53
53
|
end
|
54
54
|
|
55
55
|
def self.reset_parent_collections
|
56
56
|
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
57
57
|
end
|
58
58
|
|
59
|
-
def initialize(model:, workflow_client:, cocina_repository:)
|
59
|
+
def initialize(model:, workflow_client:, dor_services_client:, cocina_repository:)
|
60
60
|
@model = model
|
61
61
|
@workflow_client = workflow_client
|
62
|
+
@dor_services_client = dor_services_client
|
62
63
|
@cocina_repository = cocina_repository
|
63
64
|
end
|
64
65
|
|
@@ -69,12 +70,13 @@ class DorIndexing
|
|
69
70
|
parent_collections:,
|
70
71
|
administrative_tags:,
|
71
72
|
workflow_client:,
|
73
|
+
dor_services_client:,
|
72
74
|
cocina_repository:)
|
73
75
|
end
|
74
76
|
|
75
77
|
private
|
76
78
|
|
77
|
-
attr_reader :model, :workflow_client, :cocina_repository
|
79
|
+
attr_reader :model, :workflow_client, :dor_services_client, :cocina_repository
|
78
80
|
|
79
81
|
def id
|
80
82
|
model.externalIdentifier
|
@@ -25,11 +25,9 @@ class DorIndexing
|
|
25
25
|
'full_title_tenim' => full_title, # for searching; 1 more field type is copyField in solr schema.xml
|
26
26
|
'additional_titles_tenim' => additional_titles, # for searching; 1 more field type is copyField in solr schema.xml
|
27
27
|
'display_title_ss' => display_title, # for display in Argo
|
28
|
-
'sw_display_title_tesim' => display_title, # for display in Argo DEPRECATED in favor of display_title_ss
|
29
28
|
|
30
29
|
# contributor
|
31
30
|
'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
|
32
|
-
'sw_author_tesim' => author_primary, # DEPRECATED - used for author display in Argo
|
33
31
|
'author_display_ss' => author_primary, # used for author display in Argo
|
34
32
|
'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
|
35
33
|
'contributor_orcids_ssim' => orcids,
|
@@ -18,14 +18,13 @@ class DorIndexing
|
|
18
18
|
@@apo_hash = {} # rubocop:disable Style/ClassVars
|
19
19
|
|
20
20
|
# @return [Hash] the partial solr document for identifiable concerns
|
21
|
-
def to_solr
|
21
|
+
def to_solr
|
22
22
|
{}.tap do |solr_doc|
|
23
23
|
add_apo_titles(solr_doc, cocina.administrative.hasAdminPolicy)
|
24
24
|
|
25
25
|
solr_doc['metadata_source_ssim'] = identity_metadata_sources unless cocina.is_a? Cocina::Models::AdminPolicyWithMetadata
|
26
26
|
solr_doc['druid_prefixed_ssi'] = cocina.externalIdentifier
|
27
27
|
solr_doc['druid_bare_ssi'] = cocina.externalIdentifier.delete_prefix('druid:')
|
28
|
-
solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')] # DEPRECATED
|
29
28
|
end
|
30
29
|
end
|
31
30
|
|
@@ -4,11 +4,12 @@ class DorIndexing
|
|
4
4
|
module Indexers
|
5
5
|
# Indexes the object's release tags
|
6
6
|
class ReleasableIndexer
|
7
|
-
attr_reader :cocina, :parent_collections
|
7
|
+
attr_reader :cocina, :parent_collections, :dor_services_client
|
8
8
|
|
9
|
-
def initialize(cocina:, parent_collections:, **)
|
9
|
+
def initialize(cocina:, parent_collections:, dor_services_client:, **)
|
10
10
|
@cocina = cocina
|
11
11
|
@parent_collections = parent_collections
|
12
|
+
@dor_services_client = dor_services_client
|
12
13
|
end
|
13
14
|
|
14
15
|
# @return [Hash] the partial solr document for releasable concerns
|
@@ -40,8 +41,11 @@ class DorIndexing
|
|
40
41
|
|
41
42
|
def tags_from_collection
|
42
43
|
parent_collections.each_with_object({}) do |collection, result|
|
43
|
-
|
44
|
-
|
44
|
+
collection_object_client = dor_services_client.object(collection.externalIdentifier)
|
45
|
+
collection_object_client
|
46
|
+
.release_tags
|
47
|
+
.list
|
48
|
+
.select { |tag| tag.what == 'self' }
|
45
49
|
.group_by(&:to).map do |project, releases_for_project|
|
46
50
|
result[project] = releases_for_project.max_by(&:date)
|
47
51
|
end
|
@@ -49,13 +53,14 @@ class DorIndexing
|
|
49
53
|
end
|
50
54
|
|
51
55
|
def tags_from_item
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
56
|
+
object_client = dor_services_client.object(cocina.externalIdentifier)
|
57
|
+
object_client
|
58
|
+
.release_tags
|
59
|
+
.list
|
60
|
+
.select { |tag| tag.what == 'self' }
|
61
|
+
.group_by(&:to).transform_values do |releases_for_project|
|
62
|
+
releases_for_project.max_by(&:date)
|
63
|
+
end
|
59
64
|
end
|
60
65
|
end
|
61
66
|
end
|
data/lib/dor_indexing/version.rb
CHANGED
data/lib/dor_indexing.rb
CHANGED
@@ -1,18 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'zeitwerk'
|
4
|
-
require 'stanford-mods'
|
5
|
-
require 'cocina/models'
|
6
|
-
require 'marc/vocab'
|
7
|
-
require 'honeybadger'
|
8
4
|
|
9
5
|
Zeitwerk::Loader.for_gem.setup
|
10
6
|
|
7
|
+
# Zeitwerk doesn't auto-load these dependencies
|
8
|
+
require 'active_support'
|
9
|
+
require 'active_support/core_ext/object/blank'
|
10
|
+
require 'active_support/core_ext/enumerable'
|
11
|
+
require 'active_support/core_ext/string'
|
12
|
+
require 'cocina/models'
|
13
|
+
require 'dor/services/client'
|
14
|
+
require 'honeybadger'
|
15
|
+
require 'marc/vocab'
|
16
|
+
|
11
17
|
# Builds solr documents for indexing.
|
12
18
|
class DorIndexing
|
13
19
|
# @return [Hash] the solr document
|
14
|
-
def self.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
|
20
|
+
def self.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
|
15
21
|
Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
|
16
|
-
DorIndexing::Builders::DocumentBuilder.for(
|
22
|
+
DorIndexing::Builders::DocumentBuilder.for(
|
23
|
+
model: cocina_with_metadata,
|
24
|
+
workflow_client:,
|
25
|
+
dor_services_client:,
|
26
|
+
cocina_repository:
|
27
|
+
).to_solr
|
17
28
|
end
|
18
29
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dor_indexing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Littman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-03-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -30,14 +30,28 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.95.
|
33
|
+
version: 0.95.1
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.95.
|
40
|
+
version: 0.95.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: dor-services-client
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '14.0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '14.0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: dor-workflow-client
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -180,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
180
194
|
- !ruby/object:Gem::Version
|
181
195
|
version: '0'
|
182
196
|
requirements: []
|
183
|
-
rubygems_version: 3.4.
|
197
|
+
rubygems_version: 3.4.18
|
184
198
|
signing_key:
|
185
199
|
specification_version: 4
|
186
200
|
summary: Library for creating Solr documents for SDR indexing.
|