dor_indexing 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 108a49c639925b8e1ba4e892d0ed3e4d903f7e741311f480ce85dd1be88d17c0
4
- data.tar.gz: 5483c10c3b7fa972097e6b8f8cc889a6abc8a67e82bc864253ea003b97e6e578
3
+ metadata.gz: ae485a8233c79356bffbc42bef8d8e9f3ef1afbf3db962a2ba43b4ce522d7cd7
4
+ data.tar.gz: 1a05728e58b24d5f94164881a92326c9232402e241ce589dab14231e448ce60a
5
5
  SHA512:
6
- metadata.gz: d2f7554c2a2d7dc5ab0eebe685b309f88706cef647ee0aeeeded3da2dc252016b7b43b4328303cfcc0da63c6c45d89ae1f883b6e120f982bcabf750c2a90040a
7
- data.tar.gz: 5fdf32e3282b995903bc70892cc0baa9c8db8139668c40fbc92f1aff0ee0e40957c3242f07bffcd5d9512bf748697024c407f1708109f4a2837f638ea49012a3
6
+ metadata.gz: 24cfdc3e6be2af97c092e893825af54f8e3e3e47d65b9b955ea90c4ebda86072dbd87f32e2b3778e5fbee005be00c6877efac710e4d87b178f2521cee42f8b62
7
+ data.tar.gz: 7a1fc488f95678830e07fb919371e3ef8f5125d8bfcbf40d62109c51f87203fb6c424bef4da4fd8b7a9ec77a87bfee3f92274325482ab58d602599d069bbafe7
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dor_indexing (1.4.0)
4
+ dor_indexing (1.5.0)
5
5
  activesupport
6
- cocina-models (~> 0.95.0)
6
+ cocina-models (~> 0.95.1)
7
+ dor-services-client (~> 14.0)
7
8
  dor-workflow-client (~> 7.0)
8
9
  honeybadger
9
10
  marc-vocab (~> 0.3.0)
@@ -13,7 +14,7 @@ PATH
13
14
  GEM
14
15
  remote: https://rubygems.org/
15
16
  specs:
16
- activesupport (7.1.3)
17
+ activesupport (7.1.3.2)
17
18
  base64
18
19
  bigdecimal
19
20
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -26,9 +27,9 @@ GEM
26
27
  ast (2.4.2)
27
28
  attr_extras (7.1.0)
28
29
  base64 (0.2.0)
29
- bigdecimal (3.1.6)
30
+ bigdecimal (3.1.7)
30
31
  byebug (11.1.3)
31
- cocina-models (0.95.0)
32
+ cocina-models (0.95.1)
32
33
  activesupport
33
34
  deprecation
34
35
  dry-struct (~> 1.0)
@@ -51,6 +52,13 @@ GEM
51
52
  activesupport
52
53
  diff-lcs (1.5.1)
53
54
  docile (1.4.0)
55
+ dor-services-client (14.4.0)
56
+ activesupport (>= 4.2, < 8)
57
+ cocina-models (~> 0.95.1)
58
+ deprecation
59
+ faraday (~> 2.0)
60
+ faraday-retry
61
+ zeitwerk (~> 2.1)
54
62
  dor-workflow-client (7.0.2)
55
63
  activesupport (>= 3.2.1, < 8)
56
64
  deprecation (>= 0.99.0)
@@ -58,8 +66,7 @@ GEM
58
66
  faraday-retry (~> 2.0)
59
67
  nokogiri (~> 1.6)
60
68
  zeitwerk (~> 2.1)
61
- drb (2.2.0)
62
- ruby2_keywords
69
+ drb (2.2.1)
63
70
  dry-core (1.0.1)
64
71
  concurrent-ruby (~> 1.0)
65
72
  zeitwerk (~> 2.6)
@@ -90,8 +97,8 @@ GEM
90
97
  net-http
91
98
  faraday-retry (2.2.0)
92
99
  faraday (~> 2.0)
93
- honeybadger (5.4.1)
94
- i18n (1.14.1)
100
+ honeybadger (5.8.0)
101
+ i18n (1.14.4)
95
102
  concurrent-ruby (~> 1.0)
96
103
  ice_nine (0.11.2)
97
104
  iso-639 (0.3.6)
@@ -100,7 +107,7 @@ GEM
100
107
  multi_json
101
108
  language_server-protocol (3.17.0.3)
102
109
  marc-vocab (0.3.0)
103
- minitest (5.22.2)
110
+ minitest (5.22.3)
104
111
  mods (3.0.4)
105
112
  edtf (~> 3.0)
106
113
  iso-639
@@ -110,9 +117,9 @@ GEM
110
117
  mutex_m (0.2.0)
111
118
  net-http (0.4.1)
112
119
  uri
113
- nokogiri (1.16.2-x86_64-darwin)
120
+ nokogiri (1.16.3-x86_64-darwin)
114
121
  racc (~> 1.4)
115
- nokogiri (1.16.2-x86_64-linux)
122
+ nokogiri (1.16.3-x86_64-linux)
116
123
  racc (~> 1.4)
117
124
  nom-xml (1.2.0)
118
125
  i18n
@@ -144,10 +151,10 @@ GEM
144
151
  rspec-mocks (3.13.0)
145
152
  diff-lcs (>= 1.2.0, < 2.0)
146
153
  rspec-support (~> 3.13.0)
147
- rspec-support (3.13.0)
154
+ rspec-support (3.13.1)
148
155
  rss (0.3.0)
149
156
  rexml
150
- rubocop (1.60.2)
157
+ rubocop (1.62.1)
151
158
  json (~> 2.3)
152
159
  language_server-protocol (>= 3.17.0)
153
160
  parallel (~> 1.10)
@@ -155,11 +162,11 @@ GEM
155
162
  rainbow (>= 2.2.2, < 4.0)
156
163
  regexp_parser (>= 1.8, < 3.0)
157
164
  rexml (>= 3.2.5, < 4.0)
158
- rubocop-ast (>= 1.30.0, < 2.0)
165
+ rubocop-ast (>= 1.31.1, < 2.0)
159
166
  ruby-progressbar (~> 1.7)
160
167
  unicode-display_width (>= 2.4.0, < 3.0)
161
- rubocop-ast (1.30.0)
162
- parser (>= 3.2.1.0)
168
+ rubocop-ast (1.31.2)
169
+ parser (>= 3.3.0.4)
163
170
  rubocop-capybara (2.20.0)
164
171
  rubocop (~> 1.41)
165
172
  rubocop-factory_bot (2.25.1)
@@ -167,12 +174,11 @@ GEM
167
174
  rubocop-performance (1.20.2)
168
175
  rubocop (>= 1.48.1, < 2.0)
169
176
  rubocop-ast (>= 1.30.0, < 2.0)
170
- rubocop-rspec (2.26.1)
177
+ rubocop-rspec (2.27.1)
171
178
  rubocop (~> 1.40)
172
179
  rubocop-capybara (~> 2.17)
173
180
  rubocop-factory_bot (~> 2.22)
174
181
  ruby-progressbar (1.13.0)
175
- ruby2_keywords (0.0.5)
176
182
  simplecov (0.22.0)
177
183
  docile (~> 1.1)
178
184
  simplecov-html (~> 0.11)
@@ -186,7 +192,7 @@ GEM
186
192
  attr_extras (>= 6.2.4)
187
193
  diff-lcs
188
194
  patience_diff
189
- thor (1.3.0)
195
+ thor (1.3.1)
190
196
  tzinfo (2.0.6)
191
197
  concurrent-ruby (~> 1.0)
192
198
  unicode-display_width (2.5.0)
data/README.md CHANGED
@@ -26,12 +26,25 @@ If bundler is not being used to manage dependencies, install the gem by executin
26
26
 
27
27
  ## Usage
28
28
 
29
- DorIndexing that a configured Workflow Client and a Cocina Repository be injected.
29
+ DorIndexing that a configured Workflow Client, DOR Services Client, and a Cocina Repository be injected.
30
30
 
31
31
  The Cocina Repository provides methods for finding Cocina objects and administrative tags. One possible implementation of a Cocina Repository would be to use DOR Services Client.
32
32
 
33
33
  ```ruby
34
34
  require 'dor_indexing'
35
35
 
36
- doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
36
+ doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
37
37
  ```
38
+
39
+ ## Testing
40
+
41
+ ### Integration Testing with Solr
42
+
43
+ We build and update the Solr index via dor-indexing-app amd dor-services-app, both of which use this gem for indexing logic.
44
+
45
+ Argo is the blacklight app that uses the Solr index extensively, and it already has the docker containers to create new test objects in dor-services-app and index them (via dor_indexing_app to Solr). And Argo is the app built on top of the Solr index, so a good place to check results.
46
+
47
+ To ensure our indexing behavior produces the desired results, it was easiest to put
48
+ the full stack integration tests in the argo repository -- they can be found in
49
+ https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
50
+
data/dor_indexing.gemspec CHANGED
@@ -31,11 +31,12 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
- spec.add_dependency 'activesupport' # for blank? method
35
- spec.add_dependency 'cocina-models', '~> 0.95.0'
34
+ spec.add_dependency 'activesupport'
35
+ spec.add_dependency 'cocina-models', '~> 0.95.1'
36
+ spec.add_dependency 'dor-services-client', '~> 14.0'
36
37
  spec.add_dependency 'dor-workflow-client', '~> 7.0'
37
38
  spec.add_dependency 'honeybadger'
38
- spec.add_dependency 'marc-vocab', '~> 0.3.0' # for marcgac and marccountry
39
+ spec.add_dependency 'marc-vocab', '~> 0.3.0'
39
40
  spec.add_dependency 'stanford-mods'
40
41
  spec.add_dependency 'zeitwerk'
41
42
  end
@@ -48,17 +48,18 @@ class DorIndexing
48
48
 
49
49
  @@parent_collections = {} # rubocop:disable Style/ClassVars
50
50
 
51
- def self.for(model:, workflow_client:, cocina_repository:)
52
- new(model:, workflow_client:, cocina_repository:).for
51
+ def self.for(model:, workflow_client:, dor_services_client:, cocina_repository:)
52
+ new(model:, workflow_client:, dor_services_client:, cocina_repository:).for
53
53
  end
54
54
 
55
55
  def self.reset_parent_collections
56
56
  @@parent_collections = {} # rubocop:disable Style/ClassVars
57
57
  end
58
58
 
59
- def initialize(model:, workflow_client:, cocina_repository:)
59
+ def initialize(model:, workflow_client:, dor_services_client:, cocina_repository:)
60
60
  @model = model
61
61
  @workflow_client = workflow_client
62
+ @dor_services_client = dor_services_client
62
63
  @cocina_repository = cocina_repository
63
64
  end
64
65
 
@@ -69,12 +70,13 @@ class DorIndexing
69
70
  parent_collections:,
70
71
  administrative_tags:,
71
72
  workflow_client:,
73
+ dor_services_client:,
72
74
  cocina_repository:)
73
75
  end
74
76
 
75
77
  private
76
78
 
77
- attr_reader :model, :workflow_client, :cocina_repository
79
+ attr_reader :model, :workflow_client, :dor_services_client, :cocina_repository
78
80
 
79
81
  def id
80
82
  model.externalIdentifier
@@ -25,11 +25,9 @@ class DorIndexing
25
25
  'full_title_tenim' => full_title, # for searching; 1 more field type is copyField in solr schema.xml
26
26
  'additional_titles_tenim' => additional_titles, # for searching; 1 more field type is copyField in solr schema.xml
27
27
  'display_title_ss' => display_title, # for display in Argo
28
- 'sw_display_title_tesim' => display_title, # for display in Argo DEPRECATED in favor of display_title_ss
29
28
 
30
29
  # contributor
31
30
  'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
32
- 'sw_author_tesim' => author_primary, # DEPRECATED - used for author display in Argo
33
31
  'author_display_ss' => author_primary, # used for author display in Argo
34
32
  'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
35
33
  'contributor_orcids_ssim' => orcids,
@@ -18,14 +18,13 @@ class DorIndexing
18
18
  @@apo_hash = {} # rubocop:disable Style/ClassVars
19
19
 
20
20
  # @return [Hash] the partial solr document for identifiable concerns
21
- def to_solr # rubocop:disable Metrics/AbcSize
21
+ def to_solr
22
22
  {}.tap do |solr_doc|
23
23
  add_apo_titles(solr_doc, cocina.administrative.hasAdminPolicy)
24
24
 
25
25
  solr_doc['metadata_source_ssim'] = identity_metadata_sources unless cocina.is_a? Cocina::Models::AdminPolicyWithMetadata
26
26
  solr_doc['druid_prefixed_ssi'] = cocina.externalIdentifier
27
27
  solr_doc['druid_bare_ssi'] = cocina.externalIdentifier.delete_prefix('druid:')
28
- solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')] # DEPRECATED
29
28
  end
30
29
  end
31
30
 
@@ -4,11 +4,12 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the object's release tags
6
6
  class ReleasableIndexer
7
- attr_reader :cocina, :parent_collections
7
+ attr_reader :cocina, :parent_collections, :dor_services_client
8
8
 
9
- def initialize(cocina:, parent_collections:, **)
9
+ def initialize(cocina:, parent_collections:, dor_services_client:, **)
10
10
  @cocina = cocina
11
11
  @parent_collections = parent_collections
12
+ @dor_services_client = dor_services_client
12
13
  end
13
14
 
14
15
  # @return [Hash] the partial solr document for releasable concerns
@@ -40,8 +41,11 @@ class DorIndexing
40
41
 
41
42
  def tags_from_collection
42
43
  parent_collections.each_with_object({}) do |collection, result|
43
- Array(collection.administrative.releaseTags)
44
- .select { |tag| tag.what == 'collection' }
44
+ collection_object_client = dor_services_client.object(collection.externalIdentifier)
45
+ collection_object_client
46
+ .release_tags
47
+ .list
48
+ .select { |tag| tag.what == 'self' }
45
49
  .group_by(&:to).map do |project, releases_for_project|
46
50
  result[project] = releases_for_project.max_by(&:date)
47
51
  end
@@ -49,13 +53,14 @@ class DorIndexing
49
53
  end
50
54
 
51
55
  def tags_from_item
52
- released_for.group_by(&:to).transform_values do |releases_for_project|
53
- releases_for_project.max_by(&:date)
54
- end
55
- end
56
-
57
- def released_for
58
- Array(cocina.administrative.releaseTags)
56
+ object_client = dor_services_client.object(cocina.externalIdentifier)
57
+ object_client
58
+ .release_tags
59
+ .list
60
+ .select { |tag| tag.what == 'self' }
61
+ .group_by(&:to).transform_values do |releases_for_project|
62
+ releases_for_project.max_by(&:date)
63
+ end
59
64
  end
60
65
  end
61
66
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DorIndexing
4
- VERSION = '1.4.0'
4
+ VERSION = '1.5.0'
5
5
  end
data/lib/dor_indexing.rb CHANGED
@@ -1,18 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'zeitwerk'
4
- require 'stanford-mods'
5
- require 'cocina/models'
6
- require 'marc/vocab'
7
- require 'honeybadger'
8
4
 
9
5
  Zeitwerk::Loader.for_gem.setup
10
6
 
7
+ # Zeitwerk doesn't auto-load these dependencies
8
+ require 'active_support'
9
+ require 'active_support/core_ext/object/blank'
10
+ require 'active_support/core_ext/enumerable'
11
+ require 'active_support/core_ext/string'
12
+ require 'cocina/models'
13
+ require 'dor/services/client'
14
+ require 'honeybadger'
15
+ require 'marc/vocab'
16
+
11
17
  # Builds solr documents for indexing.
12
18
  class DorIndexing
13
19
  # @return [Hash] the solr document
14
- def self.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
20
+ def self.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
15
21
  Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
16
- DorIndexing::Builders::DocumentBuilder.for(model: cocina_with_metadata, workflow_client:, cocina_repository:).to_solr
22
+ DorIndexing::Builders::DocumentBuilder.for(
23
+ model: cocina_with_metadata,
24
+ workflow_client:,
25
+ dor_services_client:,
26
+ cocina_repository:
27
+ ).to_solr
17
28
  end
18
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dor_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Littman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-17 00:00:00.000000000 Z
11
+ date: 2024-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -30,14 +30,28 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.95.0
33
+ version: 0.95.1
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.95.0
40
+ version: 0.95.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: dor-services-client
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '14.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '14.0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: dor-workflow-client
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -180,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
194
  - !ruby/object:Gem::Version
181
195
  version: '0'
182
196
  requirements: []
183
- rubygems_version: 3.4.13
197
+ rubygems_version: 3.4.18
184
198
  signing_key:
185
199
  specification_version: 4
186
200
  summary: Library for creating Solr documents for SDR indexing.