dor_indexing 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 108a49c639925b8e1ba4e892d0ed3e4d903f7e741311f480ce85dd1be88d17c0
4
- data.tar.gz: 5483c10c3b7fa972097e6b8f8cc889a6abc8a67e82bc864253ea003b97e6e578
3
+ metadata.gz: ae485a8233c79356bffbc42bef8d8e9f3ef1afbf3db962a2ba43b4ce522d7cd7
4
+ data.tar.gz: 1a05728e58b24d5f94164881a92326c9232402e241ce589dab14231e448ce60a
5
5
  SHA512:
6
- metadata.gz: d2f7554c2a2d7dc5ab0eebe685b309f88706cef647ee0aeeeded3da2dc252016b7b43b4328303cfcc0da63c6c45d89ae1f883b6e120f982bcabf750c2a90040a
7
- data.tar.gz: 5fdf32e3282b995903bc70892cc0baa9c8db8139668c40fbc92f1aff0ee0e40957c3242f07bffcd5d9512bf748697024c407f1708109f4a2837f638ea49012a3
6
+ metadata.gz: 24cfdc3e6be2af97c092e893825af54f8e3e3e47d65b9b955ea90c4ebda86072dbd87f32e2b3778e5fbee005be00c6877efac710e4d87b178f2521cee42f8b62
7
+ data.tar.gz: 7a1fc488f95678830e07fb919371e3ef8f5125d8bfcbf40d62109c51f87203fb6c424bef4da4fd8b7a9ec77a87bfee3f92274325482ab58d602599d069bbafe7
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dor_indexing (1.4.0)
4
+ dor_indexing (1.5.0)
5
5
  activesupport
6
- cocina-models (~> 0.95.0)
6
+ cocina-models (~> 0.95.1)
7
+ dor-services-client (~> 14.0)
7
8
  dor-workflow-client (~> 7.0)
8
9
  honeybadger
9
10
  marc-vocab (~> 0.3.0)
@@ -13,7 +14,7 @@ PATH
13
14
  GEM
14
15
  remote: https://rubygems.org/
15
16
  specs:
16
- activesupport (7.1.3)
17
+ activesupport (7.1.3.2)
17
18
  base64
18
19
  bigdecimal
19
20
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -26,9 +27,9 @@ GEM
26
27
  ast (2.4.2)
27
28
  attr_extras (7.1.0)
28
29
  base64 (0.2.0)
29
- bigdecimal (3.1.6)
30
+ bigdecimal (3.1.7)
30
31
  byebug (11.1.3)
31
- cocina-models (0.95.0)
32
+ cocina-models (0.95.1)
32
33
  activesupport
33
34
  deprecation
34
35
  dry-struct (~> 1.0)
@@ -51,6 +52,13 @@ GEM
51
52
  activesupport
52
53
  diff-lcs (1.5.1)
53
54
  docile (1.4.0)
55
+ dor-services-client (14.4.0)
56
+ activesupport (>= 4.2, < 8)
57
+ cocina-models (~> 0.95.1)
58
+ deprecation
59
+ faraday (~> 2.0)
60
+ faraday-retry
61
+ zeitwerk (~> 2.1)
54
62
  dor-workflow-client (7.0.2)
55
63
  activesupport (>= 3.2.1, < 8)
56
64
  deprecation (>= 0.99.0)
@@ -58,8 +66,7 @@ GEM
58
66
  faraday-retry (~> 2.0)
59
67
  nokogiri (~> 1.6)
60
68
  zeitwerk (~> 2.1)
61
- drb (2.2.0)
62
- ruby2_keywords
69
+ drb (2.2.1)
63
70
  dry-core (1.0.1)
64
71
  concurrent-ruby (~> 1.0)
65
72
  zeitwerk (~> 2.6)
@@ -90,8 +97,8 @@ GEM
90
97
  net-http
91
98
  faraday-retry (2.2.0)
92
99
  faraday (~> 2.0)
93
- honeybadger (5.4.1)
94
- i18n (1.14.1)
100
+ honeybadger (5.8.0)
101
+ i18n (1.14.4)
95
102
  concurrent-ruby (~> 1.0)
96
103
  ice_nine (0.11.2)
97
104
  iso-639 (0.3.6)
@@ -100,7 +107,7 @@ GEM
100
107
  multi_json
101
108
  language_server-protocol (3.17.0.3)
102
109
  marc-vocab (0.3.0)
103
- minitest (5.22.2)
110
+ minitest (5.22.3)
104
111
  mods (3.0.4)
105
112
  edtf (~> 3.0)
106
113
  iso-639
@@ -110,9 +117,9 @@ GEM
110
117
  mutex_m (0.2.0)
111
118
  net-http (0.4.1)
112
119
  uri
113
- nokogiri (1.16.2-x86_64-darwin)
120
+ nokogiri (1.16.3-x86_64-darwin)
114
121
  racc (~> 1.4)
115
- nokogiri (1.16.2-x86_64-linux)
122
+ nokogiri (1.16.3-x86_64-linux)
116
123
  racc (~> 1.4)
117
124
  nom-xml (1.2.0)
118
125
  i18n
@@ -144,10 +151,10 @@ GEM
144
151
  rspec-mocks (3.13.0)
145
152
  diff-lcs (>= 1.2.0, < 2.0)
146
153
  rspec-support (~> 3.13.0)
147
- rspec-support (3.13.0)
154
+ rspec-support (3.13.1)
148
155
  rss (0.3.0)
149
156
  rexml
150
- rubocop (1.60.2)
157
+ rubocop (1.62.1)
151
158
  json (~> 2.3)
152
159
  language_server-protocol (>= 3.17.0)
153
160
  parallel (~> 1.10)
@@ -155,11 +162,11 @@ GEM
155
162
  rainbow (>= 2.2.2, < 4.0)
156
163
  regexp_parser (>= 1.8, < 3.0)
157
164
  rexml (>= 3.2.5, < 4.0)
158
- rubocop-ast (>= 1.30.0, < 2.0)
165
+ rubocop-ast (>= 1.31.1, < 2.0)
159
166
  ruby-progressbar (~> 1.7)
160
167
  unicode-display_width (>= 2.4.0, < 3.0)
161
- rubocop-ast (1.30.0)
162
- parser (>= 3.2.1.0)
168
+ rubocop-ast (1.31.2)
169
+ parser (>= 3.3.0.4)
163
170
  rubocop-capybara (2.20.0)
164
171
  rubocop (~> 1.41)
165
172
  rubocop-factory_bot (2.25.1)
@@ -167,12 +174,11 @@ GEM
167
174
  rubocop-performance (1.20.2)
168
175
  rubocop (>= 1.48.1, < 2.0)
169
176
  rubocop-ast (>= 1.30.0, < 2.0)
170
- rubocop-rspec (2.26.1)
177
+ rubocop-rspec (2.27.1)
171
178
  rubocop (~> 1.40)
172
179
  rubocop-capybara (~> 2.17)
173
180
  rubocop-factory_bot (~> 2.22)
174
181
  ruby-progressbar (1.13.0)
175
- ruby2_keywords (0.0.5)
176
182
  simplecov (0.22.0)
177
183
  docile (~> 1.1)
178
184
  simplecov-html (~> 0.11)
@@ -186,7 +192,7 @@ GEM
186
192
  attr_extras (>= 6.2.4)
187
193
  diff-lcs
188
194
  patience_diff
189
- thor (1.3.0)
195
+ thor (1.3.1)
190
196
  tzinfo (2.0.6)
191
197
  concurrent-ruby (~> 1.0)
192
198
  unicode-display_width (2.5.0)
data/README.md CHANGED
@@ -26,12 +26,25 @@ If bundler is not being used to manage dependencies, install the gem by executin
26
26
 
27
27
  ## Usage
28
28
 
29
- DorIndexing that a configured Workflow Client and a Cocina Repository be injected.
29
+ DorIndexing that a configured Workflow Client, DOR Services Client, and a Cocina Repository be injected.
30
30
 
31
31
  The Cocina Repository provides methods for finding Cocina objects and administrative tags. One possible implementation of a Cocina Repository would be to use DOR Services Client.
32
32
 
33
33
  ```ruby
34
34
  require 'dor_indexing'
35
35
 
36
- doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
36
+ doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
37
37
  ```
38
+
39
+ ## Testing
40
+
41
+ ### Integration Testing with Solr
42
+
43
+ We build and update the Solr index via dor-indexing-app amd dor-services-app, both of which use this gem for indexing logic.
44
+
45
+ Argo is the blacklight app that uses the Solr index extensively, and it already has the docker containers to create new test objects in dor-services-app and index them (via dor_indexing_app to Solr). And Argo is the app built on top of the Solr index, so a good place to check results.
46
+
47
+ To ensure our indexing behavior produces the desired results, it was easiest to put
48
+ the full stack integration tests in the argo repository -- they can be found in
49
+ https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
50
+
data/dor_indexing.gemspec CHANGED
@@ -31,11 +31,12 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
- spec.add_dependency 'activesupport' # for blank? method
35
- spec.add_dependency 'cocina-models', '~> 0.95.0'
34
+ spec.add_dependency 'activesupport'
35
+ spec.add_dependency 'cocina-models', '~> 0.95.1'
36
+ spec.add_dependency 'dor-services-client', '~> 14.0'
36
37
  spec.add_dependency 'dor-workflow-client', '~> 7.0'
37
38
  spec.add_dependency 'honeybadger'
38
- spec.add_dependency 'marc-vocab', '~> 0.3.0' # for marcgac and marccountry
39
+ spec.add_dependency 'marc-vocab', '~> 0.3.0'
39
40
  spec.add_dependency 'stanford-mods'
40
41
  spec.add_dependency 'zeitwerk'
41
42
  end
@@ -48,17 +48,18 @@ class DorIndexing
48
48
 
49
49
  @@parent_collections = {} # rubocop:disable Style/ClassVars
50
50
 
51
- def self.for(model:, workflow_client:, cocina_repository:)
52
- new(model:, workflow_client:, cocina_repository:).for
51
+ def self.for(model:, workflow_client:, dor_services_client:, cocina_repository:)
52
+ new(model:, workflow_client:, dor_services_client:, cocina_repository:).for
53
53
  end
54
54
 
55
55
  def self.reset_parent_collections
56
56
  @@parent_collections = {} # rubocop:disable Style/ClassVars
57
57
  end
58
58
 
59
- def initialize(model:, workflow_client:, cocina_repository:)
59
+ def initialize(model:, workflow_client:, dor_services_client:, cocina_repository:)
60
60
  @model = model
61
61
  @workflow_client = workflow_client
62
+ @dor_services_client = dor_services_client
62
63
  @cocina_repository = cocina_repository
63
64
  end
64
65
 
@@ -69,12 +70,13 @@ class DorIndexing
69
70
  parent_collections:,
70
71
  administrative_tags:,
71
72
  workflow_client:,
73
+ dor_services_client:,
72
74
  cocina_repository:)
73
75
  end
74
76
 
75
77
  private
76
78
 
77
- attr_reader :model, :workflow_client, :cocina_repository
79
+ attr_reader :model, :workflow_client, :dor_services_client, :cocina_repository
78
80
 
79
81
  def id
80
82
  model.externalIdentifier
@@ -25,11 +25,9 @@ class DorIndexing
25
25
  'full_title_tenim' => full_title, # for searching; 1 more field type is copyField in solr schema.xml
26
26
  'additional_titles_tenim' => additional_titles, # for searching; 1 more field type is copyField in solr schema.xml
27
27
  'display_title_ss' => display_title, # for display in Argo
28
- 'sw_display_title_tesim' => display_title, # for display in Argo DEPRECATED in favor of display_title_ss
29
28
 
30
29
  # contributor
31
30
  'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
32
- 'sw_author_tesim' => author_primary, # DEPRECATED - used for author display in Argo
33
31
  'author_display_ss' => author_primary, # used for author display in Argo
34
32
  'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
35
33
  'contributor_orcids_ssim' => orcids,
@@ -18,14 +18,13 @@ class DorIndexing
18
18
  @@apo_hash = {} # rubocop:disable Style/ClassVars
19
19
 
20
20
  # @return [Hash] the partial solr document for identifiable concerns
21
- def to_solr # rubocop:disable Metrics/AbcSize
21
+ def to_solr
22
22
  {}.tap do |solr_doc|
23
23
  add_apo_titles(solr_doc, cocina.administrative.hasAdminPolicy)
24
24
 
25
25
  solr_doc['metadata_source_ssim'] = identity_metadata_sources unless cocina.is_a? Cocina::Models::AdminPolicyWithMetadata
26
26
  solr_doc['druid_prefixed_ssi'] = cocina.externalIdentifier
27
27
  solr_doc['druid_bare_ssi'] = cocina.externalIdentifier.delete_prefix('druid:')
28
- solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')] # DEPRECATED
29
28
  end
30
29
  end
31
30
 
@@ -4,11 +4,12 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the object's release tags
6
6
  class ReleasableIndexer
7
- attr_reader :cocina, :parent_collections
7
+ attr_reader :cocina, :parent_collections, :dor_services_client
8
8
 
9
- def initialize(cocina:, parent_collections:, **)
9
+ def initialize(cocina:, parent_collections:, dor_services_client:, **)
10
10
  @cocina = cocina
11
11
  @parent_collections = parent_collections
12
+ @dor_services_client = dor_services_client
12
13
  end
13
14
 
14
15
  # @return [Hash] the partial solr document for releasable concerns
@@ -40,8 +41,11 @@ class DorIndexing
40
41
 
41
42
  def tags_from_collection
42
43
  parent_collections.each_with_object({}) do |collection, result|
43
- Array(collection.administrative.releaseTags)
44
- .select { |tag| tag.what == 'collection' }
44
+ collection_object_client = dor_services_client.object(collection.externalIdentifier)
45
+ collection_object_client
46
+ .release_tags
47
+ .list
48
+ .select { |tag| tag.what == 'self' }
45
49
  .group_by(&:to).map do |project, releases_for_project|
46
50
  result[project] = releases_for_project.max_by(&:date)
47
51
  end
@@ -49,13 +53,14 @@ class DorIndexing
49
53
  end
50
54
 
51
55
  def tags_from_item
52
- released_for.group_by(&:to).transform_values do |releases_for_project|
53
- releases_for_project.max_by(&:date)
54
- end
55
- end
56
-
57
- def released_for
58
- Array(cocina.administrative.releaseTags)
56
+ object_client = dor_services_client.object(cocina.externalIdentifier)
57
+ object_client
58
+ .release_tags
59
+ .list
60
+ .select { |tag| tag.what == 'self' }
61
+ .group_by(&:to).transform_values do |releases_for_project|
62
+ releases_for_project.max_by(&:date)
63
+ end
59
64
  end
60
65
  end
61
66
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DorIndexing
4
- VERSION = '1.4.0'
4
+ VERSION = '1.5.0'
5
5
  end
data/lib/dor_indexing.rb CHANGED
@@ -1,18 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'zeitwerk'
4
- require 'stanford-mods'
5
- require 'cocina/models'
6
- require 'marc/vocab'
7
- require 'honeybadger'
8
4
 
9
5
  Zeitwerk::Loader.for_gem.setup
10
6
 
7
+ # Zeitwerk doesn't auto-load these dependencies
8
+ require 'active_support'
9
+ require 'active_support/core_ext/object/blank'
10
+ require 'active_support/core_ext/enumerable'
11
+ require 'active_support/core_ext/string'
12
+ require 'cocina/models'
13
+ require 'dor/services/client'
14
+ require 'honeybadger'
15
+ require 'marc/vocab'
16
+
11
17
  # Builds solr documents for indexing.
12
18
  class DorIndexing
13
19
  # @return [Hash] the solr document
14
- def self.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
20
+ def self.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
15
21
  Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
16
- DorIndexing::Builders::DocumentBuilder.for(model: cocina_with_metadata, workflow_client:, cocina_repository:).to_solr
22
+ DorIndexing::Builders::DocumentBuilder.for(
23
+ model: cocina_with_metadata,
24
+ workflow_client:,
25
+ dor_services_client:,
26
+ cocina_repository:
27
+ ).to_solr
17
28
  end
18
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dor_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Littman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-17 00:00:00.000000000 Z
11
+ date: 2024-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -30,14 +30,28 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.95.0
33
+ version: 0.95.1
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.95.0
40
+ version: 0.95.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: dor-services-client
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '14.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '14.0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: dor-workflow-client
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -180,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
194
  - !ruby/object:Gem::Version
181
195
  version: '0'
182
196
  requirements: []
183
- rubygems_version: 3.4.13
197
+ rubygems_version: 3.4.18
184
198
  signing_key:
185
199
  specification_version: 4
186
200
  summary: Library for creating Solr documents for SDR indexing.