dor_indexing 1.4.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c1e2280668e9085122b921fd927b75e9982fa5fda9067a9245899ef93db77ae
4
- data.tar.gz: 3e8d89e77994cb62ffcc1c598e47716c3a9c9f8aa8e37f226159e8fef947082d
3
+ metadata.gz: b76dd6ecf919e3653810f59c166d313f77eba4a3046bcd1598abd996bf57c74d
4
+ data.tar.gz: be615ccc690756cd7aff121e175d544caea9cf3b1dd70a6d03e0fd3c9ab14c43
5
5
  SHA512:
6
- metadata.gz: 24e3bf95ad1c541d3b403b2233018ab1da89be80cdcd3fa9acb2793e2b57673c8364202a4a548f8fe1c9ff60444585b5cf2d020968cb5b50614ad80b118d7bae
7
- data.tar.gz: 6c015737932b1f01819d89f5f91948ab43d09e1537d6bf41ba8df85778c9ed0f18dc85b408a7fdc6ceb48ce6cfc4f7127b74da0c415aac6819d7dd460cf8a4eb
6
+ metadata.gz: d04715c311bfd64ac62169e18d4fea188b6c2dc0e8263c38413e3eac3f985e901f3f5ba1181695f5e24ecbbdb713c937bd6c334c0e1f4ec622f293cea4a2195b
7
+ data.tar.gz: de51ceba624569579cbfe1c1f259ba3568158443ed849ed3d253a144ed6c25346d89916df530c7bc91891afe5f3de19e508cca9cc23818f9e61b262b7306a2a2
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dor_indexing (1.4.1)
4
+ dor_indexing (2.0.0)
5
5
  activesupport
6
6
  cocina-models (~> 0.95.1)
7
+ dor-services-client (~> 14.0)
7
8
  dor-workflow-client (~> 7.0)
8
9
  honeybadger
9
10
  marc-vocab (~> 0.3.0)
@@ -26,7 +27,7 @@ GEM
26
27
  ast (2.4.2)
27
28
  attr_extras (7.1.0)
28
29
  base64 (0.2.0)
29
- bigdecimal (3.1.6)
30
+ bigdecimal (3.1.7)
30
31
  byebug (11.1.3)
31
32
  cocina-models (0.95.1)
32
33
  activesupport
@@ -51,6 +52,13 @@ GEM
51
52
  activesupport
52
53
  diff-lcs (1.5.1)
53
54
  docile (1.4.0)
55
+ dor-services-client (14.4.0)
56
+ activesupport (>= 4.2, < 8)
57
+ cocina-models (~> 0.95.1)
58
+ deprecation
59
+ faraday (~> 2.0)
60
+ faraday-retry
61
+ zeitwerk (~> 2.1)
54
62
  dor-workflow-client (7.0.2)
55
63
  activesupport (>= 3.2.1, < 8)
56
64
  deprecation (>= 0.99.0)
@@ -89,7 +97,7 @@ GEM
89
97
  net-http
90
98
  faraday-retry (2.2.0)
91
99
  faraday (~> 2.0)
92
- honeybadger (5.6.0)
100
+ honeybadger (5.8.0)
93
101
  i18n (1.14.4)
94
102
  concurrent-ruby (~> 1.0)
95
103
  ice_nine (0.11.2)
@@ -99,7 +107,7 @@ GEM
99
107
  multi_json
100
108
  language_server-protocol (3.17.0.3)
101
109
  marc-vocab (0.3.0)
102
- minitest (5.22.2)
110
+ minitest (5.22.3)
103
111
  mods (3.0.4)
104
112
  edtf (~> 3.0)
105
113
  iso-639
@@ -109,9 +117,9 @@ GEM
109
117
  mutex_m (0.2.0)
110
118
  net-http (0.4.1)
111
119
  uri
112
- nokogiri (1.16.2-x86_64-darwin)
120
+ nokogiri (1.16.3-x86_64-darwin)
113
121
  racc (~> 1.4)
114
- nokogiri (1.16.2-x86_64-linux)
122
+ nokogiri (1.16.3-x86_64-linux)
115
123
  racc (~> 1.4)
116
124
  nom-xml (1.2.0)
117
125
  i18n
@@ -146,7 +154,7 @@ GEM
146
154
  rspec-support (3.13.1)
147
155
  rss (0.3.0)
148
156
  rexml
149
- rubocop (1.62.0)
157
+ rubocop (1.62.1)
150
158
  json (~> 2.3)
151
159
  language_server-protocol (>= 3.17.0)
152
160
  parallel (~> 1.10)
data/README.md CHANGED
@@ -26,12 +26,32 @@ If bundler is not being used to manage dependencies, install the gem by executin
26
26
 
27
27
  ## Usage
28
28
 
29
- DorIndexing that a configured Workflow Client and a Cocina Repository be injected.
29
+ DorIndexing requires interaction with the SDR workflow API and also needs the following:
30
30
 
31
- The Cocina Repository provides methods for finding Cocina objects and administrative tags. One possible implementation of a Cocina Repository would be to use DOR Services Client.
31
+ * a callable that takes a single argument (a druid) and returns the Cocina for the corresponding object
32
+ * a callable that takes a single argument (a druid) and returns the list of administrative tags for the corresponding object
33
+ * a callable that takes a single argument (a druid) and returns the list of release tags for the corresponding object
32
34
 
33
35
  ```ruby
34
36
  require 'dor_indexing'
35
37
 
36
- doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
38
+ doc = DorIndexing.build(
39
+ cocina_with_metadata:,
40
+ workflow_client:,
41
+ cocina_finder:,
42
+ administrative_tags_finder:,
43
+ release_tags_finder:
44
+ )
37
45
  ```
46
+
47
+ ## Testing
48
+
49
+ ### Integration Testing with Solr
50
+
51
+ We build and update the Solr index via dor-indexing-app amd dor-services-app, both of which use this gem for indexing logic.
52
+
53
+ Argo is the blacklight app that uses the Solr index extensively, and it already has the docker containers to create new test objects in dor-services-app and index them (via dor_indexing_app to Solr). And Argo is the app built on top of the Solr index, so a good place to check results.
54
+
55
+ To ensure our indexing behavior produces the desired results, it was easiest to put
56
+ the full stack integration tests in the argo repository -- they can be found in
57
+ https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
data/dor_indexing.gemspec CHANGED
@@ -33,6 +33,7 @@ Gem::Specification.new do |spec|
33
33
 
34
34
  spec.add_dependency 'activesupport'
35
35
  spec.add_dependency 'cocina-models', '~> 0.95.1'
36
+ spec.add_dependency 'dor-services-client', '~> 14.0'
36
37
  spec.add_dependency 'dor-workflow-client', '~> 7.0'
37
38
  spec.add_dependency 'honeybadger'
38
39
  spec.add_dependency 'marc-vocab', '~> 0.3.0'
@@ -48,18 +48,20 @@ class DorIndexing
48
48
 
49
49
  @@parent_collections = {} # rubocop:disable Style/ClassVars
50
50
 
51
- def self.for(model:, workflow_client:, cocina_repository:)
52
- new(model:, workflow_client:, cocina_repository:).for
51
+ def self.for(...)
52
+ new(...).for
53
53
  end
54
54
 
55
55
  def self.reset_parent_collections
56
56
  @@parent_collections = {} # rubocop:disable Style/ClassVars
57
57
  end
58
58
 
59
- def initialize(model:, workflow_client:, cocina_repository:)
59
+ def initialize(model:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
60
60
  @model = model
61
61
  @workflow_client = workflow_client
62
- @cocina_repository = cocina_repository
62
+ @cocina_finder = cocina_finder
63
+ @administrative_tags_finder = administrative_tags_finder
64
+ @release_tags_finder = release_tags_finder
63
65
  end
64
66
 
65
67
  # @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
@@ -69,12 +71,14 @@ class DorIndexing
69
71
  parent_collections:,
70
72
  administrative_tags:,
71
73
  workflow_client:,
72
- cocina_repository:)
74
+ cocina_finder:,
75
+ administrative_tags_finder:,
76
+ release_tags_finder:)
73
77
  end
74
78
 
75
79
  private
76
80
 
77
- attr_reader :model, :workflow_client, :cocina_repository
81
+ attr_reader :model, :workflow_client, :cocina_finder, :administrative_tags_finder, :release_tags_finder
78
82
 
79
83
  def id
80
84
  model.externalIdentifier
@@ -88,8 +92,8 @@ class DorIndexing
88
92
  return [] unless model.dro?
89
93
 
90
94
  Array(model.structural&.isMemberOf).filter_map do |rel_druid|
91
- @@parent_collections[rel_druid] ||= cocina_repository.find(rel_druid)
92
- rescue DorIndexing::CocinaRepository::RepositoryError
95
+ @@parent_collections[rel_druid] ||= cocina_finder.call(rel_druid)
96
+ rescue DorIndexing::RepositoryError
93
97
  Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
94
98
  # This may happen if the referenced Collection does not exist (bad data)
95
99
  nil
@@ -97,8 +101,8 @@ class DorIndexing
97
101
  end
98
102
 
99
103
  def administrative_tags
100
- cocina_repository.administrative_tags(id)
101
- rescue DorIndexing::CocinaRepository::RepositoryError
104
+ administrative_tags_finder.call(id)
105
+ rescue DorIndexing::RepositoryError
102
106
  []
103
107
  end
104
108
  end
@@ -4,13 +4,14 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the druid, metadata sources, and the apo titles
6
6
  class IdentifiableIndexer
7
- attr_reader :cocina, :cocina_repository
7
+ attr_reader :cocina, :cocina_finder, :administrative_tags_finder
8
8
 
9
9
  CURRENT_CATALOG_TYPE = 'folio'
10
10
 
11
- def initialize(cocina:, cocina_repository:, **)
11
+ def initialize(cocina:, cocina_finder:, administrative_tags_finder:, **)
12
12
  @cocina = cocina
13
- @cocina_repository = cocina_repository
13
+ @cocina_finder = cocina_finder
14
+ @administrative_tags_finder = administrative_tags_finder
14
15
  end
15
16
 
16
17
  ## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
@@ -72,13 +73,13 @@ class DorIndexing
72
73
  # populate cache if necessary
73
74
  def populate_cache(rel_druid)
74
75
  @@apo_hash[rel_druid] ||= begin
75
- related_obj = cocina_repository.find(rel_druid)
76
+ related_obj = cocina_finder.call(rel_druid)
76
77
  # APOs don't have projects, and since Hydrus is set to be retired, I don't want to
77
78
  # add the cocina property. Just check the tags service instead.
78
79
  is_from_hydrus = hydrus_tag?(rel_druid)
79
80
  title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
80
81
  { 'related_obj_title' => title, 'is_from_hydrus' => is_from_hydrus }
81
- rescue CocinaRepository::RepositoryError
82
+ rescue RepositoryError
82
83
  Honeybadger.notify("Bad association found on #{cocina.externalIdentifier}. #{rel_druid} could not be found")
83
84
  # This may happen if the given APO or Collection does not exist (bad data)
84
85
  { 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
@@ -86,7 +87,7 @@ class DorIndexing
86
87
  end
87
88
 
88
89
  def hydrus_tag?(id)
89
- cocina_repository.administrative_tags(id).include?('Project : Hydrus')
90
+ administrative_tags_finder.call(id).include?('Project : Hydrus')
90
91
  end
91
92
  end
92
93
  end
@@ -4,11 +4,12 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the object's release tags
6
6
  class ReleasableIndexer
7
- attr_reader :cocina, :parent_collections
7
+ attr_reader :cocina, :parent_collections, :release_tags_finder
8
8
 
9
- def initialize(cocina:, parent_collections:, **)
9
+ def initialize(cocina:, parent_collections:, release_tags_finder:, **)
10
10
  @cocina = cocina
11
11
  @parent_collections = parent_collections
12
+ @release_tags_finder = release_tags_finder
12
13
  end
13
14
 
14
15
  # @return [Hash] the partial solr document for releasable concerns
@@ -18,18 +19,27 @@ class DorIndexing
18
19
  {
19
20
  'released_to_ssim' => tags.map(&:to).uniq,
20
21
  'released_to_searchworks_dttsi' => searchworks_release_date,
21
- 'released_to_earthworks_dttsi' => earthworks_release_date
22
+ 'released_to_earthworks_dttsi' => earthworks_release_date,
23
+ 'released_to_purl_sitemap_dttsi' => purl_sitemap_release_date
22
24
  }.compact
23
25
  end
24
26
 
25
27
  private
26
28
 
29
+ def purl_sitemap_release_date
30
+ date_for_tag 'PURL sitemap'
31
+ end
32
+
27
33
  def earthworks_release_date
28
- tags.find { |tag| tag.to == 'Earthworks' }&.date&.utc&.iso8601
34
+ date_for_tag 'Earthworks'
29
35
  end
30
36
 
31
37
  def searchworks_release_date
32
- tags.find { |tag| tag.to == 'Searchworks' }&.date&.utc&.iso8601
38
+ date_for_tag 'Searchworks'
39
+ end
40
+
41
+ def date_for_tag(project)
42
+ tags.find { |tag| tag.to == project }&.date&.utc&.iso8601
33
43
  end
34
44
 
35
45
  # Item tags have precidence over collection tags, so if the collection is release=true
@@ -40,8 +50,9 @@ class DorIndexing
40
50
 
41
51
  def tags_from_collection
42
52
  parent_collections.each_with_object({}) do |collection, result|
43
- Array(collection.administrative.releaseTags)
44
- .select { |tag| tag.what == 'collection' }
53
+ release_tags_finder
54
+ .call(collection.externalIdentifier)
55
+ .select { |tag| tag.what == 'self' }
45
56
  .group_by(&:to).map do |project, releases_for_project|
46
57
  result[project] = releases_for_project.max_by(&:date)
47
58
  end
@@ -49,13 +60,12 @@ class DorIndexing
49
60
  end
50
61
 
51
62
  def tags_from_item
52
- released_for.group_by(&:to).transform_values do |releases_for_project|
53
- releases_for_project.max_by(&:date)
54
- end
55
- end
56
-
57
- def released_for
58
- Array(cocina.administrative.releaseTags)
63
+ release_tags_finder
64
+ .call(cocina.externalIdentifier)
65
+ .select { |tag| tag.what == 'self' }
66
+ .group_by(&:to).transform_values do |releases_for_project|
67
+ releases_for_project.max_by(&:date)
68
+ end
59
69
  end
60
70
  end
61
71
  end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Error raised retrieving Cocina objects, administrative tags, or release tags
5
+ # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
+ # In DIA, the concrete implementation backs this with Dor Services Client.
7
+ class RepositoryError < StandardError; end
8
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DorIndexing
4
- VERSION = '1.4.1'
4
+ VERSION = '2.0.0'
5
5
  end
data/lib/dor_indexing.rb CHANGED
@@ -10,14 +10,21 @@ require 'active_support/core_ext/object/blank'
10
10
  require 'active_support/core_ext/enumerable'
11
11
  require 'active_support/core_ext/string'
12
12
  require 'cocina/models'
13
+ require 'dor/services/client'
13
14
  require 'honeybadger'
14
15
  require 'marc/vocab'
15
16
 
16
17
  # Builds solr documents for indexing.
17
18
  class DorIndexing
18
19
  # @return [Hash] the solr document
19
- def self.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
20
+ def self.build(cocina_with_metadata:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
20
21
  Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
21
- DorIndexing::Builders::DocumentBuilder.for(model: cocina_with_metadata, workflow_client:, cocina_repository:).to_solr
22
+ DorIndexing::Builders::DocumentBuilder.for(
23
+ model: cocina_with_metadata,
24
+ workflow_client:,
25
+ cocina_finder:,
26
+ administrative_tags_finder:,
27
+ release_tags_finder:
28
+ ).to_solr
22
29
  end
23
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dor_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Littman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-11 00:00:00.000000000 Z
11
+ date: 2024-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.95.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: dor-services-client
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '14.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '14.0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: dor-workflow-client
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -135,7 +149,6 @@ files:
135
149
  - lib/dor_indexing/builders/publisher_name_builder.rb
136
150
  - lib/dor_indexing/builders/temporal_builder.rb
137
151
  - lib/dor_indexing/builders/topic_builder.rb
138
- - lib/dor_indexing/cocina_repository.rb
139
152
  - lib/dor_indexing/indexers/administrative_tag_indexer.rb
140
153
  - lib/dor_indexing/indexers/basic_indexer.rb
141
154
  - lib/dor_indexing/indexers/collection_title_indexer.rb
@@ -153,6 +166,7 @@ files:
153
166
  - lib/dor_indexing/indexers/workflow_process_indexer.rb
154
167
  - lib/dor_indexing/indexers/workflows_indexer.rb
155
168
  - lib/dor_indexing/marc_country.rb
169
+ - lib/dor_indexing/repository_error.rb
156
170
  - lib/dor_indexing/selectors/event_selector.rb
157
171
  - lib/dor_indexing/selectors/pub_year_selector.rb
158
172
  - lib/dor_indexing/version.rb
@@ -180,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
194
  - !ruby/object:Gem::Version
181
195
  version: '0'
182
196
  requirements: []
183
- rubygems_version: 3.4.10
197
+ rubygems_version: 3.5.6
184
198
  signing_key:
185
199
  specification_version: 4
186
200
  summary: Library for creating Solr documents for SDR indexing.
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class DorIndexing
4
- # Interface for retrieving Cocina objects.
5
- # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
- # In DIA, the concrete implementation backs this with Dor Services Client.
7
- class CocinaRepository
8
- class RepositoryError < StandardError; end
9
-
10
- # @param [String] druid
11
- # @return [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Models::AdminPolicyWithMetadata]
12
- # @raise [RepositoryError] if the object is not found or other error occurs
13
- def find(druid)
14
- raise NotImplementedError
15
- end
16
-
17
- # @param [String] druid
18
- # @return [Array<String>] administrative tags
19
- # @raise [RepositoryError] if the object is not found or other error occurs
20
- def administrative_tags(druid)
21
- raise NotImplementedError
22
- end
23
- end
24
- end