dor_indexing 1.4.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c1e2280668e9085122b921fd927b75e9982fa5fda9067a9245899ef93db77ae
4
- data.tar.gz: 3e8d89e77994cb62ffcc1c598e47716c3a9c9f8aa8e37f226159e8fef947082d
3
+ metadata.gz: b76dd6ecf919e3653810f59c166d313f77eba4a3046bcd1598abd996bf57c74d
4
+ data.tar.gz: be615ccc690756cd7aff121e175d544caea9cf3b1dd70a6d03e0fd3c9ab14c43
5
5
  SHA512:
6
- metadata.gz: 24e3bf95ad1c541d3b403b2233018ab1da89be80cdcd3fa9acb2793e2b57673c8364202a4a548f8fe1c9ff60444585b5cf2d020968cb5b50614ad80b118d7bae
7
- data.tar.gz: 6c015737932b1f01819d89f5f91948ab43d09e1537d6bf41ba8df85778c9ed0f18dc85b408a7fdc6ceb48ce6cfc4f7127b74da0c415aac6819d7dd460cf8a4eb
6
+ metadata.gz: d04715c311bfd64ac62169e18d4fea188b6c2dc0e8263c38413e3eac3f985e901f3f5ba1181695f5e24ecbbdb713c937bd6c334c0e1f4ec622f293cea4a2195b
7
+ data.tar.gz: de51ceba624569579cbfe1c1f259ba3568158443ed849ed3d253a144ed6c25346d89916df530c7bc91891afe5f3de19e508cca9cc23818f9e61b262b7306a2a2
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dor_indexing (1.4.1)
4
+ dor_indexing (2.0.0)
5
5
  activesupport
6
6
  cocina-models (~> 0.95.1)
7
+ dor-services-client (~> 14.0)
7
8
  dor-workflow-client (~> 7.0)
8
9
  honeybadger
9
10
  marc-vocab (~> 0.3.0)
@@ -26,7 +27,7 @@ GEM
26
27
  ast (2.4.2)
27
28
  attr_extras (7.1.0)
28
29
  base64 (0.2.0)
29
- bigdecimal (3.1.6)
30
+ bigdecimal (3.1.7)
30
31
  byebug (11.1.3)
31
32
  cocina-models (0.95.1)
32
33
  activesupport
@@ -51,6 +52,13 @@ GEM
51
52
  activesupport
52
53
  diff-lcs (1.5.1)
53
54
  docile (1.4.0)
55
+ dor-services-client (14.4.0)
56
+ activesupport (>= 4.2, < 8)
57
+ cocina-models (~> 0.95.1)
58
+ deprecation
59
+ faraday (~> 2.0)
60
+ faraday-retry
61
+ zeitwerk (~> 2.1)
54
62
  dor-workflow-client (7.0.2)
55
63
  activesupport (>= 3.2.1, < 8)
56
64
  deprecation (>= 0.99.0)
@@ -89,7 +97,7 @@ GEM
89
97
  net-http
90
98
  faraday-retry (2.2.0)
91
99
  faraday (~> 2.0)
92
- honeybadger (5.6.0)
100
+ honeybadger (5.8.0)
93
101
  i18n (1.14.4)
94
102
  concurrent-ruby (~> 1.0)
95
103
  ice_nine (0.11.2)
@@ -99,7 +107,7 @@ GEM
99
107
  multi_json
100
108
  language_server-protocol (3.17.0.3)
101
109
  marc-vocab (0.3.0)
102
- minitest (5.22.2)
110
+ minitest (5.22.3)
103
111
  mods (3.0.4)
104
112
  edtf (~> 3.0)
105
113
  iso-639
@@ -109,9 +117,9 @@ GEM
109
117
  mutex_m (0.2.0)
110
118
  net-http (0.4.1)
111
119
  uri
112
- nokogiri (1.16.2-x86_64-darwin)
120
+ nokogiri (1.16.3-x86_64-darwin)
113
121
  racc (~> 1.4)
114
- nokogiri (1.16.2-x86_64-linux)
122
+ nokogiri (1.16.3-x86_64-linux)
115
123
  racc (~> 1.4)
116
124
  nom-xml (1.2.0)
117
125
  i18n
@@ -146,7 +154,7 @@ GEM
146
154
  rspec-support (3.13.1)
147
155
  rss (0.3.0)
148
156
  rexml
149
- rubocop (1.62.0)
157
+ rubocop (1.62.1)
150
158
  json (~> 2.3)
151
159
  language_server-protocol (>= 3.17.0)
152
160
  parallel (~> 1.10)
data/README.md CHANGED
@@ -26,12 +26,32 @@ If bundler is not being used to manage dependencies, install the gem by executin
26
26
 
27
27
  ## Usage
28
28
 
29
- DorIndexing that a configured Workflow Client and a Cocina Repository be injected.
29
+ DorIndexing requires interaction with the SDR workflow API and also needs the following:
30
30
 
31
- The Cocina Repository provides methods for finding Cocina objects and administrative tags. One possible implementation of a Cocina Repository would be to use DOR Services Client.
31
+ * a callable that takes a single argument (a druid) and returns the Cocina for the corresponding object
32
+ * a callable that takes a single argument (a druid) and returns the list of administrative tags for the corresponding object
33
+ * a callable that takes a single argument (a druid) and returns the list of release tags for the corresponding object
32
34
 
33
35
  ```ruby
34
36
  require 'dor_indexing'
35
37
 
36
- doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
38
+ doc = DorIndexing.build(
39
+ cocina_with_metadata:,
40
+ workflow_client:,
41
+ cocina_finder:,
42
+ administrative_tags_finder:,
43
+ release_tags_finder:
44
+ )
37
45
  ```
46
+
47
+ ## Testing
48
+
49
+ ### Integration Testing with Solr
50
+
51
+ We build and update the Solr index via dor-indexing-app amd dor-services-app, both of which use this gem for indexing logic.
52
+
53
+ Argo is the blacklight app that uses the Solr index extensively, and it already has the docker containers to create new test objects in dor-services-app and index them (via dor_indexing_app to Solr). And Argo is the app built on top of the Solr index, so a good place to check results.
54
+
55
+ To ensure our indexing behavior produces the desired results, it was easiest to put
56
+ the full stack integration tests in the argo repository -- they can be found in
57
+ https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
data/dor_indexing.gemspec CHANGED
@@ -33,6 +33,7 @@ Gem::Specification.new do |spec|
33
33
 
34
34
  spec.add_dependency 'activesupport'
35
35
  spec.add_dependency 'cocina-models', '~> 0.95.1'
36
+ spec.add_dependency 'dor-services-client', '~> 14.0'
36
37
  spec.add_dependency 'dor-workflow-client', '~> 7.0'
37
38
  spec.add_dependency 'honeybadger'
38
39
  spec.add_dependency 'marc-vocab', '~> 0.3.0'
@@ -48,18 +48,20 @@ class DorIndexing
48
48
 
49
49
  @@parent_collections = {} # rubocop:disable Style/ClassVars
50
50
 
51
- def self.for(model:, workflow_client:, cocina_repository:)
52
- new(model:, workflow_client:, cocina_repository:).for
51
+ def self.for(...)
52
+ new(...).for
53
53
  end
54
54
 
55
55
  def self.reset_parent_collections
56
56
  @@parent_collections = {} # rubocop:disable Style/ClassVars
57
57
  end
58
58
 
59
- def initialize(model:, workflow_client:, cocina_repository:)
59
+ def initialize(model:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
60
60
  @model = model
61
61
  @workflow_client = workflow_client
62
- @cocina_repository = cocina_repository
62
+ @cocina_finder = cocina_finder
63
+ @administrative_tags_finder = administrative_tags_finder
64
+ @release_tags_finder = release_tags_finder
63
65
  end
64
66
 
65
67
  # @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
@@ -69,12 +71,14 @@ class DorIndexing
69
71
  parent_collections:,
70
72
  administrative_tags:,
71
73
  workflow_client:,
72
- cocina_repository:)
74
+ cocina_finder:,
75
+ administrative_tags_finder:,
76
+ release_tags_finder:)
73
77
  end
74
78
 
75
79
  private
76
80
 
77
- attr_reader :model, :workflow_client, :cocina_repository
81
+ attr_reader :model, :workflow_client, :cocina_finder, :administrative_tags_finder, :release_tags_finder
78
82
 
79
83
  def id
80
84
  model.externalIdentifier
@@ -88,8 +92,8 @@ class DorIndexing
88
92
  return [] unless model.dro?
89
93
 
90
94
  Array(model.structural&.isMemberOf).filter_map do |rel_druid|
91
- @@parent_collections[rel_druid] ||= cocina_repository.find(rel_druid)
92
- rescue DorIndexing::CocinaRepository::RepositoryError
95
+ @@parent_collections[rel_druid] ||= cocina_finder.call(rel_druid)
96
+ rescue DorIndexing::RepositoryError
93
97
  Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
94
98
  # This may happen if the referenced Collection does not exist (bad data)
95
99
  nil
@@ -97,8 +101,8 @@ class DorIndexing
97
101
  end
98
102
 
99
103
  def administrative_tags
100
- cocina_repository.administrative_tags(id)
101
- rescue DorIndexing::CocinaRepository::RepositoryError
104
+ administrative_tags_finder.call(id)
105
+ rescue DorIndexing::RepositoryError
102
106
  []
103
107
  end
104
108
  end
@@ -4,13 +4,14 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the druid, metadata sources, and the apo titles
6
6
  class IdentifiableIndexer
7
- attr_reader :cocina, :cocina_repository
7
+ attr_reader :cocina, :cocina_finder, :administrative_tags_finder
8
8
 
9
9
  CURRENT_CATALOG_TYPE = 'folio'
10
10
 
11
- def initialize(cocina:, cocina_repository:, **)
11
+ def initialize(cocina:, cocina_finder:, administrative_tags_finder:, **)
12
12
  @cocina = cocina
13
- @cocina_repository = cocina_repository
13
+ @cocina_finder = cocina_finder
14
+ @administrative_tags_finder = administrative_tags_finder
14
15
  end
15
16
 
16
17
  ## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
@@ -72,13 +73,13 @@ class DorIndexing
72
73
  # populate cache if necessary
73
74
  def populate_cache(rel_druid)
74
75
  @@apo_hash[rel_druid] ||= begin
75
- related_obj = cocina_repository.find(rel_druid)
76
+ related_obj = cocina_finder.call(rel_druid)
76
77
  # APOs don't have projects, and since Hydrus is set to be retired, I don't want to
77
78
  # add the cocina property. Just check the tags service instead.
78
79
  is_from_hydrus = hydrus_tag?(rel_druid)
79
80
  title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
80
81
  { 'related_obj_title' => title, 'is_from_hydrus' => is_from_hydrus }
81
- rescue CocinaRepository::RepositoryError
82
+ rescue RepositoryError
82
83
  Honeybadger.notify("Bad association found on #{cocina.externalIdentifier}. #{rel_druid} could not be found")
83
84
  # This may happen if the given APO or Collection does not exist (bad data)
84
85
  { 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
@@ -86,7 +87,7 @@ class DorIndexing
86
87
  end
87
88
 
88
89
  def hydrus_tag?(id)
89
- cocina_repository.administrative_tags(id).include?('Project : Hydrus')
90
+ administrative_tags_finder.call(id).include?('Project : Hydrus')
90
91
  end
91
92
  end
92
93
  end
@@ -4,11 +4,12 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the object's release tags
6
6
  class ReleasableIndexer
7
- attr_reader :cocina, :parent_collections
7
+ attr_reader :cocina, :parent_collections, :release_tags_finder
8
8
 
9
- def initialize(cocina:, parent_collections:, **)
9
+ def initialize(cocina:, parent_collections:, release_tags_finder:, **)
10
10
  @cocina = cocina
11
11
  @parent_collections = parent_collections
12
+ @release_tags_finder = release_tags_finder
12
13
  end
13
14
 
14
15
  # @return [Hash] the partial solr document for releasable concerns
@@ -18,18 +19,27 @@ class DorIndexing
18
19
  {
19
20
  'released_to_ssim' => tags.map(&:to).uniq,
20
21
  'released_to_searchworks_dttsi' => searchworks_release_date,
21
- 'released_to_earthworks_dttsi' => earthworks_release_date
22
+ 'released_to_earthworks_dttsi' => earthworks_release_date,
23
+ 'released_to_purl_sitemap_dttsi' => purl_sitemap_release_date
22
24
  }.compact
23
25
  end
24
26
 
25
27
  private
26
28
 
29
+ def purl_sitemap_release_date
30
+ date_for_tag 'PURL sitemap'
31
+ end
32
+
27
33
  def earthworks_release_date
28
- tags.find { |tag| tag.to == 'Earthworks' }&.date&.utc&.iso8601
34
+ date_for_tag 'Earthworks'
29
35
  end
30
36
 
31
37
  def searchworks_release_date
32
- tags.find { |tag| tag.to == 'Searchworks' }&.date&.utc&.iso8601
38
+ date_for_tag 'Searchworks'
39
+ end
40
+
41
+ def date_for_tag(project)
42
+ tags.find { |tag| tag.to == project }&.date&.utc&.iso8601
33
43
  end
34
44
 
35
45
  # Item tags have precidence over collection tags, so if the collection is release=true
@@ -40,8 +50,9 @@ class DorIndexing
40
50
 
41
51
  def tags_from_collection
42
52
  parent_collections.each_with_object({}) do |collection, result|
43
- Array(collection.administrative.releaseTags)
44
- .select { |tag| tag.what == 'collection' }
53
+ release_tags_finder
54
+ .call(collection.externalIdentifier)
55
+ .select { |tag| tag.what == 'self' }
45
56
  .group_by(&:to).map do |project, releases_for_project|
46
57
  result[project] = releases_for_project.max_by(&:date)
47
58
  end
@@ -49,13 +60,12 @@ class DorIndexing
49
60
  end
50
61
 
51
62
  def tags_from_item
52
- released_for.group_by(&:to).transform_values do |releases_for_project|
53
- releases_for_project.max_by(&:date)
54
- end
55
- end
56
-
57
- def released_for
58
- Array(cocina.administrative.releaseTags)
63
+ release_tags_finder
64
+ .call(cocina.externalIdentifier)
65
+ .select { |tag| tag.what == 'self' }
66
+ .group_by(&:to).transform_values do |releases_for_project|
67
+ releases_for_project.max_by(&:date)
68
+ end
59
69
  end
60
70
  end
61
71
  end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Error raised retrieving Cocina objects, administrative tags, or release tags
5
+ # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
+ # In DIA, the concrete implementation backs this with Dor Services Client.
7
+ class RepositoryError < StandardError; end
8
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DorIndexing
4
- VERSION = '1.4.1'
4
+ VERSION = '2.0.0'
5
5
  end
data/lib/dor_indexing.rb CHANGED
@@ -10,14 +10,21 @@ require 'active_support/core_ext/object/blank'
10
10
  require 'active_support/core_ext/enumerable'
11
11
  require 'active_support/core_ext/string'
12
12
  require 'cocina/models'
13
+ require 'dor/services/client'
13
14
  require 'honeybadger'
14
15
  require 'marc/vocab'
15
16
 
16
17
  # Builds solr documents for indexing.
17
18
  class DorIndexing
18
19
  # @return [Hash] the solr document
19
- def self.build(cocina_with_metadata:, workflow_client:, cocina_repository:)
20
+ def self.build(cocina_with_metadata:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
20
21
  Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
21
- DorIndexing::Builders::DocumentBuilder.for(model: cocina_with_metadata, workflow_client:, cocina_repository:).to_solr
22
+ DorIndexing::Builders::DocumentBuilder.for(
23
+ model: cocina_with_metadata,
24
+ workflow_client:,
25
+ cocina_finder:,
26
+ administrative_tags_finder:,
27
+ release_tags_finder:
28
+ ).to_solr
22
29
  end
23
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dor_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Littman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-11 00:00:00.000000000 Z
11
+ date: 2024-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.95.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: dor-services-client
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '14.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '14.0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: dor-workflow-client
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -135,7 +149,6 @@ files:
135
149
  - lib/dor_indexing/builders/publisher_name_builder.rb
136
150
  - lib/dor_indexing/builders/temporal_builder.rb
137
151
  - lib/dor_indexing/builders/topic_builder.rb
138
- - lib/dor_indexing/cocina_repository.rb
139
152
  - lib/dor_indexing/indexers/administrative_tag_indexer.rb
140
153
  - lib/dor_indexing/indexers/basic_indexer.rb
141
154
  - lib/dor_indexing/indexers/collection_title_indexer.rb
@@ -153,6 +166,7 @@ files:
153
166
  - lib/dor_indexing/indexers/workflow_process_indexer.rb
154
167
  - lib/dor_indexing/indexers/workflows_indexer.rb
155
168
  - lib/dor_indexing/marc_country.rb
169
+ - lib/dor_indexing/repository_error.rb
156
170
  - lib/dor_indexing/selectors/event_selector.rb
157
171
  - lib/dor_indexing/selectors/pub_year_selector.rb
158
172
  - lib/dor_indexing/version.rb
@@ -180,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
180
194
  - !ruby/object:Gem::Version
181
195
  version: '0'
182
196
  requirements: []
183
- rubygems_version: 3.4.10
197
+ rubygems_version: 3.5.6
184
198
  signing_key:
185
199
  specification_version: 4
186
200
  summary: Library for creating Solr documents for SDR indexing.
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class DorIndexing
4
- # Interface for retrieving Cocina objects.
5
- # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
- # In DIA, the concrete implementation backs this with Dor Services Client.
7
- class CocinaRepository
8
- class RepositoryError < StandardError; end
9
-
10
- # @param [String] druid
11
- # @return [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Models::AdminPolicyWithMetadata]
12
- # @raise [RepositoryError] if the object is not found or other error occurs
13
- def find(druid)
14
- raise NotImplementedError
15
- end
16
-
17
- # @param [String] druid
18
- # @return [Array<String>] administrative tags
19
- # @raise [RepositoryError] if the object is not found or other error occurs
20
- def administrative_tags(druid)
21
- raise NotImplementedError
22
- end
23
- end
24
- end