dor_indexing 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae485a8233c79356bffbc42bef8d8e9f3ef1afbf3db962a2ba43b4ce522d7cd7
4
- data.tar.gz: 1a05728e58b24d5f94164881a92326c9232402e241ce589dab14231e448ce60a
3
+ metadata.gz: b76dd6ecf919e3653810f59c166d313f77eba4a3046bcd1598abd996bf57c74d
4
+ data.tar.gz: be615ccc690756cd7aff121e175d544caea9cf3b1dd70a6d03e0fd3c9ab14c43
5
5
  SHA512:
6
- metadata.gz: 24cfdc3e6be2af97c092e893825af54f8e3e3e47d65b9b955ea90c4ebda86072dbd87f32e2b3778e5fbee005be00c6877efac710e4d87b178f2521cee42f8b62
7
- data.tar.gz: 7a1fc488f95678830e07fb919371e3ef8f5125d8bfcbf40d62109c51f87203fb6c424bef4da4fd8b7a9ec77a87bfee3f92274325482ab58d602599d069bbafe7
6
+ metadata.gz: d04715c311bfd64ac62169e18d4fea188b6c2dc0e8263c38413e3eac3f985e901f3f5ba1181695f5e24ecbbdb713c937bd6c334c0e1f4ec622f293cea4a2195b
7
+ data.tar.gz: de51ceba624569579cbfe1c1f259ba3568158443ed849ed3d253a144ed6c25346d89916df530c7bc91891afe5f3de19e508cca9cc23818f9e61b262b7306a2a2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dor_indexing (1.5.0)
4
+ dor_indexing (2.0.0)
5
5
  activesupport
6
6
  cocina-models (~> 0.95.1)
7
7
  dor-services-client (~> 14.0)
data/README.md CHANGED
@@ -26,14 +26,22 @@ If bundler is not being used to manage dependencies, install the gem by executin
26
26
 
27
27
  ## Usage
28
28
 
29
- DorIndexing that a configured Workflow Client, DOR Services Client, and a Cocina Repository be injected.
29
+ DorIndexing requires interaction with the SDR workflow API and also needs the following:
30
30
 
31
- The Cocina Repository provides methods for finding Cocina objects and administrative tags. One possible implementation of a Cocina Repository would be to use DOR Services Client.
31
+ * a callable that takes a single argument (a druid) and returns the Cocina for the corresponding object
32
+ * a callable that takes a single argument (a druid) and returns the list of administrative tags for the corresponding object
33
+ * a callable that takes a single argument (a druid) and returns the list of release tags for the corresponding object
32
34
 
33
35
  ```ruby
34
36
  require 'dor_indexing'
35
37
 
36
- doc = DorIndexing.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
38
+ doc = DorIndexing.build(
39
+ cocina_with_metadata:,
40
+ workflow_client:,
41
+ cocina_finder:,
42
+ administrative_tags_finder:,
43
+ release_tags_finder:
44
+ )
37
45
  ```
38
46
 
39
47
  ## Testing
@@ -47,4 +55,3 @@ Argo is the blacklight app that uses the Solr index extensively, and it already
47
55
  To ensure our indexing behavior produces the desired results, it was easiest to put
48
56
  the full stack integration tests in the argo repository -- they can be found in
49
57
  https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
50
-
@@ -48,19 +48,20 @@ class DorIndexing
48
48
 
49
49
  @@parent_collections = {} # rubocop:disable Style/ClassVars
50
50
 
51
- def self.for(model:, workflow_client:, dor_services_client:, cocina_repository:)
52
- new(model:, workflow_client:, dor_services_client:, cocina_repository:).for
51
+ def self.for(...)
52
+ new(...).for
53
53
  end
54
54
 
55
55
  def self.reset_parent_collections
56
56
  @@parent_collections = {} # rubocop:disable Style/ClassVars
57
57
  end
58
58
 
59
- def initialize(model:, workflow_client:, dor_services_client:, cocina_repository:)
59
+ def initialize(model:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
60
60
  @model = model
61
61
  @workflow_client = workflow_client
62
- @dor_services_client = dor_services_client
63
- @cocina_repository = cocina_repository
62
+ @cocina_finder = cocina_finder
63
+ @administrative_tags_finder = administrative_tags_finder
64
+ @release_tags_finder = release_tags_finder
64
65
  end
65
66
 
66
67
  # @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
@@ -70,13 +71,14 @@ class DorIndexing
70
71
  parent_collections:,
71
72
  administrative_tags:,
72
73
  workflow_client:,
73
- dor_services_client:,
74
- cocina_repository:)
74
+ cocina_finder:,
75
+ administrative_tags_finder:,
76
+ release_tags_finder:)
75
77
  end
76
78
 
77
79
  private
78
80
 
79
- attr_reader :model, :workflow_client, :dor_services_client, :cocina_repository
81
+ attr_reader :model, :workflow_client, :cocina_finder, :administrative_tags_finder, :release_tags_finder
80
82
 
81
83
  def id
82
84
  model.externalIdentifier
@@ -90,8 +92,8 @@ class DorIndexing
90
92
  return [] unless model.dro?
91
93
 
92
94
  Array(model.structural&.isMemberOf).filter_map do |rel_druid|
93
- @@parent_collections[rel_druid] ||= cocina_repository.find(rel_druid)
94
- rescue DorIndexing::CocinaRepository::RepositoryError
95
+ @@parent_collections[rel_druid] ||= cocina_finder.call(rel_druid)
96
+ rescue DorIndexing::RepositoryError
95
97
  Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
96
98
  # This may happen if the referenced Collection does not exist (bad data)
97
99
  nil
@@ -99,8 +101,8 @@ class DorIndexing
99
101
  end
100
102
 
101
103
  def administrative_tags
102
- cocina_repository.administrative_tags(id)
103
- rescue DorIndexing::CocinaRepository::RepositoryError
104
+ administrative_tags_finder.call(id)
105
+ rescue DorIndexing::RepositoryError
104
106
  []
105
107
  end
106
108
  end
@@ -4,13 +4,14 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the druid, metadata sources, and the apo titles
6
6
  class IdentifiableIndexer
7
- attr_reader :cocina, :cocina_repository
7
+ attr_reader :cocina, :cocina_finder, :administrative_tags_finder
8
8
 
9
9
  CURRENT_CATALOG_TYPE = 'folio'
10
10
 
11
- def initialize(cocina:, cocina_repository:, **)
11
+ def initialize(cocina:, cocina_finder:, administrative_tags_finder:, **)
12
12
  @cocina = cocina
13
- @cocina_repository = cocina_repository
13
+ @cocina_finder = cocina_finder
14
+ @administrative_tags_finder = administrative_tags_finder
14
15
  end
15
16
 
16
17
  ## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
@@ -72,13 +73,13 @@ class DorIndexing
72
73
  # populate cache if necessary
73
74
  def populate_cache(rel_druid)
74
75
  @@apo_hash[rel_druid] ||= begin
75
- related_obj = cocina_repository.find(rel_druid)
76
+ related_obj = cocina_finder.call(rel_druid)
76
77
  # APOs don't have projects, and since Hydrus is set to be retired, I don't want to
77
78
  # add the cocina property. Just check the tags service instead.
78
79
  is_from_hydrus = hydrus_tag?(rel_druid)
79
80
  title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
80
81
  { 'related_obj_title' => title, 'is_from_hydrus' => is_from_hydrus }
81
- rescue CocinaRepository::RepositoryError
82
+ rescue RepositoryError
82
83
  Honeybadger.notify("Bad association found on #{cocina.externalIdentifier}. #{rel_druid} could not be found")
83
84
  # This may happen if the given APO or Collection does not exist (bad data)
84
85
  { 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
@@ -86,7 +87,7 @@ class DorIndexing
86
87
  end
87
88
 
88
89
  def hydrus_tag?(id)
89
- cocina_repository.administrative_tags(id).include?('Project : Hydrus')
90
+ administrative_tags_finder.call(id).include?('Project : Hydrus')
90
91
  end
91
92
  end
92
93
  end
@@ -4,12 +4,12 @@ class DorIndexing
4
4
  module Indexers
5
5
  # Indexes the object's release tags
6
6
  class ReleasableIndexer
7
- attr_reader :cocina, :parent_collections, :dor_services_client
7
+ attr_reader :cocina, :parent_collections, :release_tags_finder
8
8
 
9
- def initialize(cocina:, parent_collections:, dor_services_client:, **)
9
+ def initialize(cocina:, parent_collections:, release_tags_finder:, **)
10
10
  @cocina = cocina
11
11
  @parent_collections = parent_collections
12
- @dor_services_client = dor_services_client
12
+ @release_tags_finder = release_tags_finder
13
13
  end
14
14
 
15
15
  # @return [Hash] the partial solr document for releasable concerns
@@ -19,18 +19,27 @@ class DorIndexing
19
19
  {
20
20
  'released_to_ssim' => tags.map(&:to).uniq,
21
21
  'released_to_searchworks_dttsi' => searchworks_release_date,
22
- 'released_to_earthworks_dttsi' => earthworks_release_date
22
+ 'released_to_earthworks_dttsi' => earthworks_release_date,
23
+ 'released_to_purl_sitemap_dttsi' => purl_sitemap_release_date
23
24
  }.compact
24
25
  end
25
26
 
26
27
  private
27
28
 
29
+ def purl_sitemap_release_date
30
+ date_for_tag 'PURL sitemap'
31
+ end
32
+
28
33
  def earthworks_release_date
29
- tags.find { |tag| tag.to == 'Earthworks' }&.date&.utc&.iso8601
34
+ date_for_tag 'Earthworks'
30
35
  end
31
36
 
32
37
  def searchworks_release_date
33
- tags.find { |tag| tag.to == 'Searchworks' }&.date&.utc&.iso8601
38
+ date_for_tag 'Searchworks'
39
+ end
40
+
41
+ def date_for_tag(project)
42
+ tags.find { |tag| tag.to == project }&.date&.utc&.iso8601
34
43
  end
35
44
 
36
45
  # Item tags have precidence over collection tags, so if the collection is release=true
@@ -41,10 +50,8 @@ class DorIndexing
41
50
 
42
51
  def tags_from_collection
43
52
  parent_collections.each_with_object({}) do |collection, result|
44
- collection_object_client = dor_services_client.object(collection.externalIdentifier)
45
- collection_object_client
46
- .release_tags
47
- .list
53
+ release_tags_finder
54
+ .call(collection.externalIdentifier)
48
55
  .select { |tag| tag.what == 'self' }
49
56
  .group_by(&:to).map do |project, releases_for_project|
50
57
  result[project] = releases_for_project.max_by(&:date)
@@ -53,10 +60,8 @@ class DorIndexing
53
60
  end
54
61
 
55
62
  def tags_from_item
56
- object_client = dor_services_client.object(cocina.externalIdentifier)
57
- object_client
58
- .release_tags
59
- .list
63
+ release_tags_finder
64
+ .call(cocina.externalIdentifier)
60
65
  .select { |tag| tag.what == 'self' }
61
66
  .group_by(&:to).transform_values do |releases_for_project|
62
67
  releases_for_project.max_by(&:date)
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ class DorIndexing
4
+ # Error raised retrieving Cocina objects, administrative tags, or release tags
5
+ # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
+ # In DIA, the concrete implementation backs this with Dor Services Client.
7
+ class RepositoryError < StandardError; end
8
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DorIndexing
4
- VERSION = '1.5.0'
4
+ VERSION = '2.0.0'
5
5
  end
data/lib/dor_indexing.rb CHANGED
@@ -17,13 +17,14 @@ require 'marc/vocab'
17
17
  # Builds solr documents for indexing.
18
18
  class DorIndexing
19
19
  # @return [Hash] the solr document
20
- def self.build(cocina_with_metadata:, workflow_client:, dor_services_client:, cocina_repository:)
20
+ def self.build(cocina_with_metadata:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
21
21
  Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
22
22
  DorIndexing::Builders::DocumentBuilder.for(
23
23
  model: cocina_with_metadata,
24
24
  workflow_client:,
25
- dor_services_client:,
26
- cocina_repository:
25
+ cocina_finder:,
26
+ administrative_tags_finder:,
27
+ release_tags_finder:
27
28
  ).to_solr
28
29
  end
29
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dor_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Littman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-27 00:00:00.000000000 Z
11
+ date: 2024-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -149,7 +149,6 @@ files:
149
149
  - lib/dor_indexing/builders/publisher_name_builder.rb
150
150
  - lib/dor_indexing/builders/temporal_builder.rb
151
151
  - lib/dor_indexing/builders/topic_builder.rb
152
- - lib/dor_indexing/cocina_repository.rb
153
152
  - lib/dor_indexing/indexers/administrative_tag_indexer.rb
154
153
  - lib/dor_indexing/indexers/basic_indexer.rb
155
154
  - lib/dor_indexing/indexers/collection_title_indexer.rb
@@ -167,6 +166,7 @@ files:
167
166
  - lib/dor_indexing/indexers/workflow_process_indexer.rb
168
167
  - lib/dor_indexing/indexers/workflows_indexer.rb
169
168
  - lib/dor_indexing/marc_country.rb
169
+ - lib/dor_indexing/repository_error.rb
170
170
  - lib/dor_indexing/selectors/event_selector.rb
171
171
  - lib/dor_indexing/selectors/pub_year_selector.rb
172
172
  - lib/dor_indexing/version.rb
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
194
194
  - !ruby/object:Gem::Version
195
195
  version: '0'
196
196
  requirements: []
197
- rubygems_version: 3.4.18
197
+ rubygems_version: 3.5.6
198
198
  signing_key:
199
199
  specification_version: 4
200
200
  summary: Library for creating Solr documents for SDR indexing.
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- class DorIndexing
4
- # Interface for retrieving Cocina objects.
5
- # In DSA, the concrete implementation backs this with CocinaObjectStore.
6
- # In DIA, the concrete implementation backs this with Dor Services Client.
7
- class CocinaRepository
8
- class RepositoryError < StandardError; end
9
-
10
- # @param [String] druid
11
- # @return [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Models::AdminPolicyWithMetadata]
12
- # @raise [RepositoryError] if the object is not found or other error occurs
13
- def find(druid)
14
- raise NotImplementedError
15
- end
16
-
17
- # @param [String] druid
18
- # @return [Array<String>] administrative tags
19
- # @raise [RepositoryError] if the object is not found or other error occurs
20
- def administrative_tags(druid)
21
- raise NotImplementedError
22
- end
23
- end
24
- end