dor_indexing 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +11 -4
- data/lib/dor_indexing/builders/document_builder.rb +14 -12
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +7 -6
- data/lib/dor_indexing/indexers/releasable_indexer.rb +19 -14
- data/lib/dor_indexing/repository_error.rb +8 -0
- data/lib/dor_indexing/version.rb +1 -1
- data/lib/dor_indexing.rb +4 -3
- metadata +4 -4
- data/lib/dor_indexing/cocina_repository.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b76dd6ecf919e3653810f59c166d313f77eba4a3046bcd1598abd996bf57c74d
|
4
|
+
data.tar.gz: be615ccc690756cd7aff121e175d544caea9cf3b1dd70a6d03e0fd3c9ab14c43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d04715c311bfd64ac62169e18d4fea188b6c2dc0e8263c38413e3eac3f985e901f3f5ba1181695f5e24ecbbdb713c937bd6c334c0e1f4ec622f293cea4a2195b
|
7
|
+
data.tar.gz: de51ceba624569579cbfe1c1f259ba3568158443ed849ed3d253a144ed6c25346d89916df530c7bc91891afe5f3de19e508cca9cc23818f9e61b262b7306a2a2
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -26,14 +26,22 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
26
26
|
|
27
27
|
## Usage
|
28
28
|
|
29
|
-
DorIndexing
|
29
|
+
DorIndexing requires interaction with the SDR workflow API and also needs the following:
|
30
30
|
|
31
|
-
|
31
|
+
* a callable that takes a single argument (a druid) and returns the Cocina for the corresponding object
|
32
|
+
* a callable that takes a single argument (a druid) and returns the list of administrative tags for the corresponding object
|
33
|
+
* a callable that takes a single argument (a druid) and returns the list of release tags for the corresponding object
|
32
34
|
|
33
35
|
```ruby
|
34
36
|
require 'dor_indexing'
|
35
37
|
|
36
|
-
doc = DorIndexing.build(
|
38
|
+
doc = DorIndexing.build(
|
39
|
+
cocina_with_metadata:,
|
40
|
+
workflow_client:,
|
41
|
+
cocina_finder:,
|
42
|
+
administrative_tags_finder:,
|
43
|
+
release_tags_finder:
|
44
|
+
)
|
37
45
|
```
|
38
46
|
|
39
47
|
## Testing
|
@@ -47,4 +55,3 @@ Argo is the blacklight app that uses the Solr index extensively, and it already
|
|
47
55
|
To ensure our indexing behavior produces the desired results, it was easiest to put
|
48
56
|
the full stack integration tests in the argo repository -- they can be found in
|
49
57
|
https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
|
50
|
-
|
@@ -48,19 +48,20 @@ class DorIndexing
|
|
48
48
|
|
49
49
|
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
50
50
|
|
51
|
-
def self.for(
|
52
|
-
new(
|
51
|
+
def self.for(...)
|
52
|
+
new(...).for
|
53
53
|
end
|
54
54
|
|
55
55
|
def self.reset_parent_collections
|
56
56
|
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
57
57
|
end
|
58
58
|
|
59
|
-
def initialize(model:, workflow_client:,
|
59
|
+
def initialize(model:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
|
60
60
|
@model = model
|
61
61
|
@workflow_client = workflow_client
|
62
|
-
@
|
63
|
-
@
|
62
|
+
@cocina_finder = cocina_finder
|
63
|
+
@administrative_tags_finder = administrative_tags_finder
|
64
|
+
@release_tags_finder = release_tags_finder
|
64
65
|
end
|
65
66
|
|
66
67
|
# @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
|
@@ -70,13 +71,14 @@ class DorIndexing
|
|
70
71
|
parent_collections:,
|
71
72
|
administrative_tags:,
|
72
73
|
workflow_client:,
|
73
|
-
|
74
|
-
|
74
|
+
cocina_finder:,
|
75
|
+
administrative_tags_finder:,
|
76
|
+
release_tags_finder:)
|
75
77
|
end
|
76
78
|
|
77
79
|
private
|
78
80
|
|
79
|
-
attr_reader :model, :workflow_client, :
|
81
|
+
attr_reader :model, :workflow_client, :cocina_finder, :administrative_tags_finder, :release_tags_finder
|
80
82
|
|
81
83
|
def id
|
82
84
|
model.externalIdentifier
|
@@ -90,8 +92,8 @@ class DorIndexing
|
|
90
92
|
return [] unless model.dro?
|
91
93
|
|
92
94
|
Array(model.structural&.isMemberOf).filter_map do |rel_druid|
|
93
|
-
@@parent_collections[rel_druid] ||=
|
94
|
-
rescue DorIndexing::
|
95
|
+
@@parent_collections[rel_druid] ||= cocina_finder.call(rel_druid)
|
96
|
+
rescue DorIndexing::RepositoryError
|
95
97
|
Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
|
96
98
|
# This may happen if the referenced Collection does not exist (bad data)
|
97
99
|
nil
|
@@ -99,8 +101,8 @@ class DorIndexing
|
|
99
101
|
end
|
100
102
|
|
101
103
|
def administrative_tags
|
102
|
-
|
103
|
-
rescue DorIndexing::
|
104
|
+
administrative_tags_finder.call(id)
|
105
|
+
rescue DorIndexing::RepositoryError
|
104
106
|
[]
|
105
107
|
end
|
106
108
|
end
|
@@ -4,13 +4,14 @@ class DorIndexing
|
|
4
4
|
module Indexers
|
5
5
|
# Indexes the druid, metadata sources, and the apo titles
|
6
6
|
class IdentifiableIndexer
|
7
|
-
attr_reader :cocina, :
|
7
|
+
attr_reader :cocina, :cocina_finder, :administrative_tags_finder
|
8
8
|
|
9
9
|
CURRENT_CATALOG_TYPE = 'folio'
|
10
10
|
|
11
|
-
def initialize(cocina:,
|
11
|
+
def initialize(cocina:, cocina_finder:, administrative_tags_finder:, **)
|
12
12
|
@cocina = cocina
|
13
|
-
@
|
13
|
+
@cocina_finder = cocina_finder
|
14
|
+
@administrative_tags_finder = administrative_tags_finder
|
14
15
|
end
|
15
16
|
|
16
17
|
## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
|
@@ -72,13 +73,13 @@ class DorIndexing
|
|
72
73
|
# populate cache if necessary
|
73
74
|
def populate_cache(rel_druid)
|
74
75
|
@@apo_hash[rel_druid] ||= begin
|
75
|
-
related_obj =
|
76
|
+
related_obj = cocina_finder.call(rel_druid)
|
76
77
|
# APOs don't have projects, and since Hydrus is set to be retired, I don't want to
|
77
78
|
# add the cocina property. Just check the tags service instead.
|
78
79
|
is_from_hydrus = hydrus_tag?(rel_druid)
|
79
80
|
title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
|
80
81
|
{ 'related_obj_title' => title, 'is_from_hydrus' => is_from_hydrus }
|
81
|
-
rescue
|
82
|
+
rescue RepositoryError
|
82
83
|
Honeybadger.notify("Bad association found on #{cocina.externalIdentifier}. #{rel_druid} could not be found")
|
83
84
|
# This may happen if the given APO or Collection does not exist (bad data)
|
84
85
|
{ 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
|
@@ -86,7 +87,7 @@ class DorIndexing
|
|
86
87
|
end
|
87
88
|
|
88
89
|
def hydrus_tag?(id)
|
89
|
-
|
90
|
+
administrative_tags_finder.call(id).include?('Project : Hydrus')
|
90
91
|
end
|
91
92
|
end
|
92
93
|
end
|
@@ -4,12 +4,12 @@ class DorIndexing
|
|
4
4
|
module Indexers
|
5
5
|
# Indexes the object's release tags
|
6
6
|
class ReleasableIndexer
|
7
|
-
attr_reader :cocina, :parent_collections, :
|
7
|
+
attr_reader :cocina, :parent_collections, :release_tags_finder
|
8
8
|
|
9
|
-
def initialize(cocina:, parent_collections:,
|
9
|
+
def initialize(cocina:, parent_collections:, release_tags_finder:, **)
|
10
10
|
@cocina = cocina
|
11
11
|
@parent_collections = parent_collections
|
12
|
-
@
|
12
|
+
@release_tags_finder = release_tags_finder
|
13
13
|
end
|
14
14
|
|
15
15
|
# @return [Hash] the partial solr document for releasable concerns
|
@@ -19,18 +19,27 @@ class DorIndexing
|
|
19
19
|
{
|
20
20
|
'released_to_ssim' => tags.map(&:to).uniq,
|
21
21
|
'released_to_searchworks_dttsi' => searchworks_release_date,
|
22
|
-
'released_to_earthworks_dttsi' => earthworks_release_date
|
22
|
+
'released_to_earthworks_dttsi' => earthworks_release_date,
|
23
|
+
'released_to_purl_sitemap_dttsi' => purl_sitemap_release_date
|
23
24
|
}.compact
|
24
25
|
end
|
25
26
|
|
26
27
|
private
|
27
28
|
|
29
|
+
def purl_sitemap_release_date
|
30
|
+
date_for_tag 'PURL sitemap'
|
31
|
+
end
|
32
|
+
|
28
33
|
def earthworks_release_date
|
29
|
-
|
34
|
+
date_for_tag 'Earthworks'
|
30
35
|
end
|
31
36
|
|
32
37
|
def searchworks_release_date
|
33
|
-
|
38
|
+
date_for_tag 'Searchworks'
|
39
|
+
end
|
40
|
+
|
41
|
+
def date_for_tag(project)
|
42
|
+
tags.find { |tag| tag.to == project }&.date&.utc&.iso8601
|
34
43
|
end
|
35
44
|
|
36
45
|
# Item tags have precidence over collection tags, so if the collection is release=true
|
@@ -41,10 +50,8 @@ class DorIndexing
|
|
41
50
|
|
42
51
|
def tags_from_collection
|
43
52
|
parent_collections.each_with_object({}) do |collection, result|
|
44
|
-
|
45
|
-
|
46
|
-
.release_tags
|
47
|
-
.list
|
53
|
+
release_tags_finder
|
54
|
+
.call(collection.externalIdentifier)
|
48
55
|
.select { |tag| tag.what == 'self' }
|
49
56
|
.group_by(&:to).map do |project, releases_for_project|
|
50
57
|
result[project] = releases_for_project.max_by(&:date)
|
@@ -53,10 +60,8 @@ class DorIndexing
|
|
53
60
|
end
|
54
61
|
|
55
62
|
def tags_from_item
|
56
|
-
|
57
|
-
|
58
|
-
.release_tags
|
59
|
-
.list
|
63
|
+
release_tags_finder
|
64
|
+
.call(cocina.externalIdentifier)
|
60
65
|
.select { |tag| tag.what == 'self' }
|
61
66
|
.group_by(&:to).transform_values do |releases_for_project|
|
62
67
|
releases_for_project.max_by(&:date)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
# Error raised retrieving Cocina objects, administrative tags, or release tags
|
5
|
+
# In DSA, the concrete implementation backs this with CocinaObjectStore.
|
6
|
+
# In DIA, the concrete implementation backs this with Dor Services Client.
|
7
|
+
class RepositoryError < StandardError; end
|
8
|
+
end
|
data/lib/dor_indexing/version.rb
CHANGED
data/lib/dor_indexing.rb
CHANGED
@@ -17,13 +17,14 @@ require 'marc/vocab'
|
|
17
17
|
# Builds solr documents for indexing.
|
18
18
|
class DorIndexing
|
19
19
|
# @return [Hash] the solr document
|
20
|
-
def self.build(cocina_with_metadata:, workflow_client:,
|
20
|
+
def self.build(cocina_with_metadata:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
|
21
21
|
Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
|
22
22
|
DorIndexing::Builders::DocumentBuilder.for(
|
23
23
|
model: cocina_with_metadata,
|
24
24
|
workflow_client:,
|
25
|
-
|
26
|
-
|
25
|
+
cocina_finder:,
|
26
|
+
administrative_tags_finder:,
|
27
|
+
release_tags_finder:
|
27
28
|
).to_solr
|
28
29
|
end
|
29
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dor_indexing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Littman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -149,7 +149,6 @@ files:
|
|
149
149
|
- lib/dor_indexing/builders/publisher_name_builder.rb
|
150
150
|
- lib/dor_indexing/builders/temporal_builder.rb
|
151
151
|
- lib/dor_indexing/builders/topic_builder.rb
|
152
|
-
- lib/dor_indexing/cocina_repository.rb
|
153
152
|
- lib/dor_indexing/indexers/administrative_tag_indexer.rb
|
154
153
|
- lib/dor_indexing/indexers/basic_indexer.rb
|
155
154
|
- lib/dor_indexing/indexers/collection_title_indexer.rb
|
@@ -167,6 +166,7 @@ files:
|
|
167
166
|
- lib/dor_indexing/indexers/workflow_process_indexer.rb
|
168
167
|
- lib/dor_indexing/indexers/workflows_indexer.rb
|
169
168
|
- lib/dor_indexing/marc_country.rb
|
169
|
+
- lib/dor_indexing/repository_error.rb
|
170
170
|
- lib/dor_indexing/selectors/event_selector.rb
|
171
171
|
- lib/dor_indexing/selectors/pub_year_selector.rb
|
172
172
|
- lib/dor_indexing/version.rb
|
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
194
194
|
- !ruby/object:Gem::Version
|
195
195
|
version: '0'
|
196
196
|
requirements: []
|
197
|
-
rubygems_version: 3.
|
197
|
+
rubygems_version: 3.5.6
|
198
198
|
signing_key:
|
199
199
|
specification_version: 4
|
200
200
|
summary: Library for creating Solr documents for SDR indexing.
|
@@ -1,24 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class DorIndexing
|
4
|
-
# Interface for retrieving Cocina objects.
|
5
|
-
# In DSA, the concrete implementation backs this with CocinaObjectStore.
|
6
|
-
# In DIA, the concrete implementation backs this with Dor Services Client.
|
7
|
-
class CocinaRepository
|
8
|
-
class RepositoryError < StandardError; end
|
9
|
-
|
10
|
-
# @param [String] druid
|
11
|
-
# @return [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Models::AdminPolicyWithMetadata]
|
12
|
-
# @raise [RepositoryError] if the object is not found or other error occurs
|
13
|
-
def find(druid)
|
14
|
-
raise NotImplementedError
|
15
|
-
end
|
16
|
-
|
17
|
-
# @param [String] druid
|
18
|
-
# @return [Array<String>] administrative tags
|
19
|
-
# @raise [RepositoryError] if the object is not found or other error occurs
|
20
|
-
def administrative_tags(druid)
|
21
|
-
raise NotImplementedError
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|