dor_indexing 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +11 -4
- data/lib/dor_indexing/builders/document_builder.rb +14 -12
- data/lib/dor_indexing/indexers/identifiable_indexer.rb +7 -6
- data/lib/dor_indexing/indexers/releasable_indexer.rb +19 -14
- data/lib/dor_indexing/repository_error.rb +8 -0
- data/lib/dor_indexing/version.rb +1 -1
- data/lib/dor_indexing.rb +4 -3
- metadata +4 -4
- data/lib/dor_indexing/cocina_repository.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b76dd6ecf919e3653810f59c166d313f77eba4a3046bcd1598abd996bf57c74d
|
4
|
+
data.tar.gz: be615ccc690756cd7aff121e175d544caea9cf3b1dd70a6d03e0fd3c9ab14c43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d04715c311bfd64ac62169e18d4fea188b6c2dc0e8263c38413e3eac3f985e901f3f5ba1181695f5e24ecbbdb713c937bd6c334c0e1f4ec622f293cea4a2195b
|
7
|
+
data.tar.gz: de51ceba624569579cbfe1c1f259ba3568158443ed849ed3d253a144ed6c25346d89916df530c7bc91891afe5f3de19e508cca9cc23818f9e61b262b7306a2a2
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -26,14 +26,22 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
26
26
|
|
27
27
|
## Usage
|
28
28
|
|
29
|
-
DorIndexing
|
29
|
+
DorIndexing requires interaction with the SDR workflow API and also needs the following:
|
30
30
|
|
31
|
-
|
31
|
+
* a callable that takes a single argument (a druid) and returns the Cocina for the corresponding object
|
32
|
+
* a callable that takes a single argument (a druid) and returns the list of administrative tags for the corresponding object
|
33
|
+
* a callable that takes a single argument (a druid) and returns the list of release tags for the corresponding object
|
32
34
|
|
33
35
|
```ruby
|
34
36
|
require 'dor_indexing'
|
35
37
|
|
36
|
-
doc = DorIndexing.build(
|
38
|
+
doc = DorIndexing.build(
|
39
|
+
cocina_with_metadata:,
|
40
|
+
workflow_client:,
|
41
|
+
cocina_finder:,
|
42
|
+
administrative_tags_finder:,
|
43
|
+
release_tags_finder:
|
44
|
+
)
|
37
45
|
```
|
38
46
|
|
39
47
|
## Testing
|
@@ -47,4 +55,3 @@ Argo is the blacklight app that uses the Solr index extensively, and it already
|
|
47
55
|
To ensure our indexing behavior produces the desired results, it was easiest to put
|
48
56
|
the full stack integration tests in the argo repository -- they can be found in
|
49
57
|
https://github.com/sul-dlss/argo/tree/main/spec/features/indexing_xxx_spec.rb
|
50
|
-
|
@@ -48,19 +48,20 @@ class DorIndexing
|
|
48
48
|
|
49
49
|
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
50
50
|
|
51
|
-
def self.for(
|
52
|
-
new(
|
51
|
+
def self.for(...)
|
52
|
+
new(...).for
|
53
53
|
end
|
54
54
|
|
55
55
|
def self.reset_parent_collections
|
56
56
|
@@parent_collections = {} # rubocop:disable Style/ClassVars
|
57
57
|
end
|
58
58
|
|
59
|
-
def initialize(model:, workflow_client:,
|
59
|
+
def initialize(model:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
|
60
60
|
@model = model
|
61
61
|
@workflow_client = workflow_client
|
62
|
-
@
|
63
|
-
@
|
62
|
+
@cocina_finder = cocina_finder
|
63
|
+
@administrative_tags_finder = administrative_tags_finder
|
64
|
+
@release_tags_finder = release_tags_finder
|
64
65
|
end
|
65
66
|
|
66
67
|
# @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model
|
@@ -70,13 +71,14 @@ class DorIndexing
|
|
70
71
|
parent_collections:,
|
71
72
|
administrative_tags:,
|
72
73
|
workflow_client:,
|
73
|
-
|
74
|
-
|
74
|
+
cocina_finder:,
|
75
|
+
administrative_tags_finder:,
|
76
|
+
release_tags_finder:)
|
75
77
|
end
|
76
78
|
|
77
79
|
private
|
78
80
|
|
79
|
-
attr_reader :model, :workflow_client, :
|
81
|
+
attr_reader :model, :workflow_client, :cocina_finder, :administrative_tags_finder, :release_tags_finder
|
80
82
|
|
81
83
|
def id
|
82
84
|
model.externalIdentifier
|
@@ -90,8 +92,8 @@ class DorIndexing
|
|
90
92
|
return [] unless model.dro?
|
91
93
|
|
92
94
|
Array(model.structural&.isMemberOf).filter_map do |rel_druid|
|
93
|
-
@@parent_collections[rel_druid] ||=
|
94
|
-
rescue DorIndexing::
|
95
|
+
@@parent_collections[rel_druid] ||= cocina_finder.call(rel_druid)
|
96
|
+
rescue DorIndexing::RepositoryError
|
95
97
|
Honeybadger.notify("Bad association found on #{model.externalIdentifier}. #{rel_druid} could not be found")
|
96
98
|
# This may happen if the referenced Collection does not exist (bad data)
|
97
99
|
nil
|
@@ -99,8 +101,8 @@ class DorIndexing
|
|
99
101
|
end
|
100
102
|
|
101
103
|
def administrative_tags
|
102
|
-
|
103
|
-
rescue DorIndexing::
|
104
|
+
administrative_tags_finder.call(id)
|
105
|
+
rescue DorIndexing::RepositoryError
|
104
106
|
[]
|
105
107
|
end
|
106
108
|
end
|
@@ -4,13 +4,14 @@ class DorIndexing
|
|
4
4
|
module Indexers
|
5
5
|
# Indexes the druid, metadata sources, and the apo titles
|
6
6
|
class IdentifiableIndexer
|
7
|
-
attr_reader :cocina, :
|
7
|
+
attr_reader :cocina, :cocina_finder, :administrative_tags_finder
|
8
8
|
|
9
9
|
CURRENT_CATALOG_TYPE = 'folio'
|
10
10
|
|
11
|
-
def initialize(cocina:,
|
11
|
+
def initialize(cocina:, cocina_finder:, administrative_tags_finder:, **)
|
12
12
|
@cocina = cocina
|
13
|
-
@
|
13
|
+
@cocina_finder = cocina_finder
|
14
|
+
@administrative_tags_finder = administrative_tags_finder
|
14
15
|
end
|
15
16
|
|
16
17
|
## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
|
@@ -72,13 +73,13 @@ class DorIndexing
|
|
72
73
|
# populate cache if necessary
|
73
74
|
def populate_cache(rel_druid)
|
74
75
|
@@apo_hash[rel_druid] ||= begin
|
75
|
-
related_obj =
|
76
|
+
related_obj = cocina_finder.call(rel_druid)
|
76
77
|
# APOs don't have projects, and since Hydrus is set to be retired, I don't want to
|
77
78
|
# add the cocina property. Just check the tags service instead.
|
78
79
|
is_from_hydrus = hydrus_tag?(rel_druid)
|
79
80
|
title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
|
80
81
|
{ 'related_obj_title' => title, 'is_from_hydrus' => is_from_hydrus }
|
81
|
-
rescue
|
82
|
+
rescue RepositoryError
|
82
83
|
Honeybadger.notify("Bad association found on #{cocina.externalIdentifier}. #{rel_druid} could not be found")
|
83
84
|
# This may happen if the given APO or Collection does not exist (bad data)
|
84
85
|
{ 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
|
@@ -86,7 +87,7 @@ class DorIndexing
|
|
86
87
|
end
|
87
88
|
|
88
89
|
def hydrus_tag?(id)
|
89
|
-
|
90
|
+
administrative_tags_finder.call(id).include?('Project : Hydrus')
|
90
91
|
end
|
91
92
|
end
|
92
93
|
end
|
@@ -4,12 +4,12 @@ class DorIndexing
|
|
4
4
|
module Indexers
|
5
5
|
# Indexes the object's release tags
|
6
6
|
class ReleasableIndexer
|
7
|
-
attr_reader :cocina, :parent_collections, :
|
7
|
+
attr_reader :cocina, :parent_collections, :release_tags_finder
|
8
8
|
|
9
|
-
def initialize(cocina:, parent_collections:,
|
9
|
+
def initialize(cocina:, parent_collections:, release_tags_finder:, **)
|
10
10
|
@cocina = cocina
|
11
11
|
@parent_collections = parent_collections
|
12
|
-
@
|
12
|
+
@release_tags_finder = release_tags_finder
|
13
13
|
end
|
14
14
|
|
15
15
|
# @return [Hash] the partial solr document for releasable concerns
|
@@ -19,18 +19,27 @@ class DorIndexing
|
|
19
19
|
{
|
20
20
|
'released_to_ssim' => tags.map(&:to).uniq,
|
21
21
|
'released_to_searchworks_dttsi' => searchworks_release_date,
|
22
|
-
'released_to_earthworks_dttsi' => earthworks_release_date
|
22
|
+
'released_to_earthworks_dttsi' => earthworks_release_date,
|
23
|
+
'released_to_purl_sitemap_dttsi' => purl_sitemap_release_date
|
23
24
|
}.compact
|
24
25
|
end
|
25
26
|
|
26
27
|
private
|
27
28
|
|
29
|
+
def purl_sitemap_release_date
|
30
|
+
date_for_tag 'PURL sitemap'
|
31
|
+
end
|
32
|
+
|
28
33
|
def earthworks_release_date
|
29
|
-
|
34
|
+
date_for_tag 'Earthworks'
|
30
35
|
end
|
31
36
|
|
32
37
|
def searchworks_release_date
|
33
|
-
|
38
|
+
date_for_tag 'Searchworks'
|
39
|
+
end
|
40
|
+
|
41
|
+
def date_for_tag(project)
|
42
|
+
tags.find { |tag| tag.to == project }&.date&.utc&.iso8601
|
34
43
|
end
|
35
44
|
|
36
45
|
# Item tags have precidence over collection tags, so if the collection is release=true
|
@@ -41,10 +50,8 @@ class DorIndexing
|
|
41
50
|
|
42
51
|
def tags_from_collection
|
43
52
|
parent_collections.each_with_object({}) do |collection, result|
|
44
|
-
|
45
|
-
|
46
|
-
.release_tags
|
47
|
-
.list
|
53
|
+
release_tags_finder
|
54
|
+
.call(collection.externalIdentifier)
|
48
55
|
.select { |tag| tag.what == 'self' }
|
49
56
|
.group_by(&:to).map do |project, releases_for_project|
|
50
57
|
result[project] = releases_for_project.max_by(&:date)
|
@@ -53,10 +60,8 @@ class DorIndexing
|
|
53
60
|
end
|
54
61
|
|
55
62
|
def tags_from_item
|
56
|
-
|
57
|
-
|
58
|
-
.release_tags
|
59
|
-
.list
|
63
|
+
release_tags_finder
|
64
|
+
.call(cocina.externalIdentifier)
|
60
65
|
.select { |tag| tag.what == 'self' }
|
61
66
|
.group_by(&:to).transform_values do |releases_for_project|
|
62
67
|
releases_for_project.max_by(&:date)
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class DorIndexing
|
4
|
+
# Error raised retrieving Cocina objects, administrative tags, or release tags
|
5
|
+
# In DSA, the concrete implementation backs this with CocinaObjectStore.
|
6
|
+
# In DIA, the concrete implementation backs this with Dor Services Client.
|
7
|
+
class RepositoryError < StandardError; end
|
8
|
+
end
|
data/lib/dor_indexing/version.rb
CHANGED
data/lib/dor_indexing.rb
CHANGED
@@ -17,13 +17,14 @@ require 'marc/vocab'
|
|
17
17
|
# Builds solr documents for indexing.
|
18
18
|
class DorIndexing
|
19
19
|
# @return [Hash] the solr document
|
20
|
-
def self.build(cocina_with_metadata:, workflow_client:,
|
20
|
+
def self.build(cocina_with_metadata:, workflow_client:, cocina_finder:, administrative_tags_finder:, release_tags_finder:)
|
21
21
|
Honeybadger.context({ identifier: cocina_with_metadata.externalIdentifier })
|
22
22
|
DorIndexing::Builders::DocumentBuilder.for(
|
23
23
|
model: cocina_with_metadata,
|
24
24
|
workflow_client:,
|
25
|
-
|
26
|
-
|
25
|
+
cocina_finder:,
|
26
|
+
administrative_tags_finder:,
|
27
|
+
release_tags_finder:
|
27
28
|
).to_solr
|
28
29
|
end
|
29
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dor_indexing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Littman
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -149,7 +149,6 @@ files:
|
|
149
149
|
- lib/dor_indexing/builders/publisher_name_builder.rb
|
150
150
|
- lib/dor_indexing/builders/temporal_builder.rb
|
151
151
|
- lib/dor_indexing/builders/topic_builder.rb
|
152
|
-
- lib/dor_indexing/cocina_repository.rb
|
153
152
|
- lib/dor_indexing/indexers/administrative_tag_indexer.rb
|
154
153
|
- lib/dor_indexing/indexers/basic_indexer.rb
|
155
154
|
- lib/dor_indexing/indexers/collection_title_indexer.rb
|
@@ -167,6 +166,7 @@ files:
|
|
167
166
|
- lib/dor_indexing/indexers/workflow_process_indexer.rb
|
168
167
|
- lib/dor_indexing/indexers/workflows_indexer.rb
|
169
168
|
- lib/dor_indexing/marc_country.rb
|
169
|
+
- lib/dor_indexing/repository_error.rb
|
170
170
|
- lib/dor_indexing/selectors/event_selector.rb
|
171
171
|
- lib/dor_indexing/selectors/pub_year_selector.rb
|
172
172
|
- lib/dor_indexing/version.rb
|
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
194
194
|
- !ruby/object:Gem::Version
|
195
195
|
version: '0'
|
196
196
|
requirements: []
|
197
|
-
rubygems_version: 3.
|
197
|
+
rubygems_version: 3.5.6
|
198
198
|
signing_key:
|
199
199
|
specification_version: 4
|
200
200
|
summary: Library for creating Solr documents for SDR indexing.
|
@@ -1,24 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
class DorIndexing
|
4
|
-
# Interface for retrieving Cocina objects.
|
5
|
-
# In DSA, the concrete implementation backs this with CocinaObjectStore.
|
6
|
-
# In DIA, the concrete implementation backs this with Dor Services Client.
|
7
|
-
class CocinaRepository
|
8
|
-
class RepositoryError < StandardError; end
|
9
|
-
|
10
|
-
# @param [String] druid
|
11
|
-
# @return [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Models::AdminPolicyWithMetadata]
|
12
|
-
# @raise [RepositoryError] if the object is not found or other error occurs
|
13
|
-
def find(druid)
|
14
|
-
raise NotImplementedError
|
15
|
-
end
|
16
|
-
|
17
|
-
# @param [String] druid
|
18
|
-
# @return [Array<String>] administrative tags
|
19
|
-
# @raise [RepositoryError] if the object is not found or other error occurs
|
20
|
-
def administrative_tags(druid)
|
21
|
-
raise NotImplementedError
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|