dor_indexing 1.3.1 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 83583cc273179a722bf069396bfab8ee5e67da1cc3562c1e2de31b0c987c6bda
4
- data.tar.gz: f823aa9a76740bf9b2ea785aaab80aa69958e16b8b7ad27b92869996de188066
3
+ metadata.gz: 9c1e2280668e9085122b921fd927b75e9982fa5fda9067a9245899ef93db77ae
4
+ data.tar.gz: 3e8d89e77994cb62ffcc1c598e47716c3a9c9f8aa8e37f226159e8fef947082d
5
5
  SHA512:
6
- metadata.gz: 047fb2b8f9050f083201622629419c82a203cb65867f8f1fce35026b50ceaebbe341ab5d2182c0e89e74f43a671ba42a00080e0aab2906d7c4a8f6477e7ebe60
7
- data.tar.gz: 2c908c0b6f3d4c1f4380f4e34a502eaa7508aa0037954cd32863982fe33892e9759d8db7f6e3b06ac70c4acedb3c5be728b3defd265e7242cf8566a9e75add86
6
+ metadata.gz: 24e3bf95ad1c541d3b403b2233018ab1da89be80cdcd3fa9acb2793e2b57673c8364202a4a548f8fe1c9ff60444585b5cf2d020968cb5b50614ad80b118d7bae
7
+ data.tar.gz: 6c015737932b1f01819d89f5f91948ab43d09e1537d6bf41ba8df85778c9ed0f18dc85b408a7fdc6ceb48ce6cfc4f7127b74da0c415aac6819d7dd460cf8a4eb
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dor_indexing (1.3.1)
5
- cocina-models (~> 0.95.0)
4
+ dor_indexing (1.4.1)
5
+ activesupport
6
+ cocina-models (~> 0.95.1)
6
7
  dor-workflow-client (~> 7.0)
7
8
  honeybadger
8
9
  marc-vocab (~> 0.3.0)
9
- solrizer
10
10
  stanford-mods
11
11
  zeitwerk
12
12
 
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- activesupport (7.1.3)
16
+ activesupport (7.1.3.2)
17
17
  base64
18
18
  bigdecimal
19
19
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -28,7 +28,7 @@ GEM
28
28
  base64 (0.2.0)
29
29
  bigdecimal (3.1.6)
30
30
  byebug (11.1.3)
31
- cocina-models (0.95.0)
31
+ cocina-models (0.95.1)
32
32
  activesupport
33
33
  deprecation
34
34
  dry-struct (~> 1.0)
@@ -49,7 +49,7 @@ GEM
49
49
  connection_pool (2.4.1)
50
50
  deprecation (1.1.0)
51
51
  activesupport
52
- diff-lcs (1.5.0)
52
+ diff-lcs (1.5.1)
53
53
  docile (1.4.0)
54
54
  dor-workflow-client (7.0.2)
55
55
  activesupport (>= 3.2.1, < 8)
@@ -58,8 +58,7 @@ GEM
58
58
  faraday-retry (~> 2.0)
59
59
  nokogiri (~> 1.6)
60
60
  zeitwerk (~> 2.1)
61
- drb (2.2.0)
62
- ruby2_keywords
61
+ drb (2.2.1)
63
62
  dry-core (1.0.1)
64
63
  concurrent-ruby (~> 1.0)
65
64
  zeitwerk (~> 2.6)
@@ -90,8 +89,8 @@ GEM
90
89
  net-http
91
90
  faraday-retry (2.2.0)
92
91
  faraday (~> 2.0)
93
- honeybadger (5.4.1)
94
- i18n (1.14.1)
92
+ honeybadger (5.6.0)
93
+ i18n (1.14.4)
95
94
  concurrent-ruby (~> 1.0)
96
95
  ice_nine (0.11.2)
97
96
  iso-639 (0.3.6)
@@ -100,7 +99,7 @@ GEM
100
99
  multi_json
101
100
  language_server-protocol (3.17.0.3)
102
101
  marc-vocab (0.3.0)
103
- minitest (5.21.2)
102
+ minitest (5.22.2)
104
103
  mods (3.0.4)
105
104
  edtf (~> 3.0)
106
105
  iso-639
@@ -110,9 +109,9 @@ GEM
110
109
  mutex_m (0.2.0)
111
110
  net-http (0.4.1)
112
111
  uri
113
- nokogiri (1.16.0-x86_64-darwin)
112
+ nokogiri (1.16.2-x86_64-darwin)
114
113
  racc (~> 1.4)
115
- nokogiri (1.16.0-x86_64-linux)
114
+ nokogiri (1.16.2-x86_64-linux)
116
115
  racc (~> 1.4)
117
116
  nom-xml (1.2.0)
118
117
  i18n
@@ -132,22 +131,22 @@ GEM
132
131
  rake (13.1.0)
133
132
  regexp_parser (2.9.0)
134
133
  rexml (3.2.6)
135
- rspec (3.12.0)
136
- rspec-core (~> 3.12.0)
137
- rspec-expectations (~> 3.12.0)
138
- rspec-mocks (~> 3.12.0)
139
- rspec-core (3.12.2)
140
- rspec-support (~> 3.12.0)
141
- rspec-expectations (3.12.3)
134
+ rspec (3.13.0)
135
+ rspec-core (~> 3.13.0)
136
+ rspec-expectations (~> 3.13.0)
137
+ rspec-mocks (~> 3.13.0)
138
+ rspec-core (3.13.0)
139
+ rspec-support (~> 3.13.0)
140
+ rspec-expectations (3.13.0)
142
141
  diff-lcs (>= 1.2.0, < 2.0)
143
- rspec-support (~> 3.12.0)
144
- rspec-mocks (3.12.6)
142
+ rspec-support (~> 3.13.0)
143
+ rspec-mocks (3.13.0)
145
144
  diff-lcs (>= 1.2.0, < 2.0)
146
- rspec-support (~> 3.12.0)
147
- rspec-support (3.12.1)
145
+ rspec-support (~> 3.13.0)
146
+ rspec-support (3.13.1)
148
147
  rss (0.3.0)
149
148
  rexml
150
- rubocop (1.60.2)
149
+ rubocop (1.62.0)
151
150
  json (~> 2.3)
152
151
  language_server-protocol (>= 3.17.0)
153
152
  parallel (~> 1.10)
@@ -155,11 +154,11 @@ GEM
155
154
  rainbow (>= 2.2.2, < 4.0)
156
155
  regexp_parser (>= 1.8, < 3.0)
157
156
  rexml (>= 3.2.5, < 4.0)
158
- rubocop-ast (>= 1.30.0, < 2.0)
157
+ rubocop-ast (>= 1.31.1, < 2.0)
159
158
  ruby-progressbar (~> 1.7)
160
159
  unicode-display_width (>= 2.4.0, < 3.0)
161
- rubocop-ast (1.30.0)
162
- parser (>= 3.2.1.0)
160
+ rubocop-ast (1.31.2)
161
+ parser (>= 3.3.0.4)
163
162
  rubocop-capybara (2.20.0)
164
163
  rubocop (~> 1.41)
165
164
  rubocop-factory_bot (2.25.1)
@@ -167,37 +166,30 @@ GEM
167
166
  rubocop-performance (1.20.2)
168
167
  rubocop (>= 1.48.1, < 2.0)
169
168
  rubocop-ast (>= 1.30.0, < 2.0)
170
- rubocop-rspec (2.26.1)
169
+ rubocop-rspec (2.27.1)
171
170
  rubocop (~> 1.40)
172
171
  rubocop-capybara (~> 2.17)
173
172
  rubocop-factory_bot (~> 2.22)
174
173
  ruby-progressbar (1.13.0)
175
- ruby2_keywords (0.0.5)
176
174
  simplecov (0.22.0)
177
175
  docile (~> 1.1)
178
176
  simplecov-html (~> 0.11)
179
177
  simplecov_json_formatter (~> 0.1)
180
178
  simplecov-html (0.12.3)
181
179
  simplecov_json_formatter (0.1.4)
182
- solrizer (4.1.0)
183
- activesupport
184
- nokogiri
185
- xml-simple
186
180
  stanford-mods (3.3.9)
187
181
  activesupport
188
182
  mods (~> 3.0, >= 3.0.4)
189
- super_diff (0.10.0)
183
+ super_diff (0.11.0)
190
184
  attr_extras (>= 6.2.4)
191
185
  diff-lcs
192
186
  patience_diff
193
- thor (1.3.0)
187
+ thor (1.3.1)
194
188
  tzinfo (2.0.6)
195
189
  concurrent-ruby (~> 1.0)
196
190
  unicode-display_width (2.5.0)
197
191
  uri (0.13.0)
198
- xml-simple (1.1.9)
199
- rexml
200
- zeitwerk (2.6.12)
192
+ zeitwerk (2.6.13)
201
193
 
202
194
  PLATFORMS
203
195
  x86_64-darwin-21
data/dor_indexing.gemspec CHANGED
@@ -31,11 +31,11 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
- spec.add_dependency 'cocina-models', '~> 0.95.0'
34
+ spec.add_dependency 'activesupport'
35
+ spec.add_dependency 'cocina-models', '~> 0.95.1'
35
36
  spec.add_dependency 'dor-workflow-client', '~> 7.0'
36
37
  spec.add_dependency 'honeybadger'
37
38
  spec.add_dependency 'marc-vocab', '~> 0.3.0'
38
- spec.add_dependency 'solrizer'
39
39
  spec.add_dependency 'stanford-mods'
40
40
  spec.add_dependency 'zeitwerk'
41
41
  end
@@ -33,7 +33,7 @@ class DorIndexing
33
33
  DorIndexing::Indexers::IdentityMetadataIndexer,
34
34
  DorIndexing::Indexers::DescriptiveMetadataIndexer,
35
35
  DorIndexing::Indexers::EmbargoMetadataIndexer,
36
- DorIndexing::Indexers::ContentMetadataIndexer,
36
+ DorIndexing::Indexers::ObjectFilesIndexer,
37
37
  DorIndexing::Indexers::IdentifiableIndexer,
38
38
  DorIndexing::Indexers::CollectionTitleIndexer,
39
39
  DorIndexing::Indexers::ReleasableIndexer,
@@ -2,14 +2,12 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Index administrative tags for an object.
6
- # NOTE: Most of this code was extracted from the dor-services gem:
7
- # https://github.com/sul-dlss/dor-services/blob/v9.0.0/lib/dor/datastreams/identity_metadata_ds.rb#L196-L218
5
+ # Index administrative tags for an object
8
6
  class AdministrativeTagIndexer
9
7
  TAG_PART_DELIMITER = ' : '
10
8
  SPECIAL_TAG_TYPES_TO_INDEX = ['Project', 'Registered By'].freeze
11
9
 
12
- attr_reader :id
10
+ attr_reader :id, :administrative_tags
13
11
 
14
12
  def initialize(id:, administrative_tags:, **)
15
13
  @id = id
@@ -30,18 +28,23 @@ class DorIndexing
30
28
  tag_prefix, rest = tag.split(TAG_PART_DELIMITER, 2)
31
29
  prefix = tag_prefix.downcase.strip.gsub(/\s/, '_')
32
30
 
33
- solr_doc['tag_ssim'] << tag # for facet and display
34
- solr_doc['tag_text_unstemmed_im'] << tag # for search
31
+ solr_doc['tag_ssim'] << tag # for Argo display and fq
32
+ solr_doc['tag_text_unstemmed_im'] << tag # for Argo search
35
33
 
36
- solr_doc['exploded_nonproject_tag_ssim'] += exploded_tags_from(tag) unless prefix == 'project'
34
+ # exploded tags are for hierarchical facets in Argo
35
+ solr_doc['exploded_nonproject_tag_ssim'] += explode_tag_hierarchy(tag) unless prefix == 'project'
37
36
 
38
- next if SPECIAL_TAG_TYPES_TO_INDEX.exclude?(tag_prefix) || rest.nil?
37
+ next if rest.blank?
38
+
39
+ # Index specific tag types that are used in Argo:
40
+ # project tags for search results and registered by tags for reports ...
41
+ next unless SPECIAL_TAG_TYPES_TO_INDEX.include?(tag_prefix)
39
42
 
40
43
  (solr_doc["#{prefix}_tag_ssim"] ||= []) << rest.strip
41
44
 
42
45
  if prefix == 'project'
43
46
  solr_doc['exploded_project_tag_ssim'] ||= []
44
- solr_doc['exploded_project_tag_ssim'] += exploded_tags_from(rest.strip)
47
+ solr_doc['exploded_project_tag_ssim'] += explode_tag_hierarchy(rest.strip)
45
48
  end
46
49
  end
47
50
  solr_doc
@@ -52,12 +55,10 @@ class DorIndexing
52
55
 
53
56
  private
54
57
 
55
- attr_reader :administrative_tags
56
-
57
- # solrize each possible prefix for the tag, inclusive of the full tag.
58
- # e.g., for a tag such as "A : B : C", this will solrize to an _ssim field
59
- # that contains ["A", "A : B", "A : B : C"].
60
- def exploded_tags_from(tag)
58
+ # index each possible path, inclusive of the full tag.
59
+ # e.g., for "A : B : C", return ["A", "A : B", "A : B : C"].
60
+ # this is for the blacklight-hierarchy plugin for faceting on each level of the hierarchy
61
+ def explode_tag_hierarchy(tag)
61
62
  tag_parts = tag.split(TAG_PART_DELIMITER)
62
63
 
63
64
  1.upto(tag_parts.count).map do |i|
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Basic indexing for all objects
5
+ # Basic indexing for any object
6
6
  class BasicIndexer
7
7
  attr_reader :cocina, :workflow_client
8
8
 
@@ -11,6 +11,7 @@ class DorIndexing
11
11
  @workflow_client = workflow_client
12
12
  end
13
13
 
14
+ # @return [Hash] the partial solr document for basic data
14
15
  # rubocop:disable Metrics/AbcSize
15
16
  # rubocop:disable Metrics/MethodLength
16
17
  def to_solr
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the collection title
5
+ # Indexes collection titles for an object
6
6
  class CollectionTitleIndexer
7
7
  attr_reader :cocina, :parent_collections
8
8
 
@@ -11,14 +11,17 @@ class DorIndexing
11
11
  @parent_collections = parent_collections
12
12
  end
13
13
 
14
- # @return [Hash] the partial solr document for identifiable concerns
14
+ # @return [Hash] the partial solr document for collection title concerns
15
15
  def to_solr
16
16
  {}.tap do |solr_doc|
17
- parent_collections.each do |related_obj|
18
- coll_title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
17
+ parent_collections.each do |collection_obj|
18
+ coll_title = Cocina::Models::Builders::TitleBuilder.build(collection_obj.description.title)
19
+ next if coll_title.blank?
19
20
 
20
- # create/append collection_title_tesim and collection_title_ssim
21
- ::Solrizer.insert_field(solr_doc, 'collection_title', coll_title, :stored_searchable, :symbol)
21
+ solr_doc['collection_title_ssim'] ||= []
22
+ solr_doc['collection_title_ssim'] << coll_title
23
+ solr_doc['collection_title_tesim'] ||= []
24
+ solr_doc['collection_title_tesim'] << coll_title
22
25
  end
23
26
  end
24
27
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
+ # Allows DorIndexing::Builders::DocumentBuilder class (which builds the solr doc for an object) to be much more readable
5
6
  # Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb
6
7
  class CompositeIndexer
7
8
  attr_reader :indexers
@@ -4,8 +4,8 @@ require 'stanford-mods'
4
4
 
5
5
  class DorIndexing
6
6
  module Indexers
7
- # rubocop:disable Metrics/ClassLength
8
7
  # Indexes the descriptive metadata
8
+ # rubocop:disable Metrics/ClassLength
9
9
  class DescriptiveMetadataIndexer
10
10
  attr_reader :cocina, :stanford_mods_record
11
11
 
@@ -25,11 +25,10 @@ class DorIndexing
25
25
  'full_title_tenim' => full_title, # for searching; 1 more field type is copyField in solr schema.xml
26
26
  'additional_titles_tenim' => additional_titles, # for searching; 1 more field type is copyField in solr schema.xml
27
27
  'display_title_ss' => display_title, # for display in Argo
28
- 'sw_display_title_tesim' => display_title, # for display in Argo DEPRECATED in favor of display_title_ss
29
28
 
30
29
  # contributor
31
30
  'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
32
- 'sw_author_tesim' => author_primary, # used for author display in Argo
31
+ 'author_display_ss' => author_primary, # used for author display in Argo
33
32
  'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
34
33
  'contributor_orcids_ssim' => orcids,
35
34
 
@@ -43,8 +42,6 @@ class DorIndexing
43
42
  'originInfo_place_placeTerm_tesim' => event_place, # do we want this?
44
43
  'sw_pub_date_facet_ssi' => stanford_mods_record.pub_year_int.to_s, # SW Date facet
45
44
 
46
- 'metadata_format_ssim' => 'mods', # no longer used? https://github.com/search?q=org%3Asul-dlss+metadata_format_ssim&type=code
47
-
48
45
  # SW facets plus a friend facet
49
46
  'sw_format_ssim' => sw_format, # SW Resource Type facet
50
47
  'mods_typeOfResource_ssim' => resource_type, # MODS Resource Type facet
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the identifiable concerns
5
+ # Indexes the druid, metadata sources, and the apo titles
6
6
  class IdentifiableIndexer
7
7
  attr_reader :cocina, :cocina_repository
8
8
 
@@ -13,8 +13,8 @@ class DorIndexing
13
13
  @cocina_repository = cocina_repository
14
14
  end
15
15
 
16
- ## Module-level variables, shared between ALL mixin includers (and ALL *their* includers/extenders)!
17
- ## used for caching found values
16
+ ## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
17
+ ## used for caching apo titles
18
18
  @@apo_hash = {} # rubocop:disable Style/ClassVars
19
19
 
20
20
  # @return [Hash] the partial solr document for identifiable concerns
@@ -23,13 +23,18 @@ class DorIndexing
23
23
  add_apo_titles(solr_doc, cocina.administrative.hasAdminPolicy)
24
24
 
25
25
  solr_doc['metadata_source_ssim'] = identity_metadata_sources unless cocina.is_a? Cocina::Models::AdminPolicyWithMetadata
26
- # This used to be added to the index by https://github.com/sul-dlss/dor-services/commit/11b80d249d19326ef591411ffeb634900e75c2c3
27
- # and was called dc_identifier_druid_tesim
28
- # It is used to search based on druid.
29
- solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')]
26
+ solr_doc['druid_prefixed_ssi'] = cocina.externalIdentifier
27
+ solr_doc['druid_bare_ssi'] = cocina.externalIdentifier.delete_prefix('druid:')
30
28
  end
31
29
  end
32
30
 
31
+ # Clears out the cache of apos. Used primarily in testing.
32
+ def self.reset_cache!
33
+ @@apo_hash = {} # rubocop:disable Style/ClassVars
34
+ end
35
+
36
+ private
37
+
33
38
  # @return [Array<String>] calculated values for Solr index
34
39
  def identity_metadata_sources
35
40
  return ['DOR'] if !cocina.identification.respond_to?(:catalogLinks) || distinct_current_catalog_types.empty?
@@ -37,13 +42,6 @@ class DorIndexing
37
42
  distinct_current_catalog_types.map(&:capitalize)
38
43
  end
39
44
 
40
- # Clears out the cache of items. Used primarily in testing.
41
- def self.reset_cache!
42
- @@apo_hash = {} # rubocop:disable Style/ClassVars
43
- end
44
-
45
- private
46
-
47
45
  def distinct_current_catalog_types
48
46
  # Filter out e.g. "previous symphony", "previous folio"
49
47
  @distinct_current_catalog_types ||=
@@ -57,15 +55,18 @@ class DorIndexing
57
55
 
58
56
  # @param [Hash] solr_doc
59
57
  # @param [String] admin_policy_id
60
- def add_apo_titles(solr_doc, admin_policy_id)
58
+ def add_apo_titles(solr_doc, admin_policy_id) # rubocop:disable Metrics/MethodLength
61
59
  row = populate_cache(admin_policy_id)
62
60
  title = row['related_obj_title']
63
61
  if row['is_from_hydrus']
64
- ::Solrizer.insert_field(solr_doc, 'hydrus_apo_title', title, :symbol)
62
+ solr_doc['hydrus_apo_title_ssim'] ||= []
63
+ solr_doc['hydrus_apo_title_ssim'] << title
65
64
  else
66
- ::Solrizer.insert_field(solr_doc, 'nonhydrus_apo_title', title, :symbol)
65
+ solr_doc['nonhydrus_apo_title_ssim'] ||= []
66
+ solr_doc['nonhydrus_apo_title_ssim'] << title
67
67
  end
68
- ::Solrizer.insert_field(solr_doc, 'apo_title', title, :symbol)
68
+ solr_doc['apo_title_ssim'] ||= []
69
+ solr_doc['apo_title_ssim'] << title
69
70
  end
70
71
 
71
72
  # populate cache if necessary
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the identity metadata
5
+ # Indexes the identity metadata from cocina.identification
6
6
  class IdentityMetadataIndexer
7
7
  attr_reader :cocina_object
8
8
 
@@ -11,24 +11,21 @@ class DorIndexing
11
11
  end
12
12
 
13
13
  # @return [Hash] the partial solr document for identityMetadata
14
- # rubocop:disable Metrics/AbcSize
15
14
  # rubocop:disable Metrics/MethodLength
16
15
  def to_solr
17
- return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.nil?
16
+ return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.blank?
18
17
 
19
18
  {
20
19
  'objectType_ssim' => [object_type],
21
- 'dor_id_tesim' => [source_id_value, barcode, folio_instance_hrid, previous_ils_ids].flatten.compact,
22
- 'identifier_ssim' => prefixed_identifiers,
23
- 'identifier_tesim' => prefixed_identifiers,
20
+ 'identifier_ssim' => prefixed_identifiers, # sourceid, barcode, folio_instance_hrid for display
21
+ 'identifier_tesim' => prefixed_identifiers, # ditto ^^, for search, tokenized (can search prefix and value as separate tokens)
24
22
  'barcode_id_ssim' => [barcode].compact,
25
- 'source_id_ssi' => source_id,
26
- 'source_id_text_nostem_i' => source_id,
23
+ 'source_id_ssi' => source_id, # for search and display (reports, track_sheet)
24
+ 'source_id_text_nostem_i' => source_id, # for search, tokenized per request from accessioneers
27
25
  'folio_instance_hrid_ssim' => [folio_instance_hrid].compact,
28
26
  'doi_ssim' => [doi].compact
29
27
  }
30
28
  end
31
- # rubocop:enable Metrics/AbcSize
32
29
  # rubocop:enable Metrics/MethodLength
33
30
 
34
31
  private
@@ -37,10 +34,6 @@ class DorIndexing
37
34
  @source_id ||= cocina_object.identification.sourceId
38
35
  end
39
36
 
40
- def source_id_value
41
- @source_id_value ||= source_id ? source_id.split(':', 2)[1] : nil
42
- end
43
-
44
37
  def barcode
45
38
  @barcode ||= object_type == 'collection' ? nil : cocina_object.identification.barcode
46
39
  end
@@ -53,15 +46,6 @@ class DorIndexing
53
46
  @folio_instance_hrid ||= Array(cocina_object.identification.catalogLinks).find { |link| link.catalog == 'folio' }&.catalogRecordId
54
47
  end
55
48
 
56
- def previous_folio_instance_hrids
57
- @previous_folio_instance_hrids ||=
58
- Array(cocina_object.identification.catalogLinks).filter_map { |link| link.catalogRecordId if link.catalog == 'previous folio' }
59
- end
60
-
61
- def previous_ils_ids
62
- @previous_ils_ids ||= previous_folio_instance_hrids
63
- end
64
-
65
49
  def object_type
66
50
  case cocina_object
67
51
  when Cocina::Models::AdminPolicyWithMetadata
@@ -2,15 +2,15 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the content metadata
6
- class ContentMetadataIndexer
5
+ # Indexes the information about files in the object
6
+ class ObjectFilesIndexer
7
7
  attr_reader :cocina
8
8
 
9
9
  def initialize(cocina:, **)
10
10
  @cocina = cocina
11
11
  end
12
12
 
13
- # @return [Hash] the partial solr document for contentMetadata
13
+ # @return [Hash] the partial solr document for files in the object
14
14
  def to_solr
15
15
  {
16
16
  'content_type_ssim' => type(cocina.type),
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the role metadata
5
+ # Indexes the administrative role metadata
6
6
  class RoleMetadataIndexer
7
7
  attr_reader :cocina
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the objects position in workflows
5
+ # Indexes the object's state for each process in a single workflow
6
6
  class WorkflowIndexer
7
7
  # @param [Workflow::Response::Workflow] workflow the workflow document to index
8
8
  def initialize(workflow:, workflow_client:)
@@ -10,14 +10,14 @@ class DorIndexing
10
10
  @workflow_client = workflow_client
11
11
  end
12
12
 
13
- # @return [Hash] the partial solr document for the workflow document
13
+ # @return [Hash] the partial solr document for all the workflow processes
14
14
  def to_solr
15
15
  WorkflowSolrDocument.new do |solr_doc|
16
16
  solr_doc.name = workflow_name
17
17
 
18
18
  errors = 0 # The error count is used by the Report class in Argo
19
19
  processes.each do |process|
20
- ProcessIndexer.new(solr_doc:, workflow_name:, process:).to_solr
20
+ WorkflowProcessIndexer.new(solr_doc:, workflow_name:, process:).to_solr
21
21
  errors += 1 if process.status == 'error'
22
22
  end
23
23
  solr_doc.status = [workflow_name, workflow_status, errors].join('|')
@@ -2,8 +2,8 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the process for a workflow
6
- class ProcessIndexer
5
+ # Creates solr doc fields (and values) for a process for a workflow (which is for an object)
6
+ class WorkflowProcessIndexer
7
7
  ERROR_OMISSION = '... (continued)'
8
8
  private_constant :ERROR_OMISSION
9
9
 
@@ -13,14 +13,14 @@ class DorIndexing
13
13
 
14
14
  # @param [WorkflowSolrDocument] solr_doc
15
15
  # @param [String] workflow_name
16
- # @param [Dor::Workflow::Response::Process] process
16
+ # @param [Dor::Workflow::Response::Process] process containing data for a process in a workflow for an object
17
17
  def initialize(solr_doc:, workflow_name:, process:)
18
18
  @solr_doc = solr_doc
19
19
  @workflow_name = workflow_name
20
20
  @process = process
21
21
  end
22
22
 
23
- # @return [Hash] the partial solr document for the workflow document
23
+ # @return [Hash] the partial solr document for a single workflow process
24
24
  # rubocop:disable Metrics/AbcSize
25
25
  def to_solr
26
26
  return unless status
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the objects position in workflows
5
+ # Indexes the object's state in the most recent execution of every one of its workflows
6
6
  class WorkflowsIndexer
7
7
  attr_reader :id
8
8
 
@@ -11,7 +11,7 @@ class DorIndexing
11
11
  @workflow_client = workflow_client
12
12
  end
13
13
 
14
- # @return [Hash] the partial solr document for workflow concerns
14
+ # @return [Hash] the partial solr document for workflows concerns
15
15
  def to_solr
16
16
  WorkflowSolrDocument.new do |combined_doc|
17
17
  workflows.each do |wf|
@@ -30,7 +30,6 @@ class DorIndexing
30
30
  all_workflows.workflows
31
31
  end
32
32
 
33
- # TODO: remove Dor::Workflow::Document
34
33
  # @return [Workflow::Response::Workflows]
35
34
  def all_workflows
36
35
  @all_workflows ||= workflow_client.workflow_routes.all_workflows pid: id
@@ -61,7 +61,7 @@ class DorIndexing
61
61
 
62
62
  event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
63
63
  event_dates.flatten.compact.find do |date|
64
- date_type(date).nil?
64
+ date_type(date).blank?
65
65
  end
66
66
  end
67
67
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DorIndexing
4
- VERSION = '1.3.1'
4
+ VERSION = '1.4.1'
5
5
  end
data/lib/dor_indexing.rb CHANGED
@@ -1,14 +1,18 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'zeitwerk'
4
- require 'stanford-mods'
5
- require 'cocina/models'
6
- require 'solrizer'
7
- require 'marc/vocab'
8
- require 'honeybadger'
9
4
 
10
5
  Zeitwerk::Loader.for_gem.setup
11
6
 
7
+ # Zeitwerk doesn't auto-load these dependencies
8
+ require 'active_support'
9
+ require 'active_support/core_ext/object/blank'
10
+ require 'active_support/core_ext/enumerable'
11
+ require 'active_support/core_ext/string'
12
+ require 'cocina/models'
13
+ require 'honeybadger'
14
+ require 'marc/vocab'
15
+
12
16
  # Builds solr documents for indexing.
13
17
  class DorIndexing
14
18
  # @return [Hash] the solr document
metadata CHANGED
@@ -1,29 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dor_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Littman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-31 00:00:00.000000000 Z
11
+ date: 2024-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: cocina-models
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
31
  - - "~>"
18
32
  - !ruby/object:Gem::Version
19
- version: 0.95.0
33
+ version: 0.95.1
20
34
  type: :runtime
21
35
  prerelease: false
22
36
  version_requirements: !ruby/object:Gem::Requirement
23
37
  requirements:
24
38
  - - "~>"
25
39
  - !ruby/object:Gem::Version
26
- version: 0.95.0
40
+ version: 0.95.1
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: dor-workflow-client
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -66,20 +80,6 @@ dependencies:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
82
  version: 0.3.0
69
- - !ruby/object:Gem::Dependency
70
- name: solrizer
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: stanford-mods
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -140,17 +140,17 @@ files:
140
140
  - lib/dor_indexing/indexers/basic_indexer.rb
141
141
  - lib/dor_indexing/indexers/collection_title_indexer.rb
142
142
  - lib/dor_indexing/indexers/composite_indexer.rb
143
- - lib/dor_indexing/indexers/content_metadata_indexer.rb
144
143
  - lib/dor_indexing/indexers/default_object_rights_indexer.rb
145
144
  - lib/dor_indexing/indexers/descriptive_metadata_indexer.rb
146
145
  - lib/dor_indexing/indexers/embargo_metadata_indexer.rb
147
146
  - lib/dor_indexing/indexers/identifiable_indexer.rb
148
147
  - lib/dor_indexing/indexers/identity_metadata_indexer.rb
149
- - lib/dor_indexing/indexers/process_indexer.rb
148
+ - lib/dor_indexing/indexers/object_files_indexer.rb
150
149
  - lib/dor_indexing/indexers/releasable_indexer.rb
151
150
  - lib/dor_indexing/indexers/rights_metadata_indexer.rb
152
151
  - lib/dor_indexing/indexers/role_metadata_indexer.rb
153
152
  - lib/dor_indexing/indexers/workflow_indexer.rb
153
+ - lib/dor_indexing/indexers/workflow_process_indexer.rb
154
154
  - lib/dor_indexing/indexers/workflows_indexer.rb
155
155
  - lib/dor_indexing/marc_country.rb
156
156
  - lib/dor_indexing/selectors/event_selector.rb