dor_indexing 1.2.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c31e983b699f6064d4662b48a5d9f0b7f36a83b050ff451147017d78dc5fedb4
4
- data.tar.gz: 5ab52d4782ef1073218ee6e34007a11f4dcea877ca118695623dcb1379e7236c
3
+ metadata.gz: 108a49c639925b8e1ba4e892d0ed3e4d903f7e741311f480ce85dd1be88d17c0
4
+ data.tar.gz: 5483c10c3b7fa972097e6b8f8cc889a6abc8a67e82bc864253ea003b97e6e578
5
5
  SHA512:
6
- metadata.gz: 1a85020056ebd55920cd84563c6194afa592cda2fda02d73d460c0c988a286013e8f4a159e7f69a25863b63b9be089387bf9637c150eaf00703a90ba925c34e8
7
- data.tar.gz: 00252b4b17aefd3d3bff766713c0f167d5aecdf8444b702d44db0f73fc0d27cb7360801008e93720a7cd11e3559477e7d2e5b64ce1a65649e4eacc97aaed705e
6
+ metadata.gz: d2f7554c2a2d7dc5ab0eebe685b309f88706cef647ee0aeeeded3da2dc252016b7b43b4328303cfcc0da63c6c45d89ae1f883b6e120f982bcabf750c2a90040a
7
+ data.tar.gz: 5fdf32e3282b995903bc70892cc0baa9c8db8139668c40fbc92f1aff0ee0e40957c3242f07bffcd5d9512bf748697024c407f1708109f4a2837f638ea49012a3
data/Gemfile.lock CHANGED
@@ -1,12 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dor_indexing (1.2.1)
5
- cocina-models (~> 0.94.2)
4
+ dor_indexing (1.4.0)
5
+ activesupport
6
+ cocina-models (~> 0.95.0)
6
7
  dor-workflow-client (~> 7.0)
7
8
  honeybadger
8
9
  marc-vocab (~> 0.3.0)
9
- solrizer
10
10
  stanford-mods
11
11
  zeitwerk
12
12
 
@@ -28,7 +28,7 @@ GEM
28
28
  base64 (0.2.0)
29
29
  bigdecimal (3.1.6)
30
30
  byebug (11.1.3)
31
- cocina-models (0.94.2)
31
+ cocina-models (0.95.0)
32
32
  activesupport
33
33
  deprecation
34
34
  dry-struct (~> 1.0)
@@ -49,7 +49,7 @@ GEM
49
49
  connection_pool (2.4.1)
50
50
  deprecation (1.1.0)
51
51
  activesupport
52
- diff-lcs (1.5.0)
52
+ diff-lcs (1.5.1)
53
53
  docile (1.4.0)
54
54
  dor-workflow-client (7.0.2)
55
55
  activesupport (>= 3.2.1, < 8)
@@ -100,7 +100,7 @@ GEM
100
100
  multi_json
101
101
  language_server-protocol (3.17.0.3)
102
102
  marc-vocab (0.3.0)
103
- minitest (5.21.2)
103
+ minitest (5.22.2)
104
104
  mods (3.0.4)
105
105
  edtf (~> 3.0)
106
106
  iso-639
@@ -110,9 +110,9 @@ GEM
110
110
  mutex_m (0.2.0)
111
111
  net-http (0.4.1)
112
112
  uri
113
- nokogiri (1.16.0-x86_64-darwin)
113
+ nokogiri (1.16.2-x86_64-darwin)
114
114
  racc (~> 1.4)
115
- nokogiri (1.16.0-x86_64-linux)
115
+ nokogiri (1.16.2-x86_64-linux)
116
116
  racc (~> 1.4)
117
117
  nom-xml (1.2.0)
118
118
  i18n
@@ -132,22 +132,22 @@ GEM
132
132
  rake (13.1.0)
133
133
  regexp_parser (2.9.0)
134
134
  rexml (3.2.6)
135
- rspec (3.12.0)
136
- rspec-core (~> 3.12.0)
137
- rspec-expectations (~> 3.12.0)
138
- rspec-mocks (~> 3.12.0)
139
- rspec-core (3.12.2)
140
- rspec-support (~> 3.12.0)
141
- rspec-expectations (3.12.3)
135
+ rspec (3.13.0)
136
+ rspec-core (~> 3.13.0)
137
+ rspec-expectations (~> 3.13.0)
138
+ rspec-mocks (~> 3.13.0)
139
+ rspec-core (3.13.0)
140
+ rspec-support (~> 3.13.0)
141
+ rspec-expectations (3.13.0)
142
142
  diff-lcs (>= 1.2.0, < 2.0)
143
- rspec-support (~> 3.12.0)
144
- rspec-mocks (3.12.6)
143
+ rspec-support (~> 3.13.0)
144
+ rspec-mocks (3.13.0)
145
145
  diff-lcs (>= 1.2.0, < 2.0)
146
- rspec-support (~> 3.12.0)
147
- rspec-support (3.12.1)
146
+ rspec-support (~> 3.13.0)
147
+ rspec-support (3.13.0)
148
148
  rss (0.3.0)
149
149
  rexml
150
- rubocop (1.60.1)
150
+ rubocop (1.60.2)
151
151
  json (~> 2.3)
152
152
  language_server-protocol (>= 3.17.0)
153
153
  parallel (~> 1.10)
@@ -179,14 +179,10 @@ GEM
179
179
  simplecov_json_formatter (~> 0.1)
180
180
  simplecov-html (0.12.3)
181
181
  simplecov_json_formatter (0.1.4)
182
- solrizer (4.1.0)
183
- activesupport
184
- nokogiri
185
- xml-simple
186
182
  stanford-mods (3.3.9)
187
183
  activesupport
188
184
  mods (~> 3.0, >= 3.0.4)
189
- super_diff (0.10.0)
185
+ super_diff (0.11.0)
190
186
  attr_extras (>= 6.2.4)
191
187
  diff-lcs
192
188
  patience_diff
@@ -195,9 +191,7 @@ GEM
195
191
  concurrent-ruby (~> 1.0)
196
192
  unicode-display_width (2.5.0)
197
193
  uri (0.13.0)
198
- xml-simple (1.1.9)
199
- rexml
200
- zeitwerk (2.6.12)
194
+ zeitwerk (2.6.13)
201
195
 
202
196
  PLATFORMS
203
197
  x86_64-darwin-21
data/dor_indexing.gemspec CHANGED
@@ -31,11 +31,11 @@ Gem::Specification.new do |spec|
31
31
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
32
  spec.require_paths = ['lib']
33
33
 
34
- spec.add_dependency 'cocina-models', '~> 0.94.2'
34
+ spec.add_dependency 'activesupport' # for blank? method
35
+ spec.add_dependency 'cocina-models', '~> 0.95.0'
35
36
  spec.add_dependency 'dor-workflow-client', '~> 7.0'
36
37
  spec.add_dependency 'honeybadger'
37
- spec.add_dependency 'marc-vocab', '~> 0.3.0'
38
- spec.add_dependency 'solrizer'
38
+ spec.add_dependency 'marc-vocab', '~> 0.3.0' # for marcgac and marccountry
39
39
  spec.add_dependency 'stanford-mods'
40
40
  spec.add_dependency 'zeitwerk'
41
41
  end
@@ -6,7 +6,7 @@ class DorIndexing
6
6
  class DocumentBuilder
7
7
  ADMIN_POLICY_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
8
8
  DorIndexing::Indexers::AdministrativeTagIndexer,
9
- DorIndexing::Indexers::DataIndexer,
9
+ DorIndexing::Indexers::BasicIndexer,
10
10
  DorIndexing::Indexers::RoleMetadataIndexer,
11
11
  DorIndexing::Indexers::DefaultObjectRightsIndexer,
12
12
  DorIndexing::Indexers::IdentityMetadataIndexer,
@@ -17,7 +17,7 @@ class DorIndexing
17
17
 
18
18
  COLLECTION_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
19
19
  DorIndexing::Indexers::AdministrativeTagIndexer,
20
- DorIndexing::Indexers::DataIndexer,
20
+ DorIndexing::Indexers::BasicIndexer,
21
21
  DorIndexing::Indexers::RightsMetadataIndexer,
22
22
  DorIndexing::Indexers::IdentityMetadataIndexer,
23
23
  DorIndexing::Indexers::DescriptiveMetadataIndexer,
@@ -28,12 +28,12 @@ class DorIndexing
28
28
 
29
29
  ITEM_INDEXER = DorIndexing::Indexers::CompositeIndexer.new(
30
30
  DorIndexing::Indexers::AdministrativeTagIndexer,
31
- DorIndexing::Indexers::DataIndexer,
31
+ DorIndexing::Indexers::BasicIndexer,
32
32
  DorIndexing::Indexers::RightsMetadataIndexer,
33
33
  DorIndexing::Indexers::IdentityMetadataIndexer,
34
34
  DorIndexing::Indexers::DescriptiveMetadataIndexer,
35
35
  DorIndexing::Indexers::EmbargoMetadataIndexer,
36
- DorIndexing::Indexers::ContentMetadataIndexer,
36
+ DorIndexing::Indexers::ObjectFilesIndexer,
37
37
  DorIndexing::Indexers::IdentifiableIndexer,
38
38
  DorIndexing::Indexers::CollectionTitleIndexer,
39
39
  DorIndexing::Indexers::ReleasableIndexer,
@@ -2,7 +2,8 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Builders
5
- # Builds the author fields for a solr document
5
+ # class methods return the name values to go in Solr document fields
6
+ # used for both contributors and for topics
6
7
  class NameBuilder
7
8
  # @param [Symbol] strategy ":first" is the strategy for how to choose a name if primary and display name is not found
8
9
  # @return [Array<String>] names
@@ -2,14 +2,12 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Index administrative tags for an object.
6
- # NOTE: Most of this code was extracted from the dor-services gem:
7
- # https://github.com/sul-dlss/dor-services/blob/v9.0.0/lib/dor/datastreams/identity_metadata_ds.rb#L196-L218
5
+ # Index administrative tags for an object
8
6
  class AdministrativeTagIndexer
9
7
  TAG_PART_DELIMITER = ' : '
10
8
  SPECIAL_TAG_TYPES_TO_INDEX = ['Project', 'Registered By'].freeze
11
9
 
12
- attr_reader :id
10
+ attr_reader :id, :administrative_tags
13
11
 
14
12
  def initialize(id:, administrative_tags:, **)
15
13
  @id = id
@@ -30,18 +28,23 @@ class DorIndexing
30
28
  tag_prefix, rest = tag.split(TAG_PART_DELIMITER, 2)
31
29
  prefix = tag_prefix.downcase.strip.gsub(/\s/, '_')
32
30
 
33
- solr_doc['tag_ssim'] << tag # for facet and display
34
- solr_doc['tag_text_unstemmed_im'] << tag # for search
31
+ solr_doc['tag_ssim'] << tag # for Argo display and fq
32
+ solr_doc['tag_text_unstemmed_im'] << tag # for Argo search
35
33
 
36
- solr_doc['exploded_nonproject_tag_ssim'] += exploded_tags_from(tag) unless prefix == 'project'
34
+ # exploded tags are for hierarchical facets in Argo
35
+ solr_doc['exploded_nonproject_tag_ssim'] += explode_tag_hierarchy(tag) unless prefix == 'project'
37
36
 
38
- next if SPECIAL_TAG_TYPES_TO_INDEX.exclude?(tag_prefix) || rest.nil?
37
+ next if rest.blank?
38
+
39
+ # Index specific tag types that are used in Argo:
40
+ # project tags for search results and registered by tags for reports ...
41
+ next unless SPECIAL_TAG_TYPES_TO_INDEX.include?(tag_prefix)
39
42
 
40
43
  (solr_doc["#{prefix}_tag_ssim"] ||= []) << rest.strip
41
44
 
42
45
  if prefix == 'project'
43
46
  solr_doc['exploded_project_tag_ssim'] ||= []
44
- solr_doc['exploded_project_tag_ssim'] += exploded_tags_from(rest.strip)
47
+ solr_doc['exploded_project_tag_ssim'] += explode_tag_hierarchy(rest.strip)
45
48
  end
46
49
  end
47
50
  solr_doc
@@ -52,12 +55,10 @@ class DorIndexing
52
55
 
53
56
  private
54
57
 
55
- attr_reader :administrative_tags
56
-
57
- # solrize each possible prefix for the tag, inclusive of the full tag.
58
- # e.g., for a tag such as "A : B : C", this will solrize to an _ssim field
59
- # that contains ["A", "A : B", "A : B : C"].
60
- def exploded_tags_from(tag)
58
+ # index each possible path, inclusive of the full tag.
59
+ # e.g., for "A : B : C", return ["A", "A : B", "A : B : C"].
60
+ # this is for the blacklight-hierarchy plugin for faceting on each level of the hierarchy
61
+ def explode_tag_hierarchy(tag)
61
62
  tag_parts = tag.split(TAG_PART_DELIMITER)
62
63
 
63
64
  1.upto(tag_parts.count).map do |i|
@@ -2,8 +2,8 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexing provided by ActiveFedora
6
- class DataIndexer
5
+ # Basic indexing for any object
6
+ class BasicIndexer
7
7
  attr_reader :cocina, :workflow_client
8
8
 
9
9
  def initialize(cocina:, workflow_client:, **)
@@ -11,6 +11,7 @@ class DorIndexing
11
11
  @workflow_client = workflow_client
12
12
  end
13
13
 
14
+ # @return [Hash] the partial solr document for basic data
14
15
  # rubocop:disable Metrics/AbcSize
15
16
  # rubocop:disable Metrics/MethodLength
16
17
  def to_solr
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the collection title
5
+ # Indexes collection titles for an object
6
6
  class CollectionTitleIndexer
7
7
  attr_reader :cocina, :parent_collections
8
8
 
@@ -11,14 +11,17 @@ class DorIndexing
11
11
  @parent_collections = parent_collections
12
12
  end
13
13
 
14
- # @return [Hash] the partial solr document for identifiable concerns
14
+ # @return [Hash] the partial solr document for collection title concerns
15
15
  def to_solr
16
16
  {}.tap do |solr_doc|
17
- parent_collections.each do |related_obj|
18
- coll_title = Cocina::Models::Builders::TitleBuilder.build(related_obj.description.title)
17
+ parent_collections.each do |collection_obj|
18
+ coll_title = Cocina::Models::Builders::TitleBuilder.build(collection_obj.description.title)
19
+ next if coll_title.blank?
19
20
 
20
- # create/append collection_title_tesim and collection_title_ssim
21
- ::Solrizer.insert_field(solr_doc, 'collection_title', coll_title, :stored_searchable, :symbol)
21
+ solr_doc['collection_title_ssim'] ||= []
22
+ solr_doc['collection_title_ssim'] << coll_title
23
+ solr_doc['collection_title_tesim'] ||= []
24
+ solr_doc['collection_title_tesim'] << coll_title
22
25
  end
23
26
  end
24
27
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
+ # Allows DorIndexing::Builders::DocumentBuilder class (which builds the solr doc for an object) to be much more readable
5
6
  # Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb
6
7
  class CompositeIndexer
7
8
  attr_reader :indexers
@@ -4,8 +4,8 @@ require 'stanford-mods'
4
4
 
5
5
  class DorIndexing
6
6
  module Indexers
7
- # rubocop:disable Metrics/ClassLength
8
7
  # Indexes the descriptive metadata
8
+ # rubocop:disable Metrics/ClassLength
9
9
  class DescriptiveMetadataIndexer
10
10
  attr_reader :cocina, :stanford_mods_record
11
11
 
@@ -29,7 +29,8 @@ class DorIndexing
29
29
 
30
30
  # contributor
31
31
  'author_text_nostem_im' => author_primary, # primary author tokenized but not stemmed
32
- 'sw_author_tesim' => author_primary, # used for author display in Argo
32
+ 'sw_author_tesim' => author_primary, # DEPRECATED - used for author display in Argo
33
+ 'author_display_ss' => author_primary, # used for author display in Argo
33
34
  'contributor_text_nostem_im' => author_all, # author names should be tokenized but not stemmed
34
35
  'contributor_orcids_ssim' => orcids,
35
36
 
@@ -43,8 +44,6 @@ class DorIndexing
43
44
  'originInfo_place_placeTerm_tesim' => event_place, # do we want this?
44
45
  'sw_pub_date_facet_ssi' => stanford_mods_record.pub_year_int.to_s, # SW Date facet
45
46
 
46
- 'metadata_format_ssim' => 'mods', # no longer used? https://github.com/search?q=org%3Asul-dlss+metadata_format_ssim&type=code
47
-
48
47
  # SW facets plus a friend facet
49
48
  'sw_format_ssim' => sw_format, # SW Resource Type facet
50
49
  'mods_typeOfResource_ssim' => resource_type, # MODS Resource Type facet
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the identifiable concerns
5
+ # Indexes the druid, metadata sources, and the apo titles
6
6
  class IdentifiableIndexer
7
7
  attr_reader :cocina, :cocina_repository
8
8
 
@@ -13,23 +13,29 @@ class DorIndexing
13
13
  @cocina_repository = cocina_repository
14
14
  end
15
15
 
16
- ## Module-level variables, shared between ALL mixin includers (and ALL *their* includers/extenders)!
17
- ## used for caching found values
16
+ ## Module-level variable, shared between ALL mixin includers (and ALL *their* includers/extenders)!
17
+ ## used for caching apo titles
18
18
  @@apo_hash = {} # rubocop:disable Style/ClassVars
19
19
 
20
20
  # @return [Hash] the partial solr document for identifiable concerns
21
- def to_solr
21
+ def to_solr # rubocop:disable Metrics/AbcSize
22
22
  {}.tap do |solr_doc|
23
23
  add_apo_titles(solr_doc, cocina.administrative.hasAdminPolicy)
24
24
 
25
25
  solr_doc['metadata_source_ssim'] = identity_metadata_sources unless cocina.is_a? Cocina::Models::AdminPolicyWithMetadata
26
- # This used to be added to the index by https://github.com/sul-dlss/dor-services/commit/11b80d249d19326ef591411ffeb634900e75c2c3
27
- # and was called dc_identifier_druid_tesim
28
- # It is used to search based on druid.
29
- solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')]
26
+ solr_doc['druid_prefixed_ssi'] = cocina.externalIdentifier
27
+ solr_doc['druid_bare_ssi'] = cocina.externalIdentifier.delete_prefix('druid:')
28
+ solr_doc['objectId_tesim'] = [cocina.externalIdentifier, cocina.externalIdentifier.delete_prefix('druid:')] # DEPRECATED
30
29
  end
31
30
  end
32
31
 
32
+ # Clears out the cache of apos. Used primarily in testing.
33
+ def self.reset_cache!
34
+ @@apo_hash = {} # rubocop:disable Style/ClassVars
35
+ end
36
+
37
+ private
38
+
33
39
  # @return [Array<String>] calculated values for Solr index
34
40
  def identity_metadata_sources
35
41
  return ['DOR'] if !cocina.identification.respond_to?(:catalogLinks) || distinct_current_catalog_types.empty?
@@ -37,13 +43,6 @@ class DorIndexing
37
43
  distinct_current_catalog_types.map(&:capitalize)
38
44
  end
39
45
 
40
- # Clears out the cache of items. Used primarily in testing.
41
- def self.reset_cache!
42
- @@apo_hash = {} # rubocop:disable Style/ClassVars
43
- end
44
-
45
- private
46
-
47
46
  def distinct_current_catalog_types
48
47
  # Filter out e.g. "previous symphony", "previous folio"
49
48
  @distinct_current_catalog_types ||=
@@ -57,15 +56,18 @@ class DorIndexing
57
56
 
58
57
  # @param [Hash] solr_doc
59
58
  # @param [String] admin_policy_id
60
- def add_apo_titles(solr_doc, admin_policy_id)
59
+ def add_apo_titles(solr_doc, admin_policy_id) # rubocop:disable Metrics/MethodLength
61
60
  row = populate_cache(admin_policy_id)
62
61
  title = row['related_obj_title']
63
62
  if row['is_from_hydrus']
64
- ::Solrizer.insert_field(solr_doc, 'hydrus_apo_title', title, :symbol)
63
+ solr_doc['hydrus_apo_title_ssim'] ||= []
64
+ solr_doc['hydrus_apo_title_ssim'] << title
65
65
  else
66
- ::Solrizer.insert_field(solr_doc, 'nonhydrus_apo_title', title, :symbol)
66
+ solr_doc['nonhydrus_apo_title_ssim'] ||= []
67
+ solr_doc['nonhydrus_apo_title_ssim'] << title
67
68
  end
68
- ::Solrizer.insert_field(solr_doc, 'apo_title', title, :symbol)
69
+ solr_doc['apo_title_ssim'] ||= []
70
+ solr_doc['apo_title_ssim'] << title
69
71
  end
70
72
 
71
73
  # populate cache if necessary
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the identity metadata
5
+ # Indexes the identity metadata from cocina.identification
6
6
  class IdentityMetadataIndexer
7
7
  attr_reader :cocina_object
8
8
 
@@ -11,24 +11,21 @@ class DorIndexing
11
11
  end
12
12
 
13
13
  # @return [Hash] the partial solr document for identityMetadata
14
- # rubocop:disable Metrics/AbcSize
15
14
  # rubocop:disable Metrics/MethodLength
16
15
  def to_solr
17
- return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.nil?
16
+ return { 'objectType_ssim' => [object_type] } if object_type == 'adminPolicy' || cocina_object.identification.blank?
18
17
 
19
18
  {
20
19
  'objectType_ssim' => [object_type],
21
- 'dor_id_tesim' => [source_id_value, barcode, folio_instance_hrid, previous_ils_ids].flatten.compact,
22
- 'identifier_ssim' => prefixed_identifiers,
23
- 'identifier_tesim' => prefixed_identifiers,
20
+ 'identifier_ssim' => prefixed_identifiers, # sourceid, barcode, folio_instance_hrid for display
21
+ 'identifier_tesim' => prefixed_identifiers, # ditto ^^, for search, tokenized (can search prefix and value as separate tokens)
24
22
  'barcode_id_ssim' => [barcode].compact,
25
- 'source_id_ssi' => source_id,
26
- 'source_id_text_nostem_i' => source_id,
23
+ 'source_id_ssi' => source_id, # for search and display (reports, track_sheet)
24
+ 'source_id_text_nostem_i' => source_id, # for search, tokenized per request from accessioneers
27
25
  'folio_instance_hrid_ssim' => [folio_instance_hrid].compact,
28
26
  'doi_ssim' => [doi].compact
29
27
  }
30
28
  end
31
- # rubocop:enable Metrics/AbcSize
32
29
  # rubocop:enable Metrics/MethodLength
33
30
 
34
31
  private
@@ -37,10 +34,6 @@ class DorIndexing
37
34
  @source_id ||= cocina_object.identification.sourceId
38
35
  end
39
36
 
40
- def source_id_value
41
- @source_id_value ||= source_id ? source_id.split(':', 2)[1] : nil
42
- end
43
-
44
37
  def barcode
45
38
  @barcode ||= object_type == 'collection' ? nil : cocina_object.identification.barcode
46
39
  end
@@ -53,15 +46,6 @@ class DorIndexing
53
46
  @folio_instance_hrid ||= Array(cocina_object.identification.catalogLinks).find { |link| link.catalog == 'folio' }&.catalogRecordId
54
47
  end
55
48
 
56
- def previous_folio_instance_hrids
57
- @previous_folio_instance_hrids ||=
58
- Array(cocina_object.identification.catalogLinks).filter_map { |link| link.catalogRecordId if link.catalog == 'previous folio' }
59
- end
60
-
61
- def previous_ils_ids
62
- @previous_ils_ids ||= previous_folio_instance_hrids
63
- end
64
-
65
49
  def object_type
66
50
  case cocina_object
67
51
  when Cocina::Models::AdminPolicyWithMetadata
@@ -2,15 +2,15 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the content metadata
6
- class ContentMetadataIndexer
5
+ # Indexes the information about files in the object
6
+ class ObjectFilesIndexer
7
7
  attr_reader :cocina
8
8
 
9
9
  def initialize(cocina:, **)
10
10
  @cocina = cocina
11
11
  end
12
12
 
13
- # @return [Hash] the partial solr document for contentMetadata
13
+ # @return [Hash] the partial solr document for files in the object
14
14
  def to_solr
15
15
  {
16
16
  'content_type_ssim' => type(cocina.type),
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the role metadata
5
+ # Indexes the administrative role metadata
6
6
  class RoleMetadataIndexer
7
7
  attr_reader :cocina
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the objects position in workflows
5
+ # Indexes the object's state for each process in a single workflow
6
6
  class WorkflowIndexer
7
7
  # @param [Workflow::Response::Workflow] workflow the workflow document to index
8
8
  def initialize(workflow:, workflow_client:)
@@ -10,14 +10,14 @@ class DorIndexing
10
10
  @workflow_client = workflow_client
11
11
  end
12
12
 
13
- # @return [Hash] the partial solr document for the workflow document
13
+ # @return [Hash] the partial solr document for all the workflow processes
14
14
  def to_solr
15
15
  WorkflowSolrDocument.new do |solr_doc|
16
16
  solr_doc.name = workflow_name
17
17
 
18
18
  errors = 0 # The error count is used by the Report class in Argo
19
19
  processes.each do |process|
20
- ProcessIndexer.new(solr_doc:, workflow_name:, process:).to_solr
20
+ WorkflowProcessIndexer.new(solr_doc:, workflow_name:, process:).to_solr
21
21
  errors += 1 if process.status == 'error'
22
22
  end
23
23
  solr_doc.status = [workflow_name, workflow_status, errors].join('|')
@@ -2,8 +2,8 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the process for a workflow
6
- class ProcessIndexer
5
+ # Creates solr doc fields (and values) for a process for a workflow (which is for an object)
6
+ class WorkflowProcessIndexer
7
7
  ERROR_OMISSION = '... (continued)'
8
8
  private_constant :ERROR_OMISSION
9
9
 
@@ -13,14 +13,14 @@ class DorIndexing
13
13
 
14
14
  # @param [WorkflowSolrDocument] solr_doc
15
15
  # @param [String] workflow_name
16
- # @param [Dor::Workflow::Response::Process] process
16
+ # @param [Dor::Workflow::Response::Process] process containing data for a process in a workflow for an object
17
17
  def initialize(solr_doc:, workflow_name:, process:)
18
18
  @solr_doc = solr_doc
19
19
  @workflow_name = workflow_name
20
20
  @process = process
21
21
  end
22
22
 
23
- # @return [Hash] the partial solr document for the workflow document
23
+ # @return [Hash] the partial solr document for a single workflow process
24
24
  # rubocop:disable Metrics/AbcSize
25
25
  def to_solr
26
26
  return unless status
@@ -2,7 +2,7 @@
2
2
 
3
3
  class DorIndexing
4
4
  module Indexers
5
- # Indexes the objects position in workflows
5
+ # Indexes the object's state in the most recent execution of every one of its workflows
6
6
  class WorkflowsIndexer
7
7
  attr_reader :id
8
8
 
@@ -11,7 +11,7 @@ class DorIndexing
11
11
  @workflow_client = workflow_client
12
12
  end
13
13
 
14
- # @return [Hash] the partial solr document for workflow concerns
14
+ # @return [Hash] the partial solr document for workflows concerns
15
15
  def to_solr
16
16
  WorkflowSolrDocument.new do |combined_doc|
17
17
  workflows.each do |wf|
@@ -30,7 +30,6 @@ class DorIndexing
30
30
  all_workflows.workflows
31
31
  end
32
32
 
33
- # TODO: remove Dor::Workflow::Document
34
33
  # @return [Workflow::Response::Workflows]
35
34
  def all_workflows
36
35
  @all_workflows ||= workflow_client.workflow_routes.all_workflows pid: id
@@ -61,7 +61,7 @@ class DorIndexing
61
61
 
62
62
  event_dates = Array(event.date) + Array(event.parallelEvent&.map(&:date))
63
63
  event_dates.flatten.compact.find do |date|
64
- date_type(date).nil?
64
+ date_type(date).blank?
65
65
  end
66
66
  end
67
67
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DorIndexing
4
- VERSION = '1.2.1'
4
+ VERSION = '1.4.0'
5
5
  end
data/lib/dor_indexing.rb CHANGED
@@ -3,7 +3,6 @@
3
3
  require 'zeitwerk'
4
4
  require 'stanford-mods'
5
5
  require 'cocina/models'
6
- require 'solrizer'
7
6
  require 'marc/vocab'
8
7
  require 'honeybadger'
9
8
 
metadata CHANGED
@@ -1,29 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dor_indexing
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Littman
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-28 00:00:00.000000000 Z
11
+ date: 2024-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: cocina-models
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
31
  - - "~>"
18
32
  - !ruby/object:Gem::Version
19
- version: 0.94.2
33
+ version: 0.95.0
20
34
  type: :runtime
21
35
  prerelease: false
22
36
  version_requirements: !ruby/object:Gem::Requirement
23
37
  requirements:
24
38
  - - "~>"
25
39
  - !ruby/object:Gem::Version
26
- version: 0.94.2
40
+ version: 0.95.0
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: dor-workflow-client
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -66,20 +80,6 @@ dependencies:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
82
  version: 0.3.0
69
- - !ruby/object:Gem::Dependency
70
- name: solrizer
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :runtime
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: stanford-mods
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -137,20 +137,20 @@ files:
137
137
  - lib/dor_indexing/builders/topic_builder.rb
138
138
  - lib/dor_indexing/cocina_repository.rb
139
139
  - lib/dor_indexing/indexers/administrative_tag_indexer.rb
140
+ - lib/dor_indexing/indexers/basic_indexer.rb
140
141
  - lib/dor_indexing/indexers/collection_title_indexer.rb
141
142
  - lib/dor_indexing/indexers/composite_indexer.rb
142
- - lib/dor_indexing/indexers/content_metadata_indexer.rb
143
- - lib/dor_indexing/indexers/data_indexer.rb
144
143
  - lib/dor_indexing/indexers/default_object_rights_indexer.rb
145
144
  - lib/dor_indexing/indexers/descriptive_metadata_indexer.rb
146
145
  - lib/dor_indexing/indexers/embargo_metadata_indexer.rb
147
146
  - lib/dor_indexing/indexers/identifiable_indexer.rb
148
147
  - lib/dor_indexing/indexers/identity_metadata_indexer.rb
149
- - lib/dor_indexing/indexers/process_indexer.rb
148
+ - lib/dor_indexing/indexers/object_files_indexer.rb
150
149
  - lib/dor_indexing/indexers/releasable_indexer.rb
151
150
  - lib/dor_indexing/indexers/rights_metadata_indexer.rb
152
151
  - lib/dor_indexing/indexers/role_metadata_indexer.rb
153
152
  - lib/dor_indexing/indexers/workflow_indexer.rb
153
+ - lib/dor_indexing/indexers/workflow_process_indexer.rb
154
154
  - lib/dor_indexing/indexers/workflows_indexer.rb
155
155
  - lib/dor_indexing/marc_country.rb
156
156
  - lib/dor_indexing/selectors/event_selector.rb