dor-services 8.6.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/config/config_defaults.yml +0 -1
  3. data/config/dev_console_env.rb.example +0 -1
  4. data/lib/dor-services.rb +0 -34
  5. data/lib/dor/models/abstract.rb +1 -47
  6. data/lib/dor/models/admin_policy_object.rb +0 -9
  7. data/lib/dor/models/collection.rb +0 -9
  8. data/lib/dor/models/etd.rb +0 -6
  9. data/lib/dor/models/item.rb +0 -9
  10. data/lib/dor/models/part.rb +0 -2
  11. data/lib/dor/models/set.rb +0 -8
  12. data/lib/dor/services/search_service.rb +1 -0
  13. data/lib/dor/static_config.rb +0 -8
  14. data/lib/dor/static_config/stacks_config.rb +0 -15
  15. data/lib/dor/version.rb +1 -1
  16. metadata +2 -55
  17. data/lib/dor/datastreams/simple_dublin_core_ds.rb +0 -59
  18. data/lib/dor/datastreams/workflow_definition_ds.rb +0 -71
  19. data/lib/dor/datastreams/workflow_ds.rb +0 -20
  20. data/lib/dor/indexers/composite_indexer.rb +0 -27
  21. data/lib/dor/indexers/data_indexer.rb +0 -24
  22. data/lib/dor/indexers/describable_indexer.rb +0 -60
  23. data/lib/dor/indexers/editable_indexer.rb +0 -25
  24. data/lib/dor/indexers/identifiable_indexer.rb +0 -102
  25. data/lib/dor/indexers/process_indexer.rb +0 -58
  26. data/lib/dor/indexers/processable_indexer.rb +0 -99
  27. data/lib/dor/indexers/releasable_indexer.rb +0 -33
  28. data/lib/dor/indexers/workflow_indexer.rb +0 -47
  29. data/lib/dor/indexers/workflows_indexer.rb +0 -34
  30. data/lib/dor/models/workflow_object.rb +0 -28
  31. data/lib/dor/models/workflow_solr_document.rb +0 -93
  32. data/lib/dor/release_tags.rb +0 -13
  33. data/lib/dor/release_tags/identity_metadata.rb +0 -145
  34. data/lib/dor/release_tags/purl.rb +0 -51
  35. data/lib/dor/release_tags/purl_client.rb +0 -44
  36. data/lib/dor/services/release_tag_service.rb +0 -40
  37. data/lib/dor/services/state_service.rb +0 -34
  38. data/lib/dor/services/status_service.rb +0 -125
  39. data/lib/dor/static_config/workflow_config.rb +0 -51
  40. data/lib/dor/workflow/document.rb +0 -72
  41. data/lib/dor/workflow/process.rb +0 -157
@@ -1,59 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class SimpleDublinCoreDs < ActiveFedora::OmDatastream
5
- set_terminology do |t|
6
- t.root(
7
- path: 'dc',
8
- xmlns: 'http://www.openarchives.org/OAI/2.0/oai_dc/',
9
- schema: 'http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd',
10
- namespace_prefix: 'oai_dc',
11
- index_as: [:not_searchable]
12
- )
13
- t.title(index_as: %i[stored_sortable stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
14
- t.creator(index_as: %i[stored_sortable stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
15
- t.identifier(index_as: %i[symbol stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
16
- end
17
-
18
- def self.xml_template
19
- builder = Nokogiri::XML::Builder.new do |xml|
20
- xml['oai_dc'].dc(
21
- 'xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/',
22
- 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/',
23
- 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
24
- 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
25
- )
26
- end
27
-
28
- builder.doc
29
- end
30
-
31
- def to_solr(solr_doc = {}, *args)
32
- # There are a whole bunch of namespace-related things that can go
33
- # wrong with this terminology. Until it's fixed in OM, ignore them all.
34
-
35
- doc = super solr_doc, *args
36
-
37
- add_solr_value(doc, 'dc_title', title.first, :string, %i[stored_sortable stored_searchable])
38
- add_solr_value(doc, 'dc_creator', creator.first, :string, %i[stored_sortable stored_searchable])
39
-
40
- identifiers = {}
41
-
42
- identifier.each { |i| ns, val = i.split(':'); identifiers[ns] ||= val }
43
-
44
- identifiers.each do |ns, val|
45
- add_solr_value(doc, "dc_identifier_#{ns}", val, :string, %i[stored_sortable stored_searchable])
46
- end
47
-
48
- doc
49
- rescue Exception => e
50
- warn "ERROR in SimpleDublinCoreDs to_solr()! #{e}"
51
- solr_doc
52
- end
53
-
54
- # maintain AF < 8 indexing behavior
55
- def prefix
56
- ''
57
- end
58
- end
59
- end
@@ -1,71 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # @deprecated
5
- class WorkflowDefinitionDs < ActiveFedora::OmDatastream
6
- include SolrDocHelper
7
-
8
- set_terminology do |t|
9
- t.root(path: 'workflow-def', index_as: [:not_searchable])
10
- t.process(index_as: [:not_searchable])
11
- end
12
-
13
- define_template :process do |builder, workflow, attrs|
14
- prereqs = attrs.delete('prerequisite')
15
- prereqs = prereqs.split(/\s*,\s*/) if prereqs.is_a?(String)
16
- attrs.keys.each { |k| attrs[k.to_s.dasherize.to_sym] = attrs.delete(k) }
17
- builder.process(attrs) do |node|
18
- Array(prereqs).each do |prereq|
19
- (repo, wf, prereq_name) = prereq.split(/:/)
20
- if prereq_name.nil?
21
- prereq_name = repo
22
- repo = nil
23
- end
24
- if repo == workflow.repo && wf = workflow.name
25
- repo = nil
26
- wf = nil
27
- end
28
- attrs = repo.nil? && wf.nil? ? {} : { repository: repo, workflow: wf }
29
- node.prereq(attrs) { node.text prereq_name }
30
- end
31
- end
32
- end
33
-
34
- def self.xml_template
35
- Nokogiri::XML('<workflow-def/>')
36
- end
37
-
38
- def add_process(attributes)
39
- ng_xml_will_change!
40
- add_child_node(ng_xml.at_xpath('/workflow-def'), :process, self, attributes)
41
- end
42
-
43
- def processes
44
- ng_xml.xpath('/workflow-def/process').collect do |node|
45
- Workflow::Process.new(repo, name, node)
46
- end.sort { |a, b| (a.sequence || 0) <=> (b.sequence || 0) }
47
- end
48
-
49
- def name
50
- ng_xml.at_xpath('/workflow-def/@id').to_s
51
- end
52
-
53
- def repo
54
- ng_xml.at_xpath('/workflow-def/@repository').to_s
55
- end
56
-
57
- def to_solr(solr_doc = {}, *args)
58
- solr_doc = super(solr_doc, *args)
59
- add_solr_value(solr_doc, 'workflow_name', name, :symbol, [:symbol])
60
- processes.each do |p|
61
- add_solr_value(solr_doc, 'process', "#{p.name}|#{p.label}", :symbol, [:displayable])
62
- end
63
- solr_doc
64
- end
65
-
66
- # maintain AF < 8 indexing behavior
67
- def prefix
68
- ''
69
- end
70
- end
71
- end
@@ -1,20 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Represents the datastream that just holds the location of the workflow service
5
- class WorkflowDs < ActiveFedora::Datastream
6
- before_save :build_location
7
-
8
- # Called before saving, but after a pid has been assigned
9
- def build_location
10
- return unless new?
11
-
12
- self.dsLocation = File.join(Dor::Config.workflow.url, "dor/objects/#{pid}/workflows")
13
- end
14
-
15
- # Called by rubydora. This lets us customize the mime-type
16
- def self.default_attributes
17
- super.merge(mimeType: 'application/xml')
18
- end
19
- end
20
- end
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class CompositeIndexer
5
- attr_reader :indexers
6
- def initialize(*indexers)
7
- @indexers = indexers
8
- end
9
-
10
- def new(resource:)
11
- Instance.new(indexers, resource: resource)
12
- end
13
-
14
- class Instance
15
- attr_reader :indexers, :resource
16
- def initialize(indexers, resource:)
17
- @resource = resource
18
- @indexers = indexers.map { |i| i.new(resource: resource) }
19
- end
20
-
21
- # @return [Hash] the merged solr document for all the sub-indexers
22
- def to_solr
23
- indexers.map(&:to_solr).inject({}, &:merge)
24
- end
25
- end
26
- end
27
- end
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Indexing provided by ActiveFedora
5
- class DataIndexer
6
- include ActiveFedora::Indexing
7
-
8
- attr_reader :resource
9
- def initialize(resource:)
10
- @resource = resource
11
- end
12
-
13
- # we need to override this until https://github.com/samvera/active_fedora/pull/1371
14
- # has been released
15
- def to_solr(solr_doc = {}, opts = {})
16
- super.tap do |doc|
17
- doc['active_fedora_model_ssi'] = has_model
18
- end
19
- end
20
-
21
- delegate :create_date, :modified_date, :state, :pid, :inner_object,
22
- :datastreams, :relationships, :has_model, to: :resource
23
- end
24
- end
@@ -1,60 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class DescribableIndexer
5
- attr_reader :resource
6
- def initialize(resource:)
7
- @resource = resource
8
- end
9
-
10
- # @return [Hash] the partial solr document for describable concerns
11
- def to_solr
12
- add_metadata_format_to_solr_doc.merge(add_mods_to_solr_doc)
13
- end
14
-
15
- def add_metadata_format_to_solr_doc
16
- { 'metadata_format_ssim' => 'mods' }
17
- end
18
-
19
- def add_mods_to_solr_doc
20
- solr_doc = {}
21
- mods_sources = {
22
- sw_title_display: %w(sw_display_title_tesim),
23
- main_author_w_date: %w(sw_author_ssim sw_author_tesim),
24
- sw_sort_author: %w(sw_author_sort_ssi),
25
- sw_language_facet: %w(sw_language_ssim sw_language_tesim),
26
- sw_genre: %w(sw_genre_ssim sw_genre_tesim),
27
- format_main: %w(sw_format_ssim sw_format_tesim),
28
- topic_facet: %w(sw_topic_ssim sw_topic_tesim),
29
- era_facet: %w(sw_subject_temporal_ssim sw_subject_temporal_tesim),
30
- geographic_facet: %w(sw_subject_geographic_ssim sw_subject_geographic_tesim),
31
- %i[term_values typeOfResource] => %w(mods_typeOfResource_ssim mods_typeOfResource_tesim),
32
- pub_year_sort_str: %w(sw_pub_date_sort_ssi),
33
- pub_year_int: %w(sw_pub_date_sort_isi),
34
- pub_year_display_str: %w(sw_pub_date_facet_ssi)
35
- }
36
-
37
- mods_sources.each_pair do |meth, solr_keys|
38
- vals = meth.is_a?(Array) ? resource.stanford_mods.send(meth.shift, *meth) : resource.stanford_mods.send(meth)
39
-
40
- next if vals.nil? || (vals.respond_to?(:empty?) && vals.empty?)
41
-
42
- solr_keys.each do |key|
43
- solr_doc[key] ||= []
44
- solr_doc[key].push(*vals)
45
- end
46
- # asterisk to avoid multi-dimensional array: push values, not the array
47
- end
48
-
49
- # convert multivalued fields to single value
50
- %w(sw_pub_date_sort_ssi sw_pub_date_sort_isi sw_pub_date_facet_ssi).each do |key|
51
- solr_doc[key] = solr_doc[key].first unless solr_doc[key].nil?
52
- end
53
- # some fields get explicit "(none)" placeholder values, mostly for faceting
54
- %w(sw_language_tesim sw_genre_tesim sw_format_tesim).each do |key|
55
- solr_doc[key] = ['(none)'] if solr_doc[key].nil? || solr_doc[key].empty?
56
- end
57
- solr_doc
58
- end
59
- end
60
- end
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class EditableIndexer
5
- include SolrDocHelper
6
-
7
- attr_reader :resource
8
- def initialize(resource:)
9
- @resource = resource
10
- end
11
-
12
- def to_solr
13
- {}.tap do |solr_doc|
14
- add_solr_value(solr_doc, 'default_rights', default_rights_for_indexing, :string, [:symbol])
15
- add_solr_value(solr_doc, 'agreement', resource.agreement, :string, [:symbol]) if resource.agreement_object
16
- add_solr_value(solr_doc, 'default_use_license_machine', resource.use_license, :string, [:stored_sortable])
17
- end
18
- end
19
-
20
- # @return [String] A description of the rights defined in the default object rights datastream. Can be 'Stanford', 'World', 'Dark' or 'None'
21
- def default_rights_for_indexing
22
- RightsMetadataDS::RIGHTS_TYPE_CODES.fetch(resource.default_rights, 'Unrecognized default rights value')
23
- end
24
- end
25
- end
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class IdentifiableIndexer
5
- include SolrDocHelper
6
-
7
- INDEX_VERSION_FIELD = 'dor_services_version_ssi'
8
- NS_HASH = { 'hydra' => 'http://projecthydra.org/ns/relations#',
9
- 'fedora' => 'info:fedora/fedora-system:def/relations-external#',
10
- 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' }.freeze
11
-
12
- attr_reader :resource
13
- def initialize(resource:)
14
- @resource = resource
15
- end
16
-
17
- ## Module-level variables, shared between ALL mixin includers (and ALL *their* includers/extenders)!
18
- ## used for caching found values
19
- @@collection_hash = {}
20
- @@apo_hash = {}
21
-
22
- # @return [Hash] the partial solr document for identifiable concerns
23
- def to_solr
24
- solr_doc = {}
25
- solr_doc[INDEX_VERSION_FIELD] = Dor::VERSION
26
- solr_doc['indexed_at_dtsi'] = Time.now.utc.xmlschema
27
- resource.datastreams.values.each do |ds|
28
- # This is used to draw the table of datastreams in Argo
29
- add_solr_value(solr_doc, 'ds_specs', ds.datastream_spec_string, :string, [:symbol]) unless ds.new?
30
- end
31
-
32
- add_solr_value(solr_doc, 'title_sort', resource.label, :string, [:stored_sortable])
33
-
34
- rels_doc = Nokogiri::XML(resource.datastreams['RELS-EXT'].content)
35
- apos = rels_doc.search('//rdf:RDF/rdf:Description/hydra:isGovernedBy', NS_HASH)
36
- collections = rels_doc.search('//rdf:RDF/rdf:Description/fedora:isMemberOfCollection', NS_HASH)
37
- solrize_related_obj_titles(solr_doc, apos, @@apo_hash, 'apo_title', 'nonhydrus_apo_title', 'hydrus_apo_title')
38
- solrize_related_obj_titles(solr_doc, collections, @@collection_hash, 'collection_title', 'nonhydrus_collection_title', 'hydrus_collection_title')
39
- solr_doc['public_dc_relation_tesim'] ||= solr_doc['collection_title_tesim'] if solr_doc['collection_title_tesim']
40
- solr_doc['metadata_source_ssi'] = identity_metadata_source
41
- solr_doc
42
- end
43
-
44
- # @return [String] calculated value for Solr index
45
- def identity_metadata_source
46
- if resource.identityMetadata.otherId('catkey').first ||
47
- resource.identityMetadata.otherId('barcode').first
48
- 'Symphony'
49
- else
50
- 'DOR'
51
- end
52
- end
53
-
54
- # Clears out the cache of items. Used primarily in testing.
55
- def self.reset_cache!
56
- @@collection_hash = {}
57
- @@apo_hash = {}
58
- end
59
-
60
- private
61
-
62
- def solrize_related_obj_titles(solr_doc, relationships, title_hash, union_field_name, nonhydrus_field_name, hydrus_field_name)
63
- # TODO: if you wanted to get a little fancier, you could also solrize a 2 level hierarchy and display using hierarchial facets, like
64
- # ["SOURCE", "SOURCE : TITLE"] (e.g. ["Hydrus", "Hydrus : Special Collections"], see (exploded) tags in IdentityMetadataDS#to_solr).
65
- title_type = :symbol # we'll get an _ssim because of the type
66
- title_attrs = [:stored_searchable] # we'll also get a _tesim from this attr
67
- relationships.each do |rel_node|
68
- rel_druid = rel_node['rdf:resource']
69
- next unless rel_druid # TODO: warning here would also be useful
70
-
71
- rel_druid = rel_druid.gsub('info:fedora/', '')
72
-
73
- # populate cache if necessary
74
- unless title_hash.key?(rel_druid)
75
- begin
76
- related_obj = Dor.find(rel_druid)
77
- related_obj_title = related_obj_display_title(related_obj, rel_druid)
78
- is_from_hydrus = (related_obj&.tags&.include?('Project : Hydrus'))
79
- title_hash[rel_druid] = { 'related_obj_title' => related_obj_title, 'is_from_hydrus' => is_from_hydrus }
80
- rescue ActiveFedora::ObjectNotFoundError
81
- # This may happen if the given APO or Collection does not exist (bad data)
82
- title_hash[rel_druid] = { 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
83
- end
84
- end
85
-
86
- # cache should definitely be populated, so just use that to write solr field
87
- if title_hash[rel_druid]['is_from_hydrus']
88
- add_solr_value(solr_doc, hydrus_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
89
- else
90
- add_solr_value(solr_doc, nonhydrus_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
91
- end
92
- add_solr_value(solr_doc, union_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
93
- end
94
- end
95
-
96
- def related_obj_display_title(related_obj, default_title)
97
- return default_title unless related_obj
98
-
99
- related_obj.full_title || default_title
100
- end
101
- end
102
- end
@@ -1,58 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Indexes the process for a workflow
5
- class ProcessIndexer
6
- ERROR_OMISSION = '... (continued)'
7
- private_constant :ERROR_OMISSION
8
-
9
- # see https://lucene.apache.org/core/7_3_1/core/org/apache/lucene/util/BytesRefHash.MaxBytesLengthExceededException.html
10
- MAX_ERROR_LENGTH = 32_768 - 2 - ERROR_OMISSION.length
11
- private_constant :MAX_ERROR_LENGTH
12
-
13
- # @param [WorkflowSolrDocument] solr_doc
14
- # @param [String] workflow_name
15
- # @param [Dor::Workflow::Response::Process] process
16
- def initialize(solr_doc:, workflow_name:, process:)
17
- @solr_doc = solr_doc
18
- @workflow_name = workflow_name
19
- @process = process
20
- end
21
-
22
- # @return [Hash] the partial solr document for the workflow document
23
- def to_solr
24
- return unless status
25
-
26
- # add a record of the robot having operated on this item, so we can track robot activity
27
- solr_doc.add_process_time(workflow_name, name, Time.parse(process.datetime)) if has_time?
28
-
29
- index_error_message
30
-
31
- # workflow name, process status then process name
32
- solr_doc.add_wsp("#{workflow_name}:#{status}", "#{workflow_name}:#{status}:#{name}")
33
-
34
- # workflow name, process name then process status
35
- solr_doc.add_wps("#{workflow_name}:#{name}", "#{workflow_name}:#{name}:#{status}")
36
-
37
- # process status, workflowname then process name
38
- solr_doc.add_swp(process.status.to_s, "#{status}:#{workflow_name}", "#{status}:#{workflow_name}:#{name}")
39
- end
40
-
41
- private
42
-
43
- attr_reader :process, :workflow_name, :solr_doc
44
- delegate :status, :name, :state, :error_message, :datetime, to: :process
45
-
46
- def has_time?
47
- datetime && (status == 'completed' || status == 'error')
48
- end
49
-
50
- # index the error message without the druid so we hopefully get some overlap
51
- # truncate to avoid org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException
52
- def index_error_message
53
- return unless error_message
54
-
55
- solr_doc.error = "#{workflow_name}:#{name}:#{error_message}".truncate(MAX_ERROR_LENGTH, omission: ERROR_OMISSION)
56
- end
57
- end
58
- end