dor-services 8.6.0 → 9.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/config/config_defaults.yml +0 -1
  3. data/config/dev_console_env.rb.example +0 -1
  4. data/lib/dor-services.rb +0 -34
  5. data/lib/dor/models/abstract.rb +1 -47
  6. data/lib/dor/models/admin_policy_object.rb +0 -9
  7. data/lib/dor/models/collection.rb +0 -9
  8. data/lib/dor/models/etd.rb +0 -6
  9. data/lib/dor/models/item.rb +0 -9
  10. data/lib/dor/models/part.rb +0 -2
  11. data/lib/dor/models/set.rb +0 -8
  12. data/lib/dor/services/search_service.rb +1 -0
  13. data/lib/dor/static_config.rb +0 -8
  14. data/lib/dor/static_config/stacks_config.rb +0 -15
  15. data/lib/dor/version.rb +1 -1
  16. metadata +2 -55
  17. data/lib/dor/datastreams/simple_dublin_core_ds.rb +0 -59
  18. data/lib/dor/datastreams/workflow_definition_ds.rb +0 -71
  19. data/lib/dor/datastreams/workflow_ds.rb +0 -20
  20. data/lib/dor/indexers/composite_indexer.rb +0 -27
  21. data/lib/dor/indexers/data_indexer.rb +0 -24
  22. data/lib/dor/indexers/describable_indexer.rb +0 -60
  23. data/lib/dor/indexers/editable_indexer.rb +0 -25
  24. data/lib/dor/indexers/identifiable_indexer.rb +0 -102
  25. data/lib/dor/indexers/process_indexer.rb +0 -58
  26. data/lib/dor/indexers/processable_indexer.rb +0 -99
  27. data/lib/dor/indexers/releasable_indexer.rb +0 -33
  28. data/lib/dor/indexers/workflow_indexer.rb +0 -47
  29. data/lib/dor/indexers/workflows_indexer.rb +0 -34
  30. data/lib/dor/models/workflow_object.rb +0 -28
  31. data/lib/dor/models/workflow_solr_document.rb +0 -93
  32. data/lib/dor/release_tags.rb +0 -13
  33. data/lib/dor/release_tags/identity_metadata.rb +0 -145
  34. data/lib/dor/release_tags/purl.rb +0 -51
  35. data/lib/dor/release_tags/purl_client.rb +0 -44
  36. data/lib/dor/services/release_tag_service.rb +0 -40
  37. data/lib/dor/services/state_service.rb +0 -34
  38. data/lib/dor/services/status_service.rb +0 -125
  39. data/lib/dor/static_config/workflow_config.rb +0 -51
  40. data/lib/dor/workflow/document.rb +0 -72
  41. data/lib/dor/workflow/process.rb +0 -157
@@ -1,59 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class SimpleDublinCoreDs < ActiveFedora::OmDatastream
5
- set_terminology do |t|
6
- t.root(
7
- path: 'dc',
8
- xmlns: 'http://www.openarchives.org/OAI/2.0/oai_dc/',
9
- schema: 'http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd',
10
- namespace_prefix: 'oai_dc',
11
- index_as: [:not_searchable]
12
- )
13
- t.title(index_as: %i[stored_sortable stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
14
- t.creator(index_as: %i[stored_sortable stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
15
- t.identifier(index_as: %i[symbol stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
16
- end
17
-
18
- def self.xml_template
19
- builder = Nokogiri::XML::Builder.new do |xml|
20
- xml['oai_dc'].dc(
21
- 'xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/',
22
- 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/',
23
- 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
24
- 'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
25
- )
26
- end
27
-
28
- builder.doc
29
- end
30
-
31
- def to_solr(solr_doc = {}, *args)
32
- # There are a whole bunch of namespace-related things that can go
33
- # wrong with this terminology. Until it's fixed in OM, ignore them all.
34
-
35
- doc = super solr_doc, *args
36
-
37
- add_solr_value(doc, 'dc_title', title.first, :string, %i[stored_sortable stored_searchable])
38
- add_solr_value(doc, 'dc_creator', creator.first, :string, %i[stored_sortable stored_searchable])
39
-
40
- identifiers = {}
41
-
42
- identifier.each { |i| ns, val = i.split(':'); identifiers[ns] ||= val }
43
-
44
- identifiers.each do |ns, val|
45
- add_solr_value(doc, "dc_identifier_#{ns}", val, :string, %i[stored_sortable stored_searchable])
46
- end
47
-
48
- doc
49
- rescue Exception => e
50
- warn "ERROR in SimpleDublinCoreDs to_solr()! #{e}"
51
- solr_doc
52
- end
53
-
54
- # maintain AF < 8 indexing behavior
55
- def prefix
56
- ''
57
- end
58
- end
59
- end
@@ -1,71 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # @deprecated
5
- class WorkflowDefinitionDs < ActiveFedora::OmDatastream
6
- include SolrDocHelper
7
-
8
- set_terminology do |t|
9
- t.root(path: 'workflow-def', index_as: [:not_searchable])
10
- t.process(index_as: [:not_searchable])
11
- end
12
-
13
- define_template :process do |builder, workflow, attrs|
14
- prereqs = attrs.delete('prerequisite')
15
- prereqs = prereqs.split(/\s*,\s*/) if prereqs.is_a?(String)
16
- attrs.keys.each { |k| attrs[k.to_s.dasherize.to_sym] = attrs.delete(k) }
17
- builder.process(attrs) do |node|
18
- Array(prereqs).each do |prereq|
19
- (repo, wf, prereq_name) = prereq.split(/:/)
20
- if prereq_name.nil?
21
- prereq_name = repo
22
- repo = nil
23
- end
24
- if repo == workflow.repo && wf = workflow.name
25
- repo = nil
26
- wf = nil
27
- end
28
- attrs = repo.nil? && wf.nil? ? {} : { repository: repo, workflow: wf }
29
- node.prereq(attrs) { node.text prereq_name }
30
- end
31
- end
32
- end
33
-
34
- def self.xml_template
35
- Nokogiri::XML('<workflow-def/>')
36
- end
37
-
38
- def add_process(attributes)
39
- ng_xml_will_change!
40
- add_child_node(ng_xml.at_xpath('/workflow-def'), :process, self, attributes)
41
- end
42
-
43
- def processes
44
- ng_xml.xpath('/workflow-def/process').collect do |node|
45
- Workflow::Process.new(repo, name, node)
46
- end.sort { |a, b| (a.sequence || 0) <=> (b.sequence || 0) }
47
- end
48
-
49
- def name
50
- ng_xml.at_xpath('/workflow-def/@id').to_s
51
- end
52
-
53
- def repo
54
- ng_xml.at_xpath('/workflow-def/@repository').to_s
55
- end
56
-
57
- def to_solr(solr_doc = {}, *args)
58
- solr_doc = super(solr_doc, *args)
59
- add_solr_value(solr_doc, 'workflow_name', name, :symbol, [:symbol])
60
- processes.each do |p|
61
- add_solr_value(solr_doc, 'process', "#{p.name}|#{p.label}", :symbol, [:displayable])
62
- end
63
- solr_doc
64
- end
65
-
66
- # maintain AF < 8 indexing behavior
67
- def prefix
68
- ''
69
- end
70
- end
71
- end
@@ -1,20 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Represents the datastream that just holds the location of the workflow service
5
- class WorkflowDs < ActiveFedora::Datastream
6
- before_save :build_location
7
-
8
- # Called before saving, but after a pid has been assigned
9
- def build_location
10
- return unless new?
11
-
12
- self.dsLocation = File.join(Dor::Config.workflow.url, "dor/objects/#{pid}/workflows")
13
- end
14
-
15
- # Called by rubydora. This lets us customize the mime-type
16
- def self.default_attributes
17
- super.merge(mimeType: 'application/xml')
18
- end
19
- end
20
- end
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class CompositeIndexer
5
- attr_reader :indexers
6
- def initialize(*indexers)
7
- @indexers = indexers
8
- end
9
-
10
- def new(resource:)
11
- Instance.new(indexers, resource: resource)
12
- end
13
-
14
- class Instance
15
- attr_reader :indexers, :resource
16
- def initialize(indexers, resource:)
17
- @resource = resource
18
- @indexers = indexers.map { |i| i.new(resource: resource) }
19
- end
20
-
21
- # @return [Hash] the merged solr document for all the sub-indexers
22
- def to_solr
23
- indexers.map(&:to_solr).inject({}, &:merge)
24
- end
25
- end
26
- end
27
- end
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Indexing provided by ActiveFedora
5
- class DataIndexer
6
- include ActiveFedora::Indexing
7
-
8
- attr_reader :resource
9
- def initialize(resource:)
10
- @resource = resource
11
- end
12
-
13
- # we need to override this until https://github.com/samvera/active_fedora/pull/1371
14
- # has been released
15
- def to_solr(solr_doc = {}, opts = {})
16
- super.tap do |doc|
17
- doc['active_fedora_model_ssi'] = has_model
18
- end
19
- end
20
-
21
- delegate :create_date, :modified_date, :state, :pid, :inner_object,
22
- :datastreams, :relationships, :has_model, to: :resource
23
- end
24
- end
@@ -1,60 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class DescribableIndexer
5
- attr_reader :resource
6
- def initialize(resource:)
7
- @resource = resource
8
- end
9
-
10
- # @return [Hash] the partial solr document for describable concerns
11
- def to_solr
12
- add_metadata_format_to_solr_doc.merge(add_mods_to_solr_doc)
13
- end
14
-
15
- def add_metadata_format_to_solr_doc
16
- { 'metadata_format_ssim' => 'mods' }
17
- end
18
-
19
- def add_mods_to_solr_doc
20
- solr_doc = {}
21
- mods_sources = {
22
- sw_title_display: %w(sw_display_title_tesim),
23
- main_author_w_date: %w(sw_author_ssim sw_author_tesim),
24
- sw_sort_author: %w(sw_author_sort_ssi),
25
- sw_language_facet: %w(sw_language_ssim sw_language_tesim),
26
- sw_genre: %w(sw_genre_ssim sw_genre_tesim),
27
- format_main: %w(sw_format_ssim sw_format_tesim),
28
- topic_facet: %w(sw_topic_ssim sw_topic_tesim),
29
- era_facet: %w(sw_subject_temporal_ssim sw_subject_temporal_tesim),
30
- geographic_facet: %w(sw_subject_geographic_ssim sw_subject_geographic_tesim),
31
- %i[term_values typeOfResource] => %w(mods_typeOfResource_ssim mods_typeOfResource_tesim),
32
- pub_year_sort_str: %w(sw_pub_date_sort_ssi),
33
- pub_year_int: %w(sw_pub_date_sort_isi),
34
- pub_year_display_str: %w(sw_pub_date_facet_ssi)
35
- }
36
-
37
- mods_sources.each_pair do |meth, solr_keys|
38
- vals = meth.is_a?(Array) ? resource.stanford_mods.send(meth.shift, *meth) : resource.stanford_mods.send(meth)
39
-
40
- next if vals.nil? || (vals.respond_to?(:empty?) && vals.empty?)
41
-
42
- solr_keys.each do |key|
43
- solr_doc[key] ||= []
44
- solr_doc[key].push(*vals)
45
- end
46
- # asterisk to avoid multi-dimensional array: push values, not the array
47
- end
48
-
49
- # convert multivalued fields to single value
50
- %w(sw_pub_date_sort_ssi sw_pub_date_sort_isi sw_pub_date_facet_ssi).each do |key|
51
- solr_doc[key] = solr_doc[key].first unless solr_doc[key].nil?
52
- end
53
- # some fields get explicit "(none)" placeholder values, mostly for faceting
54
- %w(sw_language_tesim sw_genre_tesim sw_format_tesim).each do |key|
55
- solr_doc[key] = ['(none)'] if solr_doc[key].nil? || solr_doc[key].empty?
56
- end
57
- solr_doc
58
- end
59
- end
60
- end
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class EditableIndexer
5
- include SolrDocHelper
6
-
7
- attr_reader :resource
8
- def initialize(resource:)
9
- @resource = resource
10
- end
11
-
12
- def to_solr
13
- {}.tap do |solr_doc|
14
- add_solr_value(solr_doc, 'default_rights', default_rights_for_indexing, :string, [:symbol])
15
- add_solr_value(solr_doc, 'agreement', resource.agreement, :string, [:symbol]) if resource.agreement_object
16
- add_solr_value(solr_doc, 'default_use_license_machine', resource.use_license, :string, [:stored_sortable])
17
- end
18
- end
19
-
20
- # @return [String] A description of the rights defined in the default object rights datastream. Can be 'Stanford', 'World', 'Dark' or 'None'
21
- def default_rights_for_indexing
22
- RightsMetadataDS::RIGHTS_TYPE_CODES.fetch(resource.default_rights, 'Unrecognized default rights value')
23
- end
24
- end
25
- end
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- class IdentifiableIndexer
5
- include SolrDocHelper
6
-
7
- INDEX_VERSION_FIELD = 'dor_services_version_ssi'
8
- NS_HASH = { 'hydra' => 'http://projecthydra.org/ns/relations#',
9
- 'fedora' => 'info:fedora/fedora-system:def/relations-external#',
10
- 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' }.freeze
11
-
12
- attr_reader :resource
13
- def initialize(resource:)
14
- @resource = resource
15
- end
16
-
17
- ## Module-level variables, shared between ALL mixin includers (and ALL *their* includers/extenders)!
18
- ## used for caching found values
19
- @@collection_hash = {}
20
- @@apo_hash = {}
21
-
22
- # @return [Hash] the partial solr document for identifiable concerns
23
- def to_solr
24
- solr_doc = {}
25
- solr_doc[INDEX_VERSION_FIELD] = Dor::VERSION
26
- solr_doc['indexed_at_dtsi'] = Time.now.utc.xmlschema
27
- resource.datastreams.values.each do |ds|
28
- # This is used to draw the table of datastreams in Argo
29
- add_solr_value(solr_doc, 'ds_specs', ds.datastream_spec_string, :string, [:symbol]) unless ds.new?
30
- end
31
-
32
- add_solr_value(solr_doc, 'title_sort', resource.label, :string, [:stored_sortable])
33
-
34
- rels_doc = Nokogiri::XML(resource.datastreams['RELS-EXT'].content)
35
- apos = rels_doc.search('//rdf:RDF/rdf:Description/hydra:isGovernedBy', NS_HASH)
36
- collections = rels_doc.search('//rdf:RDF/rdf:Description/fedora:isMemberOfCollection', NS_HASH)
37
- solrize_related_obj_titles(solr_doc, apos, @@apo_hash, 'apo_title', 'nonhydrus_apo_title', 'hydrus_apo_title')
38
- solrize_related_obj_titles(solr_doc, collections, @@collection_hash, 'collection_title', 'nonhydrus_collection_title', 'hydrus_collection_title')
39
- solr_doc['public_dc_relation_tesim'] ||= solr_doc['collection_title_tesim'] if solr_doc['collection_title_tesim']
40
- solr_doc['metadata_source_ssi'] = identity_metadata_source
41
- solr_doc
42
- end
43
-
44
- # @return [String] calculated value for Solr index
45
- def identity_metadata_source
46
- if resource.identityMetadata.otherId('catkey').first ||
47
- resource.identityMetadata.otherId('barcode').first
48
- 'Symphony'
49
- else
50
- 'DOR'
51
- end
52
- end
53
-
54
- # Clears out the cache of items. Used primarily in testing.
55
- def self.reset_cache!
56
- @@collection_hash = {}
57
- @@apo_hash = {}
58
- end
59
-
60
- private
61
-
62
- def solrize_related_obj_titles(solr_doc, relationships, title_hash, union_field_name, nonhydrus_field_name, hydrus_field_name)
63
- # TODO: if you wanted to get a little fancier, you could also solrize a 2 level hierarchy and display using hierarchial facets, like
64
- # ["SOURCE", "SOURCE : TITLE"] (e.g. ["Hydrus", "Hydrus : Special Collections"], see (exploded) tags in IdentityMetadataDS#to_solr).
65
- title_type = :symbol # we'll get an _ssim because of the type
66
- title_attrs = [:stored_searchable] # we'll also get a _tesim from this attr
67
- relationships.each do |rel_node|
68
- rel_druid = rel_node['rdf:resource']
69
- next unless rel_druid # TODO: warning here would also be useful
70
-
71
- rel_druid = rel_druid.gsub('info:fedora/', '')
72
-
73
- # populate cache if necessary
74
- unless title_hash.key?(rel_druid)
75
- begin
76
- related_obj = Dor.find(rel_druid)
77
- related_obj_title = related_obj_display_title(related_obj, rel_druid)
78
- is_from_hydrus = (related_obj&.tags&.include?('Project : Hydrus'))
79
- title_hash[rel_druid] = { 'related_obj_title' => related_obj_title, 'is_from_hydrus' => is_from_hydrus }
80
- rescue ActiveFedora::ObjectNotFoundError
81
- # This may happen if the given APO or Collection does not exist (bad data)
82
- title_hash[rel_druid] = { 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
83
- end
84
- end
85
-
86
- # cache should definitely be populated, so just use that to write solr field
87
- if title_hash[rel_druid]['is_from_hydrus']
88
- add_solr_value(solr_doc, hydrus_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
89
- else
90
- add_solr_value(solr_doc, nonhydrus_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
91
- end
92
- add_solr_value(solr_doc, union_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
93
- end
94
- end
95
-
96
- def related_obj_display_title(related_obj, default_title)
97
- return default_title unless related_obj
98
-
99
- related_obj.full_title || default_title
100
- end
101
- end
102
- end
@@ -1,58 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Indexes the process for a workflow
5
- class ProcessIndexer
6
- ERROR_OMISSION = '... (continued)'
7
- private_constant :ERROR_OMISSION
8
-
9
- # see https://lucene.apache.org/core/7_3_1/core/org/apache/lucene/util/BytesRefHash.MaxBytesLengthExceededException.html
10
- MAX_ERROR_LENGTH = 32_768 - 2 - ERROR_OMISSION.length
11
- private_constant :MAX_ERROR_LENGTH
12
-
13
- # @param [WorkflowSolrDocument] solr_doc
14
- # @param [String] workflow_name
15
- # @param [Dor::Workflow::Response::Process] process
16
- def initialize(solr_doc:, workflow_name:, process:)
17
- @solr_doc = solr_doc
18
- @workflow_name = workflow_name
19
- @process = process
20
- end
21
-
22
- # @return [Hash] the partial solr document for the workflow document
23
- def to_solr
24
- return unless status
25
-
26
- # add a record of the robot having operated on this item, so we can track robot activity
27
- solr_doc.add_process_time(workflow_name, name, Time.parse(process.datetime)) if has_time?
28
-
29
- index_error_message
30
-
31
- # workflow name, process status then process name
32
- solr_doc.add_wsp("#{workflow_name}:#{status}", "#{workflow_name}:#{status}:#{name}")
33
-
34
- # workflow name, process name then process status
35
- solr_doc.add_wps("#{workflow_name}:#{name}", "#{workflow_name}:#{name}:#{status}")
36
-
37
- # process status, workflowname then process name
38
- solr_doc.add_swp(process.status.to_s, "#{status}:#{workflow_name}", "#{status}:#{workflow_name}:#{name}")
39
- end
40
-
41
- private
42
-
43
- attr_reader :process, :workflow_name, :solr_doc
44
- delegate :status, :name, :state, :error_message, :datetime, to: :process
45
-
46
- def has_time?
47
- datetime && (status == 'completed' || status == 'error')
48
- end
49
-
50
- # index the error message without the druid so we hopefully get some overlap
51
- # truncate to avoid org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException
52
- def index_error_message
53
- return unless error_message
54
-
55
- solr_doc.error = "#{workflow_name}:#{name}:#{error_message}".truncate(MAX_ERROR_LENGTH, omission: ERROR_OMISSION)
56
- end
57
- end
58
- end