RubyGems - dor-services - Versions diffs - 8.6.0 → 9.0.0 - Mend

dor-services 8.6.0 → 9.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

checksums.yaml +4 -4
data/config/config_defaults.yml +0 -1
data/config/dev_console_env.rb.example +0 -1
data/lib/dor-services.rb +0 -34
data/lib/dor/models/abstract.rb +1 -47
data/lib/dor/models/admin_policy_object.rb +0 -9
data/lib/dor/models/collection.rb +0 -9
data/lib/dor/models/etd.rb +0 -6
data/lib/dor/models/item.rb +0 -9
data/lib/dor/models/part.rb +0 -2
data/lib/dor/models/set.rb +0 -8
data/lib/dor/services/search_service.rb +1 -0
data/lib/dor/static_config.rb +0 -8
data/lib/dor/static_config/stacks_config.rb +0 -15
data/lib/dor/version.rb +1 -1
metadata +2 -55
data/lib/dor/datastreams/simple_dublin_core_ds.rb +0 -59
data/lib/dor/datastreams/workflow_definition_ds.rb +0 -71
data/lib/dor/datastreams/workflow_ds.rb +0 -20
data/lib/dor/indexers/composite_indexer.rb +0 -27
data/lib/dor/indexers/data_indexer.rb +0 -24
data/lib/dor/indexers/describable_indexer.rb +0 -60
data/lib/dor/indexers/editable_indexer.rb +0 -25
data/lib/dor/indexers/identifiable_indexer.rb +0 -102
data/lib/dor/indexers/process_indexer.rb +0 -58
data/lib/dor/indexers/processable_indexer.rb +0 -99
data/lib/dor/indexers/releasable_indexer.rb +0 -33
data/lib/dor/indexers/workflow_indexer.rb +0 -47
data/lib/dor/indexers/workflows_indexer.rb +0 -34
data/lib/dor/models/workflow_object.rb +0 -28
data/lib/dor/models/workflow_solr_document.rb +0 -93
data/lib/dor/release_tags.rb +0 -13
data/lib/dor/release_tags/identity_metadata.rb +0 -145
data/lib/dor/release_tags/purl.rb +0 -51
data/lib/dor/release_tags/purl_client.rb +0 -44
data/lib/dor/services/release_tag_service.rb +0 -40
data/lib/dor/services/state_service.rb +0 -34
data/lib/dor/services/status_service.rb +0 -125
data/lib/dor/static_config/workflow_config.rb +0 -51
data/lib/dor/workflow/document.rb +0 -72
data/lib/dor/workflow/process.rb +0 -157

data/lib/dor/datastreams/simple_dublin_core_ds.rb DELETED

@@ -1,59 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  class SimpleDublinCoreDs < ActiveFedora::OmDatastream
-    set_terminology do |t|
-      t.root(
-        path: 'dc',
-        xmlns: 'http://www.openarchives.org/OAI/2.0/oai_dc/',
-        schema: 'http://cosimo.stanford.edu/standards/oai_dc/v2/oai_dc.xsd',
-        namespace_prefix: 'oai_dc',
-        index_as: [:not_searchable]
-      )
-      t.title(index_as: %i[stored_sortable stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
-      t.creator(index_as: %i[stored_sortable stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
-      t.identifier(index_as: %i[symbol stored_searchable], xmlns: 'http://purl.org/dc/elements/1.1/', namespace_prefix: 'dc')
-    end
-    def self.xml_template
-      builder = Nokogiri::XML::Builder.new do |xml|
-        xml['oai_dc'].dc(
-          'xmlns:oai_dc' => 'http://www.openarchives.org/OAI/2.0/oai_dc/',
-          'xmlns:dc' => 'http://purl.org/dc/elements/1.1/',
-          'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
-          'xsi:schemaLocation' => 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd'
-        )
-      end
-      builder.doc
-    end
-    def to_solr(solr_doc = {}, *args)
-      # There are a whole bunch of namespace-related things that can go
-      # wrong with this terminology. Until it's fixed in OM, ignore them all.
-      doc = super solr_doc, *args
-      add_solr_value(doc, 'dc_title',   title.first,   :string, %i[stored_sortable stored_searchable])
-      add_solr_value(doc, 'dc_creator', creator.first, :string, %i[stored_sortable stored_searchable])
-      identifiers = {}
-      identifier.each { |i| ns, val = i.split(':'); identifiers[ns] ||= val }
-      identifiers.each do |ns, val|
-        add_solr_value(doc, "dc_identifier_#{ns}", val, :string, %i[stored_sortable stored_searchable])
-      end
-      doc
-    rescue Exception => e
-      warn "ERROR in SimpleDublinCoreDs to_solr()! #{e}"
-      solr_doc
-    end
-    # maintain AF < 8 indexing behavior
-    def prefix
-      ''
-    end
-  end
-end

data/lib/dor/datastreams/workflow_definition_ds.rb DELETED

@@ -1,71 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  # @deprecated
-  class WorkflowDefinitionDs < ActiveFedora::OmDatastream
-    include SolrDocHelper
-    set_terminology do |t|
-      t.root(path: 'workflow-def', index_as: [:not_searchable])
-      t.process(index_as: [:not_searchable])
-    end
-    define_template :process do |builder, workflow, attrs|
-      prereqs = attrs.delete('prerequisite')
-      prereqs = prereqs.split(/\s*,\s*/) if prereqs.is_a?(String)
-      attrs.keys.each { |k| attrs[k.to_s.dasherize.to_sym] = attrs.delete(k) }
-      builder.process(attrs) do |node|
-        Array(prereqs).each do |prereq|
-          (repo, wf, prereq_name) = prereq.split(/:/)
-          if prereq_name.nil?
-            prereq_name = repo
-            repo = nil
-          end
-          if repo == workflow.repo && wf = workflow.name
-            repo = nil
-            wf = nil
-          end
-          attrs = repo.nil? && wf.nil? ? {} : { repository: repo, workflow: wf }
-          node.prereq(attrs) { node.text prereq_name }
-        end
-      end
-    end
-    def self.xml_template
-      Nokogiri::XML('<workflow-def/>')
-    end
-    def add_process(attributes)
-      ng_xml_will_change!
-      add_child_node(ng_xml.at_xpath('/workflow-def'), :process, self, attributes)
-    end
-    def processes
-      ng_xml.xpath('/workflow-def/process').collect do |node|
-        Workflow::Process.new(repo, name, node)
-      end.sort { |a, b| (a.sequence || 0) <=> (b.sequence || 0) }
-    end
-    def name
-      ng_xml.at_xpath('/workflow-def/@id').to_s
-    end
-    def repo
-      ng_xml.at_xpath('/workflow-def/@repository').to_s
-    end
-    def to_solr(solr_doc = {}, *args)
-      solr_doc = super(solr_doc, *args)
-      add_solr_value(solr_doc, 'workflow_name', name, :symbol, [:symbol])
-      processes.each do |p|
-        add_solr_value(solr_doc, 'process', "#{p.name}|#{p.label}", :symbol, [:displayable])
-      end
-      solr_doc
-    end
-    # maintain AF < 8 indexing behavior
-    def prefix
-      ''
-    end
-  end
-end

data/lib/dor/datastreams/workflow_ds.rb DELETED

@@ -1,20 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  # Represents the datastream that just holds the location of the workflow service
-  class WorkflowDs < ActiveFedora::Datastream
-    before_save :build_location
-    # Called before saving, but after a pid has been assigned
-    def build_location
-      return unless new?
-      self.dsLocation = File.join(Dor::Config.workflow.url, "dor/objects/#{pid}/workflows")
-    end
-    # Called by rubydora. This lets us customize the mime-type
-    def self.default_attributes
-      super.merge(mimeType: 'application/xml')
-    end
-  end
-end

data/lib/dor/indexers/composite_indexer.rb DELETED

@@ -1,27 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  class CompositeIndexer
-    attr_reader :indexers
-    def initialize(*indexers)
-      @indexers = indexers
-    end
-    def new(resource:)
-      Instance.new(indexers, resource: resource)
-    end
-    class Instance
-      attr_reader :indexers, :resource
-      def initialize(indexers, resource:)
-        @resource = resource
-        @indexers = indexers.map { |i| i.new(resource: resource) }
-      end
-      # @return [Hash] the merged solr document for all the sub-indexers
-      def to_solr
-        indexers.map(&:to_solr).inject({}, &:merge)
-      end
-    end
-  end
-end

data/lib/dor/indexers/data_indexer.rb DELETED

@@ -1,24 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  # Indexing provided by ActiveFedora
-  class DataIndexer
-    include ActiveFedora::Indexing
-    attr_reader :resource
-    def initialize(resource:)
-      @resource = resource
-    end
-    # we need to override this until https://github.com/samvera/active_fedora/pull/1371
-    # has been released
-    def to_solr(solr_doc = {}, opts = {})
-      super.tap do |doc|
-        doc['active_fedora_model_ssi'] = has_model
-      end
-    end
-    delegate :create_date, :modified_date, :state, :pid, :inner_object,
-             :datastreams, :relationships, :has_model, to: :resource
-  end
-end

data/lib/dor/indexers/describable_indexer.rb DELETED

@@ -1,60 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  class DescribableIndexer
-    attr_reader :resource
-    def initialize(resource:)
-      @resource = resource
-    end
-    # @return [Hash] the partial solr document for describable concerns
-    def to_solr
-      add_metadata_format_to_solr_doc.merge(add_mods_to_solr_doc)
-    end
-    def add_metadata_format_to_solr_doc
-      { 'metadata_format_ssim' => 'mods' }
-    end
-    def add_mods_to_solr_doc
-      solr_doc = {}
-      mods_sources = {
-        sw_title_display: %w(sw_display_title_tesim),
-        main_author_w_date: %w(sw_author_ssim sw_author_tesim),
-        sw_sort_author: %w(sw_author_sort_ssi),
-        sw_language_facet: %w(sw_language_ssim sw_language_tesim),
-        sw_genre: %w(sw_genre_ssim sw_genre_tesim),
-        format_main: %w(sw_format_ssim sw_format_tesim),
-        topic_facet: %w(sw_topic_ssim sw_topic_tesim),
-        era_facet: %w(sw_subject_temporal_ssim sw_subject_temporal_tesim),
-        geographic_facet: %w(sw_subject_geographic_ssim sw_subject_geographic_tesim),
-        %i[term_values typeOfResource] => %w(mods_typeOfResource_ssim mods_typeOfResource_tesim),
-        pub_year_sort_str: %w(sw_pub_date_sort_ssi),
-        pub_year_int: %w(sw_pub_date_sort_isi),
-        pub_year_display_str: %w(sw_pub_date_facet_ssi)
-      }
-      mods_sources.each_pair do |meth, solr_keys|
-        vals = meth.is_a?(Array) ? resource.stanford_mods.send(meth.shift, *meth) : resource.stanford_mods.send(meth)
-        next if vals.nil? || (vals.respond_to?(:empty?) && vals.empty?)
-        solr_keys.each do |key|
-          solr_doc[key] ||= []
-          solr_doc[key].push(*vals)
-        end
-        # asterisk to avoid multi-dimensional array: push values, not the array
-      end
-      # convert multivalued fields to single value
-      %w(sw_pub_date_sort_ssi sw_pub_date_sort_isi sw_pub_date_facet_ssi).each do |key|
-        solr_doc[key] = solr_doc[key].first unless solr_doc[key].nil?
-      end
-      # some fields get explicit "(none)" placeholder values, mostly for faceting
-      %w(sw_language_tesim sw_genre_tesim sw_format_tesim).each do |key|
-        solr_doc[key] = ['(none)'] if solr_doc[key].nil? || solr_doc[key].empty?
-      end
-      solr_doc
-    end
-  end
-end

data/lib/dor/indexers/editable_indexer.rb DELETED

@@ -1,25 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  class EditableIndexer
-    include SolrDocHelper
-    attr_reader :resource
-    def initialize(resource:)
-      @resource = resource
-    end
-    def to_solr
-      {}.tap do |solr_doc|
-        add_solr_value(solr_doc, 'default_rights', default_rights_for_indexing, :string, [:symbol])
-        add_solr_value(solr_doc, 'agreement', resource.agreement, :string, [:symbol]) if resource.agreement_object
-        add_solr_value(solr_doc, 'default_use_license_machine', resource.use_license, :string, [:stored_sortable])
-      end
-    end
-    # @return [String] A description of the rights defined in the default object rights datastream. Can be 'Stanford', 'World', 'Dark' or 'None'
-    def default_rights_for_indexing
-      RightsMetadataDS::RIGHTS_TYPE_CODES.fetch(resource.default_rights, 'Unrecognized default rights value')
-    end
-  end
-end

data/lib/dor/indexers/identifiable_indexer.rb DELETED

@@ -1,102 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  class IdentifiableIndexer
-    include SolrDocHelper
-    INDEX_VERSION_FIELD = 'dor_services_version_ssi'
-    NS_HASH = { 'hydra' => 'http://projecthydra.org/ns/relations#',
-                'fedora' => 'info:fedora/fedora-system:def/relations-external#',
-                'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' }.freeze
-    attr_reader :resource
-    def initialize(resource:)
-      @resource = resource
-    end
-    ## Module-level variables, shared between ALL mixin includers (and ALL *their* includers/extenders)!
-    ## used for caching found values
-    @@collection_hash = {}
-    @@apo_hash = {}
-    # @return [Hash] the partial solr document for identifiable concerns
-    def to_solr
-      solr_doc = {}
-      solr_doc[INDEX_VERSION_FIELD] = Dor::VERSION
-      solr_doc['indexed_at_dtsi'] = Time.now.utc.xmlschema
-      resource.datastreams.values.each do |ds|
-        # This is used to draw the table of datastreams in Argo
-        add_solr_value(solr_doc, 'ds_specs', ds.datastream_spec_string, :string, [:symbol]) unless ds.new?
-      end
-      add_solr_value(solr_doc, 'title_sort', resource.label, :string, [:stored_sortable])
-      rels_doc = Nokogiri::XML(resource.datastreams['RELS-EXT'].content)
-      apos = rels_doc.search('//rdf:RDF/rdf:Description/hydra:isGovernedBy', NS_HASH)
-      collections = rels_doc.search('//rdf:RDF/rdf:Description/fedora:isMemberOfCollection', NS_HASH)
-      solrize_related_obj_titles(solr_doc, apos, @@apo_hash, 'apo_title', 'nonhydrus_apo_title', 'hydrus_apo_title')
-      solrize_related_obj_titles(solr_doc, collections, @@collection_hash, 'collection_title', 'nonhydrus_collection_title', 'hydrus_collection_title')
-      solr_doc['public_dc_relation_tesim'] ||= solr_doc['collection_title_tesim'] if solr_doc['collection_title_tesim']
-      solr_doc['metadata_source_ssi'] = identity_metadata_source
-      solr_doc
-    end
-    # @return [String] calculated value for Solr index
-    def identity_metadata_source
-      if resource.identityMetadata.otherId('catkey').first ||
-         resource.identityMetadata.otherId('barcode').first
-        'Symphony'
-      else
-        'DOR'
-      end
-    end
-    # Clears out the cache of items. Used primarily in testing.
-    def self.reset_cache!
-      @@collection_hash = {}
-      @@apo_hash = {}
-    end
-    private
-    def solrize_related_obj_titles(solr_doc, relationships, title_hash, union_field_name, nonhydrus_field_name, hydrus_field_name)
-      # TODO: if you wanted to get a little fancier, you could also solrize a 2 level hierarchy and display using hierarchial facets, like
-      # ["SOURCE", "SOURCE : TITLE"] (e.g. ["Hydrus", "Hydrus : Special Collections"], see (exploded) tags in IdentityMetadataDS#to_solr).
-      title_type = :symbol # we'll get an _ssim because of the type
-      title_attrs = [:stored_searchable] # we'll also get a _tesim from this attr
-      relationships.each do |rel_node|
-        rel_druid = rel_node['rdf:resource']
-        next unless rel_druid # TODO: warning here would also be useful
-        rel_druid = rel_druid.gsub('info:fedora/', '')
-        # populate cache if necessary
-        unless title_hash.key?(rel_druid)
-          begin
-            related_obj = Dor.find(rel_druid)
-            related_obj_title = related_obj_display_title(related_obj, rel_druid)
-            is_from_hydrus = (related_obj&.tags&.include?('Project : Hydrus'))
-            title_hash[rel_druid] = { 'related_obj_title' => related_obj_title, 'is_from_hydrus' => is_from_hydrus }
-          rescue ActiveFedora::ObjectNotFoundError
-            # This may happen if the given APO or Collection does not exist (bad data)
-            title_hash[rel_druid] = { 'related_obj_title' => rel_druid, 'is_from_hydrus' => false }
-          end
-        end
-        # cache should definitely be populated, so just use that to write solr field
-        if title_hash[rel_druid]['is_from_hydrus']
-          add_solr_value(solr_doc, hydrus_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
-        else
-          add_solr_value(solr_doc, nonhydrus_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
-        end
-        add_solr_value(solr_doc, union_field_name, title_hash[rel_druid]['related_obj_title'], title_type, title_attrs)
-      end
-    end
-    def related_obj_display_title(related_obj, default_title)
-      return default_title unless related_obj
-      related_obj.full_title || default_title
-    end
-  end
-end

data/lib/dor/indexers/process_indexer.rb DELETED

@@ -1,58 +0,0 @@
-# frozen_string_literal: true
-module Dor
-  # Indexes the process for a workflow
-  class ProcessIndexer
-    ERROR_OMISSION = '... (continued)'
-    private_constant :ERROR_OMISSION
-    # see https://lucene.apache.org/core/7_3_1/core/org/apache/lucene/util/BytesRefHash.MaxBytesLengthExceededException.html
-    MAX_ERROR_LENGTH = 32_768 - 2 - ERROR_OMISSION.length
-    private_constant :MAX_ERROR_LENGTH
-    # @param [WorkflowSolrDocument] solr_doc
-    # @param [String] workflow_name
-    # @param [Dor::Workflow::Response::Process] process
-    def initialize(solr_doc:, workflow_name:, process:)
-      @solr_doc = solr_doc
-      @workflow_name = workflow_name
-      @process = process
-    end
-    # @return [Hash] the partial solr document for the workflow document
-    def to_solr
-      return unless status
-      # add a record of the robot having operated on this item, so we can track robot activity
-      solr_doc.add_process_time(workflow_name, name, Time.parse(process.datetime)) if has_time?
-      index_error_message
-      # workflow name, process status then process name
-      solr_doc.add_wsp("#{workflow_name}:#{status}", "#{workflow_name}:#{status}:#{name}")
-      # workflow name, process name then process status
-      solr_doc.add_wps("#{workflow_name}:#{name}", "#{workflow_name}:#{name}:#{status}")
-      # process status, workflowname then process name
-      solr_doc.add_swp(process.status.to_s, "#{status}:#{workflow_name}", "#{status}:#{workflow_name}:#{name}")
-    end
-    private
-    attr_reader :process, :workflow_name, :solr_doc
-    delegate :status, :name, :state, :error_message, :datetime, to: :process
-    def has_time?
-      datetime && (status == 'completed' || status == 'error')
-    end
-    # index the error message without the druid so we hopefully get some overlap
-    # truncate to avoid org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException
-    def index_error_message
-      return unless error_message
-      solr_doc.error = "#{workflow_name}:#{name}:#{error_message}".truncate(MAX_ERROR_LENGTH, omission: ERROR_OMISSION)
-    end
-  end
-end