RubyGems - curation_concerns-models - Versions diffs - 0.1.0 → 0.2.0 - Mend

curation_concerns-models 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

data/app/models/concerns/curation_concerns/generic_file_behavior.rb DELETED Viewed

@@ -1,44 +0,0 @@
-module CurationConcerns
-  module GenericFileBehavior
-    extend ActiveSupport::Concern
-    include Hydra::Works::GenericFileBehavior
-    include Hydra::Works::GenericFile::VirusCheck
-    include Hydra::WithDepositor
-    include CurationConcerns::Serializers
-    include CurationConcerns::Noid
-    include CurationConcerns::Permissions
-    include CurationConcerns::GenericFile::Export
-    include CurationConcerns::GenericFile::Characterization
-    include CurationConcerns::BasicMetadata
-    include CurationConcerns::GenericFile::Content
-    include CurationConcerns::GenericFile::FullTextIndexing
-    include CurationConcerns::GenericFile::Indexing
-    include CurationConcerns::GenericFile::BelongsToWorks
-    include Hydra::AccessControls::Embargoable
-    included do
-      attr_accessor :file
-      # make filename single-value (CurationConcerns::GenericFile::Characterization makes it multivalue)
-      def filename
-        self[:filename].first
-      end
-    end
-    def human_readable_type
-      self.class.to_s.demodulize.titleize
-    end
-    def representative
-      to_param
-    end
-    def to_solr(solr_doc = {})
-      super(solr_doc).tap do |solr_doc|
-        # Enables Riiif to not have to recalculate this each time.
-        solr_doc['height_isi'] = Integer(height.first) if height.present?
-        solr_doc['width_isi'] = Integer(width.first) if width.present?
-      end
-    end
-  end
-end

data/app/models/concerns/curation_concerns/with_basic_metadata.rb DELETED Viewed

@@ -1,98 +0,0 @@
-module CurationConcerns
-   # This is a direct copy of Sufia::GenericFile::Metadata with a few modifications:
-  # * title & description are single-value instead of multivalue
-  module DefaultMetadata
-    extend ActiveSupport::Concern
-    included do
-      property :label, predicate: ::RDF::DC.title, multiple: false
-      property :depositor, predicate: ::RDF::URI.new("http://id.loc.gov/vocabulary/relators/dpt"), multiple: false do |index|
-        index.as :symbol, :stored_searchable
-      end
-      property :relative_path, predicate: ::RDF::URI.new('http://scholarsphere.psu.edu/ns#relativePath'), multiple: false
-      property :import_url, predicate: ::RDF::URI.new('http://scholarsphere.psu.edu/ns#importUrl'), multiple: false do |index|
-        index.as :symbol
-      end
-      property :part_of, predicate: ::RDF::DC.isPartOf
-      property :resource_type, predicate: ::RDF::DC.type do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :title, predicate: ::RDF::DC.title, multiple:false do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :creator, predicate: ::RDF::DC.creator do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :contributor, predicate: ::RDF::DC.contributor do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :description, predicate: ::RDF::DC.description, multiple: false do |index|
-        index.type :text
-        index.as :stored_searchable
-      end
-      property :tag, predicate: ::RDF::DC.relation do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :rights, predicate: ::RDF::DC.rights do |index|
-        index.as :stored_searchable
-      end
-      property :publisher, predicate: ::RDF::DC.publisher do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :date_created, predicate: ::RDF::DC.created do |index|
-        index.as :stored_searchable
-      end
-      property :date_uploaded, predicate: ::RDF::DC.dateSubmitted, multiple: false do |index|
-        index.type :date
-        index.as :stored_sortable
-      end
-      property :date_modified, predicate: ::RDF::DC.modified, multiple: false do |index|
-        index.type :date
-        index.as :stored_sortable
-      end
-      property :subject, predicate: ::RDF::DC.subject do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :language, predicate: ::RDF::DC.language do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :identifier, predicate: ::RDF::DC.identifier do |index|
-        index.as :stored_searchable
-      end
-      property :based_near, predicate: ::RDF::FOAF.based_near do |index|
-        index.as :stored_searchable, :facetable
-      end
-      property :related_url, predicate: ::RDF::RDFS.seeAlso do |index|
-        index.as :stored_searchable
-      end
-      property :bibliographic_citation, predicate: ::RDF::DC.bibliographicCitation do |index|
-        index.as :stored_searchable
-      end
-      property :source, predicate: ::RDF::DC.source do |index|
-        index.as :stored_searchable
-      end
-      # TODO: Move this somewhere more appropriate
-      begin
-        LocalAuthority.register_vocabulary(self, "subject", "lc_subjects")
-        LocalAuthority.register_vocabulary(self, "language", "lexvo_languages")
-        LocalAuthority.register_vocabulary(self, "tag", "lc_genres")
-      rescue
-        puts "tables for vocabularies missing"
-      end
-    end
-    # Add a schema.org itemtype
-    def itemtype
-      # Look up the first non-empty resource type value in a hash from the config
-      CurationConcerns.config.resource_types_to_schema[resource_type.to_a.reject { |type| type.empty? }.first] || 'http://schema.org/CreativeWork'
-    rescue
-      'http://schema.org/CreativeWork'
-    end
-  end
-end

data/app/models/concerns/curation_concerns/with_generic_files.rb DELETED Viewed

@@ -1,29 +0,0 @@
-# Copied from Curate
-module CurationConcerns
-   module WithGenericFiles
-    extend ActiveSupport::Concern
-    included do
-      # The generic_files association and its accessor methods comes from Hydra::Works::AggregatesGenericFiles
-      before_destroy :before_destroy_cleanup_generic_files
-    end
-    # Stopgap unil ActiveFedora ContainerAssociation includes an *_ids accessor.
-    # At the moment, this is no more efficient than calling generic_files, but hopefully that will change in the future.
-    def generic_file_ids
-      generic_files.map { |generic_file| generic_file.id }
-    end
-    def before_destroy_cleanup_generic_files
-      generic_files.each(&:destroy)
-    end
-    def copy_visibility_to_files
-      generic_files.each do |gf|
-        gf.visibility = visibility
-        gf.save!
-      end
-    end
-  end
-end

data/app/models/datastreams/fits_datastream.rb DELETED Viewed

@@ -1,148 +0,0 @@
-class FitsDatastream < ActiveFedora::OmDatastream
-  include OM::XML::Document
-  set_terminology do |t|
-    t.root(path: "fits",
-           xmlns: "http://hul.harvard.edu/ois/xml/ns/fits/fits_output",
-           schema: "http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd")
-    t.identification {
-      t.identity {
-        t.format_label(path: {attribute: "format"})
-        t.mime_type(path: {attribute: "mimetype"})
-      }
-    }
-    t.fileinfo {
-      t.file_size(path: "size")
-      t.last_modified(path: "lastmodified")
-      t.filename(path: "filename")
-      t.original_checksum(path: "md5checksum")
-      t.rights_basis(path: "rightsBasis")
-      t.copyright_basis(path: "copyrightBasis")
-      t.copyright_note(path: "copyrightNote")
-    }
-    t.filestatus {
-      t.well_formed(path: "well-formed")
-      t.valid(path: "valid")
-      t.status_message(path: "message")
-    }
-    t.metadata {
-      t.document {
-        t.file_title(path: "title")
-        t.file_author(path: "author")
-        t.file_language(path: "language")
-        t.page_count(path: "pageCount")
-        t.word_count(path: "wordCount")
-        t.character_count(path: "characterCount")
-        t.paragraph_count(path: "paragraphCount")
-        t.line_count(path: "lineCount")
-        t.table_count(path: "tableCount")
-        t.graphics_count(path: "graphicsCount")
-      }
-      t.image {
-        t.byte_order(path: "byteOrder")
-        t.compression(path: "compressionScheme")
-        t.width(path: "imageWidth")
-        t.height(path: "imageHeight")
-        t.color_space(path: "colorSpace")
-        t.profile_name(path: "iccProfileName")
-        t.profile_version(path: "iccProfileVersion")
-        t.orientation(path: "orientation")
-        t.color_map(path: "colorMap")
-        t.image_producer(path: "imageProducer")
-        t.capture_device(path: "captureDevice")
-        t.scanning_software(path: "scanningSoftwareName")
-        t.exif_version(path: "exifVersion")
-        t.gps_timestamp(path: "gpsTimeStamp")
-        t.latitude(path: "gpsDestLatitude")
-        t.longitude(path: "gpsDestLongitude")
-      }
-      t.text {
-        t.character_set(path: "charset")
-        t.markup_basis(path: "markupBasis")
-        t.markup_language(path: "markupLanguage")
-      }
-      t.audio {
-        t.duration(path: "duration")
-        t.bit_depth(path: "bitDepth")
-        t.sample_rate(path: "sampleRate")
-        t.channels(path: "channels")
-        t.data_format(path: "dataFormatType")
-        t.offset(path: "offset")
-      }
-      t.video {
-        t.width(path: "imageWidth")
-        t.height(path: "imageHeight")
-        t.duration(path: "duration")
-        t.sample_rate(path: "sampleRate")
-        t.frame_rate(path: "frameRate")
-      }
-    }
-    t.format_label(proxy: [:identification, :identity, :format_label])
-    t.mime_type(proxy: [:identification, :identity, :mime_type])
-    t.file_size(proxy: [:fileinfo, :file_size])
-    t.last_modified(proxy: [:fileinfo, :last_modified])
-    t.filename(proxy: [:fileinfo, :filename])
-    t.original_checksum(proxy: [:fileinfo, :original_checksum])
-    t.rights_basis(proxy: [:fileinfo, :rights_basis])
-    t.copyright_basis(proxy: [:fileinfo, :copyright_basis])
-    t.copyright_note(proxy: [:fileinfo, :copyright_note])
-    t.well_formed(proxy: [:filestatus, :well_formed])
-    t.valid(proxy: [:filestatus, :valid])
-    t.status_message(proxy: [:filestatus, :status_message])
-    t.file_title(proxy: [:metadata, :document, :file_title])
-    t.file_author(proxy: [:metadata, :document, :file_author])
-    t.page_count(proxy: [:metadata, :document, :page_count])
-    t.file_language(proxy: [:metadata, :document, :file_language])
-    t.word_count(proxy: [:metadata, :document, :word_count])
-    t.character_count(proxy: [:metadata, :document, :character_count])
-    t.paragraph_count(proxy: [:metadata, :document, :paragraph_count])
-    t.line_count(proxy: [:metadata, :document, :line_count])
-    t.table_count(proxy: [:metadata, :document, :table_count])
-    t.graphics_count(proxy: [:metadata, :document, :graphics_count])
-    t.byte_order(proxy: [:metadata, :image, :byte_order])
-    t.compression(proxy: [:metadata, :image, :compression])
-    t.width(proxy: [:metadata, :image, :width])
-    t.video_width( proxy: [:metadata, :video, :width])
-    t.height(proxy: [:metadata, :image, :height])
-    t.video_height(proxy: [:metadata, :video, :height])
-    t.color_space(proxy: [:metadata, :image, :color_space])
-    t.profile_name(proxy: [:metadata, :image, :profile_name])
-    t.profile_version(proxy: [:metadata, :image, :profile_version])
-    t.orientation(proxy: [:metadata, :image, :orientation])
-    t.color_map(proxy: [:metadata, :image, :color_map])
-    t.image_producer(proxy: [:metadata, :image, :image_producer])
-    t.capture_device(proxy: [:metadata, :image, :capture_device])
-    t.scanning_software(proxy: [:metadata, :image, :scanning_software])
-    t.exif_version(proxy: [:metadata, :image, :exif_version])
-    t.gps_timestamp(proxy: [:metadata, :image, :gps_timestamp])
-    t.latitude(proxy: [:metadata, :image, :latitude])
-    t.longitude(proxy: [:metadata, :image, :longitude])
-    t.character_set(proxy: [:metadata, :text, :character_set])
-    t.markup_basis(proxy: [:metadata, :text, :markup_basis])
-    t.markup_language(proxy: [:metadata, :text, :markup_language])
-    t.duration(proxy: [:metadata, :audio, :duration])
-    t.video_duration(proxy: [:metadata, :video, :duration])
-    t.bit_depth(proxy: [:metadata, :audio, :bit_depth])
-    t.sample_rate(proxy: [:metadata, :audio, :sample_rate])
-    t.video_sample_rate(proxy: [:metadata, :video, :sample_rate])
-    t.channels(proxy: [:metadata, :audio, :channels])
-    t.data_format(proxy: [:metadata, :audio, :data_format])
-    t.offset(proxy: [:metadata, :audio, :offset])
-    t.frame_rate(proxy: [:metadata, :video, :frame_rate])
-  end
-  def self.xml_template
-    builder = Nokogiri::XML::Builder.new do |xml|
-      xml.fits(xmlns: 'http://hul.harvard.edu/ois/xml/ns/fits/fits_output',
-               'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
-               'xsi:schemaLocation' =>
-    "http://hul.harvard.edu/ois/xml/ns/fits/fits_output
-    http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd",
-               version: "0.6.0",
-               timestamp: "1/25/12 11:04 AM") {
-        xml.identification { xml.identity(toolname: 'FITS') }
-      }
-    end
-    builder.doc
-  end
-end

data/app/services/curation_concerns/characterization_service.rb DELETED Viewed

@@ -1,71 +0,0 @@
-module CurationConcerns
-  # Run FITS to gather technical metadata about the content and the full text.
-  # Store this extracted metadata in the characterization datastream.
-  class CharacterizationService
-    include Hydra::Derivatives::ExtractMetadata
-    delegate :mime_type, :uri, to: :@generic_file
-    attr_reader :generic_file
-    def self.run(generic_file)
-      new(generic_file).characterize
-    end
-    def initialize(generic_file)
-      @generic_file = generic_file
-    end
-    ## Extract the metadata from the content datastream and record it in the characterization datastream
-    def characterize
-      store_metadata(extract_metadata)
-      store_fulltext(extract_fulltext)
-      generic_file.filename = [generic_file.original_file.original_name]
-    end
-    protected
-      def store_fulltext(extracted_text)
-        if extracted_text.present?
-          extracted_text_file = generic_file.build_extracted_text
-          extracted_text_file.content = extracted_text
-        end
-      end
-      def extract_fulltext
-        FullTextExtractionService.run(generic_file)
-      end
-      def store_metadata(metadata)
-        generic_file.characterization.ng_xml = metadata if metadata.present?
-        append_metadata
-      end
-      def extract_metadata
-        return unless generic_file.original_file.has_content?
-        Hydra::FileCharacterization.characterize(generic_file.original_file.content, filename_for_characterization.join, :fits) do |config|
-          config[:fits] = Hydra::Derivatives.fits_path
-        end
-      end
-      # Populate GenericFile's properties with fields from FITS (e.g. Author from pdfs)
-      def append_metadata
-        terms = generic_file.characterization_terms
-        CurationConcerns.config.fits_to_desc_mapping.each_pair do |k, v|
-          if terms.has_key?(k)
-            # coerce to array to remove a conditional
-            terms[k] = [terms[k]] unless terms[k].is_a? Array
-            terms[k].each do |term_value|
-              proxy_term = generic_file.send(v)
-              if proxy_term.kind_of?(Array)
-                proxy_term << term_value unless proxy_term.include?(term_value)
-              else
-                # these are single-valued terms which cannot be appended to
-                generic_file.send("#{v}=", term_value)
-              end
-            end
-          end
-        end
-      end
-  end
-end

data/app/services/curation_concerns/full_text_extraction_service.rb DELETED Viewed

@@ -1,38 +0,0 @@
-module CurationConcerns
-  # Extract the full text from the content using Solr's extract handler
-  class FullTextExtractionService
-    def self.run(generic_file)
-      new(generic_file).extract
-    end
-    delegate :original_file, :logger, :mime_type, :id, to: :@generic_file
-    def initialize(generic_file)
-      @generic_file = generic_file
-    end
-    def extract
-      uri = URI("#{connection_url}/update/extract?extractOnly=true&wt=json&extractFormat=text")
-      req = Net::HTTP.new(uri.host, uri.port)
-      resp = req.post(uri.to_s, original_file.content, {
-          'Content-type' => "#{mime_type};charset=utf-8",
-          'Content-Length' => original_file.content.size.to_s
-        })
-      raise "URL '#{uri}' returned code #{resp.code}" unless resp.code == "200"
-      original_file.content.rewind if original_file.content.respond_to?(:rewind)
-      JSON.parse(resp.body)[''].rstrip
-    rescue => e
-      logger.error("Error extracting content from #{id}: #{e.inspect}")
-      return nil
-    end
-    def connection_url
-      case
-        when Blacklight.connection_config[:url] then Blacklight.connection_config[:url]
-        when Blacklight.connection_config["url"] then Blacklight.connection_config["url"]
-        when Blacklight.connection_config[:fulltext] then Blacklight.connection_config[:fulltext]["url"]
-        else Blacklight.connection_config[:default]["url"]
-      end
-    end
-  end
-end

data/app/services/curation_concerns/generic_file_indexing_service.rb DELETED Viewed

@@ -1,14 +0,0 @@
-module CurationConcerns
-  class GenericFileIndexingService < ActiveFedora::IndexingService
-    def generate_solr_document
-      super.tap do |solr_doc|
-        solr_doc[Solrizer.solr_name('label')] = object.label
-        solr_doc[Solrizer.solr_name('file_format')] = object.file_format
-        solr_doc[Solrizer.solr_name('file_format', :facetable)] = object.file_format
-        solr_doc[Solrizer.solr_name(:file_size, :symbol)] = object.file_size[0]
-        solr_doc['all_text_timv'] = object.full_text.content
-        solr_doc[Solrizer.solr_name('generic_work_ids', :symbol)] = object.generic_work_ids unless object.generic_work_ids.empty?
-      end
-    end
-  end
-end