RubyGems - rof - Versions diffs - 1.0.7 → 1.2.0 - Mend

rof 1.0.7 → 1.2.0

Files changed (90) hide show

checksums.yaml +4 -4
data/.travis.yml +9 -7
data/LICENSE +201 -16
data/Rakefile +46 -0
data/bin/csv_to_rof +1 -2
data/bin/fedora_to_rof +7 -1
data/bin/jsonld_to_rof +26 -0
data/bin/osf_to_rof +6 -2
data/bin/rof +5 -19
data/lib/rof.rb +2 -6
data/lib/rof/access.rb +1 -1
data/lib/rof/cli.rb +104 -67
data/lib/rof/compare_rof.rb +68 -39
data/lib/rof/filter.rb +21 -0
data/lib/rof/filters.rb +38 -0
data/lib/rof/filters/bendo.rb +15 -17
data/lib/rof/filters/date_stamp.rb +5 -4
data/lib/rof/filters/file_to_url.rb +5 -3
data/lib/rof/filters/label.rb +9 -7
data/lib/rof/filters/work.rb +7 -5
data/lib/rof/ingest.rb +5 -0
data/lib/rof/osf_context.rb +2 -2
data/lib/rof/rdf_context.rb +2 -0
data/lib/rof/translator.rb +18 -0
data/lib/rof/translators.rb +23 -0
data/lib/rof/{translate_csv.rb → translators/csv_to_rof.rb} +4 -3
data/lib/rof/translators/fedora_to_rof.rb +244 -0
data/lib/rof/translators/jsonld_to_rof.rb +112 -0
data/lib/rof/translators/jsonld_to_rof/accumulator.rb +175 -0
data/lib/rof/translators/jsonld_to_rof/predicate_handler.rb +223 -0
data/lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb +125 -0
data/lib/rof/translators/jsonld_to_rof/statement_handler.rb +91 -0
data/lib/rof/translators/osf_to_rof.rb +191 -0
data/lib/rof/utility.rb +44 -1
data/lib/rof/version.rb +1 -1
data/rof.gemspec +10 -2
data/spec/coverage_helper.rb +17 -0
data/spec/fixtures/for_utility_load_items_from_json_file/multiple_items.json +8 -0
data/spec/fixtures/for_utility_load_items_from_json_file/parse_error.json +3 -0
data/spec/fixtures/for_utility_load_items_from_json_file/single_item.json +3 -0
data/spec/fixtures/jsonld_to_rof/0g354f18610.jsonld +113 -0
data/spec/fixtures/jsonld_to_rof/0g354f18610.rof +96 -0
data/spec/fixtures/jsonld_to_rof/2j62s467216.jsonld +113 -0
data/spec/fixtures/jsonld_to_rof/2j62s467216.rof +93 -0
data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.jsonld +70 -0
data/spec/fixtures/jsonld_to_rof/2v23vt16z2z.rof +87 -0
data/spec/fixtures/jsonld_to_rof/cr56n01253w.jsonld +84 -0
data/spec/fixtures/jsonld_to_rof/cr56n01253w.rof +95 -0
data/spec/fixtures/jsonld_to_rof/h989r21069m.jsonld +84 -0
data/spec/fixtures/jsonld_to_rof/h989r21069m.rof +98 -0
data/spec/fixtures/jsonld_to_rof/js956d59913.jsonld +79 -0
data/spec/fixtures/jsonld_to_rof/js956d59913.rof +89 -0
data/spec/fixtures/jsonld_to_rof/m039k358q5c.jsonld +80 -0
data/spec/fixtures/jsonld_to_rof/m039k358q5c.rof +64 -0
data/spec/fixtures/jsonld_to_rof/nk322b9161g.jsonld +89 -0
data/spec/fixtures/jsonld_to_rof/nk322b9161g.rof +69 -0
data/spec/fixtures/jsonld_to_rof/p8418k7430d.jsonld +84 -0
data/spec/fixtures/jsonld_to_rof/p8418k7430d.rof +67 -0
data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.jsonld +98 -0
data/spec/fixtures/jsonld_to_rof/xg94hm53h0c.rof +110 -0
data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.jsonld +94 -0
data/spec/fixtures/jsonld_to_rof/zk51vd69n1r.rof +121 -0
data/spec/fixtures/osf/phz6b.tar.gz +0 -0
data/spec/lib/rof/access_spec.rb +30 -23
data/spec/lib/rof/cli_spec.rb +83 -60
data/spec/lib/rof/compare_rof_spec.rb +35 -24
data/spec/lib/rof/filter_spec.rb +10 -0
data/spec/lib/rof/filters/bendo_spec.rb +42 -0
data/spec/lib/rof/filters/date_stamp_spec.rb +9 -5
data/spec/lib/rof/filters/file_to_url_spec.rb +7 -3
data/spec/lib/rof/filters/label_spec.rb +121 -77
data/spec/lib/rof/filters/work_spec.rb +7 -4
data/spec/lib/rof/filters_spec.rb +14 -0
data/spec/lib/rof/translator_spec.rb +15 -0
data/spec/lib/rof/{translate_csv_spec.rb → translators/csv_to_rof_spec.rb} +14 -14
data/spec/lib/rof/translators/fedora_to_rof_spec.rb +64 -0
data/spec/lib/rof/translators/jsonld_to_rof/accumulator_spec.rb +121 -0
data/spec/lib/rof/translators/jsonld_to_rof/predicate_handler_spec.rb +73 -0
data/spec/lib/rof/translators/jsonld_to_rof/predicate_object_handler_spec.rb +48 -0
data/spec/lib/rof/translators/jsonld_to_rof/statement_handler_spec.rb +40 -0
data/spec/lib/rof/translators/jsonld_to_rof_spec.rb +120 -0
data/spec/lib/rof/{osf_to_rof_spec.rb → translators/osf_to_rof_spec.rb} +55 -25
data/spec/lib/rof/translators_spec.rb +14 -0
data/spec/lib/rof/utility_spec.rb +47 -1
data/spec/spec_helper.rb +1 -1
data/spec/support/an_rof_filter.rb +10 -0
metadata +186 -15
data/lib/rof/get_from_fedora.rb +0 -211
data/lib/rof/osf_to_rof.rb +0 -123
data/spec/lib/rof/get_from_fedora_spec.rb +0 -22

@@ -1,211 +0,0 @@
-require 'json'
-require 'rexml/document'
-require 'rdf/ntriples'
-require 'rdf/rdfxml'
-require 'rubydora'
-module ROF
-  class FedoraToRof
-    # connect to fedora and fetch object
-    # returns array of fedora attributes or nil
-    def self.GetFromFedora(pid, fedora, config)
-      @fedora_info = {}
-      # Try to connect to fedora, and search for the desired item
-      # If either of these actions fail, handle it, and exit.
-      begin
-        fedora = Rubydora.connect(fedora)
-        doc = fedora.find(pid)
-      rescue StandardError => e
-        puts "Error: #{e}"
-        exit 1
-      end
-      # set pid, type
-      @fedora_info['pid'] = pid
-      @fedora_info['type'] = 'fobject'
-      readFedora(doc, config)
-      @fedora_info
-    end
-    # Given a rubydora object, extract what we need
-    # to create our ROF object in an associative array
-    #
-    def self.readFedora(rdora_obj, config)
-      @fedora_info['af-model'] = setModel(rdora_obj)
-      # iterate through the data streams that are present.
-      # use reflection to call appropriate method for each
-      rdora_obj.datastreams.each do |dsname, ds|
-        next if dsname == 'DC'
-        method_key = dsname.sub('-', '')
-        if respond_to?(method_key)
-          send(method_key, ds, config)
-        else
-          # dump generic datastream
-          meta = create_meta(ds, config)
-          @fedora_info["#{dsname}-meta"] = meta unless meta.empty?
-          # if content is short < X bytes and valid utf-8, save as string
-          # if content is > X bytes or is not utf-8, save as file only if config option is given
-          content = ds.datastream_content
-          if content.length <= 1024 || config['inline']
-            # this downloads the contents of the datastream into memory
-            content_string = content.to_s.force_encoding('UTF-8')
-            if content_string.valid_encoding?
-              @fedora_info[dsname] = content_string
-              next # we're done! move on to next datastream
-            end
-            # not utf-8, so keep going and see if download option was given
-          end
-          next unless config['download']
-          # download option was given, so save this datastream as a file
-          fname = "#{@fedora_info['pid']}-#{dsname}"
-          abspath = File.join(config['download_path'], fname)
-          @fedora_info["#{dsname}-file"] = fname
-          if File.file?(config['download_path'])
-            puts "Error: --download directory #{config['download_path']} specified is an existing file."
-            exit 1
-          end
-          FileUtils.mkdir_p(config['download_path'])
-          File.open(abspath, 'w') do |f|
-            f.write(content)
-          end
-        end
-      end
-    end
-    def self.create_meta(ds, config)
-      result = {}
-      label = ds.profile['dsLabel']
-      result['label'] = label unless label.nil? || label == ''
-      result['mime-type'] = ds.profile['dsMIME'] if ds.profile['dsMIME'] != 'text/plain'
-      # TODO(dbrower): make sure this is working as intended
-      if %w(R E).include?(ds.profile['dsControlGroup'])
-        s = result['URL'] = ds.profile['dsLocation']
-        s = s.sub(config['bendo'], 'bendo:') if config['bendo']
-        result['URL'] = s
-      end
-      result
-    end
-    # set fedora_indo['af-model']
-    #
-    def self.setModel(rdora_obj)
-      # only keep info:fedora/afmodel:XXXXX
-      models = rdora_obj.profile['objModels'].map do |model|
-        Regexp.last_match(1) if model =~ /^info:fedora\/afmodel:(.*)/
-      end.compact
-      models[0]
-    end
-    # The methods below are called if the like-named datastream exists in fedora
-    # set metadata
-    #
-    def self.descMetadata(ds, _config)
-      # desMetadata is encoded in ntriples, convert to JSON-LD using our special context
-      graph = RDF::Graph.new
-      data = ds.datastream_content
-      # force utf-8 encoding. fedora does not store the encoding, so it defaults to ASCII-8BIT
-      # see https://github.com/ruby-rdf/rdf/issues/142
-      data.force_encoding('utf-8')
-      graph.from_ntriples(data, format: :ntriples)
-      JSON::LD::API.fromRdf(graph) do |expanded|
-        result = JSON::LD::API.compact(expanded, RdfContext)
-        @fedora_info['metadata'] = result
-      end
-    end
-    # set rights
-    #
-    def self.rightsMetadata(ds, _config)
-      # rights is an XML document
-      # the access array may have read or edit elements
-      # each of these elements may contain group or person elements
-      xml_doc = REXML::Document.new(ds.datastream_content)
-      rights_array = {}
-      root = xml_doc.root
-      # check for optional embargo date - set if present
-      this_embargo = root.elements['embargo']
-      rights_array['embargo-date'] = this_embargo.elements['machine'].elements['date'][0] if has_embargo_date(this_embargo)
-      %w(read edit).each do |access|
-        this_access = root.elements["//access[@type=\'#{access}\']"]
-        next if this_access.nil?
-        unless this_access.elements['machine'].elements['group'].nil?
-          group_array = []
-          this_access.elements['machine'].elements['group'].each do |this_group|
-            group_array << this_group
-          end
-          rights_array["#{access}-groups"] = group_array
-        end
-        next if this_access.elements['machine'].elements['person'].nil?
-        person_array = []
-        this_access.elements['machine'].elements['person'].each do |this_person|
-          person_array << this_person
-        end
-        rights_array[access.to_s] = person_array
-      end
-      @fedora_info['rights'] = rights_array
-    end
-    # test for embargo xml cases
-    def self.has_embargo_date(embargo_xml)
-      return false if embargo_xml == '' || embargo_xml.nil?
-      return false unless embargo_xml.elements['machine'].has_elements? && embargo_xml.elements['machine'].elements['date'].has_text?
-      true
-    end
-    def self.RELSEXT(ds, _config)
-      # RELS-EXT is RDF-XML - parse it
-      ctx = ROF::RelsExtRefContext.dup
-      ctx.delete('@base') # @base causes problems when converting TO json-ld (it is = "info:/fedora") but info is not a namespace
-      graph = RDF::Graph.new
-      graph.from_rdfxml(ds.datastream_content)
-      result = nil
-      JSON::LD::API.fromRdf(graph) do |expanded|
-        result = JSON::LD::API.compact(expanded, ctx)
-      end
-      # now strip the info:fedora/ prefix from the URIs
-      strip_info_fedora(result)
-      # remove extra items
-      result.delete('hasModel')
-      @fedora_info['rels-ext'] = result
-    end
-    private
-    def self.strip_info_fedora(rels_ext)
-      rels_ext.each do |relation, targets|
-        next if relation == '@context'
-        if targets.is_a?(Hash)
-          strip_info_fedora(targets)
-          next
-        end
-        targets = [targets] if targets.is_a?(String)
-        targets.map! do |target|
-          if target.is_a?(Hash)
-            strip_info_fedora(target)
-          else
-            target.sub('info:fedora/', '')
-          end
-        end
-        # some single strings cannot be arrays in json-ld, so convert back
-        # this shouldn't cause any problems with items that began as arrays
-        targets = targets[0] if targets.length == 1
-        rels_ext[relation] = targets
-      end
-    end
-  end
-end

data/lib/rof/osf_to_rof.rb DELETED

@@ -1,123 +0,0 @@
-require 'json'
-require 'zlib'
-require 'rubygems/package'
-require 'rdf/turtle'
-require 'rof/osf_context'
-require 'rof/rdf_context'
-require 'rof/utility'
-module ROF
-  # Class for managing OSF Archive data transformations
-  # It is called after the get-from-osf task, and before the work-xlat task
-  class OsfToRof
-    # Convert Osf Archive tar.gz  to ROF
-    def self.osf_to_rof(config, osf_projects = nil)
-      @osf_map = ROF::OsfToNDMap
-      rof_array = []
-      return {} if osf_projects.nil?
-      this_project = osf_projects
-      ttl_data = ttl_from_targz(config, this_project,
-                                this_project['project_identifier'] + '.ttl')
-      rof_array[0] = build_archive_record(config, this_project, ttl_data)
-      rof_array
-    end
-    # reads a ttl file and makes it a JSON-LD file that we can parse
-    def self.fetch_from_ttl(ttl_file)
-      graph = RDF::Turtle::Reader.open(ttl_file,
-                                       prefixes:  ROF::OsfPrefixList.dup)
-      JSON::LD::API.fromRdf(graph)
-    end
-    # extracts given ttl file from JHU tar.gz package
-    # - assumed to live under data/obj/root
-    def self.ttl_from_targz(config, this_project, ttl_filename)
-      id =  this_project['project_identifier']
-      ttl_path = File.join(id,
-                           'data/obj/root',
-                           ttl_filename)
-      ROF::Utility.file_from_targz(File.join(config['package_dir'], id + '.tar.gz'),
-                                   ttl_path)
-      ttl_data = fetch_from_ttl(File.join(config['package_dir'], ttl_path))
-      # this is an array- the addition elements are the contributor(s)
-      ttl_data
-    end
-    # Maps RELS-EXT
-    def self.map_rels_ext(_ttl_data)
-      rels_ext = {}
-      rels_ext['@context'] = ROF::RelsExtRefContext.dup
-      rels_ext
-    end
-    # sets metadata
-    def self.map_metadata(config, project, ttl_data)
-      metadata = {}
-      metadata['@context'] = ROF::RdfContext.dup
-      # metdata derived from project ttl file
-      metadata['dc:created'] = Time.iso8601(ttl_data[0][@osf_map['dc:created']][0]['@value']).to_date.iso8601 + 'Z'
-      metadata['dc:title'] = ttl_data[0][@osf_map['dc:title']][0]['@value']
-      metadata['dc:description'] =
-        ttl_data[0][@osf_map['dc:description']][0]['@value']
-      metadata['dc:subject'] = map_subject(ttl_data[0])
-      # metadata derived from osf_projects data, passed from UI
-      metadata['dc:source'] = 'https://osf.io/' + project['project_identifier']
-      metadata['dc:creator#adminstrative_unit'] = project['administrative_unit']
-      metadata['dc:creator#affiliation'] = project['affiliation']
-      metadata['dc:creator'] = map_creator(config, project, ttl_data)
-      metadata
-    end
-    # Constructs OsfArchive Record from ttl_data, data from the UI form,
-    # and task config data
-    def self.build_archive_record(config, this_project, ttl_data)
-      this_rof = {}
-      this_rof['owner'] = this_project['owner']
-      this_rof['type'] = 'OsfArchive'
-      this_rof['rights'] = map_rights(ttl_data[0])
-      this_rof['rels-ext'] = map_rels_ext(ttl_data[0])
-      this_rof['metadata'] = map_metadata(config, this_project, ttl_data)
-      this_rof['files'] = [this_project['project_identifier'] + '.tar.gz']
-      this_rof
-    end
-    # sets subject
-    def self.map_subject(ttl_data)
-      if ttl_data.key?(@osf_map['dc:subject'])
-        return ttl_data[@osf_map['dc:subject']][0]['@value']
-      end
-      ''
-    end
-    # figures out the rights
-    def self.map_rights(ttl_data)
-      rights = {}
-      if ttl_data[@osf_map['isPublic']][0]['@value'] == 'true'
-        rights['read-groups'] = ['public']
-      end
-      rights
-    end
-    # sets the creator- needs to read another ttl for the User data
-    # only contrubutors with isBibliographic true are considered
-    def self.map_creator(config, project, ttl_data)
-      creator = []
-      ttl_data[0][@osf_map['hasContributor']].each do |contributor|
-        ttl_data.each do |item|
-          next unless item['@id'] == contributor['@id']
-          if item[@osf_map['isBibliographic']][0]['@value'] == 'true'
-            creator.push map_user_from_ttl(config, project,
-                                           item[@osf_map['hasUser']][0]['@id'])
-          end
-        end
-      end
-      creator
-    end
-    # read user ttl file, extract User's full name
-    def self.map_user_from_ttl(config, project, file_subpath)
-      ttl_data = ttl_from_targz(config, project, File.basename(file_subpath))
-      ttl_data[0][@osf_map['hasFullName']][0]['@value']
-    end
-  end
-end

data/spec/lib/rof/get_from_fedora_spec.rb DELETED

@@ -1,22 +0,0 @@
-require 'spec_helper'
-RSpec.describe ROF::FedoraToRof do
-  it 'handles embargo presence or absence' do
-    rights_tests = [
-      ['<embargo> <human/> <machine> <date>2017-08-01</date> </machine> </embargo>', true],
-      ['<embargo> <human/> <machine> <date></date> </machine> </embargo>', false],
-      ['<embargo> <human/> <machine/> </embargo>', false]
-    ]
-    begin
-      rights_tests.each do |this_test|
-        xml_doc = REXML::Document.new(this_test[0])
-        root = xml_doc.root
-        rights = ROF::FedoraToRof.has_embargo_date(root)
-        expect(rights).to eq(this_test[1])
-      end
-    end
-  end
-end