merritt-manifest 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +198 -0
- data/.idea/emacs.xml +6 -0
- data/.idea/encodings.xml +6 -0
- data/.idea/inspectionProfiles/Project_Default.xml +13 -0
- data/.idea/misc.xml +6 -0
- data/.idea/modules.xml +8 -0
- data/.idea/sbt.xml +6 -0
- data/.rubocop.yml +35 -0
- data/.ruby-version +1 -0
- data/.travis.yml +2 -0
- data/.yardopts +1 -0
- data/Gemfile +3 -0
- data/LICENSE.md +22 -0
- data/README.md +110 -0
- data/Rakefile +38 -0
- data/lib/merritt/manifest/data_one.rb +48 -0
- data/lib/merritt/manifest/fields.rb +119 -0
- data/lib/merritt/manifest/file.rb +33 -0
- data/lib/merritt/manifest/module_info.rb +12 -0
- data/lib/merritt/manifest/object.rb +32 -0
- data/lib/merritt/manifest.rb +128 -0
- data/lib/merritt/util.rb +19 -0
- data/lib/merritt.rb +4 -0
- data/merritt-manifest.gemspec +37 -0
- data/merritt-manifest.iml +37 -0
- data/spec/.rubocop.yml +10 -0
- data/spec/data/4blocks.checkm +8 -0
- data/spec/data/mrt-dataone-manifest.txt +32 -0
- data/spec/data/sampleBatchOfContainers.checkm +9 -0
- data/spec/data/sampleBatchOfFiles.checkm +9 -0
- data/spec/data/sampleBatchOfManifests.checkm +9 -0
- data/spec/spec_helper.rb +29 -0
- data/spec/unit/merritt/manifest/data_one_spec.rb +122 -0
- data/spec/unit/merritt/manifest/fields_spec.rb +55 -0
- data/spec/unit/merritt/manifest/object_spec.rb +69 -0
- data/spec/unit/merritt/manifest_spec.rb +134 -0
- metadata +203 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'merritt/manifest/fields'
|
2
|
+
|
3
|
+
module Merritt
|
4
|
+
class Manifest
|
5
|
+
# A marker interface for file-like objects. Each field
|
6
|
+
# may or may not be relevant to a given manifest format.
|
7
|
+
module File
|
8
|
+
# @return [nil, URI] the URL at which to retrieve the file
|
9
|
+
attr_reader :file_url
|
10
|
+
|
11
|
+
# @return [nil, String] the hash algorithm used to hash the file
|
12
|
+
attr_reader :hash_algorithm
|
13
|
+
|
14
|
+
# @return [nil, String] the hash value
|
15
|
+
attr_reader :hash_value
|
16
|
+
|
17
|
+
# @return [nil, Integer] the file size in bytes.
|
18
|
+
attr_reader :file_size
|
19
|
+
|
20
|
+
# @return [nil, DateTime] date and time the file was last modified.
|
21
|
+
# Note that according to the [Merritt Ingest Service docs](https://confluence.ucop.edu/download/attachments/16744573/Merritt-ingest-service-latest.pdf),
|
22
|
+
# “modification time field SHOULD NOT be specified, and will be ignored if provided.”
|
23
|
+
attr_reader :file_last_modified
|
24
|
+
|
25
|
+
# @return [nil, String] the file name. If not present, will be inferred from
|
26
|
+
# {#file_url}.
|
27
|
+
attr_reader :file_name
|
28
|
+
|
29
|
+
# @return [nil, MIME::Type] the mime type
|
30
|
+
attr_reader :mime_type
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Merritt
|
2
|
+
class Manifest
|
3
|
+
# The name of this gem
|
4
|
+
NAME = 'merritt-manifest'.freeze
|
5
|
+
|
6
|
+
# The version of this gem
|
7
|
+
VERSION = '0.1.0'.freeze
|
8
|
+
|
9
|
+
# The copyright notice for this gem
|
10
|
+
COPYRIGHT = 'Copyright (c) 2017 The Regents of the University of California'.freeze
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Merritt
|
2
|
+
class Manifest
|
3
|
+
# A specialization of {Manifest} for Merritt object submissions.
|
4
|
+
class Object < Manifest
|
5
|
+
|
6
|
+
# Creates a new {Manifest::Object}
|
7
|
+
# @param files [Array<Manifest::File>] an array of data files to be converted to entries.
|
8
|
+
# (Note that these not be actual {Manifest::File} objects so long as they respond to,
|
9
|
+
# at minimum, `#file_url`. The other {Manifest::File} fields are optional.)
|
10
|
+
def initialize(files:)
|
11
|
+
super(
|
12
|
+
profile: 'http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest',
|
13
|
+
prefixes: Merritt::Manifest::Fields::Object.prefixes,
|
14
|
+
fields: Merritt::Manifest::Fields::Object.fields,
|
15
|
+
entries: to_entries(files)
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def to_entries(files)
|
22
|
+
files.map do |file|
|
23
|
+
Merritt::Manifest::Fields::Object.map do |field|
|
24
|
+
field_name = field.field_name
|
25
|
+
field_value = field.value_from(file)
|
26
|
+
[field_name, field_value]
|
27
|
+
end.to_h
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
Dir.glob(File.expand_path('../manifest/*.rb', __FILE__)).sort.each(&method(:require))
|
2
|
+
|
3
|
+
module Merritt
|
4
|
+
# A Merritt manifest file
|
5
|
+
class Manifest
|
6
|
+
|
7
|
+
# Base for all recognized profile URIs
|
8
|
+
PROFILE_BASE_URI = 'http://uc3.cdlib.org/registry/ingest/manifest/'.freeze
|
9
|
+
|
10
|
+
# Checkm 0.7 conformance level
|
11
|
+
CHECKM_0_7 = 'checkm_0.7'.freeze
|
12
|
+
|
13
|
+
# @return [String] the conformance level
|
14
|
+
attr_reader :conformance
|
15
|
+
|
16
|
+
# @return [URI] the profile URI
|
17
|
+
attr_reader :profile
|
18
|
+
|
19
|
+
# @return [Hash{Symbol => URI}] a map from namespace prefixes to their URIs
|
20
|
+
attr_reader :prefixes
|
21
|
+
|
22
|
+
# @return [Array<String>] the field names, in the form prefix:fieldname
|
23
|
+
attr_reader :fields
|
24
|
+
|
25
|
+
# @return [Array<Hash<String, Object>>] the entries
|
26
|
+
attr_reader :entries
|
27
|
+
|
28
|
+
# Creates a new manifest. Note that the prefix, field, and entry arrays are
|
29
|
+
# copied on initialization, as are the individual entry hashes.
|
30
|
+
#
|
31
|
+
# @param conformance [String] the conformance level. Defaults to {CHECKM_0_7}.
|
32
|
+
# @param profile [URI, String] the profile URI. Must begin with
|
33
|
+
# @param prefixes [Hash{String,Symbol => URI, String}] a map from namespace prefixes to their URIs
|
34
|
+
# @param fields Array<String> a list of field names, in the form prefix:fieldname
|
35
|
+
# @param entries [Array<Hash<String, Object><] A list of entries, each of which is a hash keyed by
|
36
|
+
# a prefixed fieldname defined in `fields`. Nil values are allowed.
|
37
|
+
# @raise [ArgumentError] if `profile` does not begin with {PROFILE_BASE_URI}
|
38
|
+
# @raise [ArgumentError] if `fields` cannot be parsed as prefix:fieldname, or if one or more prefixes
|
39
|
+
# is not mapped to a URI in `prefixes`
|
40
|
+
# @raise [URI::InvalidURIError] if `profile` cannot be parsed as a URI
|
41
|
+
def initialize(conformance: CHECKM_0_7, profile:, prefixes: {}, fields: [], entries: [])
|
42
|
+
@conformance = conformance
|
43
|
+
@profile = normalize_profile_uri(profile).freeze
|
44
|
+
@prefixes = normalize_prefixes(prefixes).freeze
|
45
|
+
@fields = validate_fields(fields).freeze
|
46
|
+
@entries = normalize_entries(entries).freeze
|
47
|
+
end
|
48
|
+
|
49
|
+
# Writes this manifest to the specified IO
|
50
|
+
# @param io [IO] the IO to write to
|
51
|
+
def write_to(io)
|
52
|
+
write_sc(io, conformance)
|
53
|
+
write_sc(io, 'profile', profile)
|
54
|
+
prefixes.each { |prefix, url| write_sc(io, 'prefix', "#{prefix}:", url) }
|
55
|
+
write_sc(io, 'fields', *fields)
|
56
|
+
entries.each { |entry| io.puts(entry_line(entry)) }
|
57
|
+
write_sc(io, 'eof')
|
58
|
+
end
|
59
|
+
|
60
|
+
# Writes this manifest as a string
|
61
|
+
# @return [String] the manifest file contents as a string
|
62
|
+
def write_to_string
|
63
|
+
io = StringIO.new
|
64
|
+
write_to(io)
|
65
|
+
io.string
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
# checkm column separator
|
71
|
+
COLSEP = ' | '.freeze
|
72
|
+
|
73
|
+
def entry_line(entry)
|
74
|
+
fields.map { |f| entry[f] }.join(COLSEP).sub(/[| ]+\z/, '')
|
75
|
+
end
|
76
|
+
|
77
|
+
# writes a checkm "structured comment"
|
78
|
+
# @param io [IO] the IO to write to
|
79
|
+
# @param comment [String] the comment
|
80
|
+
# @param columns [nil, Array<String>] columns to follow the initial comment
|
81
|
+
def write_sc(io, comment, *columns)
|
82
|
+
io << '#%' << comment
|
83
|
+
io << COLSEP << columns.join(COLSEP) unless columns.empty?
|
84
|
+
io << "\n"
|
85
|
+
end
|
86
|
+
|
87
|
+
def normalize_entries(entries)
|
88
|
+
entries.each_with_index.map do |entry, i|
|
89
|
+
raise ArgumentError, "Nil entry at index #{i}" unless entry
|
90
|
+
normalize_entry(entry)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def normalize_entry(entry)
|
95
|
+
normalized = {}
|
96
|
+
fields.each do |f|
|
97
|
+
next unless (value = entry[f])
|
98
|
+
normalized[f] = value
|
99
|
+
end
|
100
|
+
raise ArgumentError, "No fields found in entry #{entry}" if normalized.empty?
|
101
|
+
normalized
|
102
|
+
end
|
103
|
+
|
104
|
+
def validate_fields(fields)
|
105
|
+
fields.map { |f| validate_field(f) }
|
106
|
+
end
|
107
|
+
|
108
|
+
def validate_field(field)
|
109
|
+
prefix, fieldname = field.split(':')
|
110
|
+
raise ArgumentError "Unknown prefix in field '#{field}': #{prefix}" unless prefixes.key?(prefix.to_sym)
|
111
|
+
raise ArgumentError "Field '#{field}' cannot be parsed as prefix:fieldname" unless fieldname
|
112
|
+
field
|
113
|
+
end
|
114
|
+
|
115
|
+
def normalize_prefixes(prefixes)
|
116
|
+
return {} unless prefixes
|
117
|
+
prefixes.map { |k, v| [k.to_sym, Util.to_uri(v)] }.to_h
|
118
|
+
end
|
119
|
+
|
120
|
+
def normalize_profile_uri(profile)
|
121
|
+
profile_uri = Util.to_uri(profile)
|
122
|
+
raise ArgumentError, "Invalid profile: #{profile || 'nil'}" unless profile_uri &&
|
123
|
+
profile_uri.to_s.start_with?(PROFILE_BASE_URI)
|
124
|
+
profile_uri.clone # defensive copy
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
data/lib/merritt/util.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Merritt
|
2
|
+
# Miscellaneous utility methods
|
3
|
+
module Util
|
4
|
+
class << self
|
5
|
+
# Ensures that the specified argument is a URI.
|
6
|
+
# @param url [String, URI] The argument. If the argument is already
|
7
|
+
# a URI, it is returned unchanged; otherwise, the argument's string
|
8
|
+
# form (as returned by +`to_s`+) is parsed as a URI.
|
9
|
+
# @return [nil, URI] +`nil`+ if +`url`+ is nil, otherwise the URI.
|
10
|
+
# @raise [URI::InvalidURIError] if `url` is a string that is not a valid URI
|
11
|
+
def to_uri(url)
|
12
|
+
return nil unless url
|
13
|
+
return url if url.is_a? URI
|
14
|
+
stripped = url.respond_to?(:strip) ? url.strip : url.to_s.strip
|
15
|
+
URI.parse(stripped)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/merritt.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require 'merritt/manifest/module_info'
|
7
|
+
require 'uri'
|
8
|
+
|
9
|
+
Gem::Specification.new do |spec|
|
10
|
+
spec.name = Merritt::Manifest::NAME
|
11
|
+
spec.version = Merritt::Manifest::VERSION
|
12
|
+
spec.authors = ['David Moles']
|
13
|
+
spec.email = ['david.moles@ucop.edu']
|
14
|
+
spec.summary = 'Merritt manifest generation library'
|
15
|
+
spec.description = 'Merritt manifest generation library'
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
origin = `git config --get remote.origin.url`.chomp
|
19
|
+
origin_uri = origin.start_with?('http') ? URI(origin) : URI(origin.gsub(%r{git@([^:]+)(.com|.org)[^\/]+}, 'http://\1\2'))
|
20
|
+
spec.homepage = URI::HTTP.build(host: origin_uri.host, path: origin_uri.path.chomp('.git')).to_s
|
21
|
+
|
22
|
+
spec.files = `git ls-files -z`.split("\x0")
|
23
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
24
|
+
|
25
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
26
|
+
spec.require_paths = ['lib']
|
27
|
+
|
28
|
+
spec.add_dependency 'typesafe_enum', '~> 0.1.7'
|
29
|
+
|
30
|
+
spec.add_development_dependency 'bundler', '~> 1.14'
|
31
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
32
|
+
spec.add_development_dependency 'rspec', '~> 3.5'
|
33
|
+
spec.add_development_dependency 'simplecov', '~> 0.14'
|
34
|
+
spec.add_development_dependency 'simplecov-console', '~> 0.4'
|
35
|
+
spec.add_development_dependency 'rubocop', '~> 0.47'
|
36
|
+
spec.add_development_dependency 'yard', '~> 0.9'
|
37
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<module type="RUBY_MODULE" version="4">
|
3
|
+
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
4
|
+
<exclude-output />
|
5
|
+
<content url="file://$MODULE_DIR$" />
|
6
|
+
<orderEntry type="jdk" jdkName="ruby-2.2.5-p319" jdkType="RUBY_SDK" />
|
7
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
8
|
+
<orderEntry type="library" scope="PROVIDED" name="ansi (v1.5.0, ruby-2.2.5-p319) [gem]" level="application" />
|
9
|
+
<orderEntry type="library" scope="PROVIDED" name="ast (v2.3.0, ruby-2.2.5-p319) [gem]" level="application" />
|
10
|
+
<orderEntry type="library" scope="PROVIDED" name="bundler (v1.14.6, ruby-2.2.5-p319) [gem]" level="application" />
|
11
|
+
<orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.3, ruby-2.2.5-p319) [gem]" level="application" />
|
12
|
+
<orderEntry type="library" scope="PROVIDED" name="docile (v1.1.5, ruby-2.2.5-p319) [gem]" level="application" />
|
13
|
+
<orderEntry type="library" scope="PROVIDED" name="hirb (v0.7.3, ruby-2.2.5-p319) [gem]" level="application" />
|
14
|
+
<orderEntry type="library" scope="PROVIDED" name="json (v2.0.4, ruby-2.2.5-p319) [gem]" level="application" />
|
15
|
+
<orderEntry type="library" scope="PROVIDED" name="parser (v2.4.0.0, ruby-2.2.5-p319) [gem]" level="application" />
|
16
|
+
<orderEntry type="library" scope="PROVIDED" name="powerpack (v0.1.1, ruby-2.2.5-p319) [gem]" level="application" />
|
17
|
+
<orderEntry type="library" scope="PROVIDED" name="rainbow (v2.2.1, ruby-2.2.5-p319) [gem]" level="application" />
|
18
|
+
<orderEntry type="library" scope="PROVIDED" name="rake (v12.0.0, ruby-2.2.5-p319) [gem]" level="application" />
|
19
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
|
20
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-core (v3.5.4, ruby-2.2.5-p319) [gem]" level="application" />
|
21
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
|
22
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
|
23
|
+
<orderEntry type="library" scope="PROVIDED" name="rspec-support (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
|
24
|
+
<orderEntry type="library" scope="PROVIDED" name="rubocop (v0.48.1, ruby-2.2.5-p319) [gem]" level="application" />
|
25
|
+
<orderEntry type="library" scope="PROVIDED" name="ruby-progressbar (v1.8.1, ruby-2.2.5-p319) [gem]" level="application" />
|
26
|
+
<orderEntry type="library" scope="PROVIDED" name="simplecov (v0.14.1, ruby-2.2.5-p319) [gem]" level="application" />
|
27
|
+
<orderEntry type="library" scope="PROVIDED" name="simplecov-console (v0.4.1, ruby-2.2.5-p319) [gem]" level="application" />
|
28
|
+
<orderEntry type="library" scope="PROVIDED" name="simplecov-html (v0.10.0, ruby-2.2.5-p319) [gem]" level="application" />
|
29
|
+
<orderEntry type="library" scope="PROVIDED" name="typesafe_enum (v0.1.7, ruby-2.2.5-p319) [gem]" level="application" />
|
30
|
+
<orderEntry type="library" scope="PROVIDED" name="unicode-display_width (v1.2.1, ruby-2.2.5-p319) [gem]" level="application" />
|
31
|
+
<orderEntry type="library" scope="PROVIDED" name="yard (v0.9.8, ruby-2.2.5-p319) [gem]" level="application" />
|
32
|
+
</component>
|
33
|
+
<component name="RModuleSettingsStorage">
|
34
|
+
<LOAD_PATH number="2" string0="$MODULE_DIR$/lib" string1="$MODULE_DIR$/spec" />
|
35
|
+
<I18N_FOLDERS number="0" />
|
36
|
+
</component>
|
37
|
+
</module>
|
data/spec/.rubocop.yml
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
#%checkm_0.7
|
2
|
+
#%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest
|
3
|
+
#%prefix | mrt: | http://merritt.cdlib.org/terms#
|
4
|
+
#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
|
5
|
+
#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType
|
6
|
+
http://merritt.cdlib.org/samples/4blocks.jpg | md5 | 0b21c6d48e815dd537d42dc1cfac0111 | | | 4blocks.jpg
|
7
|
+
http://merritt.cdlib.org/samples/4blocks.txt | md5 | ed04a855f89f31f8dc8e9bb946f5f159 | | | 4blocks.txt
|
8
|
+
#%eof
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#%dataonem_0.1
|
2
|
+
#%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-dataone-manifest
|
3
|
+
#%prefix | dom: | http://uc3.cdlib.org/ontology/dataonem
|
4
|
+
#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom
|
5
|
+
#%fields | dom:scienceMetadataFile | dom:scienceMetadataFormat | dom:scienceDataFile | mrt:mimeType
|
6
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | Laney_300394_Exempt_Determination_Letter.pdf | application/pdf
|
7
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | Laney_300394_Exempt_Determination_Letter.pdf | application/pdf
|
8
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | Laney_IRBProposal.docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
9
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | Laney_IRBProposal.docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
10
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | SensorSurvey_Printout.pdf | application/pdf
|
11
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | SensorSurvey_Printout.pdf | application/pdf
|
12
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | clean_survey_data_no_ids.csv | text/csv
|
13
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | clean_survey_data_no_ids.csv | text/csv
|
14
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | research_coords.csv | text/csv
|
15
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | research_coords.csv | text/csv
|
16
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q10/research_sites.R | text/plain
|
17
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q10/research_sites.R | text/plain
|
18
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q11-23/sensors_platforms.R | text/plain
|
19
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q11-23/sensors_platforms.R | text/plain
|
20
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q24/limitsToExpansion.R | text/plain
|
21
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q24/limitsToExpansion.R | text/plain
|
22
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q25-32/data_metadata_management.R | text/plain
|
23
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q25-32/data_metadata_management.R | text/plain
|
24
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q3-9/respondent_info.R | text/plain
|
25
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q3-9/respondent_info.R | text/plain
|
26
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q33-37/networking.R | text/plain
|
27
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q33-37/networking.R | text/plain
|
28
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q38-42/publications.R | text/plain
|
29
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q38-42/publications.R | text/plain
|
30
|
+
mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey_data_prep.R | text/plain
|
31
|
+
mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey_data_prep.R | text/plain
|
32
|
+
#%eof
|
@@ -0,0 +1,9 @@
|
|
1
|
+
#%checkm_0.7
|
2
|
+
#%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-container-batch-manifest
|
3
|
+
#%prefix | mrt: | http://merritt.cdlib.org/terms#
|
4
|
+
#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
|
5
|
+
#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
|
6
|
+
http://merritt.cdlib.org/samples/huskyChicken.zip | | | | | huskyChicken.zip | | | Tracy Seneca | Husky Chicken | 2009-09-01T00:00:00
|
7
|
+
http://merritt.cdlib.org/samples/outdoorStore.zip | | | | | outdoorStore.zip | | | | The Outdoor Store, Portland OR
|
8
|
+
http://merritt.cdlib.org/samples/souvenirs.zip | | | | | souvenirs.zip | | | Tracy Seneca | SouvenirsLast Summer
|
9
|
+
#%eof
|
@@ -0,0 +1,9 @@
|
|
1
|
+
#%checkm_0.7
|
2
|
+
#%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-single-file-batch-manifest
|
3
|
+
#%prefix | mrt: | http://merritt.cdlib.org/terms#
|
4
|
+
#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
|
5
|
+
#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
|
6
|
+
http://merritt.cdlib.org/samples/goldenDragon.jpg | md5 | aa59e145dfb2237ecd8cb5ce4f4953ea | | | goldenDragon.jpg | | | Tracy Seneca | Golden Dragon Restaurant Sign | 2009-09-01T00:00:00
|
7
|
+
http://merritt.cdlib.org/samples/tumbleBug.jpg | md5 | 61224ef4f2a8dad552ff57980ed0bf49 | | | tumbleBug.jpg | | | Tracy Seneca | Tumble Bug, Luna Park. Sydney
|
8
|
+
http://merritt.cdlib.org/samples/generalDrapery.jpg | | | | | generalDrapery.jpg | | | | General Drapery
|
9
|
+
#%eof
|
@@ -0,0 +1,9 @@
|
|
1
|
+
#%checkm_0.7
|
2
|
+
#%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-batch-manifest
|
3
|
+
#%prefix | mrt: | http://merritt.cdlib.org/terms#
|
4
|
+
#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
|
5
|
+
#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
|
6
|
+
http://merritt.cdlib.org/samples/manifests/bigHunt.checkm | | | | | bigHunt.checkm | | | Tracy Seneca | | 2009-09-01T00:00:00
|
7
|
+
http://merritt.cdlib.org/samples/manifests/4blocks.checkm | | | | | 4blocks.checkm | | | Tracy Seneca | Motel Capri - manifest style | 2009-09-01T00:00:00
|
8
|
+
http://merritt.cdlib.org/samples/manifests/call911.checkm | | | | | call911.checkm | | | Tracy Seneca | If This Bell Rings Call 911 - manifest style | 2009-09-01T00:00:00
|
9
|
+
#%eof
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# ------------------------------------------------------------
|
2
|
+
# SimpleCov setup
|
3
|
+
|
4
|
+
if ENV['COVERAGE']
|
5
|
+
require 'simplecov'
|
6
|
+
require 'simplecov-console'
|
7
|
+
|
8
|
+
SimpleCov.minimum_coverage 100
|
9
|
+
SimpleCov.start do
|
10
|
+
add_filter '/spec/'
|
11
|
+
SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
|
12
|
+
SimpleCov::Formatter::HTMLFormatter,
|
13
|
+
SimpleCov::Formatter::Console,
|
14
|
+
]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# ------------------------------------------------------------
|
19
|
+
# Rspec configuration
|
20
|
+
|
21
|
+
RSpec.configure do |config|
|
22
|
+
config.raise_errors_for_deprecations!
|
23
|
+
config.mock_with :rspec
|
24
|
+
end
|
25
|
+
|
26
|
+
# ------------------------------------------------------------
|
27
|
+
# Merritt::Manifest
|
28
|
+
|
29
|
+
require 'merritt'
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module Merritt
|
5
|
+
describe Manifest::DataONE do
|
6
|
+
attr_reader :files
|
7
|
+
attr_reader :manifest
|
8
|
+
|
9
|
+
before(:each) do
|
10
|
+
@files = {
|
11
|
+
'Laney_300394_Exempt_Determination_Letter.pdf' => 'application/pdf',
|
12
|
+
'Laney_IRBProposal.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
13
|
+
'SensorSurvey_Printout.pdf' => 'application/pdf',
|
14
|
+
'clean_survey_data_no_ids.csv' => 'text/csv',
|
15
|
+
'research_coords.csv' => 'text/csv',
|
16
|
+
'survey/Q10/research_sites.R' => 'text/plain',
|
17
|
+
'survey/Q11-23/sensors_platforms.R' => 'text/plain',
|
18
|
+
'survey/Q24/limitsToExpansion.R' => 'text/plain',
|
19
|
+
'survey/Q25-32/data_metadata_management.R' => 'text/plain',
|
20
|
+
'survey/Q3-9/respondent_info.R' => 'text/plain',
|
21
|
+
'survey/Q33-37/networking.R' => 'text/plain',
|
22
|
+
'survey/Q38-42/publications.R' => 'text/plain',
|
23
|
+
'survey_data_prep.R' => 'text/plain'
|
24
|
+
}.map { |name, type| OpenStruct.new(name: name, type: type) }
|
25
|
+
@manifest = Manifest::DataONE.new(files: files)
|
26
|
+
end
|
27
|
+
|
28
|
+
describe :conformance do
|
29
|
+
it 'returns DataONE 0.1' do
|
30
|
+
expect(manifest.conformance).to eq('dataonem_0.1')
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe :profile do
|
35
|
+
it 'returns the DataONE manifest profile' do
|
36
|
+
expect(manifest.profile).to eq(URI('http://uc3.cdlib.org/registry/ingest/manifest/mrt-dataone-manifest'))
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe :prefixes do
|
41
|
+
attr_reader :prefixes
|
42
|
+
before(:each) do
|
43
|
+
@prefixes = manifest.prefixes
|
44
|
+
end
|
45
|
+
it 'includes :dom' do
|
46
|
+
expect(prefixes[:dom]).to eq(URI('http://uc3.cdlib.org/ontology/dataonem'))
|
47
|
+
end
|
48
|
+
it 'includes :mrt' do
|
49
|
+
expect(prefixes[:mrt]).to eq(URI('http://uc3.cdlib.org/ontology/mom'))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe :fields do
|
54
|
+
it 'returns the expected fields' do
|
55
|
+
expected = %w[dom:scienceMetadataFile dom:scienceMetadataFormat dom:scienceDataFile mrt:mimeType]
|
56
|
+
expect(manifest.fields).to eq(expected)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe :entries do
|
61
|
+
attr_reader :entries
|
62
|
+
before(:each) do
|
63
|
+
@entries = manifest.entries
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'converts files to entries' do
|
67
|
+
expect(entries.size).to eq(2 * files.size)
|
68
|
+
files.each_with_index do |file, index|
|
69
|
+
dcs_index = 2 * index
|
70
|
+
dcs_entry = entries[dcs_index]
|
71
|
+
oai_entry = entries[1 + dcs_index]
|
72
|
+
expect(dcs_entry['dom:scienceMetadataFile']).to eq('mrt-datacite.xml')
|
73
|
+
expect(oai_entry['dom:scienceMetadataFile']).to eq('mrt-oaidc.xml')
|
74
|
+
expect(dcs_entry['dom:scienceMetadataFormat']).to eq('http://datacite.org/schema/kernel-3.1')
|
75
|
+
expect(oai_entry['dom:scienceMetadataFormat']).to eq('http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd')
|
76
|
+
|
77
|
+
[dcs_entry, oai_entry].each do |entry|
|
78
|
+
expect(entry['dom:scienceDataFile']).to eq(file.name)
|
79
|
+
expect(entry['mrt:mimeType']).to eq(file.type)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
describe :write_to_string do
|
86
|
+
it 'writes a DataONE manifest' do
|
87
|
+
path = 'mrt-dataone-manifest.txt'
|
88
|
+
expected = File.read("spec/data/#{path}")
|
89
|
+
actual = manifest.write_to_string
|
90
|
+
if actual != expected
|
91
|
+
now = Time.now.to_i
|
92
|
+
FileUtils.mkdir('tmp') unless File.directory?('tmp')
|
93
|
+
File.open("tmp/#{now}-expected-#{path}", 'w') { |f| f.write(expected) }
|
94
|
+
File.open("tmp/#{now}-actual-#{path}", 'w') { |f| f.write(actual) }
|
95
|
+
end
|
96
|
+
expect(actual).to eq(expected)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe :write_to_file do
|
101
|
+
it 'writes to a file' do
|
102
|
+
file = Tempfile.new('manifest.txt')
|
103
|
+
begin
|
104
|
+
manifest.write_to(file)
|
105
|
+
file.close
|
106
|
+
actual = IO.read(file.path)
|
107
|
+
path = 'mrt-dataone-manifest.txt'
|
108
|
+
expected = File.read("spec/data/#{path}")
|
109
|
+
actual = manifest.write_to_string
|
110
|
+
if actual != expected
|
111
|
+
now = Time.now.to_i
|
112
|
+
FileUtils.mkdir('tmp') unless File.directory?('tmp')
|
113
|
+
File.open("tmp/#{now}-expected-#{path}", 'w') { |f| f.write(expected) }
|
114
|
+
File.open("tmp/#{now}-actual-#{path}", 'w') { |f| f.write(actual) }
|
115
|
+
end
|
116
|
+
ensure
|
117
|
+
file.delete
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'ostruct'
|
3
|
+
|
4
|
+
module Merritt
|
5
|
+
describe Manifest::Fields::Object do
|
6
|
+
describe :value_from do
|
7
|
+
describe :FILE_URL do
|
8
|
+
it 'returns a URI even if given a String' do
|
9
|
+
url_str = 'http://example.org/example.txt'
|
10
|
+
obj = OpenStruct.new(file_url: url_str)
|
11
|
+
expected = URI(url_str)
|
12
|
+
actual = Manifest::Fields::Object::FILE_URL.value_from(obj)
|
13
|
+
expect(actual).to eq(expected)
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'fails if not present' do
|
17
|
+
obj = OpenStruct.new
|
18
|
+
expect { Manifest::Fields::Object::FILE_URL.value_from(obj) }.to raise_error(ArgumentError)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe :FILE_SIZE do
|
23
|
+
it 'returns nil for nil' do
|
24
|
+
obj = OpenStruct.new
|
25
|
+
actual = Manifest::Fields::Object::FILE_SIZE.value_from(obj)
|
26
|
+
expect(actual).to be_nil
|
27
|
+
end
|
28
|
+
it 'returns an integer even if given a String' do
|
29
|
+
obj = OpenStruct.new(file_size: '1234')
|
30
|
+
expected = 1234
|
31
|
+
actual = Manifest::Fields::Object::FILE_SIZE.value_from(obj)
|
32
|
+
expect(actual).to eq(expected)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
describe :FILE_NAME do
|
37
|
+
it 'prefers the filename to the URI' do
|
38
|
+
url = URI('http://example.org/example.txt')
|
39
|
+
name = 'EXAMPLE.TXT'
|
40
|
+
obj = OpenStruct.new(file_url: url, file_name: name)
|
41
|
+
actual = Manifest::Fields::Object::FILE_NAME.value_from(obj)
|
42
|
+
expect(actual).to eq(name)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'extracts the filename from the URI if not present' do
|
46
|
+
url = URI('http://example.org/example.txt')
|
47
|
+
obj = OpenStruct.new(file_url: url)
|
48
|
+
expected = 'example.txt'
|
49
|
+
actual = Manifest::Fields::Object::FILE_NAME.value_from(obj)
|
50
|
+
expect(actual).to eq(expected)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|