merritt-manifest 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,33 @@
1
+ require 'merritt/manifest/fields'
2
+
3
+ module Merritt
4
+ class Manifest
5
+ # A marker interface for file-like objects. Each field
6
+ # may or may not be relevant to a given manifest format.
7
+ module File
8
+ # @return [nil, URI] the URL at which to retrieve the file
9
+ attr_reader :file_url
10
+
11
+ # @return [nil, String] the hash algorithm used to hash the file
12
+ attr_reader :hash_algorithm
13
+
14
+ # @return [nil, String] the hash value
15
+ attr_reader :hash_value
16
+
17
+ # @return [nil, Integer] the file size in bytes.
18
+ attr_reader :file_size
19
+
20
+ # @return [nil, DateTime] date and time the file was last modified.
21
+ # Note that according to the [Merritt Ingest Service docs](https://confluence.ucop.edu/download/attachments/16744573/Merritt-ingest-service-latest.pdf),
22
+ # “modification time field SHOULD NOT be specified, and will be ignored if provided.”
23
+ attr_reader :file_last_modified
24
+
25
+ # @return [nil, String] the file name. If not present, will be inferred from
26
+ # {#file_url}.
27
+ attr_reader :file_name
28
+
29
+ # @return [nil, MIME::Type] the mime type
30
+ attr_reader :mime_type
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,12 @@
1
+ module Merritt
2
+ class Manifest
3
+ # The name of this gem
4
+ NAME = 'merritt-manifest'.freeze
5
+
6
+ # The version of this gem
7
+ VERSION = '0.1.0'.freeze
8
+
9
+ # The copyright notice for this gem
10
+ COPYRIGHT = 'Copyright (c) 2017 The Regents of the University of California'.freeze
11
+ end
12
+ end
@@ -0,0 +1,32 @@
1
+ module Merritt
2
+ class Manifest
3
+ # A specialization of {Manifest} for Merritt object submissions.
4
+ class Object < Manifest
5
+
6
+ # Creates a new {Manifest::Object}
7
+ # @param files [Array<Manifest::File>] an array of data files to be converted to entries.
8
+ # (Note that these not be actual {Manifest::File} objects so long as they respond to,
9
+ # at minimum, `#file_url`. The other {Manifest::File} fields are optional.)
10
+ def initialize(files:)
11
+ super(
12
+ profile: 'http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest',
13
+ prefixes: Merritt::Manifest::Fields::Object.prefixes,
14
+ fields: Merritt::Manifest::Fields::Object.fields,
15
+ entries: to_entries(files)
16
+ )
17
+ end
18
+
19
+ private
20
+
21
+ def to_entries(files)
22
+ files.map do |file|
23
+ Merritt::Manifest::Fields::Object.map do |field|
24
+ field_name = field.field_name
25
+ field_value = field.value_from(file)
26
+ [field_name, field_value]
27
+ end.to_h
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,128 @@
1
+ Dir.glob(File.expand_path('../manifest/*.rb', __FILE__)).sort.each(&method(:require))
2
+
3
+ module Merritt
4
+ # A Merritt manifest file
5
+ class Manifest
6
+
7
+ # Base for all recognized profile URIs
8
+ PROFILE_BASE_URI = 'http://uc3.cdlib.org/registry/ingest/manifest/'.freeze
9
+
10
+ # Checkm 0.7 conformance level
11
+ CHECKM_0_7 = 'checkm_0.7'.freeze
12
+
13
+ # @return [String] the conformance level
14
+ attr_reader :conformance
15
+
16
+ # @return [URI] the profile URI
17
+ attr_reader :profile
18
+
19
+ # @return [Hash{Symbol => URI}] a map from namespace prefixes to their URIs
20
+ attr_reader :prefixes
21
+
22
+ # @return [Array<String>] the field names, in the form prefix:fieldname
23
+ attr_reader :fields
24
+
25
+ # @return [Array<Hash<String, Object>>] the entries
26
+ attr_reader :entries
27
+
28
+ # Creates a new manifest. Note that the prefix, field, and entry arrays are
29
+ # copied on initialization, as are the individual entry hashes.
30
+ #
31
+ # @param conformance [String] the conformance level. Defaults to {CHECKM_0_7}.
32
+ # @param profile [URI, String] the profile URI. Must begin with
33
+ # @param prefixes [Hash{String,Symbol => URI, String}] a map from namespace prefixes to their URIs
34
+ # @param fields Array<String> a list of field names, in the form prefix:fieldname
35
+ # @param entries [Array<Hash<String, Object><] A list of entries, each of which is a hash keyed by
36
+ # a prefixed fieldname defined in `fields`. Nil values are allowed.
37
+ # @raise [ArgumentError] if `profile` does not begin with {PROFILE_BASE_URI}
38
+ # @raise [ArgumentError] if `fields` cannot be parsed as prefix:fieldname, or if one or more prefixes
39
+ # is not mapped to a URI in `prefixes`
40
+ # @raise [URI::InvalidURIError] if `profile` cannot be parsed as a URI
41
+ def initialize(conformance: CHECKM_0_7, profile:, prefixes: {}, fields: [], entries: [])
42
+ @conformance = conformance
43
+ @profile = normalize_profile_uri(profile).freeze
44
+ @prefixes = normalize_prefixes(prefixes).freeze
45
+ @fields = validate_fields(fields).freeze
46
+ @entries = normalize_entries(entries).freeze
47
+ end
48
+
49
+ # Writes this manifest to the specified IO
50
+ # @param io [IO] the IO to write to
51
+ def write_to(io)
52
+ write_sc(io, conformance)
53
+ write_sc(io, 'profile', profile)
54
+ prefixes.each { |prefix, url| write_sc(io, 'prefix', "#{prefix}:", url) }
55
+ write_sc(io, 'fields', *fields)
56
+ entries.each { |entry| io.puts(entry_line(entry)) }
57
+ write_sc(io, 'eof')
58
+ end
59
+
60
+ # Writes this manifest as a string
61
+ # @return [String] the manifest file contents as a string
62
+ def write_to_string
63
+ io = StringIO.new
64
+ write_to(io)
65
+ io.string
66
+ end
67
+
68
+ private
69
+
70
+ # checkm column separator
71
+ COLSEP = ' | '.freeze
72
+
73
+ def entry_line(entry)
74
+ fields.map { |f| entry[f] }.join(COLSEP).sub(/[| ]+\z/, '')
75
+ end
76
+
77
+ # writes a checkm "structured comment"
78
+ # @param io [IO] the IO to write to
79
+ # @param comment [String] the comment
80
+ # @param columns [nil, Array<String>] columns to follow the initial comment
81
+ def write_sc(io, comment, *columns)
82
+ io << '#%' << comment
83
+ io << COLSEP << columns.join(COLSEP) unless columns.empty?
84
+ io << "\n"
85
+ end
86
+
87
+ def normalize_entries(entries)
88
+ entries.each_with_index.map do |entry, i|
89
+ raise ArgumentError, "Nil entry at index #{i}" unless entry
90
+ normalize_entry(entry)
91
+ end
92
+ end
93
+
94
+ def normalize_entry(entry)
95
+ normalized = {}
96
+ fields.each do |f|
97
+ next unless (value = entry[f])
98
+ normalized[f] = value
99
+ end
100
+ raise ArgumentError, "No fields found in entry #{entry}" if normalized.empty?
101
+ normalized
102
+ end
103
+
104
+ def validate_fields(fields)
105
+ fields.map { |f| validate_field(f) }
106
+ end
107
+
108
+ def validate_field(field)
109
+ prefix, fieldname = field.split(':')
110
+ raise ArgumentError "Unknown prefix in field '#{field}': #{prefix}" unless prefixes.key?(prefix.to_sym)
111
+ raise ArgumentError "Field '#{field}' cannot be parsed as prefix:fieldname" unless fieldname
112
+ field
113
+ end
114
+
115
+ def normalize_prefixes(prefixes)
116
+ return {} unless prefixes
117
+ prefixes.map { |k, v| [k.to_sym, Util.to_uri(v)] }.to_h
118
+ end
119
+
120
+ def normalize_profile_uri(profile)
121
+ profile_uri = Util.to_uri(profile)
122
+ raise ArgumentError, "Invalid profile: #{profile || 'nil'}" unless profile_uri &&
123
+ profile_uri.to_s.start_with?(PROFILE_BASE_URI)
124
+ profile_uri.clone # defensive copy
125
+ end
126
+
127
+ end
128
+ end
@@ -0,0 +1,19 @@
1
+ module Merritt
2
+ # Miscellaneous utility methods
3
+ module Util
4
+ class << self
5
+ # Ensures that the specified argument is a URI.
6
+ # @param url [String, URI] The argument. If the argument is already
7
+ # a URI, it is returned unchanged; otherwise, the argument's string
8
+ # form (as returned by +`to_s`+) is parsed as a URI.
9
+ # @return [nil, URI] +`nil`+ if +`url`+ is nil, otherwise the URI.
10
+ # @raise [URI::InvalidURIError] if `url` is a string that is not a valid URI
11
+ def to_uri(url)
12
+ return nil unless url
13
+ return url if url.is_a? URI
14
+ stripped = url.respond_to?(:strip) ? url.strip : url.to_s.strip
15
+ URI.parse(stripped)
16
+ end
17
+ end
18
+ end
19
+ end
data/lib/merritt.rb ADDED
@@ -0,0 +1,4 @@
1
+ # Supermodule for Merritt-related code
2
+ module Merritt
3
+ Dir.glob(File.expand_path('../merritt/*.rb', __FILE__)).sort.each(&method(:require))
4
+ end
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'merritt/manifest/module_info'
7
+ require 'uri'
8
+
9
+ Gem::Specification.new do |spec|
10
+ spec.name = Merritt::Manifest::NAME
11
+ spec.version = Merritt::Manifest::VERSION
12
+ spec.authors = ['David Moles']
13
+ spec.email = ['david.moles@ucop.edu']
14
+ spec.summary = 'Merritt manifest generation library'
15
+ spec.description = 'Merritt manifest generation library'
16
+ spec.license = 'MIT'
17
+
18
+ origin = `git config --get remote.origin.url`.chomp
19
+ origin_uri = origin.start_with?('http') ? URI(origin) : URI(origin.gsub(%r{git@([^:]+)(.com|.org)[^\/]+}, 'http://\1\2'))
20
+ spec.homepage = URI::HTTP.build(host: origin_uri.host, path: origin_uri.path.chomp('.git')).to_s
21
+
22
+ spec.files = `git ls-files -z`.split("\x0")
23
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
24
+
25
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
26
+ spec.require_paths = ['lib']
27
+
28
+ spec.add_dependency 'typesafe_enum', '~> 0.1.7'
29
+
30
+ spec.add_development_dependency 'bundler', '~> 1.14'
31
+ spec.add_development_dependency 'rake', '~> 12.0'
32
+ spec.add_development_dependency 'rspec', '~> 3.5'
33
+ spec.add_development_dependency 'simplecov', '~> 0.14'
34
+ spec.add_development_dependency 'simplecov-console', '~> 0.4'
35
+ spec.add_development_dependency 'rubocop', '~> 0.47'
36
+ spec.add_development_dependency 'yard', '~> 0.9'
37
+ end
@@ -0,0 +1,37 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="RUBY_MODULE" version="4">
3
+ <component name="NewModuleRootManager" inherit-compiler-output="true">
4
+ <exclude-output />
5
+ <content url="file://$MODULE_DIR$" />
6
+ <orderEntry type="jdk" jdkName="ruby-2.2.5-p319" jdkType="RUBY_SDK" />
7
+ <orderEntry type="sourceFolder" forTests="false" />
8
+ <orderEntry type="library" scope="PROVIDED" name="ansi (v1.5.0, ruby-2.2.5-p319) [gem]" level="application" />
9
+ <orderEntry type="library" scope="PROVIDED" name="ast (v2.3.0, ruby-2.2.5-p319) [gem]" level="application" />
10
+ <orderEntry type="library" scope="PROVIDED" name="bundler (v1.14.6, ruby-2.2.5-p319) [gem]" level="application" />
11
+ <orderEntry type="library" scope="PROVIDED" name="diff-lcs (v1.3, ruby-2.2.5-p319) [gem]" level="application" />
12
+ <orderEntry type="library" scope="PROVIDED" name="docile (v1.1.5, ruby-2.2.5-p319) [gem]" level="application" />
13
+ <orderEntry type="library" scope="PROVIDED" name="hirb (v0.7.3, ruby-2.2.5-p319) [gem]" level="application" />
14
+ <orderEntry type="library" scope="PROVIDED" name="json (v2.0.4, ruby-2.2.5-p319) [gem]" level="application" />
15
+ <orderEntry type="library" scope="PROVIDED" name="parser (v2.4.0.0, ruby-2.2.5-p319) [gem]" level="application" />
16
+ <orderEntry type="library" scope="PROVIDED" name="powerpack (v0.1.1, ruby-2.2.5-p319) [gem]" level="application" />
17
+ <orderEntry type="library" scope="PROVIDED" name="rainbow (v2.2.1, ruby-2.2.5-p319) [gem]" level="application" />
18
+ <orderEntry type="library" scope="PROVIDED" name="rake (v12.0.0, ruby-2.2.5-p319) [gem]" level="application" />
19
+ <orderEntry type="library" scope="PROVIDED" name="rspec (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
20
+ <orderEntry type="library" scope="PROVIDED" name="rspec-core (v3.5.4, ruby-2.2.5-p319) [gem]" level="application" />
21
+ <orderEntry type="library" scope="PROVIDED" name="rspec-expectations (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
22
+ <orderEntry type="library" scope="PROVIDED" name="rspec-mocks (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
23
+ <orderEntry type="library" scope="PROVIDED" name="rspec-support (v3.5.0, ruby-2.2.5-p319) [gem]" level="application" />
24
+ <orderEntry type="library" scope="PROVIDED" name="rubocop (v0.48.1, ruby-2.2.5-p319) [gem]" level="application" />
25
+ <orderEntry type="library" scope="PROVIDED" name="ruby-progressbar (v1.8.1, ruby-2.2.5-p319) [gem]" level="application" />
26
+ <orderEntry type="library" scope="PROVIDED" name="simplecov (v0.14.1, ruby-2.2.5-p319) [gem]" level="application" />
27
+ <orderEntry type="library" scope="PROVIDED" name="simplecov-console (v0.4.1, ruby-2.2.5-p319) [gem]" level="application" />
28
+ <orderEntry type="library" scope="PROVIDED" name="simplecov-html (v0.10.0, ruby-2.2.5-p319) [gem]" level="application" />
29
+ <orderEntry type="library" scope="PROVIDED" name="typesafe_enum (v0.1.7, ruby-2.2.5-p319) [gem]" level="application" />
30
+ <orderEntry type="library" scope="PROVIDED" name="unicode-display_width (v1.2.1, ruby-2.2.5-p319) [gem]" level="application" />
31
+ <orderEntry type="library" scope="PROVIDED" name="yard (v0.9.8, ruby-2.2.5-p319) [gem]" level="application" />
32
+ </component>
33
+ <component name="RModuleSettingsStorage">
34
+ <LOAD_PATH number="2" string0="$MODULE_DIR$/lib" string1="$MODULE_DIR$/spec" />
35
+ <I18N_FOLDERS number="0" />
36
+ </component>
37
+ </module>
data/spec/.rubocop.yml ADDED
@@ -0,0 +1,10 @@
1
+ inherit_from: ../.rubocop.yml
2
+
3
+ Metrics/MethodLength:
4
+ Enabled: false
5
+
6
+ Metrics/ModuleLength:
7
+ Enabled: false
8
+
9
+ Metrics/BlockLength:
10
+ Enabled: false
@@ -0,0 +1,8 @@
1
+ #%checkm_0.7
2
+ #%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest
3
+ #%prefix | mrt: | http://merritt.cdlib.org/terms#
4
+ #%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
5
+ #%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType
6
+ http://merritt.cdlib.org/samples/4blocks.jpg | md5 | 0b21c6d48e815dd537d42dc1cfac0111 | | | 4blocks.jpg
7
+ http://merritt.cdlib.org/samples/4blocks.txt | md5 | ed04a855f89f31f8dc8e9bb946f5f159 | | | 4blocks.txt
8
+ #%eof
@@ -0,0 +1,32 @@
1
+ #%dataonem_0.1
2
+ #%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-dataone-manifest
3
+ #%prefix | dom: | http://uc3.cdlib.org/ontology/dataonem
4
+ #%prefix | mrt: | http://uc3.cdlib.org/ontology/mom
5
+ #%fields | dom:scienceMetadataFile | dom:scienceMetadataFormat | dom:scienceDataFile | mrt:mimeType
6
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | Laney_300394_Exempt_Determination_Letter.pdf | application/pdf
7
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | Laney_300394_Exempt_Determination_Letter.pdf | application/pdf
8
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | Laney_IRBProposal.docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document
9
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | Laney_IRBProposal.docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document
10
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | SensorSurvey_Printout.pdf | application/pdf
11
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | SensorSurvey_Printout.pdf | application/pdf
12
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | clean_survey_data_no_ids.csv | text/csv
13
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | clean_survey_data_no_ids.csv | text/csv
14
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | research_coords.csv | text/csv
15
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | research_coords.csv | text/csv
16
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q10/research_sites.R | text/plain
17
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q10/research_sites.R | text/plain
18
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q11-23/sensors_platforms.R | text/plain
19
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q11-23/sensors_platforms.R | text/plain
20
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q24/limitsToExpansion.R | text/plain
21
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q24/limitsToExpansion.R | text/plain
22
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q25-32/data_metadata_management.R | text/plain
23
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q25-32/data_metadata_management.R | text/plain
24
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q3-9/respondent_info.R | text/plain
25
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q3-9/respondent_info.R | text/plain
26
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q33-37/networking.R | text/plain
27
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q33-37/networking.R | text/plain
28
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey/Q38-42/publications.R | text/plain
29
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey/Q38-42/publications.R | text/plain
30
+ mrt-datacite.xml | http://datacite.org/schema/kernel-3.1 | survey_data_prep.R | text/plain
31
+ mrt-oaidc.xml | http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd | survey_data_prep.R | text/plain
32
+ #%eof
@@ -0,0 +1,9 @@
1
+ #%checkm_0.7
2
+ #%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-container-batch-manifest
3
+ #%prefix | mrt: | http://merritt.cdlib.org/terms#
4
+ #%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
5
+ #%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
6
+ http://merritt.cdlib.org/samples/huskyChicken.zip | | | | | huskyChicken.zip | | | Tracy Seneca | Husky Chicken | 2009-09-01T00:00:00
7
+ http://merritt.cdlib.org/samples/outdoorStore.zip | | | | | outdoorStore.zip | | | | The Outdoor Store, Portland OR
8
+ http://merritt.cdlib.org/samples/souvenirs.zip | | | | | souvenirs.zip | | | Tracy Seneca | SouvenirsLast Summer
9
+ #%eof
@@ -0,0 +1,9 @@
1
+ #%checkm_0.7
2
+ #%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-single-file-batch-manifest
3
+ #%prefix | mrt: | http://merritt.cdlib.org/terms#
4
+ #%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
5
+ #%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
6
+ http://merritt.cdlib.org/samples/goldenDragon.jpg | md5 | aa59e145dfb2237ecd8cb5ce4f4953ea | | | goldenDragon.jpg | | | Tracy Seneca | Golden Dragon Restaurant Sign | 2009-09-01T00:00:00
7
+ http://merritt.cdlib.org/samples/tumbleBug.jpg | md5 | 61224ef4f2a8dad552ff57980ed0bf49 | | | tumbleBug.jpg | | | Tracy Seneca | Tumble Bug, Luna Park. Sydney
8
+ http://merritt.cdlib.org/samples/generalDrapery.jpg | | | | | generalDrapery.jpg | | | | General Drapery
9
+ #%eof
@@ -0,0 +1,9 @@
1
+ #%checkm_0.7
2
+ #%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-batch-manifest
3
+ #%prefix | mrt: | http://merritt.cdlib.org/terms#
4
+ #%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
5
+ #%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
6
+ http://merritt.cdlib.org/samples/manifests/bigHunt.checkm | | | | | bigHunt.checkm | | | Tracy Seneca | | 2009-09-01T00:00:00
7
+ http://merritt.cdlib.org/samples/manifests/4blocks.checkm | | | | | 4blocks.checkm | | | Tracy Seneca | Motel Capri - manifest style | 2009-09-01T00:00:00
8
+ http://merritt.cdlib.org/samples/manifests/call911.checkm | | | | | call911.checkm | | | Tracy Seneca | If This Bell Rings Call 911 - manifest style | 2009-09-01T00:00:00
9
+ #%eof
@@ -0,0 +1,29 @@
1
+ # ------------------------------------------------------------
2
+ # SimpleCov setup
3
+
4
+ if ENV['COVERAGE']
5
+ require 'simplecov'
6
+ require 'simplecov-console'
7
+
8
+ SimpleCov.minimum_coverage 100
9
+ SimpleCov.start do
10
+ add_filter '/spec/'
11
+ SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
12
+ SimpleCov::Formatter::HTMLFormatter,
13
+ SimpleCov::Formatter::Console,
14
+ ]
15
+ end
16
+ end
17
+
18
+ # ------------------------------------------------------------
19
+ # Rspec configuration
20
+
21
+ RSpec.configure do |config|
22
+ config.raise_errors_for_deprecations!
23
+ config.mock_with :rspec
24
+ end
25
+
26
+ # ------------------------------------------------------------
27
+ # Merritt::Manifest
28
+
29
+ require 'merritt'
@@ -0,0 +1,122 @@
1
+ require 'spec_helper'
2
+ require 'ostruct'
3
+
4
+ module Merritt
5
+ describe Manifest::DataONE do
6
+ attr_reader :files
7
+ attr_reader :manifest
8
+
9
+ before(:each) do
10
+ @files = {
11
+ 'Laney_300394_Exempt_Determination_Letter.pdf' => 'application/pdf',
12
+ 'Laney_IRBProposal.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
13
+ 'SensorSurvey_Printout.pdf' => 'application/pdf',
14
+ 'clean_survey_data_no_ids.csv' => 'text/csv',
15
+ 'research_coords.csv' => 'text/csv',
16
+ 'survey/Q10/research_sites.R' => 'text/plain',
17
+ 'survey/Q11-23/sensors_platforms.R' => 'text/plain',
18
+ 'survey/Q24/limitsToExpansion.R' => 'text/plain',
19
+ 'survey/Q25-32/data_metadata_management.R' => 'text/plain',
20
+ 'survey/Q3-9/respondent_info.R' => 'text/plain',
21
+ 'survey/Q33-37/networking.R' => 'text/plain',
22
+ 'survey/Q38-42/publications.R' => 'text/plain',
23
+ 'survey_data_prep.R' => 'text/plain'
24
+ }.map { |name, type| OpenStruct.new(name: name, type: type) }
25
+ @manifest = Manifest::DataONE.new(files: files)
26
+ end
27
+
28
+ describe :conformance do
29
+ it 'returns DataONE 0.1' do
30
+ expect(manifest.conformance).to eq('dataonem_0.1')
31
+ end
32
+ end
33
+
34
+ describe :profile do
35
+ it 'returns the DataONE manifest profile' do
36
+ expect(manifest.profile).to eq(URI('http://uc3.cdlib.org/registry/ingest/manifest/mrt-dataone-manifest'))
37
+ end
38
+ end
39
+
40
+ describe :prefixes do
41
+ attr_reader :prefixes
42
+ before(:each) do
43
+ @prefixes = manifest.prefixes
44
+ end
45
+ it 'includes :dom' do
46
+ expect(prefixes[:dom]).to eq(URI('http://uc3.cdlib.org/ontology/dataonem'))
47
+ end
48
+ it 'includes :mrt' do
49
+ expect(prefixes[:mrt]).to eq(URI('http://uc3.cdlib.org/ontology/mom'))
50
+ end
51
+ end
52
+
53
+ describe :fields do
54
+ it 'returns the expected fields' do
55
+ expected = %w[dom:scienceMetadataFile dom:scienceMetadataFormat dom:scienceDataFile mrt:mimeType]
56
+ expect(manifest.fields).to eq(expected)
57
+ end
58
+ end
59
+
60
+ describe :entries do
61
+ attr_reader :entries
62
+ before(:each) do
63
+ @entries = manifest.entries
64
+ end
65
+
66
+ it 'converts files to entries' do
67
+ expect(entries.size).to eq(2 * files.size)
68
+ files.each_with_index do |file, index|
69
+ dcs_index = 2 * index
70
+ dcs_entry = entries[dcs_index]
71
+ oai_entry = entries[1 + dcs_index]
72
+ expect(dcs_entry['dom:scienceMetadataFile']).to eq('mrt-datacite.xml')
73
+ expect(oai_entry['dom:scienceMetadataFile']).to eq('mrt-oaidc.xml')
74
+ expect(dcs_entry['dom:scienceMetadataFormat']).to eq('http://datacite.org/schema/kernel-3.1')
75
+ expect(oai_entry['dom:scienceMetadataFormat']).to eq('http://dublincore.org/schemas/xmls/qdc/2008/02/11/qualifieddc.xsd')
76
+
77
+ [dcs_entry, oai_entry].each do |entry|
78
+ expect(entry['dom:scienceDataFile']).to eq(file.name)
79
+ expect(entry['mrt:mimeType']).to eq(file.type)
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ describe :write_to_string do
86
+ it 'writes a DataONE manifest' do
87
+ path = 'mrt-dataone-manifest.txt'
88
+ expected = File.read("spec/data/#{path}")
89
+ actual = manifest.write_to_string
90
+ if actual != expected
91
+ now = Time.now.to_i
92
+ FileUtils.mkdir('tmp') unless File.directory?('tmp')
93
+ File.open("tmp/#{now}-expected-#{path}", 'w') { |f| f.write(expected) }
94
+ File.open("tmp/#{now}-actual-#{path}", 'w') { |f| f.write(actual) }
95
+ end
96
+ expect(actual).to eq(expected)
97
+ end
98
+ end
99
+
100
+ describe :write_to_file do
101
+ it 'writes to a file' do
102
+ file = Tempfile.new('manifest.txt')
103
+ begin
104
+ manifest.write_to(file)
105
+ file.close
106
+ actual = IO.read(file.path)
107
+ path = 'mrt-dataone-manifest.txt'
108
+ expected = File.read("spec/data/#{path}")
109
+ actual = manifest.write_to_string
110
+ if actual != expected
111
+ now = Time.now.to_i
112
+ FileUtils.mkdir('tmp') unless File.directory?('tmp')
113
+ File.open("tmp/#{now}-expected-#{path}", 'w') { |f| f.write(expected) }
114
+ File.open("tmp/#{now}-actual-#{path}", 'w') { |f| f.write(actual) }
115
+ end
116
+ ensure
117
+ file.delete
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+ require 'ostruct'
3
+
4
+ module Merritt
5
+ describe Manifest::Fields::Object do
6
+ describe :value_from do
7
+ describe :FILE_URL do
8
+ it 'returns a URI even if given a String' do
9
+ url_str = 'http://example.org/example.txt'
10
+ obj = OpenStruct.new(file_url: url_str)
11
+ expected = URI(url_str)
12
+ actual = Manifest::Fields::Object::FILE_URL.value_from(obj)
13
+ expect(actual).to eq(expected)
14
+ end
15
+
16
+ it 'fails if not present' do
17
+ obj = OpenStruct.new
18
+ expect { Manifest::Fields::Object::FILE_URL.value_from(obj) }.to raise_error(ArgumentError)
19
+ end
20
+ end
21
+
22
+ describe :FILE_SIZE do
23
+ it 'returns nil for nil' do
24
+ obj = OpenStruct.new
25
+ actual = Manifest::Fields::Object::FILE_SIZE.value_from(obj)
26
+ expect(actual).to be_nil
27
+ end
28
+ it 'returns an integer even if given a String' do
29
+ obj = OpenStruct.new(file_size: '1234')
30
+ expected = 1234
31
+ actual = Manifest::Fields::Object::FILE_SIZE.value_from(obj)
32
+ expect(actual).to eq(expected)
33
+ end
34
+ end
35
+
36
+ describe :FILE_NAME do
37
+ it 'prefers the filename to the URI' do
38
+ url = URI('http://example.org/example.txt')
39
+ name = 'EXAMPLE.TXT'
40
+ obj = OpenStruct.new(file_url: url, file_name: name)
41
+ actual = Manifest::Fields::Object::FILE_NAME.value_from(obj)
42
+ expect(actual).to eq(name)
43
+ end
44
+
45
+ it 'extracts the filename from the URI if not present' do
46
+ url = URI('http://example.org/example.txt')
47
+ obj = OpenStruct.new(file_url: url)
48
+ expected = 'example.txt'
49
+ actual = Manifest::Fields::Object::FILE_NAME.value_from(obj)
50
+ expect(actual).to eq(expected)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end