text-checkm 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ require 'text/checkm/manifest'
2
+ require 'text/checkm/entry'
@@ -0,0 +1,40 @@
1
+ require 'digest'
2
+
3
+ module Text
4
+ module Checkm
5
+ class << self
6
+ # Size (in bytes) to read (in chunks) to compute checksums
7
+ CHUNK_SIZE = 8 * 1024 * 1024
8
+
9
+ # Compute the checksum 'alg' for a file
10
+ # @param [File] file
11
+ # @param [String] alg md5, sha1, sha256, dir
12
+ def checksum(file, alg) # TODO: don't pass file handles around
13
+ return true unless alg # TODO: something less counterintuitive
14
+ return File.directory?(file) if alg =~ /dir/
15
+
16
+ digest_alg = digest_for(alg)
17
+ return false unless digest_alg # TODO: something less counterintuitive
18
+
19
+ while !file.eof? && (chunk = file.readpartial(CHUNK_SIZE))
20
+ digest_alg << chunk
21
+ end
22
+ digest_alg.hexdigest
23
+ end
24
+
25
+ private
26
+
27
+ def digest_for(alg)
28
+ case alg
29
+ when /md5/
30
+ Digest::MD5.new
31
+ when /sha1/
32
+ Digest::SHA1.new
33
+ when /sha256/
34
+ Digest::SHA2.new(256)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,77 @@
1
+ require 'open-uri'
2
+ require 'time'
3
+
4
+ require 'text/checkm/checksum'
5
+
6
+ module Text
7
+ module Checkm
8
+ class Entry
9
+ BASE_FIELDS = %w[sourcefileorurl alg digest length modtime targetfileorurl].freeze
10
+ attr_reader :values
11
+
12
+ def self.create(path, args = {}) # TODO: why is this in this class?
13
+ base = args[:base] || Dir.pwd
14
+ alg = args[:alg] || 'md5'
15
+ file = File.new File.join(base, path)
16
+
17
+ format('%s | %s | %s | %s | %s | %s', path, alg, Checkm.checksum(file, alg), File.size(file.path), file.mtime.utc.xmlschema, nil)
18
+ end
19
+
20
+ def initialize(line, manifest = nil)
21
+ @line = line.strip
22
+ @include = false
23
+ @fields = BASE_FIELDS
24
+ @fields = manifest.fields if manifest && manifest.fields
25
+ @values = line.split('|').map(&:strip)
26
+ @manifest = manifest
27
+ end
28
+
29
+ # rubocop:disable Style/MethodMissingSuper
30
+ def method_missing(sym, *_args)
31
+ # TODO: something less extreme
32
+ @values[@fields.index(sym.to_s.downcase) || BASE_FIELDS.index(sym.to_s.downcase)]
33
+ end
34
+ # rubocop:enable Style/MethodMissingSuper
35
+
36
+ def respond_to_missing?(sym, *)
37
+ @fields.include?(sym.to_s.downcase) || BASE_FIELDS.include?(sym.to_s.downcase)
38
+ end
39
+
40
+ def valid?
41
+ source_exists? && valid_checksum? && valid_multilevel? # xxx && valid_length? && valid_modtime?
42
+ end
43
+
44
+ private
45
+
46
+ def source
47
+ file = sourcefileorurl
48
+ file = file[1..] if file =~ /^@/
49
+ File.join(@manifest.path, file)
50
+ end
51
+
52
+ def source_exists?
53
+ File.exist? source
54
+ end
55
+
56
+ def valid_checksum?
57
+ file = File.new source
58
+ checksum = Checkm.checksum(file, alg)
59
+ [true, digest].include?(checksum) # TODO: something less counterintuitive
60
+ end
61
+
62
+ # def valid_length?
63
+ # throw NotImplementedError
64
+ # end
65
+ #
66
+ # def valid_modtime?
67
+ # throw NotImplementedError
68
+ # end
69
+
70
+ def valid_multilevel?
71
+ return true unless sourcefileorurl =~ /^@/
72
+
73
+ Manifest.parse(URI.open(source).read, path: File.dirname(source))
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,115 @@
1
+ require 'text/checkm/entry'
2
+
3
+ module Text
4
+ module Checkm
5
+ class Manifest
6
+ def self.parse(str, args = {})
7
+ Manifest.new str, args
8
+ end
9
+
10
+ attr_reader :version
11
+ attr_reader :entries
12
+ attr_reader :fields
13
+ attr_reader :path
14
+
15
+ # rubocop:disable Metrics/MethodLength
16
+ def initialize(checkm, args = {})
17
+ @args = args
18
+ @version = nil
19
+ @checkm = checkm
20
+ @lines = checkm.split "\n"
21
+ @entries = []
22
+ @eof = false
23
+ @fields = nil
24
+ @path = args[:path] # TODO: something less hacky
25
+ @path ||= Dir.pwd
26
+
27
+ parse_lines
28
+ # xxx error on empty entries?
29
+ @lines.unshift('#%checkm_0.7') and (@version = '0.7') if @version.nil?
30
+ end
31
+
32
+ # rubocop:enable Metrics/MethodLength
33
+
34
+ def valid?
35
+ return true if @entries.empty?
36
+
37
+ @entries.map(&:valid?).none? { |b| b == false }
38
+ end
39
+
40
+ def add(path, args = {})
41
+ line = Entry.create path, args
42
+
43
+ Manifest.new [@lines, line].flatten.join("\n"), @args
44
+ end
45
+
46
+ def remove(path)
47
+ Manifest.new @lines.reject { |x| x =~ /^@?#{path}/ }.join("\n"), @args
48
+ end
49
+
50
+ def to_s
51
+ @lines.join("\n")
52
+ end
53
+
54
+ def to_h
55
+ {}.tap do |h|
56
+ entries.each do |e|
57
+ source = e.sourcefileorurl
58
+ (h[source] ||= []) << e
59
+ end
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ # rubocop:disable Metrics/MethodLength
66
+ def parse_lines
67
+ @lines.each do |line|
68
+ case line
69
+ when /^#%/
70
+ parse_header line
71
+ when /^#/
72
+ parse_comment line
73
+ when /^$/
74
+ # do nothing
75
+ when /^@/
76
+ parse_line line
77
+ else
78
+ parse_line line
79
+ end
80
+ end
81
+ end
82
+
83
+ # rubocop:enable Metrics/MethodLength
84
+
85
+ # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
86
+ def parse_header(line)
87
+ case line
88
+ when /^#%checkm/
89
+ match = /^#%checkm_(\d+)\.(\d+)/.match line
90
+ @version = "#{match[1]}.#{match[2]}" if match
91
+ when /^#%eof/
92
+ @eof = true
93
+ when /^#%fields/
94
+ list = line.split('|')
95
+ list.shift
96
+ @fields = list.map { |v| v.strip.downcase }
97
+ when /^#%prefix/
98
+ # do nothing
99
+ when /^#%profile/
100
+ # do nothing
101
+ end
102
+ end
103
+
104
+ # rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity
105
+
106
+ def parse_comment(_line)
107
+ # do nothing
108
+ end
109
+
110
+ def parse_line(line)
111
+ @entries << Entry.new(line, self)
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,16 @@
1
+ module Text
2
+ module Checkm
3
+ class ModuleInfo
4
+ NAME = 'text-checkm'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'Ruby implementation of the Checkm text manifest format'.freeze
8
+ DESCRIPTION = 'A Ruby implementation of the Checkm checksum-based text manifest format'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '0.7'.freeze
11
+ HOMEPAGE = 'https://github.com/dmolesUC/checkm'.freeze
12
+
13
+ private_class_method :new
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,8 @@
1
+ namespace :bundle do
2
+ desc 'Updates the ruby-advisory-db then runs bundle-audit'
3
+ task :audit do
4
+ require 'bundler/audit/cli'
5
+ Bundler::Audit::CLI.start ['update']
6
+ Bundler::Audit::CLI.start ['check', '--ignore', 'CVE-2015-9284']
7
+ end
8
+ end
@@ -0,0 +1,5 @@
1
+ desc 'Run all specs in spec directory, with coverage'
2
+ task :coverage do
3
+ ENV['COVERAGE'] ||= 'true'
4
+ Rake::Task[:spec].invoke
5
+ end
@@ -0,0 +1,16 @@
1
+ require 'rubocop'
2
+ require 'rubocop/rake_task'
3
+
4
+ desc 'Run rubocop with HTML output'
5
+ RuboCop::RakeTask.new(:rubocop) do |cop|
6
+ output = ENV['RUBOCOP_OUTPUT'] || 'artifacts/reports/rubocop/index.html'
7
+
8
+ cop.formatters = ['html']
9
+ cop.options = ['--out', output]
10
+ end
11
+
12
+ desc 'Run RuboCop with auto-correct, and output results to console'
13
+ task :ra do
14
+ # b/c we want console output, we can't just use `rubocop:auto_correct`
15
+ RuboCop::CLI.new.run(['--auto-correct'])
16
+ end
@@ -0,0 +1,19 @@
1
+ inherit_from: ../.rubocop.yml
2
+
3
+ Style/ClassAndModuleChildren:
4
+ Enabled: false
5
+
6
+ Layout/LineLength:
7
+ Enabled: false
8
+
9
+ Metrics/BlockLength:
10
+ Enabled: false
11
+
12
+ Metrics/ClassLength:
13
+ Enabled: false
14
+
15
+ Metrics/ModuleLength:
16
+ Enabled: false
17
+
18
+ Metrics/MethodLength:
19
+ Enabled: false
@@ -0,0 +1,9 @@
1
+ #%checkm_0.7
2
+ #%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-single-file-batch-manifest
3
+ #%prefix | mrt: | http://merritt.cdlib.org/terms#
4
+ #%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
5
+ #%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
6
+ http://merritt.cdlib.org/samples/goldenDragon.jpg | md5 | aa59e145dfb2237ecd8cb5ce4f4953ea | | | goldenDragon.jpg | | | Tracy Seneca | Golden Dragon Restaurant Sign | 2009-09-01T00:00:00
7
+ http://merritt.cdlib.org/samples/tumbleBug.jpg | md5 | 61224ef4f2a8dad552ff57980ed0bf49 | | | tumbleBug.jpg | | | Tracy Seneca | Tumble Bug, Luna Park. Sydney
8
+ http://merritt.cdlib.org/samples/generalDrapery.jpg | | | | | generalDrapery.jpg | | | | General Drapery
9
+ #%eof
@@ -0,0 +1,5 @@
1
+ #%checkm_0.7
2
+ # My first manifest. Two files total.
3
+ # Filename |Algorithm| Digest
4
+ book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9
5
+ images/r862.png | md5 | 408ad21d50cef31da4df6d9ed81b01a7
@@ -0,0 +1 @@
1
+ 1
@@ -0,0 +1,8 @@
1
+ #%checkm_0.7
2
+ # A two-level manifest.
3
+
4
+ #Filename|Alg |Checksum |Length
5
+ foo.bar |sha1|2eacd0da7aa89b094f5121eb2901bf4de2219ef1 | 366
6
+ foo.bar |md5 |3e83471320227c0797a0c251f28db0c5 | 366
7
+ # This next line "includes" the manifest in file "myfirst".
8
+ @myfirst.checkm |md5 |1ad6bcd5e4c49420ead2df43406d37fb | 218
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ module Text
4
+ module Checkm
5
+ describe :checksum do
6
+ it 'calculates the checksum' do
7
+ file = 'spec/data/test_1/1'
8
+ expected = {
9
+ 'md5' => 'b026324c6904b2a9cb4b88d6d61c81d1',
10
+ 'sha1' => 'e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e',
11
+ 'sha256' => '4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865'
12
+ }
13
+ aggregate_failures 'checksums' do
14
+ expected.each do |alg, checksum|
15
+ File.open(file, 'r') do |f|
16
+ actual = Checkm.checksum(f, alg)
17
+ expect(actual).to eq(checksum), "Wrong #{alg} checksum for #{file}, expected #{checksum}, was #{actual}"
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ module Text
4
+ module Checkm
5
+ describe Entry do
6
+ describe :create do
7
+ it 'should return a manifest line' do
8
+ res = Entry.create('LICENSE.md')
9
+ expect(res).to match(/LICENSE\.md | md5 | a02e647a5dcd1fe38abf74f9f0d44dae | 1149 | \d{4}/)
10
+ end
11
+ end
12
+
13
+ describe :respond_to_missing? do
14
+ it 'should return true for all base fields' do
15
+ entry = Entry.new('book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9')
16
+ Entry::BASE_FIELDS.each do |f|
17
+ expect(entry.respond_to?(f.to_sym)).to eq(true)
18
+ end
19
+ end
20
+
21
+ it 'should return true for all custom fields' do
22
+ expected_fields = %w[
23
+ nfo:fileUrl
24
+ nfo:hashAlgorithm
25
+ nfo:hashValue
26
+ nfo:fileSize
27
+ nfo:fileLastModified
28
+ nfo:fileName
29
+ mrt:primaryIdentifier
30
+ mrt:localIdentifier
31
+ mrt:creator
32
+ mrt:title
33
+ mrt:date
34
+ ]
35
+ manifest = Manifest.parse(File.read('spec/data/merritt-manifest.checkm'))
36
+ entry = manifest.entries[0]
37
+ expected_fields.each do |f|
38
+ expect(entry.respond_to?(f.to_sym)).to eq(true)
39
+ end
40
+ end
41
+ end
42
+
43
+ describe :valid do
44
+ it 'handles multi-level manifests' do
45
+ manifest = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'), path: 'spec/data')
46
+ entry = manifest.entries[2]
47
+ expect(entry.valid?).to be_truthy # TODO: something less hacky
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,157 @@
1
+ require 'spec_helper'
2
+
3
+ module Text
4
+ module Checkm
5
+ describe Manifest do
6
+ it 'should be valid if empty' do
7
+ checkm = ''
8
+ res = Manifest.parse(checkm)
9
+ expect(res.entries).to be_empty
10
+ expect(res).to be_valid
11
+ end
12
+
13
+ it 'should ignore comments' do
14
+ checkm = '#'
15
+ res = Manifest.parse(checkm)
16
+ expect(res.entries).to be_empty
17
+ expect(res).to be_valid
18
+ end
19
+
20
+ it 'should parse the checkm version' do
21
+ checkm = '#%checkm_0.7'
22
+ res = Manifest.parse(checkm)
23
+ expect(res.entries).to be_empty
24
+ expect(res).to be_valid
25
+ expect(res.version).to eq('0.7')
26
+ end
27
+
28
+ describe 'simple checkm line' do
29
+ before(:each) do
30
+ @checkm = 'book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9'
31
+ @result = Manifest.parse(@checkm)
32
+ @line = @result.entries.first
33
+ end
34
+
35
+ it 'should parse one entry' do
36
+ expect(@result.entries.size).to eq(1)
37
+ end
38
+
39
+ it 'should parse a checkm line' do
40
+ expect(@line.values[0]).to eq('book/Chapter9.xml')
41
+ expect(@line.values[1]).to eq('md5')
42
+ expect(@line.values[2]).to eq('49afbd86a1ca9f34b677a3f09655eae9')
43
+ end
44
+
45
+ it 'should allow name-based lookups' do
46
+ expect(@line.sourcefileorurl).to eq('book/Chapter9.xml')
47
+ expect(@line.alg).to eq('md5')
48
+ expect(@line.digest).to eq('49afbd86a1ca9f34b677a3f09655eae9')
49
+ end
50
+ end
51
+
52
+ it 'should support custom field names' do
53
+ checkm = <<~CHECKM
54
+ #%fields | testa | test b
55
+ book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9
56
+ CHECKM
57
+ checkm.strip!
58
+
59
+ res = Manifest.parse(checkm)
60
+
61
+ line = res.entries.first
62
+
63
+ expect(line.sourcefileorurl).to eq('book/Chapter9.xml')
64
+ expect(line.testa).to eq('book/Chapter9.xml')
65
+ expect(line.alg).to eq('md5')
66
+ expect(line.send(:'test b')).to eq('md5')
67
+ expect(line.digest).to eq('49afbd86a1ca9f34b677a3f09655eae9')
68
+ end
69
+
70
+ describe 'validity check' do
71
+ it 'should be valid if the file exists' do
72
+ checkm = '1 | md5 | b026324c6904b2a9cb4b88d6d61c81d1'
73
+ res = Manifest.parse(checkm, path: 'spec/data/test_1')
74
+ expect(res.entries.size).to eq(1)
75
+ expect(res).to be_valid
76
+ end
77
+
78
+ it 'should be valid if the directory exists' do
79
+ checkm = 'test_1 | dir'
80
+ res = Manifest.parse(checkm, path: 'spec/data')
81
+ expect(res.entries.size).to eq(1)
82
+ expect(res).to be_valid
83
+ end
84
+
85
+ it 'should be invalid if a file is missing' do
86
+ checkm = '2 | md5 | b026324c6904b2a9cb4b88d6d61c81d1'
87
+ res = Manifest.parse(checkm, path: 'spec/data/test_1')
88
+ expect(res.entries.size).to eq(1)
89
+ expect(res).not_to be_valid
90
+ end
91
+
92
+ it 'should be invalid if the checksum is different' do
93
+ checkm = '1 | md5 | zzz'
94
+ res = Manifest.parse(checkm, path: 'spec/data/test_1')
95
+ expect(res.entries.size).to eq(1)
96
+ expect(res).not_to be_valid
97
+ end
98
+ end
99
+
100
+ describe 'manipulate manifest' do
101
+ it 'should allow files to be added to an existing manifest' do
102
+ m = Manifest.parse('')
103
+ res = m.add('LICENSE.md')
104
+ expect(res.entries.size).to eq(1)
105
+ expect(res).to be_valid
106
+ end
107
+ end
108
+
109
+ it 'should be serializable to a string' do
110
+ m = Manifest.parse('')
111
+ n = m.add('LICENSE.md')
112
+ lines = n.to_s.split "\n"
113
+ expect(lines[0]).to eq('#%checkm_0.7')
114
+ expect(lines[1]).to match(/^LICENSE\.md/)
115
+ end
116
+
117
+ it 'parses a two-level manifest' do
118
+ m = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'))
119
+ entries = m.entries
120
+ expect(entries.size).to eq(3)
121
+ entry = entries[2]
122
+ expect(entry.sourcefileorurl).to eq('@myfirst.checkm')
123
+ end
124
+
125
+ describe :remove do
126
+ attr_reader :original
127
+ attr_reader :modified
128
+
129
+ before(:each) do
130
+ @original = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'))
131
+ @modified = original.remove('foo.bar')
132
+ end
133
+
134
+ it 'removes entries by name' do
135
+ expect(modified.entries.size).to eq(1)
136
+ expect(modified.entries[0].sourcefileorurl).to eq('@myfirst.checkm')
137
+ end
138
+
139
+ it 'does not modify the original' do
140
+ expect(original.entries.size).to eq(3)
141
+ expect(original.entries[0].sourcefileorurl).to eq('foo.bar')
142
+ expect(original.entries[1].sourcefileorurl).to eq('foo.bar')
143
+ end
144
+ end
145
+
146
+ describe :to_h do
147
+ it 'returns a hash of entries by source' do
148
+ manifest = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'))
149
+ h = manifest.to_h
150
+ manifest.entries.each do |e|
151
+ expect(h[e.sourcefileorurl]).to include(e)
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end