text-checkm 0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2 @@
1
+ require 'text/checkm/manifest'
2
+ require 'text/checkm/entry'
@@ -0,0 +1,40 @@
1
+ require 'digest'
2
+
3
+ module Text
4
+ module Checkm
5
+ class << self
6
+ # Size (in bytes) to read (in chunks) to compute checksums
7
+ CHUNK_SIZE = 8 * 1024 * 1024
8
+
9
+ # Compute the checksum 'alg' for a file
10
+ # @param [File] file
11
+ # @param [String] alg md5, sha1, sha256, dir
12
+ def checksum(file, alg) # TODO: don't pass file handles around
13
+ return true unless alg # TODO: something less counterintuitive
14
+ return File.directory?(file) if alg =~ /dir/
15
+
16
+ digest_alg = digest_for(alg)
17
+ return false unless digest_alg # TODO: something less counterintuitive
18
+
19
+ while !file.eof? && (chunk = file.readpartial(CHUNK_SIZE))
20
+ digest_alg << chunk
21
+ end
22
+ digest_alg.hexdigest
23
+ end
24
+
25
+ private
26
+
27
+ def digest_for(alg)
28
+ case alg
29
+ when /md5/
30
+ Digest::MD5.new
31
+ when /sha1/
32
+ Digest::SHA1.new
33
+ when /sha256/
34
+ Digest::SHA2.new(256)
35
+ end
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,77 @@
1
+ require 'open-uri'
2
+ require 'time'
3
+
4
+ require 'text/checkm/checksum'
5
+
6
+ module Text
7
+ module Checkm
8
+ class Entry
9
+ BASE_FIELDS = %w[sourcefileorurl alg digest length modtime targetfileorurl].freeze
10
+ attr_reader :values
11
+
12
+ def self.create(path, args = {}) # TODO: why is this in this class?
13
+ base = args[:base] || Dir.pwd
14
+ alg = args[:alg] || 'md5'
15
+ file = File.new File.join(base, path)
16
+
17
+ format('%s | %s | %s | %s | %s | %s', path, alg, Checkm.checksum(file, alg), File.size(file.path), file.mtime.utc.xmlschema, nil)
18
+ end
19
+
20
+ def initialize(line, manifest = nil)
21
+ @line = line.strip
22
+ @include = false
23
+ @fields = BASE_FIELDS
24
+ @fields = manifest.fields if manifest && manifest.fields
25
+ @values = line.split('|').map(&:strip)
26
+ @manifest = manifest
27
+ end
28
+
29
+ # rubocop:disable Style/MethodMissingSuper
30
+ def method_missing(sym, *_args)
31
+ # TODO: something less extreme
32
+ @values[@fields.index(sym.to_s.downcase) || BASE_FIELDS.index(sym.to_s.downcase)]
33
+ end
34
+ # rubocop:enable Style/MethodMissingSuper
35
+
36
+ def respond_to_missing?(sym, *)
37
+ @fields.include?(sym.to_s.downcase) || BASE_FIELDS.include?(sym.to_s.downcase)
38
+ end
39
+
40
+ def valid?
41
+ source_exists? && valid_checksum? && valid_multilevel? # xxx && valid_length? && valid_modtime?
42
+ end
43
+
44
+ private
45
+
46
+ def source
47
+ file = sourcefileorurl
48
+ file = file[1..] if file =~ /^@/
49
+ File.join(@manifest.path, file)
50
+ end
51
+
52
+ def source_exists?
53
+ File.exist? source
54
+ end
55
+
56
+ def valid_checksum?
57
+ file = File.new source
58
+ checksum = Checkm.checksum(file, alg)
59
+ [true, digest].include?(checksum) # TODO: something less counterintuitive
60
+ end
61
+
62
+ # def valid_length?
63
+ # throw NotImplementedError
64
+ # end
65
+ #
66
+ # def valid_modtime?
67
+ # throw NotImplementedError
68
+ # end
69
+
70
+ def valid_multilevel?
71
+ return true unless sourcefileorurl =~ /^@/
72
+
73
+ Manifest.parse(URI.open(source).read, path: File.dirname(source))
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,115 @@
1
+ require 'text/checkm/entry'
2
+
3
+ module Text
4
+ module Checkm
5
+ class Manifest
6
+ def self.parse(str, args = {})
7
+ Manifest.new str, args
8
+ end
9
+
10
+ attr_reader :version
11
+ attr_reader :entries
12
+ attr_reader :fields
13
+ attr_reader :path
14
+
15
+ # rubocop:disable Metrics/MethodLength
16
+ def initialize(checkm, args = {})
17
+ @args = args
18
+ @version = nil
19
+ @checkm = checkm
20
+ @lines = checkm.split "\n"
21
+ @entries = []
22
+ @eof = false
23
+ @fields = nil
24
+ @path = args[:path] # TODO: something less hacky
25
+ @path ||= Dir.pwd
26
+
27
+ parse_lines
28
+ # xxx error on empty entries?
29
+ @lines.unshift('#%checkm_0.7') and (@version = '0.7') if @version.nil?
30
+ end
31
+
32
+ # rubocop:enable Metrics/MethodLength
33
+
34
+ def valid?
35
+ return true if @entries.empty?
36
+
37
+ @entries.map(&:valid?).none? { |b| b == false }
38
+ end
39
+
40
+ def add(path, args = {})
41
+ line = Entry.create path, args
42
+
43
+ Manifest.new [@lines, line].flatten.join("\n"), @args
44
+ end
45
+
46
+ def remove(path)
47
+ Manifest.new @lines.reject { |x| x =~ /^@?#{path}/ }.join("\n"), @args
48
+ end
49
+
50
+ def to_s
51
+ @lines.join("\n")
52
+ end
53
+
54
+ def to_h
55
+ {}.tap do |h|
56
+ entries.each do |e|
57
+ source = e.sourcefileorurl
58
+ (h[source] ||= []) << e
59
+ end
60
+ end
61
+ end
62
+
63
+ private
64
+
65
+ # rubocop:disable Metrics/MethodLength
66
+ def parse_lines
67
+ @lines.each do |line|
68
+ case line
69
+ when /^#%/
70
+ parse_header line
71
+ when /^#/
72
+ parse_comment line
73
+ when /^$/
74
+ # do nothing
75
+ when /^@/
76
+ parse_line line
77
+ else
78
+ parse_line line
79
+ end
80
+ end
81
+ end
82
+
83
+ # rubocop:enable Metrics/MethodLength
84
+
85
+ # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
86
+ def parse_header(line)
87
+ case line
88
+ when /^#%checkm/
89
+ match = /^#%checkm_(\d+)\.(\d+)/.match line
90
+ @version = "#{match[1]}.#{match[2]}" if match
91
+ when /^#%eof/
92
+ @eof = true
93
+ when /^#%fields/
94
+ list = line.split('|')
95
+ list.shift
96
+ @fields = list.map { |v| v.strip.downcase }
97
+ when /^#%prefix/
98
+ # do nothing
99
+ when /^#%profile/
100
+ # do nothing
101
+ end
102
+ end
103
+
104
+ # rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity
105
+
106
+ def parse_comment(_line)
107
+ # do nothing
108
+ end
109
+
110
+ def parse_line(line)
111
+ @entries << Entry.new(line, self)
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,16 @@
1
+ module Text
2
+ module Checkm
3
+ class ModuleInfo
4
+ NAME = 'text-checkm'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'Ruby implementation of the Checkm text manifest format'.freeze
8
+ DESCRIPTION = 'A Ruby implementation of the Checkm checksum-based text manifest format'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '0.7'.freeze
11
+ HOMEPAGE = 'https://github.com/dmolesUC/checkm'.freeze
12
+
13
+ private_class_method :new
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,8 @@
1
+ namespace :bundle do
2
+ desc 'Updates the ruby-advisory-db then runs bundle-audit'
3
+ task :audit do
4
+ require 'bundler/audit/cli'
5
+ Bundler::Audit::CLI.start ['update']
6
+ Bundler::Audit::CLI.start ['check', '--ignore', 'CVE-2015-9284']
7
+ end
8
+ end
@@ -0,0 +1,5 @@
1
+ desc 'Run all specs in spec directory, with coverage'
2
+ task :coverage do
3
+ ENV['COVERAGE'] ||= 'true'
4
+ Rake::Task[:spec].invoke
5
+ end
@@ -0,0 +1,16 @@
1
+ require 'rubocop'
2
+ require 'rubocop/rake_task'
3
+
4
+ desc 'Run rubocop with HTML output'
5
+ RuboCop::RakeTask.new(:rubocop) do |cop|
6
+ output = ENV['RUBOCOP_OUTPUT'] || 'artifacts/reports/rubocop/index.html'
7
+
8
+ cop.formatters = ['html']
9
+ cop.options = ['--out', output]
10
+ end
11
+
12
+ desc 'Run RuboCop with auto-correct, and output results to console'
13
+ task :ra do
14
+ # b/c we want console output, we can't just use `rubocop:auto_correct`
15
+ RuboCop::CLI.new.run(['--auto-correct'])
16
+ end
@@ -0,0 +1,19 @@
1
+ inherit_from: ../.rubocop.yml
2
+
3
+ Style/ClassAndModuleChildren:
4
+ Enabled: false
5
+
6
+ Layout/LineLength:
7
+ Enabled: false
8
+
9
+ Metrics/BlockLength:
10
+ Enabled: false
11
+
12
+ Metrics/ClassLength:
13
+ Enabled: false
14
+
15
+ Metrics/ModuleLength:
16
+ Enabled: false
17
+
18
+ Metrics/MethodLength:
19
+ Enabled: false
@@ -0,0 +1,9 @@
1
+ #%checkm_0.7
2
+ #%profile | http://uc3.cdlib.org/registry/ingest/manifest/mrt-single-file-batch-manifest
3
+ #%prefix | mrt: | http://merritt.cdlib.org/terms#
4
+ #%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#
5
+ #%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:primaryIdentifier | mrt:localIdentifier | mrt:creator | mrt:title | mrt:date
6
+ http://merritt.cdlib.org/samples/goldenDragon.jpg | md5 | aa59e145dfb2237ecd8cb5ce4f4953ea | | | goldenDragon.jpg | | | Tracy Seneca | Golden Dragon Restaurant Sign | 2009-09-01T00:00:00
7
+ http://merritt.cdlib.org/samples/tumbleBug.jpg | md5 | 61224ef4f2a8dad552ff57980ed0bf49 | | | tumbleBug.jpg | | | Tracy Seneca | Tumble Bug, Luna Park. Sydney
8
+ http://merritt.cdlib.org/samples/generalDrapery.jpg | | | | | generalDrapery.jpg | | | | General Drapery
9
+ #%eof
@@ -0,0 +1,5 @@
1
+ #%checkm_0.7
2
+ # My first manifest. Two files total.
3
+ # Filename |Algorithm| Digest
4
+ book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9
5
+ images/r862.png | md5 | 408ad21d50cef31da4df6d9ed81b01a7
@@ -0,0 +1 @@
1
+ 1
@@ -0,0 +1,8 @@
1
+ #%checkm_0.7
2
+ # A two-level manifest.
3
+
4
+ #Filename|Alg |Checksum |Length
5
+ foo.bar |sha1|2eacd0da7aa89b094f5121eb2901bf4de2219ef1 | 366
6
+ foo.bar |md5 |3e83471320227c0797a0c251f28db0c5 | 366
7
+ # This next line "includes" the manifest in file "myfirst".
8
+ @myfirst.checkm |md5 |1ad6bcd5e4c49420ead2df43406d37fb | 218
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ module Text
4
+ module Checkm
5
+ describe :checksum do
6
+ it 'calculates the checksum' do
7
+ file = 'spec/data/test_1/1'
8
+ expected = {
9
+ 'md5' => 'b026324c6904b2a9cb4b88d6d61c81d1',
10
+ 'sha1' => 'e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e',
11
+ 'sha256' => '4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865'
12
+ }
13
+ aggregate_failures 'checksums' do
14
+ expected.each do |alg, checksum|
15
+ File.open(file, 'r') do |f|
16
+ actual = Checkm.checksum(f, alg)
17
+ expect(actual).to eq(checksum), "Wrong #{alg} checksum for #{file}, expected #{checksum}, was #{actual}"
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ module Text
4
+ module Checkm
5
+ describe Entry do
6
+ describe :create do
7
+ it 'should return a manifest line' do
8
+ res = Entry.create('LICENSE.md')
9
+ expect(res).to match(/LICENSE\.md | md5 | a02e647a5dcd1fe38abf74f9f0d44dae | 1149 | \d{4}/)
10
+ end
11
+ end
12
+
13
+ describe :respond_to_missing? do
14
+ it 'should return true for all base fields' do
15
+ entry = Entry.new('book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9')
16
+ Entry::BASE_FIELDS.each do |f|
17
+ expect(entry.respond_to?(f.to_sym)).to eq(true)
18
+ end
19
+ end
20
+
21
+ it 'should return true for all custom fields' do
22
+ expected_fields = %w[
23
+ nfo:fileUrl
24
+ nfo:hashAlgorithm
25
+ nfo:hashValue
26
+ nfo:fileSize
27
+ nfo:fileLastModified
28
+ nfo:fileName
29
+ mrt:primaryIdentifier
30
+ mrt:localIdentifier
31
+ mrt:creator
32
+ mrt:title
33
+ mrt:date
34
+ ]
35
+ manifest = Manifest.parse(File.read('spec/data/merritt-manifest.checkm'))
36
+ entry = manifest.entries[0]
37
+ expected_fields.each do |f|
38
+ expect(entry.respond_to?(f.to_sym)).to eq(true)
39
+ end
40
+ end
41
+ end
42
+
43
+ describe :valid do
44
+ it 'handles multi-level manifests' do
45
+ manifest = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'), path: 'spec/data')
46
+ entry = manifest.entries[2]
47
+ expect(entry.valid?).to be_truthy # TODO: something less hacky
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,157 @@
1
+ require 'spec_helper'
2
+
3
+ module Text
4
+ module Checkm
5
+ describe Manifest do
6
+ it 'should be valid if empty' do
7
+ checkm = ''
8
+ res = Manifest.parse(checkm)
9
+ expect(res.entries).to be_empty
10
+ expect(res).to be_valid
11
+ end
12
+
13
+ it 'should ignore comments' do
14
+ checkm = '#'
15
+ res = Manifest.parse(checkm)
16
+ expect(res.entries).to be_empty
17
+ expect(res).to be_valid
18
+ end
19
+
20
+ it 'should parse the checkm version' do
21
+ checkm = '#%checkm_0.7'
22
+ res = Manifest.parse(checkm)
23
+ expect(res.entries).to be_empty
24
+ expect(res).to be_valid
25
+ expect(res.version).to eq('0.7')
26
+ end
27
+
28
+ describe 'simple checkm line' do
29
+ before(:each) do
30
+ @checkm = 'book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9'
31
+ @result = Manifest.parse(@checkm)
32
+ @line = @result.entries.first
33
+ end
34
+
35
+ it 'should parse one entry' do
36
+ expect(@result.entries.size).to eq(1)
37
+ end
38
+
39
+ it 'should parse a checkm line' do
40
+ expect(@line.values[0]).to eq('book/Chapter9.xml')
41
+ expect(@line.values[1]).to eq('md5')
42
+ expect(@line.values[2]).to eq('49afbd86a1ca9f34b677a3f09655eae9')
43
+ end
44
+
45
+ it 'should allow name-based lookups' do
46
+ expect(@line.sourcefileorurl).to eq('book/Chapter9.xml')
47
+ expect(@line.alg).to eq('md5')
48
+ expect(@line.digest).to eq('49afbd86a1ca9f34b677a3f09655eae9')
49
+ end
50
+ end
51
+
52
+ it 'should support custom field names' do
53
+ checkm = <<~CHECKM
54
+ #%fields | testa | test b
55
+ book/Chapter9.xml | md5 | 49afbd86a1ca9f34b677a3f09655eae9
56
+ CHECKM
57
+ checkm.strip!
58
+
59
+ res = Manifest.parse(checkm)
60
+
61
+ line = res.entries.first
62
+
63
+ expect(line.sourcefileorurl).to eq('book/Chapter9.xml')
64
+ expect(line.testa).to eq('book/Chapter9.xml')
65
+ expect(line.alg).to eq('md5')
66
+ expect(line.send(:'test b')).to eq('md5')
67
+ expect(line.digest).to eq('49afbd86a1ca9f34b677a3f09655eae9')
68
+ end
69
+
70
+ describe 'validity check' do
71
+ it 'should be valid if the file exists' do
72
+ checkm = '1 | md5 | b026324c6904b2a9cb4b88d6d61c81d1'
73
+ res = Manifest.parse(checkm, path: 'spec/data/test_1')
74
+ expect(res.entries.size).to eq(1)
75
+ expect(res).to be_valid
76
+ end
77
+
78
+ it 'should be valid if the directory exists' do
79
+ checkm = 'test_1 | dir'
80
+ res = Manifest.parse(checkm, path: 'spec/data')
81
+ expect(res.entries.size).to eq(1)
82
+ expect(res).to be_valid
83
+ end
84
+
85
+ it 'should be invalid if a file is missing' do
86
+ checkm = '2 | md5 | b026324c6904b2a9cb4b88d6d61c81d1'
87
+ res = Manifest.parse(checkm, path: 'spec/data/test_1')
88
+ expect(res.entries.size).to eq(1)
89
+ expect(res).not_to be_valid
90
+ end
91
+
92
+ it 'should be invalid if the checksum is different' do
93
+ checkm = '1 | md5 | zzz'
94
+ res = Manifest.parse(checkm, path: 'spec/data/test_1')
95
+ expect(res.entries.size).to eq(1)
96
+ expect(res).not_to be_valid
97
+ end
98
+ end
99
+
100
+ describe 'manipulate manifest' do
101
+ it 'should allow files to be added to an existing manifest' do
102
+ m = Manifest.parse('')
103
+ res = m.add('LICENSE.md')
104
+ expect(res.entries.size).to eq(1)
105
+ expect(res).to be_valid
106
+ end
107
+ end
108
+
109
+ it 'should be serializable to a string' do
110
+ m = Manifest.parse('')
111
+ n = m.add('LICENSE.md')
112
+ lines = n.to_s.split "\n"
113
+ expect(lines[0]).to eq('#%checkm_0.7')
114
+ expect(lines[1]).to match(/^LICENSE\.md/)
115
+ end
116
+
117
+ it 'parses a two-level manifest' do
118
+ m = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'))
119
+ entries = m.entries
120
+ expect(entries.size).to eq(3)
121
+ entry = entries[2]
122
+ expect(entry.sourcefileorurl).to eq('@myfirst.checkm')
123
+ end
124
+
125
+ describe :remove do
126
+ attr_reader :original
127
+ attr_reader :modified
128
+
129
+ before(:each) do
130
+ @original = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'))
131
+ @modified = original.remove('foo.bar')
132
+ end
133
+
134
+ it 'removes entries by name' do
135
+ expect(modified.entries.size).to eq(1)
136
+ expect(modified.entries[0].sourcefileorurl).to eq('@myfirst.checkm')
137
+ end
138
+
139
+ it 'does not modify the original' do
140
+ expect(original.entries.size).to eq(3)
141
+ expect(original.entries[0].sourcefileorurl).to eq('foo.bar')
142
+ expect(original.entries[1].sourcefileorurl).to eq('foo.bar')
143
+ end
144
+ end
145
+
146
+ describe :to_h do
147
+ it 'returns a hash of entries by source' do
148
+ manifest = Manifest.parse(File.read('spec/data/two-level-manifest.checkm'))
149
+ h = manifest.to_h
150
+ manifest.entries.each do |e|
151
+ expect(h[e.sourcefileorurl]).to include(e)
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end