fingerprint 1.4.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.travis.yml +23 -0
- data/GUIDE.md +233 -0
- data/Gemfile +1 -1
- data/README.md +113 -18
- data/Rakefile +6 -0
- data/bin/fingerprint +6 -197
- data/fingerprint.gemspec +8 -7
- data/lib/fingerprint.rb +16 -3
- data/lib/fingerprint/checker.rb +7 -7
- data/lib/fingerprint/checksums.rb +33 -0
- data/lib/fingerprint/command.rb +90 -0
- data/lib/fingerprint/command/analyze.rb +74 -0
- data/lib/fingerprint/command/compare.rb +55 -0
- data/lib/fingerprint/command/duplicates.rb +97 -0
- data/lib/fingerprint/command/scan.rb +61 -0
- data/lib/fingerprint/command/verify.rb +86 -0
- data/lib/fingerprint/find.rb +40 -0
- data/lib/fingerprint/record.rb +20 -1
- data/lib/fingerprint/scanner.rb +134 -91
- data/lib/fingerprint/version.rb +2 -3
- data/spec/fingerprint/command/analyze_spec.rb +48 -0
- data/spec/fingerprint/command/duplicates_spec.rb +53 -0
- data/spec/fingerprint/command/source_fingerprint.rb +32 -0
- data/spec/fingerprint/command/verify_spec.rb +39 -0
- data/spec/fingerprint/corpus/README.md +3 -0
- data/spec/fingerprint/scanner_spec.rb +67 -0
- data/{test/test_fingerprint.rb → spec/fingerprint_spec.rb} +8 -36
- data/spec/spec_helper.rb +31 -0
- metadata +88 -23
- data/bin/fingerprint-diff +0 -145
- data/rakefile.rb +0 -9
data/lib/fingerprint/scanner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright, 2011, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
2
|
#
|
3
3
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
# of this software and associated documentation files (the "Software"), to deal
|
@@ -19,28 +19,33 @@
|
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
21
|
require 'stringio'
|
22
|
-
require 'find'
|
23
22
|
require 'etc'
|
24
23
|
require 'digest/sha2'
|
25
24
|
|
26
|
-
|
25
|
+
require_relative 'find'
|
26
|
+
require_relative 'record'
|
27
|
+
require_relative 'version'
|
27
28
|
|
29
|
+
module Fingerprint
|
30
|
+
INDEX_FINGERPRINT = "index.fingerprint"
|
31
|
+
|
28
32
|
CHECKSUMS = {
|
29
33
|
'MD5' => lambda { Digest::MD5.new },
|
30
34
|
'SHA1' => lambda { Digest::SHA1.new },
|
31
35
|
'SHA2.256' => lambda { Digest::SHA2.new(256) },
|
36
|
+
'SHA2.384' => lambda { Digest::SHA2.new(384) },
|
32
37
|
'SHA2.512' => lambda { Digest::SHA2.new(512) },
|
33
38
|
}
|
34
39
|
|
35
|
-
DEFAULT_CHECKSUMS = ['
|
40
|
+
DEFAULT_CHECKSUMS = ['SHA2.256']
|
36
41
|
|
37
42
|
# The scanner class can scan a set of directories and produce an index.
|
38
43
|
class Scanner
|
39
44
|
# Initialize the scanner to scan a given set of directories in order.
|
40
45
|
# [+options[:excludes]+] An array of regular expressions of files to avoid indexing.
|
41
46
|
# [+options[:output]+] An +IO+ where the results will be written.
|
42
|
-
def initialize(roots,
|
43
|
-
@roots = roots
|
47
|
+
def initialize(roots, pwd: Dir.pwd, **options)
|
48
|
+
@roots = roots.collect{|root| File.expand_path(root, pwd)}
|
44
49
|
|
45
50
|
@excludes = options[:excludes] || []
|
46
51
|
@options = options
|
@@ -84,14 +89,14 @@ module Fingerprint
|
|
84
89
|
end
|
85
90
|
|
86
91
|
File.open(path, "rb") do |file|
|
87
|
-
|
88
|
-
while file.read(1024 * 1024 * 10,
|
89
|
-
total +=
|
92
|
+
buffer = ""
|
93
|
+
while file.read(1024 * 1024 * 10, buffer)
|
94
|
+
total += buffer.bytesize
|
90
95
|
|
91
96
|
@progress.call(total) if @progress
|
92
97
|
|
93
98
|
@digests.each do |key, digest|
|
94
|
-
digest <<
|
99
|
+
digest << buffer
|
95
100
|
end
|
96
101
|
end
|
97
102
|
end
|
@@ -106,35 +111,61 @@ module Fingerprint
|
|
106
111
|
end
|
107
112
|
|
108
113
|
def metadata_for(type, path)
|
109
|
-
stat = File.stat(path)
|
110
114
|
metadata = {}
|
115
|
+
|
116
|
+
if type == :link
|
117
|
+
metadata['file.symlink'] = File.readlink(path)
|
118
|
+
else
|
119
|
+
stat = File.stat(path)
|
120
|
+
|
121
|
+
if type == :file
|
122
|
+
metadata['file.size'] = stat.size
|
123
|
+
digests = digests_for(path)
|
124
|
+
metadata.merge!(digests)
|
125
|
+
elsif type == :blockdev or type == :chardev
|
126
|
+
metadata['file.dev_major'] = stat.dev_major
|
127
|
+
metadata['file.dev_minor'] = stat.dev_minor
|
128
|
+
end
|
111
129
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
metadata.merge!(digests)
|
116
|
-
end
|
117
|
-
|
118
|
-
# Extended information
|
119
|
-
if @options[:extended]
|
120
|
-
metadata['posix.time.modified'] = File.mtime(path)
|
130
|
+
# Extended information
|
131
|
+
if @options[:extended]
|
132
|
+
metadata['posix.time.modified'] = File.mtime(path)
|
121
133
|
|
122
|
-
|
134
|
+
metadata['posix.mode'] = stat.mode.to_s(8)
|
123
135
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
136
|
+
metadata['posix.permissions.user.id'] = stat.uid
|
137
|
+
metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
|
138
|
+
metadata['posix.permissions.group.id'] = stat.gid
|
139
|
+
metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
|
140
|
+
end
|
128
141
|
end
|
129
|
-
|
142
|
+
|
130
143
|
return metadata
|
131
144
|
end
|
132
145
|
|
133
146
|
# Output a directory header.
|
134
147
|
def directory_record_for(path)
|
135
|
-
Record.new(:directory, path, metadata_for(:directory, path))
|
148
|
+
Record.new(:directory, path.relative_path, metadata_for(:directory, path))
|
136
149
|
end
|
137
150
|
|
151
|
+
def link_record_for(path)
|
152
|
+
metadata = metadata_for(:link, path)
|
153
|
+
|
154
|
+
Record.new(:link, path.relative_path, metadata)
|
155
|
+
end
|
156
|
+
|
157
|
+
def blockdev_record_for(path)
|
158
|
+
metadata = metadata_for(:blockdev, path)
|
159
|
+
|
160
|
+
Record.new(:blockdev, path.relative_path, metadata)
|
161
|
+
end
|
162
|
+
|
163
|
+
def chardev_record_for(path)
|
164
|
+
metadata = metadata_for(:chardev, path)
|
165
|
+
|
166
|
+
Record.new(:chardev, path.relative_path, metadata)
|
167
|
+
end
|
168
|
+
|
138
169
|
# Output a file and associated metadata.
|
139
170
|
def file_record_for(path)
|
140
171
|
metadata = metadata_for(:file, path)
|
@@ -142,12 +173,30 @@ module Fingerprint
|
|
142
173
|
# Should this be here or in metadata_for?
|
143
174
|
# metadata.merge!(digests_for(path))
|
144
175
|
|
145
|
-
Record.new(:file, path, metadata)
|
176
|
+
Record.new(:file, path.relative_path, metadata)
|
146
177
|
end
|
147
178
|
|
148
179
|
# Add information about excluded paths.
|
149
180
|
def excluded_record_for(path)
|
150
|
-
Record.new(:excluded, path)
|
181
|
+
Record.new(:excluded, path.relative_path)
|
182
|
+
end
|
183
|
+
|
184
|
+
def record_for(path)
|
185
|
+
stat = File.stat(path)
|
186
|
+
|
187
|
+
if stat.symlink?
|
188
|
+
return link_record_for(path)
|
189
|
+
elsif stat.blockdev?
|
190
|
+
return blockdev_record_for(path)
|
191
|
+
elsif stat.chardev?
|
192
|
+
return chardev_record_for(path)
|
193
|
+
elsif stat.socket?
|
194
|
+
return socket_record_for(path)
|
195
|
+
elsif stat.file?
|
196
|
+
return file_record_for(path)
|
197
|
+
end
|
198
|
+
rescue Errno::ENOENT
|
199
|
+
return nil
|
151
200
|
end
|
152
201
|
|
153
202
|
public
|
@@ -163,17 +212,13 @@ module Fingerprint
|
|
163
212
|
return false
|
164
213
|
end
|
165
214
|
|
166
|
-
def valid_file?(path)
|
167
|
-
!(excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path))
|
168
|
-
end
|
169
|
-
|
170
215
|
def scan_path(path)
|
216
|
+
return nil if excluded?(path)
|
217
|
+
|
171
218
|
@roots.each do |root|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
end
|
176
|
-
end
|
219
|
+
full_path = Build::Files::Path.join(root, path)
|
220
|
+
|
221
|
+
return record_for(full_path)
|
177
222
|
end
|
178
223
|
|
179
224
|
return nil
|
@@ -192,23 +237,21 @@ module Fingerprint
|
|
192
237
|
# Estimate the number of files and amount of data to process..
|
193
238
|
if @options[:progress]
|
194
239
|
@roots.each do |root|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
end
|
211
|
-
end
|
240
|
+
Find.find(root) do |path|
|
241
|
+
# Some special files fail here, and this was the simplest fix.
|
242
|
+
Find.prune unless File.exist?(path)
|
243
|
+
|
244
|
+
if @options[:progress]
|
245
|
+
$stderr.puts "# Scanning: #{path}"
|
246
|
+
end
|
247
|
+
|
248
|
+
if excluded?(path)
|
249
|
+
Find.prune if path.directory?
|
250
|
+
elsif path.symlink?
|
251
|
+
total_count += 1
|
252
|
+
elsif path.file?
|
253
|
+
total_count += 1
|
254
|
+
total_size += File.size(path)
|
212
255
|
end
|
213
256
|
end
|
214
257
|
end
|
@@ -221,47 +264,47 @@ module Fingerprint
|
|
221
264
|
end
|
222
265
|
|
223
266
|
@roots.each do |root|
|
224
|
-
|
225
|
-
|
267
|
+
recordset << header_for(root)
|
268
|
+
|
269
|
+
Find.find(root) do |path|
|
270
|
+
# Some special files fail here, and this was the simplest fix.
|
271
|
+
Find.prune unless File.exist?(path)
|
226
272
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
273
|
+
if @options[:progress]
|
274
|
+
$stderr.puts "# Path: #{path.relative_path}"
|
275
|
+
end
|
276
|
+
|
277
|
+
if excluded?(path)
|
278
|
+
excluded_count += 1
|
231
279
|
|
232
|
-
if
|
233
|
-
|
234
|
-
excluded_count += 1
|
235
|
-
|
236
|
-
if @options[:verbose]
|
237
|
-
recordset << excluded_record_for(path)
|
238
|
-
end
|
239
|
-
|
240
|
-
Find.prune # Ignore this directory
|
241
|
-
else
|
242
|
-
directory_count += 1
|
243
|
-
|
244
|
-
recordset << directory_record_for(path)
|
245
|
-
end
|
246
|
-
else
|
247
|
-
# Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
|
248
|
-
if valid_file?(path)
|
249
|
-
recordset << file_record_for(path)
|
250
|
-
|
251
|
-
processed_count += 1
|
252
|
-
processed_size += File.size(path)
|
253
|
-
else
|
254
|
-
excluded_count += 1
|
255
|
-
|
256
|
-
if @options[:verbose]
|
257
|
-
recordset << excluded_record_for(path)
|
258
|
-
end
|
259
|
-
end
|
280
|
+
if @options[:verbose]
|
281
|
+
recordset << excluded_record_for(path)
|
260
282
|
end
|
261
283
|
|
262
|
-
|
263
|
-
|
284
|
+
Find.prune if path.directory?
|
285
|
+
elsif path.directory?
|
286
|
+
directory_count += 1
|
287
|
+
|
288
|
+
recordset << directory_record_for(path)
|
289
|
+
elsif path.symlink?
|
290
|
+
recordset << link_record_for(path)
|
291
|
+
|
292
|
+
processed_count += 1
|
293
|
+
elsif path.file?
|
294
|
+
recordset << file_record_for(path)
|
295
|
+
|
296
|
+
processed_count += 1
|
297
|
+
processed_size += File.size(path)
|
298
|
+
else
|
299
|
+
excluded_count += 1
|
300
|
+
|
301
|
+
if @options[:verbose]
|
302
|
+
recordset << excluded_record_for(path)
|
303
|
+
end
|
264
304
|
end
|
305
|
+
|
306
|
+
# Print out a progress summary if requested
|
307
|
+
@progress.call(0) if @progress
|
265
308
|
end
|
266
309
|
end
|
267
310
|
|
data/lib/fingerprint/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright, 2011, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
2
|
#
|
3
3
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
# of this software and associated documentation files (the "Software"), to deal
|
@@ -18,7 +18,6 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
|
-
|
22
21
|
module Fingerprint
|
23
|
-
VERSION = "
|
22
|
+
VERSION = "3.0.1"
|
24
23
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env rspec
|
2
|
+
|
3
|
+
# Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'fingerprint/command'
|
24
|
+
|
25
|
+
require_relative 'source_fingerprint'
|
26
|
+
|
27
|
+
RSpec.describe Fingerprint::Command::Analyze do
|
28
|
+
include_context "source fingerprint"
|
29
|
+
|
30
|
+
it "should analyze and verify files" do
|
31
|
+
Fingerprint::Command::Top["analyze", "-f"].call
|
32
|
+
|
33
|
+
expect(File).to be_exist(Fingerprint::INDEX_FINGERPRINT)
|
34
|
+
|
35
|
+
top = Fingerprint::Command::Top["verify"].tap(&:call)
|
36
|
+
|
37
|
+
expect(top.command.error_count).to be == 0
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should analyze path but exclude specified file" do
|
41
|
+
Fingerprint::Command::Top["analyze", "-n", fingerprint_name, "-f", source_directory].call
|
42
|
+
|
43
|
+
expect(File).to be_exist(fingerprint_name)
|
44
|
+
|
45
|
+
record_set = Fingerprint::RecordSet.load_file(fingerprint_name)
|
46
|
+
expect(record_set).to_not be_include(File.basename(fingerprint_name))
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env rspec
|
2
|
+
|
3
|
+
# Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'fingerprint/command'
|
24
|
+
|
25
|
+
require_relative 'source_fingerprint'
|
26
|
+
|
27
|
+
RSpec.describe Fingerprint::Command::Duplicates do
|
28
|
+
include_context "source fingerprint"
|
29
|
+
|
30
|
+
it "should have no duplicates" do
|
31
|
+
Fingerprint::Command::Top["analyze", "-n", fingerprint_name, "-f", source_directory].call
|
32
|
+
|
33
|
+
expect(File).to be_exist(fingerprint_name)
|
34
|
+
|
35
|
+
record_set = Fingerprint::RecordSet.load_file(fingerprint_name)
|
36
|
+
|
37
|
+
top = Fingerprint::Command::Top["duplicates", fingerprint_name].tap(&:call)
|
38
|
+
|
39
|
+
expect(top.command.duplicates_recordset).to be_empty
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should have duplicates" do
|
43
|
+
Fingerprint::Command::Top["analyze", "-n", fingerprint_name, "-f", source_directory].call
|
44
|
+
|
45
|
+
expect(File).to be_exist(fingerprint_name)
|
46
|
+
|
47
|
+
record_set = Fingerprint::RecordSet.load_file(fingerprint_name)
|
48
|
+
|
49
|
+
top = Fingerprint::Command::Top["duplicates", fingerprint_name, fingerprint_name].tap(&:call)
|
50
|
+
|
51
|
+
expect(top.command.duplicates_recordset).to_not be_empty
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env rspec
|
2
|
+
|
3
|
+
# Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'fileutils'
|
24
|
+
|
25
|
+
RSpec.shared_context "source fingerprint" do
|
26
|
+
let(:source_directory) {File.expand_path("../../../lib", __dir__)}
|
27
|
+
let(:fingerprint_name) {File.join(__dir__, "test.fingerprint")}
|
28
|
+
|
29
|
+
after(:each) do
|
30
|
+
FileUtils.rm_rf(fingerprint_name)
|
31
|
+
end
|
32
|
+
end
|