fingerprint 1.4.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.travis.yml +23 -0
- data/GUIDE.md +233 -0
- data/Gemfile +1 -1
- data/README.md +113 -18
- data/Rakefile +6 -0
- data/bin/fingerprint +6 -197
- data/fingerprint.gemspec +8 -7
- data/lib/fingerprint.rb +16 -3
- data/lib/fingerprint/checker.rb +7 -7
- data/lib/fingerprint/checksums.rb +33 -0
- data/lib/fingerprint/command.rb +90 -0
- data/lib/fingerprint/command/analyze.rb +74 -0
- data/lib/fingerprint/command/compare.rb +55 -0
- data/lib/fingerprint/command/duplicates.rb +97 -0
- data/lib/fingerprint/command/scan.rb +61 -0
- data/lib/fingerprint/command/verify.rb +86 -0
- data/lib/fingerprint/find.rb +40 -0
- data/lib/fingerprint/record.rb +20 -1
- data/lib/fingerprint/scanner.rb +134 -91
- data/lib/fingerprint/version.rb +2 -3
- data/spec/fingerprint/command/analyze_spec.rb +48 -0
- data/spec/fingerprint/command/duplicates_spec.rb +53 -0
- data/spec/fingerprint/command/source_fingerprint.rb +32 -0
- data/spec/fingerprint/command/verify_spec.rb +39 -0
- data/spec/fingerprint/corpus/README.md +3 -0
- data/spec/fingerprint/scanner_spec.rb +67 -0
- data/{test/test_fingerprint.rb → spec/fingerprint_spec.rb} +8 -36
- data/spec/spec_helper.rb +31 -0
- metadata +88 -23
- data/bin/fingerprint-diff +0 -145
- data/rakefile.rb +0 -9
data/lib/fingerprint/scanner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright, 2011, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
2
|
#
|
3
3
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
# of this software and associated documentation files (the "Software"), to deal
|
@@ -19,28 +19,33 @@
|
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
21
|
require 'stringio'
|
22
|
-
require 'find'
|
23
22
|
require 'etc'
|
24
23
|
require 'digest/sha2'
|
25
24
|
|
26
|
-
|
25
|
+
require_relative 'find'
|
26
|
+
require_relative 'record'
|
27
|
+
require_relative 'version'
|
27
28
|
|
29
|
+
module Fingerprint
|
30
|
+
INDEX_FINGERPRINT = "index.fingerprint"
|
31
|
+
|
28
32
|
CHECKSUMS = {
|
29
33
|
'MD5' => lambda { Digest::MD5.new },
|
30
34
|
'SHA1' => lambda { Digest::SHA1.new },
|
31
35
|
'SHA2.256' => lambda { Digest::SHA2.new(256) },
|
36
|
+
'SHA2.384' => lambda { Digest::SHA2.new(384) },
|
32
37
|
'SHA2.512' => lambda { Digest::SHA2.new(512) },
|
33
38
|
}
|
34
39
|
|
35
|
-
DEFAULT_CHECKSUMS = ['
|
40
|
+
DEFAULT_CHECKSUMS = ['SHA2.256']
|
36
41
|
|
37
42
|
# The scanner class can scan a set of directories and produce an index.
|
38
43
|
class Scanner
|
39
44
|
# Initialize the scanner to scan a given set of directories in order.
|
40
45
|
# [+options[:excludes]+] An array of regular expressions of files to avoid indexing.
|
41
46
|
# [+options[:output]+] An +IO+ where the results will be written.
|
42
|
-
def initialize(roots,
|
43
|
-
@roots = roots
|
47
|
+
def initialize(roots, pwd: Dir.pwd, **options)
|
48
|
+
@roots = roots.collect{|root| File.expand_path(root, pwd)}
|
44
49
|
|
45
50
|
@excludes = options[:excludes] || []
|
46
51
|
@options = options
|
@@ -84,14 +89,14 @@ module Fingerprint
|
|
84
89
|
end
|
85
90
|
|
86
91
|
File.open(path, "rb") do |file|
|
87
|
-
|
88
|
-
while file.read(1024 * 1024 * 10,
|
89
|
-
total +=
|
92
|
+
buffer = ""
|
93
|
+
while file.read(1024 * 1024 * 10, buffer)
|
94
|
+
total += buffer.bytesize
|
90
95
|
|
91
96
|
@progress.call(total) if @progress
|
92
97
|
|
93
98
|
@digests.each do |key, digest|
|
94
|
-
digest <<
|
99
|
+
digest << buffer
|
95
100
|
end
|
96
101
|
end
|
97
102
|
end
|
@@ -106,35 +111,61 @@ module Fingerprint
|
|
106
111
|
end
|
107
112
|
|
108
113
|
def metadata_for(type, path)
|
109
|
-
stat = File.stat(path)
|
110
114
|
metadata = {}
|
115
|
+
|
116
|
+
if type == :link
|
117
|
+
metadata['file.symlink'] = File.readlink(path)
|
118
|
+
else
|
119
|
+
stat = File.stat(path)
|
120
|
+
|
121
|
+
if type == :file
|
122
|
+
metadata['file.size'] = stat.size
|
123
|
+
digests = digests_for(path)
|
124
|
+
metadata.merge!(digests)
|
125
|
+
elsif type == :blockdev or type == :chardev
|
126
|
+
metadata['file.dev_major'] = stat.dev_major
|
127
|
+
metadata['file.dev_minor'] = stat.dev_minor
|
128
|
+
end
|
111
129
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
metadata.merge!(digests)
|
116
|
-
end
|
117
|
-
|
118
|
-
# Extended information
|
119
|
-
if @options[:extended]
|
120
|
-
metadata['posix.time.modified'] = File.mtime(path)
|
130
|
+
# Extended information
|
131
|
+
if @options[:extended]
|
132
|
+
metadata['posix.time.modified'] = File.mtime(path)
|
121
133
|
|
122
|
-
|
134
|
+
metadata['posix.mode'] = stat.mode.to_s(8)
|
123
135
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
136
|
+
metadata['posix.permissions.user.id'] = stat.uid
|
137
|
+
metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
|
138
|
+
metadata['posix.permissions.group.id'] = stat.gid
|
139
|
+
metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
|
140
|
+
end
|
128
141
|
end
|
129
|
-
|
142
|
+
|
130
143
|
return metadata
|
131
144
|
end
|
132
145
|
|
133
146
|
# Output a directory header.
|
134
147
|
def directory_record_for(path)
|
135
|
-
Record.new(:directory, path, metadata_for(:directory, path))
|
148
|
+
Record.new(:directory, path.relative_path, metadata_for(:directory, path))
|
136
149
|
end
|
137
150
|
|
151
|
+
def link_record_for(path)
|
152
|
+
metadata = metadata_for(:link, path)
|
153
|
+
|
154
|
+
Record.new(:link, path.relative_path, metadata)
|
155
|
+
end
|
156
|
+
|
157
|
+
def blockdev_record_for(path)
|
158
|
+
metadata = metadata_for(:blockdev, path)
|
159
|
+
|
160
|
+
Record.new(:blockdev, path.relative_path, metadata)
|
161
|
+
end
|
162
|
+
|
163
|
+
def chardev_record_for(path)
|
164
|
+
metadata = metadata_for(:chardev, path)
|
165
|
+
|
166
|
+
Record.new(:chardev, path.relative_path, metadata)
|
167
|
+
end
|
168
|
+
|
138
169
|
# Output a file and associated metadata.
|
139
170
|
def file_record_for(path)
|
140
171
|
metadata = metadata_for(:file, path)
|
@@ -142,12 +173,30 @@ module Fingerprint
|
|
142
173
|
# Should this be here or in metadata_for?
|
143
174
|
# metadata.merge!(digests_for(path))
|
144
175
|
|
145
|
-
Record.new(:file, path, metadata)
|
176
|
+
Record.new(:file, path.relative_path, metadata)
|
146
177
|
end
|
147
178
|
|
148
179
|
# Add information about excluded paths.
|
149
180
|
def excluded_record_for(path)
|
150
|
-
Record.new(:excluded, path)
|
181
|
+
Record.new(:excluded, path.relative_path)
|
182
|
+
end
|
183
|
+
|
184
|
+
def record_for(path)
|
185
|
+
stat = File.stat(path)
|
186
|
+
|
187
|
+
if stat.symlink?
|
188
|
+
return link_record_for(path)
|
189
|
+
elsif stat.blockdev?
|
190
|
+
return blockdev_record_for(path)
|
191
|
+
elsif stat.chardev?
|
192
|
+
return chardev_record_for(path)
|
193
|
+
elsif stat.socket?
|
194
|
+
return socket_record_for(path)
|
195
|
+
elsif stat.file?
|
196
|
+
return file_record_for(path)
|
197
|
+
end
|
198
|
+
rescue Errno::ENOENT
|
199
|
+
return nil
|
151
200
|
end
|
152
201
|
|
153
202
|
public
|
@@ -163,17 +212,13 @@ module Fingerprint
|
|
163
212
|
return false
|
164
213
|
end
|
165
214
|
|
166
|
-
def valid_file?(path)
|
167
|
-
!(excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path))
|
168
|
-
end
|
169
|
-
|
170
215
|
def scan_path(path)
|
216
|
+
return nil if excluded?(path)
|
217
|
+
|
171
218
|
@roots.each do |root|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
end
|
176
|
-
end
|
219
|
+
full_path = Build::Files::Path.join(root, path)
|
220
|
+
|
221
|
+
return record_for(full_path)
|
177
222
|
end
|
178
223
|
|
179
224
|
return nil
|
@@ -192,23 +237,21 @@ module Fingerprint
|
|
192
237
|
# Estimate the number of files and amount of data to process..
|
193
238
|
if @options[:progress]
|
194
239
|
@roots.each do |root|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
end
|
211
|
-
end
|
240
|
+
Find.find(root) do |path|
|
241
|
+
# Some special files fail here, and this was the simplest fix.
|
242
|
+
Find.prune unless File.exist?(path)
|
243
|
+
|
244
|
+
if @options[:progress]
|
245
|
+
$stderr.puts "# Scanning: #{path}"
|
246
|
+
end
|
247
|
+
|
248
|
+
if excluded?(path)
|
249
|
+
Find.prune if path.directory?
|
250
|
+
elsif path.symlink?
|
251
|
+
total_count += 1
|
252
|
+
elsif path.file?
|
253
|
+
total_count += 1
|
254
|
+
total_size += File.size(path)
|
212
255
|
end
|
213
256
|
end
|
214
257
|
end
|
@@ -221,47 +264,47 @@ module Fingerprint
|
|
221
264
|
end
|
222
265
|
|
223
266
|
@roots.each do |root|
|
224
|
-
|
225
|
-
|
267
|
+
recordset << header_for(root)
|
268
|
+
|
269
|
+
Find.find(root) do |path|
|
270
|
+
# Some special files fail here, and this was the simplest fix.
|
271
|
+
Find.prune unless File.exist?(path)
|
226
272
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
273
|
+
if @options[:progress]
|
274
|
+
$stderr.puts "# Path: #{path.relative_path}"
|
275
|
+
end
|
276
|
+
|
277
|
+
if excluded?(path)
|
278
|
+
excluded_count += 1
|
231
279
|
|
232
|
-
if
|
233
|
-
|
234
|
-
excluded_count += 1
|
235
|
-
|
236
|
-
if @options[:verbose]
|
237
|
-
recordset << excluded_record_for(path)
|
238
|
-
end
|
239
|
-
|
240
|
-
Find.prune # Ignore this directory
|
241
|
-
else
|
242
|
-
directory_count += 1
|
243
|
-
|
244
|
-
recordset << directory_record_for(path)
|
245
|
-
end
|
246
|
-
else
|
247
|
-
# Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
|
248
|
-
if valid_file?(path)
|
249
|
-
recordset << file_record_for(path)
|
250
|
-
|
251
|
-
processed_count += 1
|
252
|
-
processed_size += File.size(path)
|
253
|
-
else
|
254
|
-
excluded_count += 1
|
255
|
-
|
256
|
-
if @options[:verbose]
|
257
|
-
recordset << excluded_record_for(path)
|
258
|
-
end
|
259
|
-
end
|
280
|
+
if @options[:verbose]
|
281
|
+
recordset << excluded_record_for(path)
|
260
282
|
end
|
261
283
|
|
262
|
-
|
263
|
-
|
284
|
+
Find.prune if path.directory?
|
285
|
+
elsif path.directory?
|
286
|
+
directory_count += 1
|
287
|
+
|
288
|
+
recordset << directory_record_for(path)
|
289
|
+
elsif path.symlink?
|
290
|
+
recordset << link_record_for(path)
|
291
|
+
|
292
|
+
processed_count += 1
|
293
|
+
elsif path.file?
|
294
|
+
recordset << file_record_for(path)
|
295
|
+
|
296
|
+
processed_count += 1
|
297
|
+
processed_size += File.size(path)
|
298
|
+
else
|
299
|
+
excluded_count += 1
|
300
|
+
|
301
|
+
if @options[:verbose]
|
302
|
+
recordset << excluded_record_for(path)
|
303
|
+
end
|
264
304
|
end
|
305
|
+
|
306
|
+
# Print out a progress summary if requested
|
307
|
+
@progress.call(0) if @progress
|
265
308
|
end
|
266
309
|
end
|
267
310
|
|
data/lib/fingerprint/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright, 2011, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
2
|
#
|
3
3
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
# of this software and associated documentation files (the "Software"), to deal
|
@@ -18,7 +18,6 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
|
-
|
22
21
|
module Fingerprint
|
23
|
-
VERSION = "
|
22
|
+
VERSION = "3.0.1"
|
24
23
|
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env rspec
|
2
|
+
|
3
|
+
# Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'fingerprint/command'
|
24
|
+
|
25
|
+
require_relative 'source_fingerprint'
|
26
|
+
|
27
|
+
RSpec.describe Fingerprint::Command::Analyze do
|
28
|
+
include_context "source fingerprint"
|
29
|
+
|
30
|
+
it "should analyze and verify files" do
|
31
|
+
Fingerprint::Command::Top["analyze", "-f"].call
|
32
|
+
|
33
|
+
expect(File).to be_exist(Fingerprint::INDEX_FINGERPRINT)
|
34
|
+
|
35
|
+
top = Fingerprint::Command::Top["verify"].tap(&:call)
|
36
|
+
|
37
|
+
expect(top.command.error_count).to be == 0
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should analyze path but exclude specified file" do
|
41
|
+
Fingerprint::Command::Top["analyze", "-n", fingerprint_name, "-f", source_directory].call
|
42
|
+
|
43
|
+
expect(File).to be_exist(fingerprint_name)
|
44
|
+
|
45
|
+
record_set = Fingerprint::RecordSet.load_file(fingerprint_name)
|
46
|
+
expect(record_set).to_not be_include(File.basename(fingerprint_name))
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env rspec
|
2
|
+
|
3
|
+
# Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'fingerprint/command'
|
24
|
+
|
25
|
+
require_relative 'source_fingerprint'
|
26
|
+
|
27
|
+
RSpec.describe Fingerprint::Command::Duplicates do
|
28
|
+
include_context "source fingerprint"
|
29
|
+
|
30
|
+
it "should have no duplicates" do
|
31
|
+
Fingerprint::Command::Top["analyze", "-n", fingerprint_name, "-f", source_directory].call
|
32
|
+
|
33
|
+
expect(File).to be_exist(fingerprint_name)
|
34
|
+
|
35
|
+
record_set = Fingerprint::RecordSet.load_file(fingerprint_name)
|
36
|
+
|
37
|
+
top = Fingerprint::Command::Top["duplicates", fingerprint_name].tap(&:call)
|
38
|
+
|
39
|
+
expect(top.command.duplicates_recordset).to be_empty
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should have duplicates" do
|
43
|
+
Fingerprint::Command::Top["analyze", "-n", fingerprint_name, "-f", source_directory].call
|
44
|
+
|
45
|
+
expect(File).to be_exist(fingerprint_name)
|
46
|
+
|
47
|
+
record_set = Fingerprint::RecordSet.load_file(fingerprint_name)
|
48
|
+
|
49
|
+
top = Fingerprint::Command::Top["duplicates", fingerprint_name, fingerprint_name].tap(&:call)
|
50
|
+
|
51
|
+
expect(top.command.duplicates_recordset).to_not be_empty
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env rspec
|
2
|
+
|
3
|
+
# Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
23
|
+
require 'fileutils'
|
24
|
+
|
25
|
+
RSpec.shared_context "source fingerprint" do
|
26
|
+
let(:source_directory) {File.expand_path("../../../lib", __dir__)}
|
27
|
+
let(:fingerprint_name) {File.join(__dir__, "test.fingerprint")}
|
28
|
+
|
29
|
+
after(:each) do
|
30
|
+
FileUtils.rm_rf(fingerprint_name)
|
31
|
+
end
|
32
|
+
end
|