fingerprint 1.3.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/fingerprint +6 -215
- data/lib/.DS_Store +0 -0
- data/lib/fingerprint.rb +19 -5
- data/lib/fingerprint/checker.rb +45 -17
- data/lib/fingerprint/checksums.rb +33 -0
- data/lib/fingerprint/command.rb +90 -0
- data/lib/fingerprint/command/analyze.rb +74 -0
- data/lib/fingerprint/command/compare.rb +55 -0
- data/lib/fingerprint/command/duplicates.rb +98 -0
- data/lib/fingerprint/command/scan.rb +61 -0
- data/lib/fingerprint/command/verify.rb +86 -0
- data/lib/fingerprint/find.rb +40 -0
- data/lib/fingerprint/record.rb +43 -16
- data/lib/fingerprint/scanner.rb +147 -96
- data/lib/fingerprint/version.rb +2 -9
- metadata +109 -63
- data/README.md +0 -52
@@ -0,0 +1,40 @@
|
|
1
|
+
# Copyright, 2016, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require 'find'
|
22
|
+
require 'build/files/path'
|
23
|
+
require 'build/files/system'
|
24
|
+
|
25
|
+
module Fingerprint
|
26
|
+
module Find
|
27
|
+
def self.find(root)
|
28
|
+
# Ensure root is a directory:
|
29
|
+
root += File::SEPARATOR unless root.end_with?(File::SEPARATOR)
|
30
|
+
|
31
|
+
::Find.find(root) do |path|
|
32
|
+
yield Build::Files::Path.new(path, root)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.prune
|
37
|
+
::Find.prune
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/fingerprint/record.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright, 2011, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
2
|
#
|
3
3
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
# of this software and associated documentation files (the "Software"), to deal
|
@@ -25,6 +25,7 @@ module Fingerprint
|
|
25
25
|
MODES = {
|
26
26
|
:configuration => 'C',
|
27
27
|
:file => 'F',
|
28
|
+
:link => 'L',
|
28
29
|
:directory => 'D',
|
29
30
|
:summary => 'S',
|
30
31
|
:warning => 'W',
|
@@ -67,7 +68,7 @@ module Fingerprint
|
|
67
68
|
options = {}
|
68
69
|
|
69
70
|
options[:extended] = true if @metadata['options.extended'] == 'true'
|
70
|
-
options[:
|
71
|
+
options[:checksums] = @metadata['options.checksums'].split(/[\s,]+/) if @metadata['options.checksums']
|
71
72
|
|
72
73
|
return options
|
73
74
|
end
|
@@ -83,12 +84,22 @@ module Fingerprint
|
|
83
84
|
end
|
84
85
|
end
|
85
86
|
|
86
|
-
class
|
87
|
+
class RecordSet
|
88
|
+
def self.load_file(path)
|
89
|
+
File.open(path, "r") do |io|
|
90
|
+
self.load(io)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.load(io)
|
95
|
+
self.new.tap{|record_set| record_set.parse(io)}
|
96
|
+
end
|
97
|
+
|
87
98
|
def initialize
|
88
99
|
@records = []
|
89
100
|
@paths = {}
|
90
101
|
@keys = {}
|
91
|
-
|
102
|
+
|
92
103
|
@configuration = nil
|
93
104
|
|
94
105
|
@callback = nil
|
@@ -107,22 +118,30 @@ module Fingerprint
|
|
107
118
|
@configuration = record
|
108
119
|
else
|
109
120
|
@paths[record.path] = record
|
110
|
-
record.keys.each do |key
|
111
|
-
@keys[key]
|
121
|
+
record.keys.each do |key|
|
122
|
+
@keys[key] ||= {}
|
123
|
+
|
124
|
+
@keys[key][record[key]] = record
|
112
125
|
end
|
113
126
|
end
|
114
127
|
end
|
115
128
|
|
129
|
+
def include?(path)
|
130
|
+
@paths.include?(path)
|
131
|
+
end
|
132
|
+
|
133
|
+
def empty?
|
134
|
+
@paths.empty?
|
135
|
+
end
|
136
|
+
|
116
137
|
def lookup(path)
|
117
138
|
return @paths[path]
|
118
139
|
end
|
119
140
|
|
120
|
-
def
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
@record.keys.each do |key, value|
|
141
|
+
def find_by_key(record)
|
142
|
+
record.keys.each do |key|
|
143
|
+
value = record[key]
|
144
|
+
|
126
145
|
result = @keys[key][value]
|
127
146
|
|
128
147
|
return result if result
|
@@ -131,6 +150,14 @@ module Fingerprint
|
|
131
150
|
return nil
|
132
151
|
end
|
133
152
|
|
153
|
+
def find(record)
|
154
|
+
result = lookup(record.path)
|
155
|
+
|
156
|
+
return result if result
|
157
|
+
|
158
|
+
return find_by_key(record)
|
159
|
+
end
|
160
|
+
|
134
161
|
def compare(other)
|
135
162
|
main = lookup(other.path)
|
136
163
|
|
@@ -218,7 +245,7 @@ module Fingerprint
|
|
218
245
|
end
|
219
246
|
|
220
247
|
# This record set dynamically computes data from the disk as required.
|
221
|
-
class
|
248
|
+
class SparseRecordSet < RecordSet
|
222
249
|
def initialize(scanner)
|
223
250
|
super()
|
224
251
|
|
@@ -234,7 +261,7 @@ module Fingerprint
|
|
234
261
|
end
|
235
262
|
end
|
236
263
|
|
237
|
-
class
|
264
|
+
class RecordSetWrapper
|
238
265
|
def initialize(recordset)
|
239
266
|
@recordset = recordset
|
240
267
|
end
|
@@ -248,7 +275,7 @@ module Fingerprint
|
|
248
275
|
end
|
249
276
|
end
|
250
277
|
|
251
|
-
class
|
278
|
+
class RecordSetPrinter < RecordSetWrapper
|
252
279
|
def initialize(recordset, output)
|
253
280
|
super(recordset)
|
254
281
|
@output = output
|
@@ -256,7 +283,7 @@ module Fingerprint
|
|
256
283
|
|
257
284
|
def <<(record)
|
258
285
|
record.write(@output)
|
259
|
-
@recordset << record
|
286
|
+
@recordset << record if @recordset
|
260
287
|
end
|
261
288
|
end
|
262
289
|
end
|
data/lib/fingerprint/scanner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright, 2011, by Samuel G. D. Williams. <http://www.codeotaku.com>
|
2
2
|
#
|
3
3
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
4
|
# of this software and associated documentation files (the "Software"), to deal
|
@@ -19,28 +19,33 @@
|
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
21
|
require 'stringio'
|
22
|
-
require 'find'
|
23
22
|
require 'etc'
|
24
23
|
require 'digest/sha2'
|
25
24
|
|
26
|
-
|
25
|
+
require_relative 'find'
|
26
|
+
require_relative 'record'
|
27
|
+
require_relative 'version'
|
27
28
|
|
29
|
+
module Fingerprint
|
30
|
+
INDEX_FINGERPRINT = "index.fingerprint"
|
31
|
+
|
28
32
|
CHECKSUMS = {
|
29
33
|
'MD5' => lambda { Digest::MD5.new },
|
30
34
|
'SHA1' => lambda { Digest::SHA1.new },
|
31
35
|
'SHA2.256' => lambda { Digest::SHA2.new(256) },
|
36
|
+
'SHA2.384' => lambda { Digest::SHA2.new(384) },
|
32
37
|
'SHA2.512' => lambda { Digest::SHA2.new(512) },
|
33
38
|
}
|
34
39
|
|
35
|
-
DEFAULT_CHECKSUMS = ['
|
40
|
+
DEFAULT_CHECKSUMS = ['SHA2.256']
|
36
41
|
|
37
42
|
# The scanner class can scan a set of directories and produce an index.
|
38
43
|
class Scanner
|
39
44
|
# Initialize the scanner to scan a given set of directories in order.
|
40
45
|
# [+options[:excludes]+] An array of regular expressions of files to avoid indexing.
|
41
46
|
# [+options[:output]+] An +IO+ where the results will be written.
|
42
|
-
def initialize(roots,
|
43
|
-
@roots = roots
|
47
|
+
def initialize(roots, pwd: Dir.pwd, **options)
|
48
|
+
@roots = roots.collect{|root| File.expand_path(root, pwd)}
|
44
49
|
|
45
50
|
@excludes = options[:excludes] || []
|
46
51
|
@options = options
|
@@ -71,7 +76,7 @@ module Fingerprint
|
|
71
76
|
'options.extended' => @options[:extended] == true,
|
72
77
|
'options.checksums' => @options[:checksums].join(', '),
|
73
78
|
'summary.time.start' => Time.now,
|
74
|
-
'fingerprint.version' => Fingerprint::VERSION
|
79
|
+
'fingerprint.version' => Fingerprint::VERSION
|
75
80
|
})
|
76
81
|
end
|
77
82
|
|
@@ -84,14 +89,14 @@ module Fingerprint
|
|
84
89
|
end
|
85
90
|
|
86
91
|
File.open(path, "rb") do |file|
|
87
|
-
|
88
|
-
while file.read(1024 * 1024 * 10,
|
89
|
-
total +=
|
92
|
+
buffer = ""
|
93
|
+
while file.read(1024 * 1024 * 10, buffer)
|
94
|
+
total += buffer.bytesize
|
90
95
|
|
91
96
|
@progress.call(total) if @progress
|
92
97
|
|
93
98
|
@digests.each do |key, digest|
|
94
|
-
digest <<
|
99
|
+
digest << buffer
|
95
100
|
end
|
96
101
|
end
|
97
102
|
end
|
@@ -106,35 +111,61 @@ module Fingerprint
|
|
106
111
|
end
|
107
112
|
|
108
113
|
def metadata_for(type, path)
|
109
|
-
stat = File.stat(path)
|
110
114
|
metadata = {}
|
115
|
+
|
116
|
+
if type == :link
|
117
|
+
metadata['file.symlink'] = File.readlink(path)
|
118
|
+
else
|
119
|
+
stat = File.stat(path)
|
120
|
+
|
121
|
+
if type == :file
|
122
|
+
metadata['file.size'] = stat.size
|
123
|
+
digests = digests_for(path)
|
124
|
+
metadata.merge!(digests)
|
125
|
+
elsif type == :blockdev or type == :chardev
|
126
|
+
metadata['file.dev_major'] = stat.dev_major
|
127
|
+
metadata['file.dev_minor'] = stat.dev_minor
|
128
|
+
end
|
111
129
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
metadata.merge!(digests)
|
116
|
-
end
|
117
|
-
|
118
|
-
# Extended information
|
119
|
-
if @options[:extended]
|
120
|
-
metadata['posix.time.modified'] = File.mtime(path)
|
130
|
+
# Extended information
|
131
|
+
if @options[:extended]
|
132
|
+
metadata['posix.time.modified'] = File.mtime(path)
|
121
133
|
|
122
|
-
|
134
|
+
metadata['posix.mode'] = stat.mode.to_s(8)
|
123
135
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
136
|
+
metadata['posix.permissions.user.id'] = stat.uid
|
137
|
+
metadata['posix.permissions.user.name'] = Etc.getpwuid(stat.uid).name
|
138
|
+
metadata['posix.permissions.group.id'] = stat.gid
|
139
|
+
metadata['posix.permissions.group.name'] = Etc.getgrgid(stat.gid).name
|
140
|
+
end
|
128
141
|
end
|
129
|
-
|
142
|
+
|
130
143
|
return metadata
|
131
144
|
end
|
132
145
|
|
133
146
|
# Output a directory header.
|
134
147
|
def directory_record_for(path)
|
135
|
-
Record.new(:directory, path, metadata_for(:directory, path))
|
148
|
+
Record.new(:directory, path.relative_path, metadata_for(:directory, path))
|
136
149
|
end
|
137
150
|
|
151
|
+
def link_record_for(path)
|
152
|
+
metadata = metadata_for(:link, path)
|
153
|
+
|
154
|
+
Record.new(:link, path.relative_path, metadata)
|
155
|
+
end
|
156
|
+
|
157
|
+
def blockdev_record_for(path)
|
158
|
+
metadata = metadata_for(:blockdev, path)
|
159
|
+
|
160
|
+
Record.new(:blockdev, path.relative_path, metadata)
|
161
|
+
end
|
162
|
+
|
163
|
+
def chardev_record_for(path)
|
164
|
+
metadata = metadata_for(:chardev, path)
|
165
|
+
|
166
|
+
Record.new(:chardev, path.relative_path, metadata)
|
167
|
+
end
|
168
|
+
|
138
169
|
# Output a file and associated metadata.
|
139
170
|
def file_record_for(path)
|
140
171
|
metadata = metadata_for(:file, path)
|
@@ -142,12 +173,30 @@ module Fingerprint
|
|
142
173
|
# Should this be here or in metadata_for?
|
143
174
|
# metadata.merge!(digests_for(path))
|
144
175
|
|
145
|
-
Record.new(:file, path, metadata)
|
176
|
+
Record.new(:file, path.relative_path, metadata)
|
146
177
|
end
|
147
178
|
|
148
179
|
# Add information about excluded paths.
|
149
180
|
def excluded_record_for(path)
|
150
|
-
Record.new(:excluded, path)
|
181
|
+
Record.new(:excluded, path.relative_path)
|
182
|
+
end
|
183
|
+
|
184
|
+
def record_for(path)
|
185
|
+
stat = File.stat(path)
|
186
|
+
|
187
|
+
if stat.symlink?
|
188
|
+
return link_record_for(path)
|
189
|
+
elsif stat.blockdev?
|
190
|
+
return blockdev_record_for(path)
|
191
|
+
elsif stat.chardev?
|
192
|
+
return chardev_record_for(path)
|
193
|
+
elsif stat.socket?
|
194
|
+
return socket_record_for(path)
|
195
|
+
elsif stat.file?
|
196
|
+
return file_record_for(path)
|
197
|
+
end
|
198
|
+
rescue Errno::ENOENT
|
199
|
+
return nil
|
151
200
|
end
|
152
201
|
|
153
202
|
public
|
@@ -163,17 +212,13 @@ module Fingerprint
|
|
163
212
|
return false
|
164
213
|
end
|
165
214
|
|
166
|
-
def valid_file?(path)
|
167
|
-
!(excluded?(path) || File.symlink?(path) || !File.file?(path) || !File.readable?(path))
|
168
|
-
end
|
169
|
-
|
170
215
|
def scan_path(path)
|
216
|
+
return nil if excluded?(path)
|
217
|
+
|
171
218
|
@roots.each do |root|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
end
|
176
|
-
end
|
219
|
+
full_path = Build::Files::Path.join(root, path)
|
220
|
+
|
221
|
+
return record_for(full_path)
|
177
222
|
end
|
178
223
|
|
179
224
|
return nil
|
@@ -192,23 +237,21 @@ module Fingerprint
|
|
192
237
|
# Estimate the number of files and amount of data to process..
|
193
238
|
if @options[:progress]
|
194
239
|
@roots.each do |root|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
end
|
211
|
-
end
|
240
|
+
Find.find(root) do |path|
|
241
|
+
# Some special files fail here, and this was the simplest fix.
|
242
|
+
Find.prune unless File.exist?(path)
|
243
|
+
|
244
|
+
if @options[:progress]
|
245
|
+
$stderr.puts "# Scanning: #{path}"
|
246
|
+
end
|
247
|
+
|
248
|
+
if excluded?(path)
|
249
|
+
Find.prune if path.directory?
|
250
|
+
elsif path.symlink?
|
251
|
+
total_count += 1
|
252
|
+
elsif path.file?
|
253
|
+
total_count += 1
|
254
|
+
total_size += File.size(path)
|
212
255
|
end
|
213
256
|
end
|
214
257
|
end
|
@@ -221,52 +264,54 @@ module Fingerprint
|
|
221
264
|
end
|
222
265
|
|
223
266
|
@roots.each do |root|
|
224
|
-
|
225
|
-
|
267
|
+
recordset << header_for(root)
|
268
|
+
|
269
|
+
Find.find(root) do |path|
|
270
|
+
# Some special files fail here, and this was the simplest fix.
|
271
|
+
Find.prune unless File.exist?(path)
|
226
272
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
273
|
+
if @options[:progress]
|
274
|
+
$stderr.puts "# Path: #{path.relative_path}"
|
275
|
+
end
|
276
|
+
|
277
|
+
if excluded?(path)
|
278
|
+
excluded_count += 1
|
231
279
|
|
232
|
-
if
|
233
|
-
|
234
|
-
excluded_count += 1
|
235
|
-
|
236
|
-
if @options[:verbose]
|
237
|
-
recordset << excluded_record_for(path)
|
238
|
-
end
|
239
|
-
|
240
|
-
Find.prune # Ignore this directory
|
241
|
-
else
|
242
|
-
directory_count += 1
|
243
|
-
|
244
|
-
recordset << directory_record_for(path)
|
245
|
-
end
|
246
|
-
else
|
247
|
-
# Skip anything that isn't a valid file (e.g. pipes, sockets, symlinks).
|
248
|
-
if valid_file?(path)
|
249
|
-
recordset << file_record_for(path)
|
250
|
-
|
251
|
-
processed_count += 1
|
252
|
-
processed_size += File.size(path)
|
253
|
-
else
|
254
|
-
excluded_count += 1
|
255
|
-
|
256
|
-
if @options[:verbose]
|
257
|
-
recordset << excluded_record_for(path)
|
258
|
-
end
|
259
|
-
end
|
280
|
+
if @options[:verbose]
|
281
|
+
recordset << excluded_record_for(path)
|
260
282
|
end
|
261
283
|
|
262
|
-
|
263
|
-
|
284
|
+
Find.prune if path.directory?
|
285
|
+
elsif path.directory?
|
286
|
+
directory_count += 1
|
287
|
+
|
288
|
+
recordset << directory_record_for(path)
|
289
|
+
elsif path.symlink?
|
290
|
+
recordset << link_record_for(path)
|
291
|
+
|
292
|
+
processed_count += 1
|
293
|
+
elsif path.file?
|
294
|
+
recordset << file_record_for(path)
|
295
|
+
|
296
|
+
processed_count += 1
|
297
|
+
processed_size += File.size(path)
|
298
|
+
else
|
299
|
+
excluded_count += 1
|
300
|
+
|
301
|
+
if @options[:verbose]
|
302
|
+
recordset << excluded_record_for(path)
|
303
|
+
end
|
264
304
|
end
|
305
|
+
|
306
|
+
# Print out a progress summary if requested
|
307
|
+
@progress.call(0) if @progress
|
265
308
|
end
|
266
309
|
end
|
310
|
+
|
311
|
+
summary_message = "#{processed_count} files processed."
|
267
312
|
|
268
313
|
# Output summary
|
269
|
-
recordset << Record.new(:summary,
|
314
|
+
recordset << Record.new(:summary, summary_message, {
|
270
315
|
'summary.directories' => directory_count,
|
271
316
|
'summary.files' => processed_count,
|
272
317
|
'summary.size' => processed_size,
|
@@ -278,12 +323,18 @@ module Fingerprint
|
|
278
323
|
end
|
279
324
|
|
280
325
|
# A helper function to scan a set of directories.
|
281
|
-
def self.scan_paths(paths, options
|
326
|
+
def self.scan_paths(paths, **options)
|
282
327
|
if options[:output]
|
283
|
-
options
|
328
|
+
if options.key? :recordset
|
329
|
+
recordset = options[:recordset]
|
330
|
+
else
|
331
|
+
recordset = RecordSet.new
|
332
|
+
end
|
333
|
+
|
334
|
+
options[:recordset] = RecordSetPrinter.new(recordset, options[:output])
|
284
335
|
end
|
285
336
|
|
286
|
-
scanner = Scanner.new(paths, options)
|
337
|
+
scanner = Scanner.new(paths, **options)
|
287
338
|
|
288
339
|
scanner.scan(options[:recordset])
|
289
340
|
|