file-digests 0.0.24 → 0.0.29
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +123 -80
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cab5bc80dc7949984501c3068bf99f44f82564774733e84b1cd6810fd0cb6a05
|
4
|
+
data.tar.gz: 97a4220b83a08408345b21a60256e55bbbe0a5fb30e6c97efa05a4faac42d356
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5eaae0823ccb95bb3db6245e0863ff37aaa6f14f599bd92d040c8085e753aaa8d3be7903d1039a433bded9011bdc73c6a0d42e3e10d1e2c52eb81b6b791a5bc1
|
7
|
+
data.tar.gz: 97f7fa2ac8605fc1775570accf44a93c73e67a11699b7e98bfa9687f9e2c0219ad1890c1a293f52ca901079e602f6cf6863b5f57de174dd39e36ec338d6597f7
|
data/lib/file-digests.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
12
|
+
LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
|
12
13
|
|
13
14
|
def self.canonical_digest_algorithm_name(string)
|
14
15
|
if string
|
15
|
-
|
16
|
-
index
|
16
|
+
algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
|
17
|
+
index = algorithms.map(&:downcase).index(string.downcase)
|
18
|
+
index && algorithms[index]
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
@@ -27,55 +29,60 @@ class FileDigests
|
|
27
29
|
|
28
30
|
def self.parse_cli_options
|
29
31
|
options = {}
|
30
|
-
|
32
|
+
|
31
33
|
OptionParser.new do |opts|
|
32
34
|
opts.banner = [
|
33
35
|
"Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
|
34
36
|
" By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
|
35
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
36
38
|
].join "\n"
|
37
|
-
|
38
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
39
|
+
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
39
41
|
options[:auto] = true
|
40
42
|
end
|
41
43
|
|
42
44
|
opts.on(
|
43
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
44
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
45
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
46
48
|
"#{digest_algorithms_list_text}.",
|
47
|
-
|
48
|
-
|
49
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
50
52
|
) do |value|
|
51
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
52
|
-
unless digest_algorithm
|
53
|
-
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
|
+
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
55
|
+
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
56
|
exit 1
|
55
57
|
end
|
56
58
|
options[:digest_algorithm] = digest_algorithm
|
57
59
|
end
|
58
60
|
|
59
|
-
opts.on("-
|
60
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
61
63
|
end
|
62
64
|
|
63
|
-
opts.on("-
|
64
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
65
68
|
end
|
66
69
|
|
67
|
-
opts.on("-
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
68
75
|
options[:quiet] = true
|
69
76
|
end
|
70
77
|
|
71
|
-
opts.on("-
|
72
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
73
80
|
end
|
74
81
|
|
75
|
-
opts.on("-
|
76
|
-
|
77
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
78
84
|
end
|
85
|
+
|
79
86
|
end.parse!
|
80
87
|
options
|
81
88
|
end
|
@@ -93,15 +100,20 @@ class FileDigests
|
|
93
100
|
initialize_paths files_path, digest_database_path
|
94
101
|
initialize_database
|
95
102
|
|
96
|
-
|
97
|
-
if
|
98
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if db_digest_algorithm = get_metadata("digest_algorithm")
|
105
|
+
if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
|
106
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
107
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
108
|
+
end
|
109
|
+
else
|
110
|
+
raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
114
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
99
115
|
end
|
100
|
-
else
|
101
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
102
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
103
116
|
end
|
104
|
-
|
105
117
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
106
118
|
end
|
107
119
|
|
@@ -111,8 +123,10 @@ class FileDigests
|
|
111
123
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
112
124
|
|
113
125
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
114
|
-
@digest_database_path +=
|
115
|
-
|
126
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
127
|
+
ensure_dir_exist @digest_database_path.dirname
|
128
|
+
|
129
|
+
@digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
|
116
130
|
|
117
131
|
if @options[:verbose]
|
118
132
|
puts "Target directory: #{@files_path}"
|
@@ -123,14 +137,17 @@ class FileDigests
|
|
123
137
|
def initialize_database
|
124
138
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
125
139
|
@db.results_as_hash = true
|
140
|
+
@db.busy_timeout = 5000
|
126
141
|
|
127
142
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
128
143
|
|
129
|
-
execute
|
130
|
-
execute
|
131
|
-
execute
|
132
|
-
execute
|
133
|
-
execute
|
144
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
145
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
146
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
147
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
148
|
+
execute "PRAGMA cache_size = '5000'"
|
149
|
+
|
150
|
+
integrity_check
|
134
151
|
|
135
152
|
@db.transaction(:exclusive) do
|
136
153
|
metadata_table_was_created = false
|
@@ -181,7 +198,7 @@ class FileDigests
|
|
181
198
|
# Convert database from 1st to 2nd version
|
182
199
|
unless get_metadata("digest_algorithm")
|
183
200
|
if get_metadata("database_version") == "1"
|
184
|
-
if File.exist?(@digest_database_path.dirname +
|
201
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
185
202
|
set_metadata("digest_algorithm", "SHA512")
|
186
203
|
else
|
187
204
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -191,7 +208,7 @@ class FileDigests
|
|
191
208
|
end
|
192
209
|
|
193
210
|
if get_metadata("database_version") != "2"
|
194
|
-
STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
211
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
195
212
|
raise "Incompatible database version"
|
196
213
|
end
|
197
214
|
end
|
@@ -211,15 +228,19 @@ class FileDigests
|
|
211
228
|
end
|
212
229
|
end
|
213
230
|
|
214
|
-
|
215
|
-
|
231
|
+
nested_transaction do
|
232
|
+
track_renames
|
233
|
+
end
|
234
|
+
|
216
235
|
if any_missing_files?
|
217
236
|
if any_exceptions?
|
218
237
|
STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
|
219
238
|
else
|
220
239
|
print_missing_files
|
221
240
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
222
|
-
|
241
|
+
nested_transaction do
|
242
|
+
remove_missing_files
|
243
|
+
end
|
223
244
|
end
|
224
245
|
end
|
225
246
|
end
|
@@ -232,6 +253,7 @@ class FileDigests
|
|
232
253
|
update_digest_to_new_digest new_digest, old_digest
|
233
254
|
end
|
234
255
|
set_metadata "digest_algorithm", @new_digest_algorithm
|
256
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
235
257
|
end
|
236
258
|
end
|
237
259
|
|
@@ -241,6 +263,12 @@ class FileDigests
|
|
241
263
|
|
242
264
|
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
243
265
|
|
266
|
+
execute "PRAGMA optimize"
|
267
|
+
execute "VACUUM"
|
268
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
269
|
+
|
270
|
+
hide_database_files
|
271
|
+
|
244
272
|
print_counters
|
245
273
|
end
|
246
274
|
end
|
@@ -248,12 +276,12 @@ class FileDigests
|
|
248
276
|
def show_duplicates
|
249
277
|
current_digest = nil
|
250
278
|
query_duplicates.each do |found|
|
251
|
-
if current_digest != found[
|
279
|
+
if current_digest != found["digest"]
|
252
280
|
puts "" if current_digest
|
253
|
-
current_digest = found[
|
254
|
-
puts "#{found[
|
281
|
+
current_digest = found["digest"]
|
282
|
+
puts "#{found["digest"]}:"
|
255
283
|
end
|
256
|
-
puts " #{found[
|
284
|
+
puts " #{found["filename"]}"
|
257
285
|
end
|
258
286
|
end
|
259
287
|
|
@@ -272,17 +300,18 @@ class FileDigests
|
|
272
300
|
|
273
301
|
raise "File is not readable" unless stat.readable?
|
274
302
|
|
275
|
-
if filename
|
276
|
-
filename == "#{@digest_database_path}-wal" ||
|
277
|
-
filename == "#{@digest_database_path}-shm"
|
303
|
+
if @digest_database_files.include?(filename)
|
278
304
|
puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
|
279
305
|
return
|
280
306
|
end
|
281
307
|
|
282
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
308
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
283
309
|
mtime_string = time_to_database stat.mtime
|
310
|
+
digest = get_file_digest(filename)
|
284
311
|
|
285
|
-
|
312
|
+
nested_transaction do
|
313
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
314
|
+
end
|
286
315
|
|
287
316
|
rescue => exception
|
288
317
|
@counters[:exceptions] += 1
|
@@ -299,25 +328,25 @@ class FileDigests
|
|
299
328
|
|
300
329
|
def process_previously_seen_file found, filename, mtime, digest
|
301
330
|
@missing_files.delete(filename)
|
302
|
-
if found[
|
331
|
+
if found["digest"] == digest
|
303
332
|
@counters[:good] += 1
|
304
333
|
puts "GOOD: #{filename}" if @options[:verbose]
|
305
334
|
unless @options[:test_only]
|
306
|
-
if found[
|
307
|
-
touch_digest_check_time found[
|
335
|
+
if found["mtime"] == mtime
|
336
|
+
touch_digest_check_time found["id"]
|
308
337
|
else
|
309
|
-
update_mtime mtime, found[
|
338
|
+
update_mtime mtime, found["id"]
|
310
339
|
end
|
311
340
|
end
|
312
341
|
else
|
313
|
-
if found[
|
342
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
314
343
|
@counters[:likely_damaged] += 1
|
315
344
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
316
345
|
else
|
317
346
|
@counters[:updated] += 1
|
318
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
347
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
319
348
|
unless @options[:test_only]
|
320
|
-
update_mtime_and_digest mtime, digest, found[
|
349
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
321
350
|
end
|
322
351
|
end
|
323
352
|
end
|
@@ -355,12 +384,10 @@ class FileDigests
|
|
355
384
|
end
|
356
385
|
|
357
386
|
def remove_missing_files
|
358
|
-
|
359
|
-
|
360
|
-
delete_by_filename filename
|
361
|
-
end
|
362
|
-
@missing_files = {}
|
387
|
+
@missing_files.each do |filename, digest|
|
388
|
+
delete_by_filename filename
|
363
389
|
end
|
390
|
+
@missing_files = {}
|
364
391
|
end
|
365
392
|
|
366
393
|
|
@@ -370,7 +397,13 @@ class FileDigests
|
|
370
397
|
@db.execute *args, &block
|
371
398
|
end
|
372
399
|
|
373
|
-
def
|
400
|
+
def integrity_check
|
401
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
402
|
+
raise "Database integrity check failed"
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def nested_transaction(mode = :deferred)
|
374
407
|
if @db.transaction_active?
|
375
408
|
yield
|
376
409
|
else
|
@@ -380,9 +413,9 @@ class FileDigests
|
|
380
413
|
end
|
381
414
|
end
|
382
415
|
|
383
|
-
def perhaps_transaction(condition, mode)
|
416
|
+
def perhaps_transaction(condition, mode = :deferred)
|
384
417
|
if condition
|
385
|
-
|
418
|
+
nested_transaction(mode) do
|
386
419
|
yield
|
387
420
|
end
|
388
421
|
else
|
@@ -391,7 +424,7 @@ class FileDigests
|
|
391
424
|
end
|
392
425
|
|
393
426
|
def table_exist? table_name
|
394
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
427
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
395
428
|
end
|
396
429
|
|
397
430
|
def prepare_method name, query
|
@@ -426,21 +459,31 @@ class FileDigests
|
|
426
459
|
end
|
427
460
|
|
428
461
|
def time_to_database time
|
429
|
-
time.utc.strftime(
|
462
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
463
|
+
end
|
464
|
+
|
465
|
+
def hide_database_files
|
466
|
+
if Gem.win_platform?
|
467
|
+
@digest_database_files.each do |file|
|
468
|
+
if File.exist?(file)
|
469
|
+
system "attrib", "+H", file, exception: true
|
470
|
+
end
|
471
|
+
end
|
472
|
+
end
|
430
473
|
end
|
431
474
|
|
432
475
|
|
433
476
|
# Filesystem-related helpers
|
434
477
|
|
435
478
|
def patch_path_string path
|
436
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
479
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
437
480
|
end
|
438
481
|
|
439
482
|
def cleanup_path path
|
440
483
|
Pathname.new(patch_path_string(path)).cleanpath
|
441
484
|
end
|
442
485
|
|
443
|
-
def
|
486
|
+
def ensure_dir_exist path
|
444
487
|
if File.exist?(path)
|
445
488
|
unless File.directory?(path)
|
446
489
|
raise "#{path} is not a directory"
|
@@ -451,13 +494,13 @@ class FileDigests
|
|
451
494
|
end
|
452
495
|
|
453
496
|
def walk_files
|
454
|
-
Dir.glob(@files_path +
|
497
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
455
498
|
yield filename
|
456
499
|
end
|
457
500
|
end
|
458
501
|
|
459
502
|
def get_file_digest filename
|
460
|
-
File.open(filename,
|
503
|
+
File.open(filename, "rb") do |io|
|
461
504
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
462
505
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
463
506
|
|
@@ -499,13 +542,13 @@ class FileDigests
|
|
499
542
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
500
543
|
yield
|
501
544
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
502
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
545
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
503
546
|
end
|
504
547
|
|
505
548
|
def print_file_exception exception, filename
|
506
549
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
507
550
|
begin
|
508
|
-
STDERR.print filename.encode(
|
551
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
509
552
|
rescue
|
510
553
|
STDERR.print "(Unable to encode file name to utf-8) "
|
511
554
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.29
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|