file-digests 0.0.25 → 0.0.30
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +110 -70
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99b58aede8267994cc69da0ac1fd2e35d6661fd666257e9f3dfcde5054b3b6f8
|
4
|
+
data.tar.gz: 4b733dcf4be1f14b4a08931d15d0531c18e161f3d6d3af3e3025b36266f67e9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cf4c3df2b8f206b54689f2506de474ca3b021c572869765caf6e1424dac73a7c6dca3b67918ba5cb88f712353febe9aebe5fc40f7da24f6dd4019300b052436
|
7
|
+
data.tar.gz: cb0904f141e8861c9923647214c92bb8ce022952c53e8a96cee9819a43f4192bf550b47c927cc0ea8f5a16f2c58c3013561555c2afde9c093dd6cd7997f25c94
|
data/lib/file-digests.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
@@ -37,18 +37,18 @@ class FileDigests
|
|
37
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
38
38
|
].join "\n"
|
39
39
|
|
40
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
41
41
|
options[:auto] = true
|
42
42
|
end
|
43
43
|
|
44
44
|
opts.on(
|
45
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
46
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
47
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
48
48
|
"#{digest_algorithms_list_text}.",
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
52
52
|
) do |value|
|
53
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
54
54
|
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
@@ -58,26 +58,31 @@ class FileDigests
|
|
58
58
|
options[:digest_algorithm] = digest_algorithm
|
59
59
|
end
|
60
60
|
|
61
|
-
opts.on("-
|
62
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
63
63
|
end
|
64
64
|
|
65
|
-
opts.on("-
|
66
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
67
68
|
end
|
68
69
|
|
69
|
-
opts.on("-
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
70
75
|
options[:quiet] = true
|
71
76
|
end
|
72
77
|
|
73
|
-
opts.on("-
|
74
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
75
80
|
end
|
76
81
|
|
77
|
-
opts.on("-
|
78
|
-
|
79
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
80
84
|
end
|
85
|
+
|
81
86
|
end.parse!
|
82
87
|
options
|
83
88
|
end
|
@@ -95,15 +100,20 @@ class FileDigests
|
|
95
100
|
initialize_paths files_path, digest_database_path
|
96
101
|
initialize_database
|
97
102
|
|
98
|
-
|
99
|
-
if
|
100
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if db_digest_algorithm = get_metadata("digest_algorithm")
|
105
|
+
if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
|
106
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
107
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
108
|
+
end
|
109
|
+
else
|
110
|
+
raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
114
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
101
115
|
end
|
102
|
-
else
|
103
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
104
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
105
116
|
end
|
106
|
-
|
107
117
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
108
118
|
end
|
109
119
|
|
@@ -113,8 +123,10 @@ class FileDigests
|
|
113
123
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
114
124
|
|
115
125
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
116
|
-
@digest_database_path +=
|
117
|
-
|
126
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
127
|
+
ensure_dir_exist @digest_database_path.dirname
|
128
|
+
|
129
|
+
@digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
|
118
130
|
|
119
131
|
if @options[:verbose]
|
120
132
|
puts "Target directory: #{@files_path}"
|
@@ -125,14 +137,17 @@ class FileDigests
|
|
125
137
|
def initialize_database
|
126
138
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
127
139
|
@db.results_as_hash = true
|
140
|
+
@db.busy_timeout = 5000
|
128
141
|
|
129
142
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
130
143
|
|
131
|
-
execute
|
132
|
-
execute
|
133
|
-
execute
|
134
|
-
execute
|
135
|
-
execute
|
144
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
145
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
146
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
147
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
148
|
+
execute "PRAGMA cache_size = '5000'"
|
149
|
+
|
150
|
+
integrity_check
|
136
151
|
|
137
152
|
@db.transaction(:exclusive) do
|
138
153
|
metadata_table_was_created = false
|
@@ -183,7 +198,7 @@ class FileDigests
|
|
183
198
|
# Convert database from 1st to 2nd version
|
184
199
|
unless get_metadata("digest_algorithm")
|
185
200
|
if get_metadata("database_version") == "1"
|
186
|
-
if File.exist?(@digest_database_path.dirname +
|
201
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
187
202
|
set_metadata("digest_algorithm", "SHA512")
|
188
203
|
else
|
189
204
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -193,7 +208,7 @@ class FileDigests
|
|
193
208
|
end
|
194
209
|
|
195
210
|
if get_metadata("database_version") != "2"
|
196
|
-
STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
211
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
197
212
|
raise "Incompatible database version"
|
198
213
|
end
|
199
214
|
end
|
@@ -213,7 +228,9 @@ class FileDigests
|
|
213
228
|
end
|
214
229
|
end
|
215
230
|
|
216
|
-
|
231
|
+
nested_transaction do
|
232
|
+
track_renames
|
233
|
+
end
|
217
234
|
|
218
235
|
if any_missing_files?
|
219
236
|
if any_exceptions?
|
@@ -221,7 +238,9 @@ class FileDigests
|
|
221
238
|
else
|
222
239
|
print_missing_files
|
223
240
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
224
|
-
|
241
|
+
nested_transaction do
|
242
|
+
remove_missing_files
|
243
|
+
end
|
225
244
|
end
|
226
245
|
end
|
227
246
|
end
|
@@ -234,6 +253,7 @@ class FileDigests
|
|
234
253
|
update_digest_to_new_digest new_digest, old_digest
|
235
254
|
end
|
236
255
|
set_metadata "digest_algorithm", @new_digest_algorithm
|
256
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
237
257
|
end
|
238
258
|
end
|
239
259
|
|
@@ -245,17 +265,22 @@ class FileDigests
|
|
245
265
|
|
246
266
|
print_counters
|
247
267
|
end
|
268
|
+
execute "PRAGMA optimize"
|
269
|
+
execute "VACUUM"
|
270
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
271
|
+
|
272
|
+
hide_database_files
|
248
273
|
end
|
249
274
|
|
250
275
|
def show_duplicates
|
251
276
|
current_digest = nil
|
252
277
|
query_duplicates.each do |found|
|
253
|
-
if current_digest != found[
|
278
|
+
if current_digest != found["digest"]
|
254
279
|
puts "" if current_digest
|
255
|
-
current_digest = found[
|
256
|
-
puts "#{found[
|
280
|
+
current_digest = found["digest"]
|
281
|
+
puts "#{found["digest"]}:"
|
257
282
|
end
|
258
|
-
puts " #{found[
|
283
|
+
puts " #{found["filename"]}"
|
259
284
|
end
|
260
285
|
end
|
261
286
|
|
@@ -274,17 +299,18 @@ class FileDigests
|
|
274
299
|
|
275
300
|
raise "File is not readable" unless stat.readable?
|
276
301
|
|
277
|
-
if filename
|
278
|
-
filename == "#{@digest_database_path}-wal" ||
|
279
|
-
filename == "#{@digest_database_path}-shm"
|
302
|
+
if @digest_database_files.include?(filename)
|
280
303
|
puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
|
281
304
|
return
|
282
305
|
end
|
283
306
|
|
284
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
307
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
285
308
|
mtime_string = time_to_database stat.mtime
|
309
|
+
digest = get_file_digest(filename)
|
286
310
|
|
287
|
-
|
311
|
+
nested_transaction do
|
312
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
313
|
+
end
|
288
314
|
|
289
315
|
rescue => exception
|
290
316
|
@counters[:exceptions] += 1
|
@@ -301,25 +327,25 @@ class FileDigests
|
|
301
327
|
|
302
328
|
def process_previously_seen_file found, filename, mtime, digest
|
303
329
|
@missing_files.delete(filename)
|
304
|
-
if found[
|
330
|
+
if found["digest"] == digest
|
305
331
|
@counters[:good] += 1
|
306
332
|
puts "GOOD: #{filename}" if @options[:verbose]
|
307
333
|
unless @options[:test_only]
|
308
|
-
if found[
|
309
|
-
touch_digest_check_time found[
|
334
|
+
if found["mtime"] == mtime
|
335
|
+
touch_digest_check_time found["id"]
|
310
336
|
else
|
311
|
-
update_mtime mtime, found[
|
337
|
+
update_mtime mtime, found["id"]
|
312
338
|
end
|
313
339
|
end
|
314
340
|
else
|
315
|
-
if found[
|
341
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
316
342
|
@counters[:likely_damaged] += 1
|
317
343
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
318
344
|
else
|
319
345
|
@counters[:updated] += 1
|
320
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
346
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
321
347
|
unless @options[:test_only]
|
322
|
-
update_mtime_and_digest mtime, digest, found[
|
348
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
323
349
|
end
|
324
350
|
end
|
325
351
|
end
|
@@ -357,12 +383,10 @@ class FileDigests
|
|
357
383
|
end
|
358
384
|
|
359
385
|
def remove_missing_files
|
360
|
-
|
361
|
-
|
362
|
-
delete_by_filename filename
|
363
|
-
end
|
364
|
-
@missing_files = {}
|
386
|
+
@missing_files.each do |filename, digest|
|
387
|
+
delete_by_filename filename
|
365
388
|
end
|
389
|
+
@missing_files = {}
|
366
390
|
end
|
367
391
|
|
368
392
|
|
@@ -372,6 +396,12 @@ class FileDigests
|
|
372
396
|
@db.execute *args, &block
|
373
397
|
end
|
374
398
|
|
399
|
+
def integrity_check
|
400
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
401
|
+
raise "Database integrity check failed"
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
375
405
|
def nested_transaction(mode = :deferred)
|
376
406
|
if @db.transaction_active?
|
377
407
|
yield
|
@@ -393,7 +423,7 @@ class FileDigests
|
|
393
423
|
end
|
394
424
|
|
395
425
|
def table_exist? table_name
|
396
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
426
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
397
427
|
end
|
398
428
|
|
399
429
|
def prepare_method name, query
|
@@ -428,21 +458,31 @@ class FileDigests
|
|
428
458
|
end
|
429
459
|
|
430
460
|
def time_to_database time
|
431
|
-
time.utc.strftime(
|
461
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
462
|
+
end
|
463
|
+
|
464
|
+
def hide_database_files
|
465
|
+
if Gem.win_platform?
|
466
|
+
@digest_database_files.each do |file|
|
467
|
+
if File.exist?(file)
|
468
|
+
system "attrib", "+H", file, exception: true
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end
|
432
472
|
end
|
433
473
|
|
434
474
|
|
435
475
|
# Filesystem-related helpers
|
436
476
|
|
437
477
|
def patch_path_string path
|
438
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
478
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
439
479
|
end
|
440
480
|
|
441
481
|
def cleanup_path path
|
442
482
|
Pathname.new(patch_path_string(path)).cleanpath
|
443
483
|
end
|
444
484
|
|
445
|
-
def
|
485
|
+
def ensure_dir_exist path
|
446
486
|
if File.exist?(path)
|
447
487
|
unless File.directory?(path)
|
448
488
|
raise "#{path} is not a directory"
|
@@ -453,13 +493,13 @@ class FileDigests
|
|
453
493
|
end
|
454
494
|
|
455
495
|
def walk_files
|
456
|
-
Dir.glob(@files_path +
|
496
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
457
497
|
yield filename
|
458
498
|
end
|
459
499
|
end
|
460
500
|
|
461
501
|
def get_file_digest filename
|
462
|
-
File.open(filename,
|
502
|
+
File.open(filename, "rb") do |io|
|
463
503
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
464
504
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
465
505
|
|
@@ -501,13 +541,13 @@ class FileDigests
|
|
501
541
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
502
542
|
yield
|
503
543
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
504
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
544
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
505
545
|
end
|
506
546
|
|
507
547
|
def print_file_exception exception, filename
|
508
548
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
509
549
|
begin
|
510
|
-
STDERR.print filename.encode(
|
550
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
511
551
|
rescue
|
512
552
|
STDERR.print "(Unable to encode file name to utf-8) "
|
513
553
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.30
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|