file-digests 0.0.25 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +110 -70
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99b58aede8267994cc69da0ac1fd2e35d6661fd666257e9f3dfcde5054b3b6f8
|
4
|
+
data.tar.gz: 4b733dcf4be1f14b4a08931d15d0531c18e161f3d6d3af3e3025b36266f67e9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cf4c3df2b8f206b54689f2506de474ca3b021c572869765caf6e1424dac73a7c6dca3b67918ba5cb88f712353febe9aebe5fc40f7da24f6dd4019300b052436
|
7
|
+
data.tar.gz: cb0904f141e8861c9923647214c92bb8ce022952c53e8a96cee9819a43f4192bf550b47c927cc0ea8f5a16f2c58c3013561555c2afde9c093dd6cd7997f25c94
|
data/lib/file-digests.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
@@ -37,18 +37,18 @@ class FileDigests
|
|
37
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
38
38
|
].join "\n"
|
39
39
|
|
40
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
41
41
|
options[:auto] = true
|
42
42
|
end
|
43
43
|
|
44
44
|
opts.on(
|
45
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
46
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
47
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
48
48
|
"#{digest_algorithms_list_text}.",
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
52
52
|
) do |value|
|
53
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
54
54
|
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
@@ -58,26 +58,31 @@ class FileDigests
|
|
58
58
|
options[:digest_algorithm] = digest_algorithm
|
59
59
|
end
|
60
60
|
|
61
|
-
opts.on("-
|
62
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
63
63
|
end
|
64
64
|
|
65
|
-
opts.on("-
|
66
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
67
68
|
end
|
68
69
|
|
69
|
-
opts.on("-
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
70
75
|
options[:quiet] = true
|
71
76
|
end
|
72
77
|
|
73
|
-
opts.on("-
|
74
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
75
80
|
end
|
76
81
|
|
77
|
-
opts.on("-
|
78
|
-
|
79
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
80
84
|
end
|
85
|
+
|
81
86
|
end.parse!
|
82
87
|
options
|
83
88
|
end
|
@@ -95,15 +100,20 @@ class FileDigests
|
|
95
100
|
initialize_paths files_path, digest_database_path
|
96
101
|
initialize_database
|
97
102
|
|
98
|
-
|
99
|
-
if
|
100
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if db_digest_algorithm = get_metadata("digest_algorithm")
|
105
|
+
if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
|
106
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
107
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
108
|
+
end
|
109
|
+
else
|
110
|
+
raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
114
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
101
115
|
end
|
102
|
-
else
|
103
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
104
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
105
116
|
end
|
106
|
-
|
107
117
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
108
118
|
end
|
109
119
|
|
@@ -113,8 +123,10 @@ class FileDigests
|
|
113
123
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
114
124
|
|
115
125
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
116
|
-
@digest_database_path +=
|
117
|
-
|
126
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
127
|
+
ensure_dir_exist @digest_database_path.dirname
|
128
|
+
|
129
|
+
@digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
|
118
130
|
|
119
131
|
if @options[:verbose]
|
120
132
|
puts "Target directory: #{@files_path}"
|
@@ -125,14 +137,17 @@ class FileDigests
|
|
125
137
|
def initialize_database
|
126
138
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
127
139
|
@db.results_as_hash = true
|
140
|
+
@db.busy_timeout = 5000
|
128
141
|
|
129
142
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
130
143
|
|
131
|
-
execute
|
132
|
-
execute
|
133
|
-
execute
|
134
|
-
execute
|
135
|
-
execute
|
144
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
145
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
146
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
147
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
148
|
+
execute "PRAGMA cache_size = '5000'"
|
149
|
+
|
150
|
+
integrity_check
|
136
151
|
|
137
152
|
@db.transaction(:exclusive) do
|
138
153
|
metadata_table_was_created = false
|
@@ -183,7 +198,7 @@ class FileDigests
|
|
183
198
|
# Convert database from 1st to 2nd version
|
184
199
|
unless get_metadata("digest_algorithm")
|
185
200
|
if get_metadata("database_version") == "1"
|
186
|
-
if File.exist?(@digest_database_path.dirname +
|
201
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
187
202
|
set_metadata("digest_algorithm", "SHA512")
|
188
203
|
else
|
189
204
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -193,7 +208,7 @@ class FileDigests
|
|
193
208
|
end
|
194
209
|
|
195
210
|
if get_metadata("database_version") != "2"
|
196
|
-
STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
211
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
197
212
|
raise "Incompatible database version"
|
198
213
|
end
|
199
214
|
end
|
@@ -213,7 +228,9 @@ class FileDigests
|
|
213
228
|
end
|
214
229
|
end
|
215
230
|
|
216
|
-
|
231
|
+
nested_transaction do
|
232
|
+
track_renames
|
233
|
+
end
|
217
234
|
|
218
235
|
if any_missing_files?
|
219
236
|
if any_exceptions?
|
@@ -221,7 +238,9 @@ class FileDigests
|
|
221
238
|
else
|
222
239
|
print_missing_files
|
223
240
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
224
|
-
|
241
|
+
nested_transaction do
|
242
|
+
remove_missing_files
|
243
|
+
end
|
225
244
|
end
|
226
245
|
end
|
227
246
|
end
|
@@ -234,6 +253,7 @@ class FileDigests
|
|
234
253
|
update_digest_to_new_digest new_digest, old_digest
|
235
254
|
end
|
236
255
|
set_metadata "digest_algorithm", @new_digest_algorithm
|
256
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
237
257
|
end
|
238
258
|
end
|
239
259
|
|
@@ -245,17 +265,22 @@ class FileDigests
|
|
245
265
|
|
246
266
|
print_counters
|
247
267
|
end
|
268
|
+
execute "PRAGMA optimize"
|
269
|
+
execute "VACUUM"
|
270
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
271
|
+
|
272
|
+
hide_database_files
|
248
273
|
end
|
249
274
|
|
250
275
|
def show_duplicates
|
251
276
|
current_digest = nil
|
252
277
|
query_duplicates.each do |found|
|
253
|
-
if current_digest != found[
|
278
|
+
if current_digest != found["digest"]
|
254
279
|
puts "" if current_digest
|
255
|
-
current_digest = found[
|
256
|
-
puts "#{found[
|
280
|
+
current_digest = found["digest"]
|
281
|
+
puts "#{found["digest"]}:"
|
257
282
|
end
|
258
|
-
puts " #{found[
|
283
|
+
puts " #{found["filename"]}"
|
259
284
|
end
|
260
285
|
end
|
261
286
|
|
@@ -274,17 +299,18 @@ class FileDigests
|
|
274
299
|
|
275
300
|
raise "File is not readable" unless stat.readable?
|
276
301
|
|
277
|
-
if filename
|
278
|
-
filename == "#{@digest_database_path}-wal" ||
|
279
|
-
filename == "#{@digest_database_path}-shm"
|
302
|
+
if @digest_database_files.include?(filename)
|
280
303
|
puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
|
281
304
|
return
|
282
305
|
end
|
283
306
|
|
284
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
307
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
285
308
|
mtime_string = time_to_database stat.mtime
|
309
|
+
digest = get_file_digest(filename)
|
286
310
|
|
287
|
-
|
311
|
+
nested_transaction do
|
312
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
313
|
+
end
|
288
314
|
|
289
315
|
rescue => exception
|
290
316
|
@counters[:exceptions] += 1
|
@@ -301,25 +327,25 @@ class FileDigests
|
|
301
327
|
|
302
328
|
def process_previously_seen_file found, filename, mtime, digest
|
303
329
|
@missing_files.delete(filename)
|
304
|
-
if found[
|
330
|
+
if found["digest"] == digest
|
305
331
|
@counters[:good] += 1
|
306
332
|
puts "GOOD: #{filename}" if @options[:verbose]
|
307
333
|
unless @options[:test_only]
|
308
|
-
if found[
|
309
|
-
touch_digest_check_time found[
|
334
|
+
if found["mtime"] == mtime
|
335
|
+
touch_digest_check_time found["id"]
|
310
336
|
else
|
311
|
-
update_mtime mtime, found[
|
337
|
+
update_mtime mtime, found["id"]
|
312
338
|
end
|
313
339
|
end
|
314
340
|
else
|
315
|
-
if found[
|
341
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
316
342
|
@counters[:likely_damaged] += 1
|
317
343
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
318
344
|
else
|
319
345
|
@counters[:updated] += 1
|
320
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
346
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
321
347
|
unless @options[:test_only]
|
322
|
-
update_mtime_and_digest mtime, digest, found[
|
348
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
323
349
|
end
|
324
350
|
end
|
325
351
|
end
|
@@ -357,12 +383,10 @@ class FileDigests
|
|
357
383
|
end
|
358
384
|
|
359
385
|
def remove_missing_files
|
360
|
-
|
361
|
-
|
362
|
-
delete_by_filename filename
|
363
|
-
end
|
364
|
-
@missing_files = {}
|
386
|
+
@missing_files.each do |filename, digest|
|
387
|
+
delete_by_filename filename
|
365
388
|
end
|
389
|
+
@missing_files = {}
|
366
390
|
end
|
367
391
|
|
368
392
|
|
@@ -372,6 +396,12 @@ class FileDigests
|
|
372
396
|
@db.execute *args, &block
|
373
397
|
end
|
374
398
|
|
399
|
+
def integrity_check
|
400
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
401
|
+
raise "Database integrity check failed"
|
402
|
+
end
|
403
|
+
end
|
404
|
+
|
375
405
|
def nested_transaction(mode = :deferred)
|
376
406
|
if @db.transaction_active?
|
377
407
|
yield
|
@@ -393,7 +423,7 @@ class FileDigests
|
|
393
423
|
end
|
394
424
|
|
395
425
|
def table_exist? table_name
|
396
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
426
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
397
427
|
end
|
398
428
|
|
399
429
|
def prepare_method name, query
|
@@ -428,21 +458,31 @@ class FileDigests
|
|
428
458
|
end
|
429
459
|
|
430
460
|
def time_to_database time
|
431
|
-
time.utc.strftime(
|
461
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
462
|
+
end
|
463
|
+
|
464
|
+
def hide_database_files
|
465
|
+
if Gem.win_platform?
|
466
|
+
@digest_database_files.each do |file|
|
467
|
+
if File.exist?(file)
|
468
|
+
system "attrib", "+H", file, exception: true
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end
|
432
472
|
end
|
433
473
|
|
434
474
|
|
435
475
|
# Filesystem-related helpers
|
436
476
|
|
437
477
|
def patch_path_string path
|
438
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
478
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
439
479
|
end
|
440
480
|
|
441
481
|
def cleanup_path path
|
442
482
|
Pathname.new(patch_path_string(path)).cleanpath
|
443
483
|
end
|
444
484
|
|
445
|
-
def
|
485
|
+
def ensure_dir_exist path
|
446
486
|
if File.exist?(path)
|
447
487
|
unless File.directory?(path)
|
448
488
|
raise "#{path} is not a directory"
|
@@ -453,13 +493,13 @@ class FileDigests
|
|
453
493
|
end
|
454
494
|
|
455
495
|
def walk_files
|
456
|
-
Dir.glob(@files_path +
|
496
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
457
497
|
yield filename
|
458
498
|
end
|
459
499
|
end
|
460
500
|
|
461
501
|
def get_file_digest filename
|
462
|
-
File.open(filename,
|
502
|
+
File.open(filename, "rb") do |io|
|
463
503
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
464
504
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
465
505
|
|
@@ -501,13 +541,13 @@ class FileDigests
|
|
501
541
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
502
542
|
yield
|
503
543
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
504
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
544
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
505
545
|
end
|
506
546
|
|
507
547
|
def print_file_exception exception, filename
|
508
548
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
509
549
|
begin
|
510
|
-
STDERR.print filename.encode(
|
550
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
511
551
|
rescue
|
512
552
|
STDERR.print "(Unable to encode file name to utf-8) "
|
513
553
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.30
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|