file-digests 0.0.26 → 0.0.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +89 -68
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
|
4
|
+
data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
|
7
|
+
data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
|
data/lib/file-digests.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
@@ -37,22 +37,18 @@ class FileDigests
|
|
37
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
38
38
|
].join "\n"
|
39
39
|
|
40
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
41
41
|
options[:auto] = true
|
42
42
|
end
|
43
43
|
|
44
|
-
opts.on("--accept-fate", "Accept the current state of files that are likely damaged and update their digest data") do
|
45
|
-
options[:accept_fate] = true
|
46
|
-
end
|
47
|
-
|
48
44
|
opts.on(
|
49
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
50
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
51
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
52
48
|
"#{digest_algorithms_list_text}.",
|
53
|
-
|
54
|
-
|
55
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
56
52
|
) do |value|
|
57
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
58
54
|
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
@@ -62,26 +58,31 @@ class FileDigests
|
|
62
58
|
options[:digest_algorithm] = digest_algorithm
|
63
59
|
end
|
64
60
|
|
65
|
-
opts.on("-
|
66
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
67
63
|
end
|
68
64
|
|
69
|
-
opts.on("-
|
70
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
71
68
|
end
|
72
69
|
|
73
|
-
opts.on("-
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
74
75
|
options[:quiet] = true
|
75
76
|
end
|
76
77
|
|
77
|
-
opts.on("-
|
78
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
79
80
|
end
|
80
81
|
|
81
|
-
opts.on("-
|
82
|
-
|
83
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
84
84
|
end
|
85
|
+
|
85
86
|
end.parse!
|
86
87
|
options
|
87
88
|
end
|
@@ -99,13 +100,15 @@ class FileDigests
|
|
99
100
|
initialize_paths files_path, digest_database_path
|
100
101
|
initialize_database
|
101
102
|
|
102
|
-
|
103
|
-
if @
|
104
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
|
105
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
106
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
107
|
+
end
|
108
|
+
else
|
109
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
110
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
105
111
|
end
|
106
|
-
else
|
107
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
108
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
109
112
|
end
|
110
113
|
|
111
114
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
@@ -117,7 +120,7 @@ class FileDigests
|
|
117
120
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
118
121
|
|
119
122
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
120
|
-
@digest_database_path +=
|
123
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
121
124
|
ensure_dir_exists @digest_database_path.dirname
|
122
125
|
|
123
126
|
if @options[:verbose]
|
@@ -129,14 +132,17 @@ class FileDigests
|
|
129
132
|
def initialize_database
|
130
133
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
131
134
|
@db.results_as_hash = true
|
135
|
+
@db.busy_timeout = 5000
|
132
136
|
|
133
137
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
134
138
|
|
135
|
-
execute
|
136
|
-
execute
|
137
|
-
execute
|
138
|
-
execute
|
139
|
-
execute
|
139
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
140
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
141
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
142
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
143
|
+
execute "PRAGMA cache_size = '5000'"
|
144
|
+
|
145
|
+
integrity_check
|
140
146
|
|
141
147
|
@db.transaction(:exclusive) do
|
142
148
|
metadata_table_was_created = false
|
@@ -187,7 +193,7 @@ class FileDigests
|
|
187
193
|
# Convert database from 1st to 2nd version
|
188
194
|
unless get_metadata("digest_algorithm")
|
189
195
|
if get_metadata("database_version") == "1"
|
190
|
-
if File.exist?(@digest_database_path.dirname +
|
196
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
191
197
|
set_metadata("digest_algorithm", "SHA512")
|
192
198
|
else
|
193
199
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -197,7 +203,7 @@ class FileDigests
|
|
197
203
|
end
|
198
204
|
|
199
205
|
if get_metadata("database_version") != "2"
|
200
|
-
STDERR.puts "This version of file-digests (#{file_digests_gem_version ||
|
206
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
201
207
|
raise "Incompatible database version"
|
202
208
|
end
|
203
209
|
end
|
@@ -217,7 +223,9 @@ class FileDigests
|
|
217
223
|
end
|
218
224
|
end
|
219
225
|
|
220
|
-
|
226
|
+
nested_transaction do
|
227
|
+
track_renames
|
228
|
+
end
|
221
229
|
|
222
230
|
if any_missing_files?
|
223
231
|
if any_exceptions?
|
@@ -225,7 +233,9 @@ class FileDigests
|
|
225
233
|
else
|
226
234
|
print_missing_files
|
227
235
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
228
|
-
|
236
|
+
nested_transaction do
|
237
|
+
remove_missing_files
|
238
|
+
end
|
229
239
|
end
|
230
240
|
end
|
231
241
|
end
|
@@ -248,6 +258,10 @@ class FileDigests
|
|
248
258
|
|
249
259
|
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
250
260
|
|
261
|
+
execute "PRAGMA optimize"
|
262
|
+
execute "VACUUM"
|
263
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
264
|
+
|
251
265
|
print_counters
|
252
266
|
end
|
253
267
|
end
|
@@ -255,12 +269,12 @@ class FileDigests
|
|
255
269
|
def show_duplicates
|
256
270
|
current_digest = nil
|
257
271
|
query_duplicates.each do |found|
|
258
|
-
if current_digest != found[
|
272
|
+
if current_digest != found["digest"]
|
259
273
|
puts "" if current_digest
|
260
|
-
current_digest = found[
|
261
|
-
puts "#{found[
|
274
|
+
current_digest = found["digest"]
|
275
|
+
puts "#{found["digest"]}:"
|
262
276
|
end
|
263
|
-
puts " #{found[
|
277
|
+
puts " #{found["filename"]}"
|
264
278
|
end
|
265
279
|
end
|
266
280
|
|
@@ -286,10 +300,13 @@ class FileDigests
|
|
286
300
|
return
|
287
301
|
end
|
288
302
|
|
289
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
303
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
290
304
|
mtime_string = time_to_database stat.mtime
|
305
|
+
digest = get_file_digest(filename)
|
291
306
|
|
292
|
-
|
307
|
+
nested_transaction do
|
308
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
309
|
+
end
|
293
310
|
|
294
311
|
rescue => exception
|
295
312
|
@counters[:exceptions] += 1
|
@@ -306,25 +323,25 @@ class FileDigests
|
|
306
323
|
|
307
324
|
def process_previously_seen_file found, filename, mtime, digest
|
308
325
|
@missing_files.delete(filename)
|
309
|
-
if found[
|
326
|
+
if found["digest"] == digest
|
310
327
|
@counters[:good] += 1
|
311
328
|
puts "GOOD: #{filename}" if @options[:verbose]
|
312
329
|
unless @options[:test_only]
|
313
|
-
if found[
|
314
|
-
touch_digest_check_time found[
|
330
|
+
if found["mtime"] == mtime
|
331
|
+
touch_digest_check_time found["id"]
|
315
332
|
else
|
316
|
-
update_mtime mtime, found[
|
333
|
+
update_mtime mtime, found["id"]
|
317
334
|
end
|
318
335
|
end
|
319
336
|
else
|
320
|
-
if found[
|
337
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
321
338
|
@counters[:likely_damaged] += 1
|
322
339
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
323
340
|
else
|
324
341
|
@counters[:updated] += 1
|
325
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
342
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
326
343
|
unless @options[:test_only]
|
327
|
-
update_mtime_and_digest mtime, digest, found[
|
344
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
328
345
|
end
|
329
346
|
end
|
330
347
|
end
|
@@ -362,12 +379,10 @@ class FileDigests
|
|
362
379
|
end
|
363
380
|
|
364
381
|
def remove_missing_files
|
365
|
-
|
366
|
-
|
367
|
-
delete_by_filename filename
|
368
|
-
end
|
369
|
-
@missing_files = {}
|
382
|
+
@missing_files.each do |filename, digest|
|
383
|
+
delete_by_filename filename
|
370
384
|
end
|
385
|
+
@missing_files = {}
|
371
386
|
end
|
372
387
|
|
373
388
|
|
@@ -377,6 +392,12 @@ class FileDigests
|
|
377
392
|
@db.execute *args, &block
|
378
393
|
end
|
379
394
|
|
395
|
+
def integrity_check
|
396
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
397
|
+
raise "Database integrity check failed"
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
380
401
|
def nested_transaction(mode = :deferred)
|
381
402
|
if @db.transaction_active?
|
382
403
|
yield
|
@@ -398,7 +419,7 @@ class FileDigests
|
|
398
419
|
end
|
399
420
|
|
400
421
|
def table_exist? table_name
|
401
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
422
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
402
423
|
end
|
403
424
|
|
404
425
|
def prepare_method name, query
|
@@ -433,14 +454,14 @@ class FileDigests
|
|
433
454
|
end
|
434
455
|
|
435
456
|
def time_to_database time
|
436
|
-
time.utc.strftime(
|
457
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
437
458
|
end
|
438
459
|
|
439
460
|
|
440
461
|
# Filesystem-related helpers
|
441
462
|
|
442
463
|
def patch_path_string path
|
443
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
464
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
444
465
|
end
|
445
466
|
|
446
467
|
def cleanup_path path
|
@@ -458,13 +479,13 @@ class FileDigests
|
|
458
479
|
end
|
459
480
|
|
460
481
|
def walk_files
|
461
|
-
Dir.glob(@files_path +
|
482
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
462
483
|
yield filename
|
463
484
|
end
|
464
485
|
end
|
465
486
|
|
466
487
|
def get_file_digest filename
|
467
|
-
File.open(filename,
|
488
|
+
File.open(filename, "rb") do |io|
|
468
489
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
469
490
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
470
491
|
|
@@ -506,13 +527,13 @@ class FileDigests
|
|
506
527
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
507
528
|
yield
|
508
529
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
509
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
530
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
510
531
|
end
|
511
532
|
|
512
533
|
def print_file_exception exception, filename
|
513
534
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
514
535
|
begin
|
515
|
-
STDERR.print filename.encode(
|
536
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
516
537
|
rescue
|
517
538
|
STDERR.print "(Unable to encode file name to utf-8) "
|
518
539
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|