file-digests 0.0.26 → 0.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +89 -68
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
|
4
|
+
data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
|
7
|
+
data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
|
data/lib/file-digests.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
@@ -37,22 +37,18 @@ class FileDigests
|
|
37
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
38
38
|
].join "\n"
|
39
39
|
|
40
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
41
41
|
options[:auto] = true
|
42
42
|
end
|
43
43
|
|
44
|
-
opts.on("--accept-fate", "Accept the current state of files that are likely damaged and update their digest data") do
|
45
|
-
options[:accept_fate] = true
|
46
|
-
end
|
47
|
-
|
48
44
|
opts.on(
|
49
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
50
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
51
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
52
48
|
"#{digest_algorithms_list_text}.",
|
53
|
-
|
54
|
-
|
55
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
56
52
|
) do |value|
|
57
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
58
54
|
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
@@ -62,26 +58,31 @@ class FileDigests
|
|
62
58
|
options[:digest_algorithm] = digest_algorithm
|
63
59
|
end
|
64
60
|
|
65
|
-
opts.on("-
|
66
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
67
63
|
end
|
68
64
|
|
69
|
-
opts.on("-
|
70
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
71
68
|
end
|
72
69
|
|
73
|
-
opts.on("-
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
72
|
+
end
|
73
|
+
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
74
75
|
options[:quiet] = true
|
75
76
|
end
|
76
77
|
|
77
|
-
opts.on("-
|
78
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
79
80
|
end
|
80
81
|
|
81
|
-
opts.on("-
|
82
|
-
|
83
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
84
84
|
end
|
85
|
+
|
85
86
|
end.parse!
|
86
87
|
options
|
87
88
|
end
|
@@ -99,13 +100,15 @@ class FileDigests
|
|
99
100
|
initialize_paths files_path, digest_database_path
|
100
101
|
initialize_database
|
101
102
|
|
102
|
-
|
103
|
-
if @
|
104
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
|
105
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
106
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
107
|
+
end
|
108
|
+
else
|
109
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
110
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
105
111
|
end
|
106
|
-
else
|
107
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
108
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
109
112
|
end
|
110
113
|
|
111
114
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
@@ -117,7 +120,7 @@ class FileDigests
|
|
117
120
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
118
121
|
|
119
122
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
120
|
-
@digest_database_path +=
|
123
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
121
124
|
ensure_dir_exists @digest_database_path.dirname
|
122
125
|
|
123
126
|
if @options[:verbose]
|
@@ -129,14 +132,17 @@ class FileDigests
|
|
129
132
|
def initialize_database
|
130
133
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
131
134
|
@db.results_as_hash = true
|
135
|
+
@db.busy_timeout = 5000
|
132
136
|
|
133
137
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
134
138
|
|
135
|
-
execute
|
136
|
-
execute
|
137
|
-
execute
|
138
|
-
execute
|
139
|
-
execute
|
139
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
140
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
141
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
142
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
143
|
+
execute "PRAGMA cache_size = '5000'"
|
144
|
+
|
145
|
+
integrity_check
|
140
146
|
|
141
147
|
@db.transaction(:exclusive) do
|
142
148
|
metadata_table_was_created = false
|
@@ -187,7 +193,7 @@ class FileDigests
|
|
187
193
|
# Convert database from 1st to 2nd version
|
188
194
|
unless get_metadata("digest_algorithm")
|
189
195
|
if get_metadata("database_version") == "1"
|
190
|
-
if File.exist?(@digest_database_path.dirname +
|
196
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
191
197
|
set_metadata("digest_algorithm", "SHA512")
|
192
198
|
else
|
193
199
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -197,7 +203,7 @@ class FileDigests
|
|
197
203
|
end
|
198
204
|
|
199
205
|
if get_metadata("database_version") != "2"
|
200
|
-
STDERR.puts "This version of file-digests (#{file_digests_gem_version ||
|
206
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
201
207
|
raise "Incompatible database version"
|
202
208
|
end
|
203
209
|
end
|
@@ -217,7 +223,9 @@ class FileDigests
|
|
217
223
|
end
|
218
224
|
end
|
219
225
|
|
220
|
-
|
226
|
+
nested_transaction do
|
227
|
+
track_renames
|
228
|
+
end
|
221
229
|
|
222
230
|
if any_missing_files?
|
223
231
|
if any_exceptions?
|
@@ -225,7 +233,9 @@ class FileDigests
|
|
225
233
|
else
|
226
234
|
print_missing_files
|
227
235
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
228
|
-
|
236
|
+
nested_transaction do
|
237
|
+
remove_missing_files
|
238
|
+
end
|
229
239
|
end
|
230
240
|
end
|
231
241
|
end
|
@@ -248,6 +258,10 @@ class FileDigests
|
|
248
258
|
|
249
259
|
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
250
260
|
|
261
|
+
execute "PRAGMA optimize"
|
262
|
+
execute "VACUUM"
|
263
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
264
|
+
|
251
265
|
print_counters
|
252
266
|
end
|
253
267
|
end
|
@@ -255,12 +269,12 @@ class FileDigests
|
|
255
269
|
def show_duplicates
|
256
270
|
current_digest = nil
|
257
271
|
query_duplicates.each do |found|
|
258
|
-
if current_digest != found[
|
272
|
+
if current_digest != found["digest"]
|
259
273
|
puts "" if current_digest
|
260
|
-
current_digest = found[
|
261
|
-
puts "#{found[
|
274
|
+
current_digest = found["digest"]
|
275
|
+
puts "#{found["digest"]}:"
|
262
276
|
end
|
263
|
-
puts " #{found[
|
277
|
+
puts " #{found["filename"]}"
|
264
278
|
end
|
265
279
|
end
|
266
280
|
|
@@ -286,10 +300,13 @@ class FileDigests
|
|
286
300
|
return
|
287
301
|
end
|
288
302
|
|
289
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
303
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
290
304
|
mtime_string = time_to_database stat.mtime
|
305
|
+
digest = get_file_digest(filename)
|
291
306
|
|
292
|
-
|
307
|
+
nested_transaction do
|
308
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
309
|
+
end
|
293
310
|
|
294
311
|
rescue => exception
|
295
312
|
@counters[:exceptions] += 1
|
@@ -306,25 +323,25 @@ class FileDigests
|
|
306
323
|
|
307
324
|
def process_previously_seen_file found, filename, mtime, digest
|
308
325
|
@missing_files.delete(filename)
|
309
|
-
if found[
|
326
|
+
if found["digest"] == digest
|
310
327
|
@counters[:good] += 1
|
311
328
|
puts "GOOD: #{filename}" if @options[:verbose]
|
312
329
|
unless @options[:test_only]
|
313
|
-
if found[
|
314
|
-
touch_digest_check_time found[
|
330
|
+
if found["mtime"] == mtime
|
331
|
+
touch_digest_check_time found["id"]
|
315
332
|
else
|
316
|
-
update_mtime mtime, found[
|
333
|
+
update_mtime mtime, found["id"]
|
317
334
|
end
|
318
335
|
end
|
319
336
|
else
|
320
|
-
if found[
|
337
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
321
338
|
@counters[:likely_damaged] += 1
|
322
339
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
323
340
|
else
|
324
341
|
@counters[:updated] += 1
|
325
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
342
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
326
343
|
unless @options[:test_only]
|
327
|
-
update_mtime_and_digest mtime, digest, found[
|
344
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
328
345
|
end
|
329
346
|
end
|
330
347
|
end
|
@@ -362,12 +379,10 @@ class FileDigests
|
|
362
379
|
end
|
363
380
|
|
364
381
|
def remove_missing_files
|
365
|
-
|
366
|
-
|
367
|
-
delete_by_filename filename
|
368
|
-
end
|
369
|
-
@missing_files = {}
|
382
|
+
@missing_files.each do |filename, digest|
|
383
|
+
delete_by_filename filename
|
370
384
|
end
|
385
|
+
@missing_files = {}
|
371
386
|
end
|
372
387
|
|
373
388
|
|
@@ -377,6 +392,12 @@ class FileDigests
|
|
377
392
|
@db.execute *args, &block
|
378
393
|
end
|
379
394
|
|
395
|
+
def integrity_check
|
396
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
397
|
+
raise "Database integrity check failed"
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
380
401
|
def nested_transaction(mode = :deferred)
|
381
402
|
if @db.transaction_active?
|
382
403
|
yield
|
@@ -398,7 +419,7 @@ class FileDigests
|
|
398
419
|
end
|
399
420
|
|
400
421
|
def table_exist? table_name
|
401
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
422
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
402
423
|
end
|
403
424
|
|
404
425
|
def prepare_method name, query
|
@@ -433,14 +454,14 @@ class FileDigests
|
|
433
454
|
end
|
434
455
|
|
435
456
|
def time_to_database time
|
436
|
-
time.utc.strftime(
|
457
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
437
458
|
end
|
438
459
|
|
439
460
|
|
440
461
|
# Filesystem-related helpers
|
441
462
|
|
442
463
|
def patch_path_string path
|
443
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
464
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
444
465
|
end
|
445
466
|
|
446
467
|
def cleanup_path path
|
@@ -458,13 +479,13 @@ class FileDigests
|
|
458
479
|
end
|
459
480
|
|
460
481
|
def walk_files
|
461
|
-
Dir.glob(@files_path +
|
482
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
462
483
|
yield filename
|
463
484
|
end
|
464
485
|
end
|
465
486
|
|
466
487
|
def get_file_digest filename
|
467
|
-
File.open(filename,
|
488
|
+
File.open(filename, "rb") do |io|
|
468
489
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
469
490
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
470
491
|
|
@@ -506,13 +527,13 @@ class FileDigests
|
|
506
527
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
507
528
|
yield
|
508
529
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
509
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
530
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
510
531
|
end
|
511
532
|
|
512
533
|
def print_file_exception exception, filename
|
513
534
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
514
535
|
begin
|
515
|
-
STDERR.print filename.encode(
|
536
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
516
537
|
rescue
|
517
538
|
STDERR.print "(Unable to encode file name to utf-8) "
|
518
539
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|