file-digests 0.0.23 → 0.0.28
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +109 -74
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 406b02c22923ae98c45dc92f2836a99dffa6dc8b2343ff62b5b5957a4a154bdc
|
4
|
+
data.tar.gz: 5898cbc3826818da8c3fa5cf16a334bcb4627f95dc2635a297cc9c7d81d25dfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '021494ba0a65daada30e55c63e489c60c018ae3b189409be200635948132f0ed5118d217388e81d8097bc991cf10fea2ce09c4a3a7c148c1fdaa8f66e6b8e074'
|
7
|
+
data.tar.gz: 3a28808aa979157a2597b5ea56ad006576d28f35678cbaa6e13d62be20aa37066a9d4684d48f5ed2990b24c42acb2ee8cd2d0f14f4f9b12f893c5b3946d5b76e
|
data/lib/file-digests.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
12
|
+
LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
|
12
13
|
|
13
14
|
def self.canonical_digest_algorithm_name(string)
|
14
15
|
if string
|
15
|
-
|
16
|
-
index
|
16
|
+
algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
|
17
|
+
index = algorithms.map(&:downcase).index(string.downcase)
|
18
|
+
index && algorithms[index]
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
@@ -27,55 +29,60 @@ class FileDigests
|
|
27
29
|
|
28
30
|
def self.parse_cli_options
|
29
31
|
options = {}
|
30
|
-
|
32
|
+
|
31
33
|
OptionParser.new do |opts|
|
32
34
|
opts.banner = [
|
33
35
|
"Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
|
34
36
|
" By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
|
35
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
36
38
|
].join "\n"
|
37
|
-
|
38
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
39
|
+
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
39
41
|
options[:auto] = true
|
40
42
|
end
|
41
43
|
|
42
44
|
opts.on(
|
43
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
44
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
45
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
46
48
|
"#{digest_algorithms_list_text}.",
|
47
|
-
|
48
|
-
|
49
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
50
52
|
) do |value|
|
51
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
52
|
-
unless digest_algorithm
|
53
|
-
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
|
+
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
55
|
+
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
56
|
exit 1
|
55
57
|
end
|
56
58
|
options[:digest_algorithm] = digest_algorithm
|
57
59
|
end
|
58
60
|
|
59
|
-
opts.on("-
|
60
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
61
63
|
end
|
62
64
|
|
63
|
-
opts.on("-
|
64
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
65
72
|
end
|
66
73
|
|
67
|
-
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
68
75
|
options[:quiet] = true
|
69
76
|
end
|
70
77
|
|
71
|
-
opts.on("-
|
72
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
73
80
|
end
|
74
81
|
|
75
|
-
opts.on("-
|
76
|
-
|
77
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
78
84
|
end
|
85
|
+
|
79
86
|
end.parse!
|
80
87
|
options
|
81
88
|
end
|
@@ -93,15 +100,20 @@ class FileDigests
|
|
93
100
|
initialize_paths files_path, digest_database_path
|
94
101
|
initialize_database
|
95
102
|
|
96
|
-
|
97
|
-
if
|
98
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if db_digest_algorithm = get_metadata("digest_algorithm")
|
105
|
+
if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
|
106
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
107
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
108
|
+
end
|
109
|
+
else
|
110
|
+
raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
114
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
99
115
|
end
|
100
|
-
else
|
101
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
102
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
103
116
|
end
|
104
|
-
|
105
117
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
106
118
|
end
|
107
119
|
|
@@ -111,7 +123,7 @@ class FileDigests
|
|
111
123
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
112
124
|
|
113
125
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
114
|
-
@digest_database_path +=
|
126
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
115
127
|
ensure_dir_exists @digest_database_path.dirname
|
116
128
|
|
117
129
|
if @options[:verbose]
|
@@ -123,14 +135,17 @@ class FileDigests
|
|
123
135
|
def initialize_database
|
124
136
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
125
137
|
@db.results_as_hash = true
|
138
|
+
@db.busy_timeout = 5000
|
126
139
|
|
127
140
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
128
141
|
|
129
|
-
execute
|
130
|
-
execute
|
131
|
-
execute
|
132
|
-
execute
|
133
|
-
execute
|
142
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
143
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
144
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
145
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
146
|
+
execute "PRAGMA cache_size = '5000'"
|
147
|
+
|
148
|
+
integrity_check
|
134
149
|
|
135
150
|
@db.transaction(:exclusive) do
|
136
151
|
metadata_table_was_created = false
|
@@ -181,7 +196,7 @@ class FileDigests
|
|
181
196
|
# Convert database from 1st to 2nd version
|
182
197
|
unless get_metadata("digest_algorithm")
|
183
198
|
if get_metadata("database_version") == "1"
|
184
|
-
if File.exist?(@digest_database_path.dirname +
|
199
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
185
200
|
set_metadata("digest_algorithm", "SHA512")
|
186
201
|
else
|
187
202
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -190,6 +205,10 @@ class FileDigests
|
|
190
205
|
end
|
191
206
|
end
|
192
207
|
|
208
|
+
if get_metadata("database_version") != "2"
|
209
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
210
|
+
raise "Incompatible database version"
|
211
|
+
end
|
193
212
|
end
|
194
213
|
end
|
195
214
|
|
@@ -207,15 +226,19 @@ class FileDigests
|
|
207
226
|
end
|
208
227
|
end
|
209
228
|
|
210
|
-
|
211
|
-
|
229
|
+
nested_transaction do
|
230
|
+
track_renames
|
231
|
+
end
|
232
|
+
|
212
233
|
if any_missing_files?
|
213
234
|
if any_exceptions?
|
214
235
|
STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
|
215
236
|
else
|
216
237
|
print_missing_files
|
217
238
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
218
|
-
|
239
|
+
nested_transaction do
|
240
|
+
remove_missing_files
|
241
|
+
end
|
219
242
|
end
|
220
243
|
end
|
221
244
|
end
|
@@ -228,6 +251,7 @@ class FileDigests
|
|
228
251
|
update_digest_to_new_digest new_digest, old_digest
|
229
252
|
end
|
230
253
|
set_metadata "digest_algorithm", @new_digest_algorithm
|
254
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
231
255
|
end
|
232
256
|
end
|
233
257
|
|
@@ -237,6 +261,10 @@ class FileDigests
|
|
237
261
|
|
238
262
|
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
239
263
|
|
264
|
+
execute "PRAGMA optimize"
|
265
|
+
execute "VACUUM"
|
266
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
267
|
+
|
240
268
|
print_counters
|
241
269
|
end
|
242
270
|
end
|
@@ -244,12 +272,12 @@ class FileDigests
|
|
244
272
|
def show_duplicates
|
245
273
|
current_digest = nil
|
246
274
|
query_duplicates.each do |found|
|
247
|
-
if current_digest != found[
|
275
|
+
if current_digest != found["digest"]
|
248
276
|
puts "" if current_digest
|
249
|
-
current_digest = found[
|
250
|
-
puts "#{found[
|
277
|
+
current_digest = found["digest"]
|
278
|
+
puts "#{found["digest"]}:"
|
251
279
|
end
|
252
|
-
puts " #{found[
|
280
|
+
puts " #{found["filename"]}"
|
253
281
|
end
|
254
282
|
end
|
255
283
|
|
@@ -275,10 +303,13 @@ class FileDigests
|
|
275
303
|
return
|
276
304
|
end
|
277
305
|
|
278
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
306
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
279
307
|
mtime_string = time_to_database stat.mtime
|
308
|
+
digest = get_file_digest(filename)
|
280
309
|
|
281
|
-
|
310
|
+
nested_transaction do
|
311
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
312
|
+
end
|
282
313
|
|
283
314
|
rescue => exception
|
284
315
|
@counters[:exceptions] += 1
|
@@ -295,25 +326,25 @@ class FileDigests
|
|
295
326
|
|
296
327
|
def process_previously_seen_file found, filename, mtime, digest
|
297
328
|
@missing_files.delete(filename)
|
298
|
-
if found[
|
329
|
+
if found["digest"] == digest
|
299
330
|
@counters[:good] += 1
|
300
331
|
puts "GOOD: #{filename}" if @options[:verbose]
|
301
332
|
unless @options[:test_only]
|
302
|
-
if found[
|
303
|
-
touch_digest_check_time found[
|
333
|
+
if found["mtime"] == mtime
|
334
|
+
touch_digest_check_time found["id"]
|
304
335
|
else
|
305
|
-
update_mtime mtime, found[
|
336
|
+
update_mtime mtime, found["id"]
|
306
337
|
end
|
307
338
|
end
|
308
339
|
else
|
309
|
-
if found[
|
340
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
310
341
|
@counters[:likely_damaged] += 1
|
311
342
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
312
343
|
else
|
313
344
|
@counters[:updated] += 1
|
314
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
345
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
315
346
|
unless @options[:test_only]
|
316
|
-
update_mtime_and_digest mtime, digest, found[
|
347
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
317
348
|
end
|
318
349
|
end
|
319
350
|
end
|
@@ -351,12 +382,10 @@ class FileDigests
|
|
351
382
|
end
|
352
383
|
|
353
384
|
def remove_missing_files
|
354
|
-
|
355
|
-
|
356
|
-
delete_by_filename filename
|
357
|
-
end
|
358
|
-
@missing_files = {}
|
385
|
+
@missing_files.each do |filename, digest|
|
386
|
+
delete_by_filename filename
|
359
387
|
end
|
388
|
+
@missing_files = {}
|
360
389
|
end
|
361
390
|
|
362
391
|
|
@@ -366,7 +395,13 @@ class FileDigests
|
|
366
395
|
@db.execute *args, &block
|
367
396
|
end
|
368
397
|
|
369
|
-
def
|
398
|
+
def integrity_check
|
399
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
400
|
+
raise "Database integrity check failed"
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
def nested_transaction(mode = :deferred)
|
370
405
|
if @db.transaction_active?
|
371
406
|
yield
|
372
407
|
else
|
@@ -376,9 +411,9 @@ class FileDigests
|
|
376
411
|
end
|
377
412
|
end
|
378
413
|
|
379
|
-
def perhaps_transaction(condition, mode)
|
414
|
+
def perhaps_transaction(condition, mode = :deferred)
|
380
415
|
if condition
|
381
|
-
|
416
|
+
nested_transaction(mode) do
|
382
417
|
yield
|
383
418
|
end
|
384
419
|
else
|
@@ -387,7 +422,7 @@ class FileDigests
|
|
387
422
|
end
|
388
423
|
|
389
424
|
def table_exist? table_name
|
390
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
425
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
391
426
|
end
|
392
427
|
|
393
428
|
def prepare_method name, query
|
@@ -422,14 +457,14 @@ class FileDigests
|
|
422
457
|
end
|
423
458
|
|
424
459
|
def time_to_database time
|
425
|
-
time.utc.strftime(
|
460
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
426
461
|
end
|
427
462
|
|
428
463
|
|
429
464
|
# Filesystem-related helpers
|
430
465
|
|
431
466
|
def patch_path_string path
|
432
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
467
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
433
468
|
end
|
434
469
|
|
435
470
|
def cleanup_path path
|
@@ -447,13 +482,13 @@ class FileDigests
|
|
447
482
|
end
|
448
483
|
|
449
484
|
def walk_files
|
450
|
-
Dir.glob(@files_path +
|
485
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
451
486
|
yield filename
|
452
487
|
end
|
453
488
|
end
|
454
489
|
|
455
490
|
def get_file_digest filename
|
456
|
-
File.open(filename,
|
491
|
+
File.open(filename, "rb") do |io|
|
457
492
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
458
493
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
459
494
|
|
@@ -495,13 +530,13 @@ class FileDigests
|
|
495
530
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
496
531
|
yield
|
497
532
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
498
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
533
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
499
534
|
end
|
500
535
|
|
501
536
|
def print_file_exception exception, filename
|
502
537
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
503
538
|
begin
|
504
|
-
STDERR.print filename.encode(
|
539
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
505
540
|
rescue
|
506
541
|
STDERR.print "(Unable to encode file name to utf-8) "
|
507
542
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.28
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|