file-digests 0.0.22 → 0.0.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +109 -74
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
|
4
|
+
data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
|
7
|
+
data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
|
data/lib/file-digests.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
12
|
+
LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
|
12
13
|
|
13
14
|
def self.canonical_digest_algorithm_name(string)
|
14
15
|
if string
|
15
|
-
|
16
|
-
index
|
16
|
+
algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
|
17
|
+
index = algorithms.map(&:downcase).index(string.downcase)
|
18
|
+
index && algorithms[index]
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
@@ -27,55 +29,60 @@ class FileDigests
|
|
27
29
|
|
28
30
|
def self.parse_cli_options
|
29
31
|
options = {}
|
30
|
-
|
32
|
+
|
31
33
|
OptionParser.new do |opts|
|
32
34
|
opts.banner = [
|
33
35
|
"Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
|
34
36
|
" By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
|
35
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
36
38
|
].join "\n"
|
37
|
-
|
38
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
39
|
+
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
39
41
|
options[:auto] = true
|
40
42
|
end
|
41
43
|
|
42
44
|
opts.on(
|
43
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
44
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
45
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
46
48
|
"#{digest_algorithms_list_text}.",
|
47
|
-
|
48
|
-
|
49
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
50
52
|
) do |value|
|
51
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
52
|
-
unless digest_algorithm
|
53
|
-
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
|
+
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
55
|
+
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
56
|
exit 1
|
55
57
|
end
|
56
58
|
options[:digest_algorithm] = digest_algorithm
|
57
59
|
end
|
58
60
|
|
59
|
-
opts.on("-
|
60
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
61
63
|
end
|
62
64
|
|
63
|
-
opts.on("-
|
64
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
65
72
|
end
|
66
73
|
|
67
|
-
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
68
75
|
options[:quiet] = true
|
69
76
|
end
|
70
77
|
|
71
|
-
opts.on("-
|
72
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
73
80
|
end
|
74
81
|
|
75
|
-
opts.on("-
|
76
|
-
|
77
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
78
84
|
end
|
85
|
+
|
79
86
|
end.parse!
|
80
87
|
options
|
81
88
|
end
|
@@ -93,13 +100,15 @@ class FileDigests
|
|
93
100
|
initialize_paths files_path, digest_database_path
|
94
101
|
initialize_database
|
95
102
|
|
96
|
-
|
97
|
-
if @
|
98
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
|
105
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
106
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
107
|
+
end
|
108
|
+
else
|
109
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
110
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
99
111
|
end
|
100
|
-
else
|
101
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
102
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
103
112
|
end
|
104
113
|
|
105
114
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
@@ -111,7 +120,7 @@ class FileDigests
|
|
111
120
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
112
121
|
|
113
122
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
114
|
-
@digest_database_path +=
|
123
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
115
124
|
ensure_dir_exists @digest_database_path.dirname
|
116
125
|
|
117
126
|
if @options[:verbose]
|
@@ -123,27 +132,33 @@ class FileDigests
|
|
123
132
|
def initialize_database
|
124
133
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
125
134
|
@db.results_as_hash = true
|
135
|
+
@db.busy_timeout = 5000
|
126
136
|
|
127
137
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
128
138
|
|
129
|
-
execute
|
130
|
-
execute
|
131
|
-
execute
|
132
|
-
execute
|
133
|
-
execute
|
139
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
140
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
141
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
142
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
143
|
+
execute "PRAGMA cache_size = '5000'"
|
144
|
+
|
145
|
+
integrity_check
|
134
146
|
|
135
147
|
@db.transaction(:exclusive) do
|
148
|
+
metadata_table_was_created = false
|
136
149
|
unless table_exist?("metadata")
|
137
150
|
execute "CREATE TABLE metadata (
|
138
151
|
key TEXT NOT NULL PRIMARY KEY,
|
139
152
|
value TEXT)"
|
140
153
|
execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
|
141
|
-
|
154
|
+
metadata_table_was_created = true
|
142
155
|
end
|
143
156
|
|
144
157
|
prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
|
145
158
|
prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"
|
146
159
|
|
160
|
+
set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && metadata_table_was_created
|
161
|
+
|
147
162
|
# Heuristic to detect database version 1 (metadata was not stored back then)
|
148
163
|
unless get_metadata("database_version")
|
149
164
|
if table_exist?("digests")
|
@@ -178,7 +193,7 @@ class FileDigests
|
|
178
193
|
# Convert database from 1st to 2nd version
|
179
194
|
unless get_metadata("digest_algorithm")
|
180
195
|
if get_metadata("database_version") == "1"
|
181
|
-
if File.exist?(@digest_database_path.dirname +
|
196
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
182
197
|
set_metadata("digest_algorithm", "SHA512")
|
183
198
|
else
|
184
199
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -187,6 +202,10 @@ class FileDigests
|
|
187
202
|
end
|
188
203
|
end
|
189
204
|
|
205
|
+
if get_metadata("database_version") != "2"
|
206
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
207
|
+
raise "Incompatible database version"
|
208
|
+
end
|
190
209
|
end
|
191
210
|
end
|
192
211
|
|
@@ -204,15 +223,19 @@ class FileDigests
|
|
204
223
|
end
|
205
224
|
end
|
206
225
|
|
207
|
-
|
208
|
-
|
226
|
+
nested_transaction do
|
227
|
+
track_renames
|
228
|
+
end
|
229
|
+
|
209
230
|
if any_missing_files?
|
210
231
|
if any_exceptions?
|
211
232
|
STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
|
212
233
|
else
|
213
234
|
print_missing_files
|
214
235
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
215
|
-
|
236
|
+
nested_transaction do
|
237
|
+
remove_missing_files
|
238
|
+
end
|
216
239
|
end
|
217
240
|
end
|
218
241
|
end
|
@@ -225,6 +248,7 @@ class FileDigests
|
|
225
248
|
update_digest_to_new_digest new_digest, old_digest
|
226
249
|
end
|
227
250
|
set_metadata "digest_algorithm", @new_digest_algorithm
|
251
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
228
252
|
end
|
229
253
|
end
|
230
254
|
|
@@ -234,6 +258,10 @@ class FileDigests
|
|
234
258
|
|
235
259
|
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
236
260
|
|
261
|
+
execute "PRAGMA optimize"
|
262
|
+
execute "VACUUM"
|
263
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
264
|
+
|
237
265
|
print_counters
|
238
266
|
end
|
239
267
|
end
|
@@ -241,12 +269,12 @@ class FileDigests
|
|
241
269
|
def show_duplicates
|
242
270
|
current_digest = nil
|
243
271
|
query_duplicates.each do |found|
|
244
|
-
if current_digest != found[
|
272
|
+
if current_digest != found["digest"]
|
245
273
|
puts "" if current_digest
|
246
|
-
current_digest = found[
|
247
|
-
puts "#{found[
|
274
|
+
current_digest = found["digest"]
|
275
|
+
puts "#{found["digest"]}:"
|
248
276
|
end
|
249
|
-
puts " #{found[
|
277
|
+
puts " #{found["filename"]}"
|
250
278
|
end
|
251
279
|
end
|
252
280
|
|
@@ -272,10 +300,13 @@ class FileDigests
|
|
272
300
|
return
|
273
301
|
end
|
274
302
|
|
275
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
303
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
276
304
|
mtime_string = time_to_database stat.mtime
|
305
|
+
digest = get_file_digest(filename)
|
277
306
|
|
278
|
-
|
307
|
+
nested_transaction do
|
308
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
309
|
+
end
|
279
310
|
|
280
311
|
rescue => exception
|
281
312
|
@counters[:exceptions] += 1
|
@@ -292,25 +323,25 @@ class FileDigests
|
|
292
323
|
|
293
324
|
def process_previously_seen_file found, filename, mtime, digest
|
294
325
|
@missing_files.delete(filename)
|
295
|
-
if found[
|
326
|
+
if found["digest"] == digest
|
296
327
|
@counters[:good] += 1
|
297
328
|
puts "GOOD: #{filename}" if @options[:verbose]
|
298
329
|
unless @options[:test_only]
|
299
|
-
if found[
|
300
|
-
touch_digest_check_time found[
|
330
|
+
if found["mtime"] == mtime
|
331
|
+
touch_digest_check_time found["id"]
|
301
332
|
else
|
302
|
-
update_mtime mtime, found[
|
333
|
+
update_mtime mtime, found["id"]
|
303
334
|
end
|
304
335
|
end
|
305
336
|
else
|
306
|
-
if found[
|
337
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
307
338
|
@counters[:likely_damaged] += 1
|
308
339
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
309
340
|
else
|
310
341
|
@counters[:updated] += 1
|
311
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
342
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
312
343
|
unless @options[:test_only]
|
313
|
-
update_mtime_and_digest mtime, digest, found[
|
344
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
314
345
|
end
|
315
346
|
end
|
316
347
|
end
|
@@ -348,12 +379,10 @@ class FileDigests
|
|
348
379
|
end
|
349
380
|
|
350
381
|
def remove_missing_files
|
351
|
-
|
352
|
-
|
353
|
-
delete_by_filename filename
|
354
|
-
end
|
355
|
-
@missing_files = {}
|
382
|
+
@missing_files.each do |filename, digest|
|
383
|
+
delete_by_filename filename
|
356
384
|
end
|
385
|
+
@missing_files = {}
|
357
386
|
end
|
358
387
|
|
359
388
|
|
@@ -363,7 +392,13 @@ class FileDigests
|
|
363
392
|
@db.execute *args, &block
|
364
393
|
end
|
365
394
|
|
366
|
-
def
|
395
|
+
def integrity_check
|
396
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
397
|
+
raise "Database integrity check failed"
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def nested_transaction(mode = :deferred)
|
367
402
|
if @db.transaction_active?
|
368
403
|
yield
|
369
404
|
else
|
@@ -373,9 +408,9 @@ class FileDigests
|
|
373
408
|
end
|
374
409
|
end
|
375
410
|
|
376
|
-
def perhaps_transaction(condition, mode)
|
411
|
+
def perhaps_transaction(condition, mode = :deferred)
|
377
412
|
if condition
|
378
|
-
|
413
|
+
nested_transaction(mode) do
|
379
414
|
yield
|
380
415
|
end
|
381
416
|
else
|
@@ -384,7 +419,7 @@ class FileDigests
|
|
384
419
|
end
|
385
420
|
|
386
421
|
def table_exist? table_name
|
387
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
422
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
388
423
|
end
|
389
424
|
|
390
425
|
def prepare_method name, query
|
@@ -419,14 +454,14 @@ class FileDigests
|
|
419
454
|
end
|
420
455
|
|
421
456
|
def time_to_database time
|
422
|
-
time.utc.strftime(
|
457
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
423
458
|
end
|
424
459
|
|
425
460
|
|
426
461
|
# Filesystem-related helpers
|
427
462
|
|
428
463
|
def patch_path_string path
|
429
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
464
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
430
465
|
end
|
431
466
|
|
432
467
|
def cleanup_path path
|
@@ -444,13 +479,13 @@ class FileDigests
|
|
444
479
|
end
|
445
480
|
|
446
481
|
def walk_files
|
447
|
-
Dir.glob(@files_path +
|
482
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
448
483
|
yield filename
|
449
484
|
end
|
450
485
|
end
|
451
486
|
|
452
487
|
def get_file_digest filename
|
453
|
-
File.open(filename,
|
488
|
+
File.open(filename, "rb") do |io|
|
454
489
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
455
490
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
456
491
|
|
@@ -492,13 +527,13 @@ class FileDigests
|
|
492
527
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
493
528
|
yield
|
494
529
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
495
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
530
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
496
531
|
end
|
497
532
|
|
498
533
|
def print_file_exception exception, filename
|
499
534
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
500
535
|
begin
|
501
|
-
STDERR.print filename.encode(
|
536
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
502
537
|
rescue
|
503
538
|
STDERR.print "(Unable to encode file name to utf-8) "
|
504
539
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|