file-digests 0.0.22 → 0.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +109 -74
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
|
4
|
+
data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
|
7
|
+
data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
|
data/lib/file-digests.rb
CHANGED
@@ -1,19 +1,21 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
1
|
+
require "date"
|
2
|
+
require "digest"
|
3
|
+
require "fileutils"
|
4
|
+
require "openssl"
|
5
|
+
require "optparse"
|
6
|
+
require "pathname"
|
7
|
+
require "set"
|
8
|
+
require "sqlite3"
|
9
9
|
|
10
10
|
class FileDigests
|
11
11
|
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
12
|
+
LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
|
12
13
|
|
13
14
|
def self.canonical_digest_algorithm_name(string)
|
14
15
|
if string
|
15
|
-
|
16
|
-
index
|
16
|
+
algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
|
17
|
+
index = algorithms.map(&:downcase).index(string.downcase)
|
18
|
+
index && algorithms[index]
|
17
19
|
end
|
18
20
|
end
|
19
21
|
|
@@ -27,55 +29,60 @@ class FileDigests
|
|
27
29
|
|
28
30
|
def self.parse_cli_options
|
29
31
|
options = {}
|
30
|
-
|
32
|
+
|
31
33
|
OptionParser.new do |opts|
|
32
34
|
opts.banner = [
|
33
35
|
"Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
|
34
36
|
" By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
|
35
37
|
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
36
38
|
].join "\n"
|
37
|
-
|
38
|
-
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
39
|
+
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation.") do
|
39
41
|
options[:auto] = true
|
40
42
|
end
|
41
43
|
|
42
44
|
opts.on(
|
43
|
-
|
45
|
+
"-d", "--digest DIGEST",
|
44
46
|
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
45
47
|
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
46
48
|
"#{digest_algorithms_list_text}.",
|
47
|
-
|
48
|
-
|
49
|
-
|
49
|
+
"You only need to specify an algorithm on the first run, your choice will be saved to a database.",
|
50
|
+
"Any time later you could specify a new algorithm to change the current one.",
|
51
|
+
"Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
|
50
52
|
) do |value|
|
51
53
|
digest_algorithm = canonical_digest_algorithm_name(value)
|
52
|
-
unless digest_algorithm
|
53
|
-
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
|
+
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
55
|
+
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
54
56
|
exit 1
|
55
57
|
end
|
56
58
|
options[:digest_algorithm] = digest_algorithm
|
57
59
|
end
|
58
60
|
|
59
|
-
opts.on("-
|
60
|
-
options[:
|
61
|
+
opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
|
62
|
+
options[:accept_fate] = true
|
61
63
|
end
|
62
64
|
|
63
|
-
opts.on("-
|
64
|
-
|
65
|
+
opts.on("-h", "--help", "Prints this help.") do
|
66
|
+
puts opts
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
|
70
|
+
opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
|
71
|
+
options[:action] = :show_duplicates
|
65
72
|
end
|
66
73
|
|
67
|
-
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
|
74
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
|
68
75
|
options[:quiet] = true
|
69
76
|
end
|
70
77
|
|
71
|
-
opts.on("-
|
72
|
-
options[:
|
78
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
|
79
|
+
options[:test_only] = true
|
73
80
|
end
|
74
81
|
|
75
|
-
opts.on("-
|
76
|
-
|
77
|
-
exit
|
82
|
+
opts.on("-v", "--verbose", "More verbose output.") do
|
83
|
+
options[:verbose] = true
|
78
84
|
end
|
85
|
+
|
79
86
|
end.parse!
|
80
87
|
options
|
81
88
|
end
|
@@ -93,13 +100,15 @@ class FileDigests
|
|
93
100
|
initialize_paths files_path, digest_database_path
|
94
101
|
initialize_database
|
95
102
|
|
96
|
-
|
97
|
-
if @
|
98
|
-
@
|
103
|
+
@db.transaction(:exclusive) do
|
104
|
+
if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
|
105
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
106
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
107
|
+
end
|
108
|
+
else
|
109
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
110
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
99
111
|
end
|
100
|
-
else
|
101
|
-
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
102
|
-
set_metadata "digest_algorithm", @digest_algorithm
|
103
112
|
end
|
104
113
|
|
105
114
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
@@ -111,7 +120,7 @@ class FileDigests
|
|
111
120
|
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
112
121
|
|
113
122
|
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
114
|
-
@digest_database_path +=
|
123
|
+
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
115
124
|
ensure_dir_exists @digest_database_path.dirname
|
116
125
|
|
117
126
|
if @options[:verbose]
|
@@ -123,27 +132,33 @@ class FileDigests
|
|
123
132
|
def initialize_database
|
124
133
|
@db = SQLite3::Database.new @digest_database_path.to_s
|
125
134
|
@db.results_as_hash = true
|
135
|
+
@db.busy_timeout = 5000
|
126
136
|
|
127
137
|
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
128
138
|
|
129
|
-
execute
|
130
|
-
execute
|
131
|
-
execute
|
132
|
-
execute
|
133
|
-
execute
|
139
|
+
execute "PRAGMA encoding = 'UTF-8'"
|
140
|
+
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
141
|
+
execute "PRAGMA journal_mode = 'WAL'"
|
142
|
+
execute "PRAGMA synchronous = 'NORMAL'"
|
143
|
+
execute "PRAGMA cache_size = '5000'"
|
144
|
+
|
145
|
+
integrity_check
|
134
146
|
|
135
147
|
@db.transaction(:exclusive) do
|
148
|
+
metadata_table_was_created = false
|
136
149
|
unless table_exist?("metadata")
|
137
150
|
execute "CREATE TABLE metadata (
|
138
151
|
key TEXT NOT NULL PRIMARY KEY,
|
139
152
|
value TEXT)"
|
140
153
|
execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
|
141
|
-
|
154
|
+
metadata_table_was_created = true
|
142
155
|
end
|
143
156
|
|
144
157
|
prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
|
145
158
|
prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"
|
146
159
|
|
160
|
+
set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && metadata_table_was_created
|
161
|
+
|
147
162
|
# Heuristic to detect database version 1 (metadata was not stored back then)
|
148
163
|
unless get_metadata("database_version")
|
149
164
|
if table_exist?("digests")
|
@@ -178,7 +193,7 @@ class FileDigests
|
|
178
193
|
# Convert database from 1st to 2nd version
|
179
194
|
unless get_metadata("digest_algorithm")
|
180
195
|
if get_metadata("database_version") == "1"
|
181
|
-
if File.exist?(@digest_database_path.dirname +
|
196
|
+
if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
|
182
197
|
set_metadata("digest_algorithm", "SHA512")
|
183
198
|
else
|
184
199
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -187,6 +202,10 @@ class FileDigests
|
|
187
202
|
end
|
188
203
|
end
|
189
204
|
|
205
|
+
if get_metadata("database_version") != "2"
|
206
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
207
|
+
raise "Incompatible database version"
|
208
|
+
end
|
190
209
|
end
|
191
210
|
end
|
192
211
|
|
@@ -204,15 +223,19 @@ class FileDigests
|
|
204
223
|
end
|
205
224
|
end
|
206
225
|
|
207
|
-
|
208
|
-
|
226
|
+
nested_transaction do
|
227
|
+
track_renames
|
228
|
+
end
|
229
|
+
|
209
230
|
if any_missing_files?
|
210
231
|
if any_exceptions?
|
211
232
|
STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
|
212
233
|
else
|
213
234
|
print_missing_files
|
214
235
|
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
215
|
-
|
236
|
+
nested_transaction do
|
237
|
+
remove_missing_files
|
238
|
+
end
|
216
239
|
end
|
217
240
|
end
|
218
241
|
end
|
@@ -225,6 +248,7 @@ class FileDigests
|
|
225
248
|
update_digest_to_new_digest new_digest, old_digest
|
226
249
|
end
|
227
250
|
set_metadata "digest_algorithm", @new_digest_algorithm
|
251
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
228
252
|
end
|
229
253
|
end
|
230
254
|
|
@@ -234,6 +258,10 @@ class FileDigests
|
|
234
258
|
|
235
259
|
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
236
260
|
|
261
|
+
execute "PRAGMA optimize"
|
262
|
+
execute "VACUUM"
|
263
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
264
|
+
|
237
265
|
print_counters
|
238
266
|
end
|
239
267
|
end
|
@@ -241,12 +269,12 @@ class FileDigests
|
|
241
269
|
def show_duplicates
|
242
270
|
current_digest = nil
|
243
271
|
query_duplicates.each do |found|
|
244
|
-
if current_digest != found[
|
272
|
+
if current_digest != found["digest"]
|
245
273
|
puts "" if current_digest
|
246
|
-
current_digest = found[
|
247
|
-
puts "#{found[
|
274
|
+
current_digest = found["digest"]
|
275
|
+
puts "#{found["digest"]}:"
|
248
276
|
end
|
249
|
-
puts " #{found[
|
277
|
+
puts " #{found["filename"]}"
|
250
278
|
end
|
251
279
|
end
|
252
280
|
|
@@ -272,10 +300,13 @@ class FileDigests
|
|
272
300
|
return
|
273
301
|
end
|
274
302
|
|
275
|
-
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode(
|
303
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
276
304
|
mtime_string = time_to_database stat.mtime
|
305
|
+
digest = get_file_digest(filename)
|
277
306
|
|
278
|
-
|
307
|
+
nested_transaction do
|
308
|
+
process_file_indeed normalized_filename, mtime_string, digest
|
309
|
+
end
|
279
310
|
|
280
311
|
rescue => exception
|
281
312
|
@counters[:exceptions] += 1
|
@@ -292,25 +323,25 @@ class FileDigests
|
|
292
323
|
|
293
324
|
def process_previously_seen_file found, filename, mtime, digest
|
294
325
|
@missing_files.delete(filename)
|
295
|
-
if found[
|
326
|
+
if found["digest"] == digest
|
296
327
|
@counters[:good] += 1
|
297
328
|
puts "GOOD: #{filename}" if @options[:verbose]
|
298
329
|
unless @options[:test_only]
|
299
|
-
if found[
|
300
|
-
touch_digest_check_time found[
|
330
|
+
if found["mtime"] == mtime
|
331
|
+
touch_digest_check_time found["id"]
|
301
332
|
else
|
302
|
-
update_mtime mtime, found[
|
333
|
+
update_mtime mtime, found["id"]
|
303
334
|
end
|
304
335
|
end
|
305
336
|
else
|
306
|
-
if found[
|
337
|
+
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
307
338
|
@counters[:likely_damaged] += 1
|
308
339
|
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
309
340
|
else
|
310
341
|
@counters[:updated] += 1
|
311
|
-
puts "UPDATED: #{filename}" unless @options[:quiet]
|
342
|
+
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
312
343
|
unless @options[:test_only]
|
313
|
-
update_mtime_and_digest mtime, digest, found[
|
344
|
+
update_mtime_and_digest mtime, digest, found["id"]
|
314
345
|
end
|
315
346
|
end
|
316
347
|
end
|
@@ -348,12 +379,10 @@ class FileDigests
|
|
348
379
|
end
|
349
380
|
|
350
381
|
def remove_missing_files
|
351
|
-
|
352
|
-
|
353
|
-
delete_by_filename filename
|
354
|
-
end
|
355
|
-
@missing_files = {}
|
382
|
+
@missing_files.each do |filename, digest|
|
383
|
+
delete_by_filename filename
|
356
384
|
end
|
385
|
+
@missing_files = {}
|
357
386
|
end
|
358
387
|
|
359
388
|
|
@@ -363,7 +392,13 @@ class FileDigests
|
|
363
392
|
@db.execute *args, &block
|
364
393
|
end
|
365
394
|
|
366
|
-
def
|
395
|
+
def integrity_check
|
396
|
+
if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
|
397
|
+
raise "Database integrity check failed"
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
401
|
+
def nested_transaction(mode = :deferred)
|
367
402
|
if @db.transaction_active?
|
368
403
|
yield
|
369
404
|
else
|
@@ -373,9 +408,9 @@ class FileDigests
|
|
373
408
|
end
|
374
409
|
end
|
375
410
|
|
376
|
-
def perhaps_transaction(condition, mode)
|
411
|
+
def perhaps_transaction(condition, mode = :deferred)
|
377
412
|
if condition
|
378
|
-
|
413
|
+
nested_transaction(mode) do
|
379
414
|
yield
|
380
415
|
end
|
381
416
|
else
|
@@ -384,7 +419,7 @@ class FileDigests
|
|
384
419
|
end
|
385
420
|
|
386
421
|
def table_exist? table_name
|
387
|
-
execute("SELECT name FROM sqlite_master WHERE type='table' AND name =
|
422
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
|
388
423
|
end
|
389
424
|
|
390
425
|
def prepare_method name, query
|
@@ -419,14 +454,14 @@ class FileDigests
|
|
419
454
|
end
|
420
455
|
|
421
456
|
def time_to_database time
|
422
|
-
time.utc.strftime(
|
457
|
+
time.utc.strftime("%Y-%m-%d %H:%M:%S")
|
423
458
|
end
|
424
459
|
|
425
460
|
|
426
461
|
# Filesystem-related helpers
|
427
462
|
|
428
463
|
def patch_path_string path
|
429
|
-
Gem.win_platform? ? path.gsub(/\\/,
|
464
|
+
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
430
465
|
end
|
431
466
|
|
432
467
|
def cleanup_path path
|
@@ -444,13 +479,13 @@ class FileDigests
|
|
444
479
|
end
|
445
480
|
|
446
481
|
def walk_files
|
447
|
-
Dir.glob(@files_path +
|
482
|
+
Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
|
448
483
|
yield filename
|
449
484
|
end
|
450
485
|
end
|
451
486
|
|
452
487
|
def get_file_digest filename
|
453
|
-
File.open(filename,
|
488
|
+
File.open(filename, "rb") do |io|
|
454
489
|
digest = OpenSSL::Digest.new(@digest_algorithm)
|
455
490
|
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
456
491
|
|
@@ -492,13 +527,13 @@ class FileDigests
|
|
492
527
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
493
528
|
yield
|
494
529
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
495
|
-
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{
|
530
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
496
531
|
end
|
497
532
|
|
498
533
|
def print_file_exception exception, filename
|
499
534
|
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
500
535
|
begin
|
501
|
-
STDERR.print filename.encode(
|
536
|
+
STDERR.print filename.encode("utf-8", universal_newline: true)
|
502
537
|
rescue
|
503
538
|
STDERR.print "(Unable to encode file name to utf-8) "
|
504
539
|
STDERR.print filename
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: openssl
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1
|
19
|
+
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1
|
26
|
+
version: '2.1'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '1.3'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '1.3'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|