file-digests 0.0.36 → 0.0.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +211 -138
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
|
4
|
+
data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
|
7
|
+
data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
|
data/lib/file-digests.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
1
3
|
# Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
|
2
4
|
#
|
3
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -15,10 +17,8 @@
|
|
15
17
|
require "date"
|
16
18
|
require "digest"
|
17
19
|
require "fileutils"
|
18
|
-
require "find"
|
19
20
|
require "openssl"
|
20
21
|
require "optparse"
|
21
|
-
require "pathname"
|
22
22
|
require "set"
|
23
23
|
require "sqlite3"
|
24
24
|
|
@@ -139,16 +139,106 @@ class FileDigests
|
|
139
139
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
140
140
|
end
|
141
141
|
|
142
|
+
def perform_check
|
143
|
+
measure_time do
|
144
|
+
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
145
|
+
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
146
|
+
|
147
|
+
walk_files(@files_path) do |filename|
|
148
|
+
process_file filename
|
149
|
+
end
|
150
|
+
|
151
|
+
nested_transaction do
|
152
|
+
puts "Tracking renames..." if @options[:verbose]
|
153
|
+
track_renames
|
154
|
+
end
|
155
|
+
|
156
|
+
if any_missing_files?
|
157
|
+
if any_exceptions?
|
158
|
+
STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
|
159
|
+
else
|
160
|
+
report_missing_files
|
161
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
162
|
+
nested_transaction do
|
163
|
+
puts "Removing missing files..." if @options[:verbose]
|
164
|
+
remove_missing_files
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
if @new_digest_algorithm && !@options[:test_only]
|
171
|
+
if any_missing_files? || any_likely_damaged? || any_exceptions?
|
172
|
+
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
173
|
+
else
|
174
|
+
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
175
|
+
digests_update_digests_to_new_digests
|
176
|
+
set_metadata "digest_algorithm", @new_digest_algorithm
|
177
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if any_likely_damaged? || any_exceptions?
|
182
|
+
STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
|
183
|
+
STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
|
184
|
+
end
|
185
|
+
|
186
|
+
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
187
|
+
|
188
|
+
print_counters
|
189
|
+
end
|
190
|
+
|
191
|
+
puts "Performing database maintenance..." if @options[:verbose]
|
192
|
+
execute "PRAGMA optimize"
|
193
|
+
execute "VACUUM"
|
194
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
195
|
+
|
196
|
+
hide_database_files
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def show_duplicates
|
201
|
+
current_digest = nil
|
202
|
+
digests_select_duplicates.each do |found|
|
203
|
+
if current_digest != found["digest"]
|
204
|
+
puts "" if current_digest
|
205
|
+
current_digest = found["digest"]
|
206
|
+
puts "#{found["digest"]}:"
|
207
|
+
end
|
208
|
+
puts " #{found["filename"]}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
private
|
213
|
+
|
142
214
|
def initialize_paths files_path, digest_database_path
|
143
|
-
@files_path =
|
215
|
+
@files_path = realpath(files_path || ".")
|
216
|
+
|
217
|
+
unless File.directory?(@files_path) && File.readable?(@files_path)
|
218
|
+
raise "ERROR: Files path must be a readable directory"
|
219
|
+
end
|
144
220
|
|
145
|
-
|
221
|
+
@start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
|
222
|
+
|
223
|
+
@error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
|
224
|
+
@missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
|
146
225
|
|
147
|
-
@digest_database_path = digest_database_path ?
|
148
|
-
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
149
|
-
ensure_dir_exist @digest_database_path.dirname
|
226
|
+
@digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
|
150
227
|
|
151
|
-
|
228
|
+
if File.directory?(@digest_database_path)
|
229
|
+
@digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
|
230
|
+
end
|
231
|
+
|
232
|
+
@digest_database_files = [
|
233
|
+
@digest_database_path,
|
234
|
+
"#{@digest_database_path}-wal",
|
235
|
+
"#{@digest_database_path}-shm"
|
236
|
+
]
|
237
|
+
|
238
|
+
@skip_files = @digest_database_files + [
|
239
|
+
@error_log_path,
|
240
|
+
@missing_files_path
|
241
|
+
]
|
152
242
|
|
153
243
|
if @options[:verbose]
|
154
244
|
puts "Target directory: #{@files_path}"
|
@@ -157,7 +247,7 @@ class FileDigests
|
|
157
247
|
end
|
158
248
|
|
159
249
|
def initialize_database
|
160
|
-
@db = SQLite3::Database.new @digest_database_path
|
250
|
+
@db = SQLite3::Database.new @digest_database_path
|
161
251
|
@db.results_as_hash = true
|
162
252
|
@db.busy_timeout = 5000
|
163
253
|
|
@@ -216,7 +306,7 @@ class FileDigests
|
|
216
306
|
# Convert database from 1st to 2nd version
|
217
307
|
unless get_metadata("digest_algorithm")
|
218
308
|
if get_metadata("database_version") == "1"
|
219
|
-
if File.exist?(@digest_database_path.
|
309
|
+
if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
|
220
310
|
set_metadata("digest_algorithm", "SHA512")
|
221
311
|
else
|
222
312
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -269,81 +359,84 @@ class FileDigests
|
|
269
359
|
prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
|
270
360
|
end
|
271
361
|
|
272
|
-
def perform_check
|
273
|
-
measure_time do
|
274
|
-
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
275
|
-
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
276
362
|
|
277
|
-
|
278
|
-
process_file filename
|
279
|
-
end
|
363
|
+
# Files
|
280
364
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
end
|
365
|
+
def realpath path
|
366
|
+
realxpath path, :realpath
|
367
|
+
end
|
285
368
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
else
|
290
|
-
print_missing_files
|
291
|
-
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
292
|
-
nested_transaction do
|
293
|
-
puts "Removing missing files..." if @options[:verbose]
|
294
|
-
remove_missing_files
|
295
|
-
end
|
296
|
-
end
|
297
|
-
end
|
298
|
-
end
|
369
|
+
def realdirpath path
|
370
|
+
realxpath path, :realdirpath
|
371
|
+
end
|
299
372
|
|
300
|
-
|
301
|
-
|
302
|
-
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
303
|
-
else
|
304
|
-
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
305
|
-
digests_update_digests_to_new_digests
|
306
|
-
set_metadata "digest_algorithm", @new_digest_algorithm
|
307
|
-
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
308
|
-
end
|
309
|
-
end
|
373
|
+
def realxpath path, method_name
|
374
|
+
path = path.encode("utf-8")
|
310
375
|
|
311
|
-
|
312
|
-
|
313
|
-
|
376
|
+
if Gem.win_platform?
|
377
|
+
path = path.gsub(/\\/, "/")
|
378
|
+
end
|
314
379
|
|
315
|
-
|
380
|
+
path = File.send(method_name, path).encode("utf-8")
|
316
381
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
puts "Performing database maintenance..." if @options[:verbose]
|
321
|
-
execute "PRAGMA optimize"
|
322
|
-
execute "VACUUM"
|
323
|
-
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
382
|
+
if Gem.win_platform? && path[0] == "/"
|
383
|
+
path = Dir.pwd[0, 2].encode("utf-8") + path
|
384
|
+
end
|
324
385
|
|
325
|
-
|
386
|
+
path
|
387
|
+
end
|
388
|
+
|
389
|
+
def perhaps_nt_path path
|
390
|
+
if Gem.win_platform?
|
391
|
+
"\\??\\#{path.gsub(/\//,"\\")}"
|
392
|
+
else
|
393
|
+
path
|
326
394
|
end
|
327
395
|
end
|
328
396
|
|
329
|
-
def
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
397
|
+
def get_file_digest filename
|
398
|
+
File.open(filename, "rb") do |io|
|
399
|
+
digest = OpenSSL::Digest.new(@digest_algorithm)
|
400
|
+
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
401
|
+
|
402
|
+
buffer = ""
|
403
|
+
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
404
|
+
digest.update(buffer)
|
405
|
+
new_digest.update(buffer) if @new_digest_algorithm
|
336
406
|
end
|
337
|
-
|
407
|
+
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
338
408
|
end
|
339
409
|
end
|
340
410
|
|
341
|
-
|
411
|
+
def walk_files(path, &block)
|
412
|
+
Dir.each_child(path, encoding: "UTF-8") do |item|
|
413
|
+
item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
|
414
|
+
begin
|
415
|
+
item_perhaps_nt_path = perhaps_nt_path item
|
416
|
+
|
417
|
+
unless File.symlink? item_perhaps_nt_path
|
418
|
+
if File.directory?(item_perhaps_nt_path)
|
419
|
+
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
420
|
+
walk_files(item, &block)
|
421
|
+
else
|
422
|
+
yield item
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
rescue => exception
|
427
|
+
@counters[:exceptions] += 1
|
428
|
+
report_file_exception exception, item
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
342
432
|
|
343
433
|
def process_file filename
|
344
|
-
|
434
|
+
perhaps_nt_filename = perhaps_nt_path filename
|
435
|
+
|
436
|
+
# this is checked in the walk_files
|
437
|
+
# return if File.symlink? perhaps_nt_filename
|
345
438
|
|
346
|
-
stat = File.stat
|
439
|
+
stat = File.stat perhaps_nt_filename
|
347
440
|
|
348
441
|
return if stat.blockdev?
|
349
442
|
return if stat.chardev?
|
@@ -353,23 +446,19 @@ class FileDigests
|
|
353
446
|
|
354
447
|
raise "File is not readable" unless stat.readable?
|
355
448
|
|
356
|
-
if @
|
357
|
-
puts "SKIPPING
|
449
|
+
if @skip_files.include?(filename)
|
450
|
+
puts "SKIPPING FILE: #{filename}" if @options[:verbose]
|
358
451
|
return
|
359
452
|
end
|
360
453
|
|
361
|
-
normalized_filename = filename.delete_prefix("#{@files_path
|
454
|
+
normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
362
455
|
mtime_string = time_to_database stat.mtime
|
363
|
-
digest, new_digest = get_file_digest(
|
456
|
+
digest, new_digest = get_file_digest(perhaps_nt_filename)
|
364
457
|
|
365
458
|
nested_transaction do
|
366
459
|
new_digests_insert(normalized_filename, new_digest) if new_digest
|
367
460
|
process_file_indeed normalized_filename, mtime_string, digest
|
368
461
|
end
|
369
|
-
|
370
|
-
rescue => exception
|
371
|
-
@counters[:exceptions] += 1
|
372
|
-
print_file_exception exception, filename
|
373
462
|
end
|
374
463
|
|
375
464
|
def process_file_indeed filename, mtime, digest
|
@@ -395,7 +484,7 @@ class FileDigests
|
|
395
484
|
else
|
396
485
|
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
397
486
|
@counters[:likely_damaged] += 1
|
398
|
-
|
487
|
+
error_text "LIKELY DAMAGED: #{filename}"
|
399
488
|
else
|
400
489
|
@counters[:updated] += 1
|
401
490
|
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
@@ -425,10 +514,20 @@ class FileDigests
|
|
425
514
|
@counters[:renamed] = @db.changes
|
426
515
|
end
|
427
516
|
|
428
|
-
def
|
517
|
+
def report_missing_files
|
429
518
|
puts "\nMISSING FILES:"
|
519
|
+
write_missing_files STDOUT
|
520
|
+
if missing_files_count > 256
|
521
|
+
File.open(@missing_files_path, "a") do |f|
|
522
|
+
write_missing_files f
|
523
|
+
end
|
524
|
+
puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
def write_missing_files dest
|
430
529
|
missing_files_select_all_filenames.each do |record|
|
431
|
-
puts record["filename"]
|
530
|
+
dest.puts record["filename"]
|
432
531
|
end
|
433
532
|
end
|
434
533
|
|
@@ -446,6 +545,17 @@ class FileDigests
|
|
446
545
|
end
|
447
546
|
|
448
547
|
|
548
|
+
# Runtime state helpers
|
549
|
+
|
550
|
+
def any_exceptions?
|
551
|
+
@counters[:exceptions] > 0
|
552
|
+
end
|
553
|
+
|
554
|
+
def any_likely_damaged?
|
555
|
+
@counters[:likely_damaged] > 0
|
556
|
+
end
|
557
|
+
|
558
|
+
|
449
559
|
# Database helpers
|
450
560
|
|
451
561
|
def execute *args, &block
|
@@ -531,7 +641,7 @@ class FileDigests
|
|
531
641
|
def check_if_database_is_at_certain_version target_version
|
532
642
|
current_version = get_metadata("database_version")
|
533
643
|
if current_version != target_version
|
534
|
-
STDERR.puts "This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
|
644
|
+
STDERR.puts "ERROR: This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
|
535
645
|
raise "Incompatible database version"
|
536
646
|
end
|
537
647
|
end
|
@@ -541,57 +651,6 @@ class FileDigests
|
|
541
651
|
end
|
542
652
|
|
543
653
|
|
544
|
-
# Filesystem-related helpers
|
545
|
-
|
546
|
-
def patch_path_string path
|
547
|
-
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
548
|
-
end
|
549
|
-
|
550
|
-
def cleanup_path path
|
551
|
-
Pathname.new(patch_path_string(path)).cleanpath
|
552
|
-
end
|
553
|
-
|
554
|
-
def ensure_dir_exist path
|
555
|
-
if File.exist?(path)
|
556
|
-
unless File.directory?(path)
|
557
|
-
raise "#{path} is not a directory"
|
558
|
-
end
|
559
|
-
else
|
560
|
-
FileUtils.mkdir_p path
|
561
|
-
end
|
562
|
-
end
|
563
|
-
|
564
|
-
def walk_files
|
565
|
-
Find.find(@files_path) do |path|
|
566
|
-
yield path
|
567
|
-
end
|
568
|
-
end
|
569
|
-
|
570
|
-
def get_file_digest filename
|
571
|
-
File.open(filename, "rb") do |io|
|
572
|
-
digest = OpenSSL::Digest.new(@digest_algorithm)
|
573
|
-
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
574
|
-
|
575
|
-
buffer = ""
|
576
|
-
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
577
|
-
digest.update(buffer)
|
578
|
-
new_digest.update(buffer) if @new_digest_algorithm
|
579
|
-
end
|
580
|
-
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
581
|
-
end
|
582
|
-
end
|
583
|
-
|
584
|
-
|
585
|
-
# Runtime state helpers
|
586
|
-
|
587
|
-
def any_exceptions?
|
588
|
-
@counters[:exceptions] > 0
|
589
|
-
end
|
590
|
-
|
591
|
-
def any_likely_damaged?
|
592
|
-
@counters[:likely_damaged] > 0
|
593
|
-
end
|
594
|
-
|
595
654
|
# UI helpers
|
596
655
|
|
597
656
|
def confirm text
|
@@ -611,17 +670,31 @@ class FileDigests
|
|
611
670
|
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
612
671
|
end
|
613
672
|
|
614
|
-
def
|
615
|
-
STDERR
|
673
|
+
def report_file_exception exception, filename
|
674
|
+
write_file_exception STDERR, exception, filename
|
675
|
+
File.open(@error_log_path, "a") do |f|
|
676
|
+
write_file_exception f, exception, filename
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
def write_file_exception dest, exception, filename
|
681
|
+
dest.print "ERROR: #{exception.message}, processing file: "
|
616
682
|
begin
|
617
|
-
|
683
|
+
dest.print filename.encode("utf-8", universal_newline: true)
|
618
684
|
rescue
|
619
|
-
|
620
|
-
|
685
|
+
dest.print "(Unable to encode file name to utf-8) "
|
686
|
+
dest.print filename
|
687
|
+
end
|
688
|
+
dest.print "\n"
|
689
|
+
dest.flush
|
690
|
+
exception.backtrace.each { |line| dest.puts " " + line }
|
691
|
+
end
|
692
|
+
|
693
|
+
def error_text text
|
694
|
+
STDERR.puts text
|
695
|
+
File.open(@error_log_path, "a") do |f|
|
696
|
+
f.puts text
|
621
697
|
end
|
622
|
-
STDERR.print "\n"
|
623
|
-
STDERR.flush
|
624
|
-
exception.backtrace.each { |line| STDERR.puts " " + line }
|
625
698
|
end
|
626
699
|
|
627
700
|
def print_counters
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.41
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: openssl
|