file-digests 0.0.36 → 0.0.41
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +211 -138
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
|
4
|
+
data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
|
7
|
+
data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
|
data/lib/file-digests.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
1
3
|
# Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
|
2
4
|
#
|
3
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -15,10 +17,8 @@
|
|
15
17
|
require "date"
|
16
18
|
require "digest"
|
17
19
|
require "fileutils"
|
18
|
-
require "find"
|
19
20
|
require "openssl"
|
20
21
|
require "optparse"
|
21
|
-
require "pathname"
|
22
22
|
require "set"
|
23
23
|
require "sqlite3"
|
24
24
|
|
@@ -139,16 +139,106 @@ class FileDigests
|
|
139
139
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
140
140
|
end
|
141
141
|
|
142
|
+
def perform_check
|
143
|
+
measure_time do
|
144
|
+
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
145
|
+
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
146
|
+
|
147
|
+
walk_files(@files_path) do |filename|
|
148
|
+
process_file filename
|
149
|
+
end
|
150
|
+
|
151
|
+
nested_transaction do
|
152
|
+
puts "Tracking renames..." if @options[:verbose]
|
153
|
+
track_renames
|
154
|
+
end
|
155
|
+
|
156
|
+
if any_missing_files?
|
157
|
+
if any_exceptions?
|
158
|
+
STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
|
159
|
+
else
|
160
|
+
report_missing_files
|
161
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
162
|
+
nested_transaction do
|
163
|
+
puts "Removing missing files..." if @options[:verbose]
|
164
|
+
remove_missing_files
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
if @new_digest_algorithm && !@options[:test_only]
|
171
|
+
if any_missing_files? || any_likely_damaged? || any_exceptions?
|
172
|
+
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
173
|
+
else
|
174
|
+
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
175
|
+
digests_update_digests_to_new_digests
|
176
|
+
set_metadata "digest_algorithm", @new_digest_algorithm
|
177
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if any_likely_damaged? || any_exceptions?
|
182
|
+
STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
|
183
|
+
STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
|
184
|
+
end
|
185
|
+
|
186
|
+
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
187
|
+
|
188
|
+
print_counters
|
189
|
+
end
|
190
|
+
|
191
|
+
puts "Performing database maintenance..." if @options[:verbose]
|
192
|
+
execute "PRAGMA optimize"
|
193
|
+
execute "VACUUM"
|
194
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
195
|
+
|
196
|
+
hide_database_files
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def show_duplicates
|
201
|
+
current_digest = nil
|
202
|
+
digests_select_duplicates.each do |found|
|
203
|
+
if current_digest != found["digest"]
|
204
|
+
puts "" if current_digest
|
205
|
+
current_digest = found["digest"]
|
206
|
+
puts "#{found["digest"]}:"
|
207
|
+
end
|
208
|
+
puts " #{found["filename"]}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
private
|
213
|
+
|
142
214
|
def initialize_paths files_path, digest_database_path
|
143
|
-
@files_path =
|
215
|
+
@files_path = realpath(files_path || ".")
|
216
|
+
|
217
|
+
unless File.directory?(@files_path) && File.readable?(@files_path)
|
218
|
+
raise "ERROR: Files path must be a readable directory"
|
219
|
+
end
|
144
220
|
|
145
|
-
|
221
|
+
@start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
|
222
|
+
|
223
|
+
@error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
|
224
|
+
@missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
|
146
225
|
|
147
|
-
@digest_database_path = digest_database_path ?
|
148
|
-
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
149
|
-
ensure_dir_exist @digest_database_path.dirname
|
226
|
+
@digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
|
150
227
|
|
151
|
-
|
228
|
+
if File.directory?(@digest_database_path)
|
229
|
+
@digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
|
230
|
+
end
|
231
|
+
|
232
|
+
@digest_database_files = [
|
233
|
+
@digest_database_path,
|
234
|
+
"#{@digest_database_path}-wal",
|
235
|
+
"#{@digest_database_path}-shm"
|
236
|
+
]
|
237
|
+
|
238
|
+
@skip_files = @digest_database_files + [
|
239
|
+
@error_log_path,
|
240
|
+
@missing_files_path
|
241
|
+
]
|
152
242
|
|
153
243
|
if @options[:verbose]
|
154
244
|
puts "Target directory: #{@files_path}"
|
@@ -157,7 +247,7 @@ class FileDigests
|
|
157
247
|
end
|
158
248
|
|
159
249
|
def initialize_database
|
160
|
-
@db = SQLite3::Database.new @digest_database_path
|
250
|
+
@db = SQLite3::Database.new @digest_database_path
|
161
251
|
@db.results_as_hash = true
|
162
252
|
@db.busy_timeout = 5000
|
163
253
|
|
@@ -216,7 +306,7 @@ class FileDigests
|
|
216
306
|
# Convert database from 1st to 2nd version
|
217
307
|
unless get_metadata("digest_algorithm")
|
218
308
|
if get_metadata("database_version") == "1"
|
219
|
-
if File.exist?(@digest_database_path.
|
309
|
+
if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
|
220
310
|
set_metadata("digest_algorithm", "SHA512")
|
221
311
|
else
|
222
312
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -269,81 +359,84 @@ class FileDigests
|
|
269
359
|
prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
|
270
360
|
end
|
271
361
|
|
272
|
-
def perform_check
|
273
|
-
measure_time do
|
274
|
-
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
275
|
-
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
276
362
|
|
277
|
-
|
278
|
-
process_file filename
|
279
|
-
end
|
363
|
+
# Files
|
280
364
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
end
|
365
|
+
def realpath path
|
366
|
+
realxpath path, :realpath
|
367
|
+
end
|
285
368
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
else
|
290
|
-
print_missing_files
|
291
|
-
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
292
|
-
nested_transaction do
|
293
|
-
puts "Removing missing files..." if @options[:verbose]
|
294
|
-
remove_missing_files
|
295
|
-
end
|
296
|
-
end
|
297
|
-
end
|
298
|
-
end
|
369
|
+
def realdirpath path
|
370
|
+
realxpath path, :realdirpath
|
371
|
+
end
|
299
372
|
|
300
|
-
|
301
|
-
|
302
|
-
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
303
|
-
else
|
304
|
-
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
305
|
-
digests_update_digests_to_new_digests
|
306
|
-
set_metadata "digest_algorithm", @new_digest_algorithm
|
307
|
-
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
308
|
-
end
|
309
|
-
end
|
373
|
+
def realxpath path, method_name
|
374
|
+
path = path.encode("utf-8")
|
310
375
|
|
311
|
-
|
312
|
-
|
313
|
-
|
376
|
+
if Gem.win_platform?
|
377
|
+
path = path.gsub(/\\/, "/")
|
378
|
+
end
|
314
379
|
|
315
|
-
|
380
|
+
path = File.send(method_name, path).encode("utf-8")
|
316
381
|
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
puts "Performing database maintenance..." if @options[:verbose]
|
321
|
-
execute "PRAGMA optimize"
|
322
|
-
execute "VACUUM"
|
323
|
-
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
382
|
+
if Gem.win_platform? && path[0] == "/"
|
383
|
+
path = Dir.pwd[0, 2].encode("utf-8") + path
|
384
|
+
end
|
324
385
|
|
325
|
-
|
386
|
+
path
|
387
|
+
end
|
388
|
+
|
389
|
+
def perhaps_nt_path path
|
390
|
+
if Gem.win_platform?
|
391
|
+
"\\??\\#{path.gsub(/\//,"\\")}"
|
392
|
+
else
|
393
|
+
path
|
326
394
|
end
|
327
395
|
end
|
328
396
|
|
329
|
-
def
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
397
|
+
def get_file_digest filename
|
398
|
+
File.open(filename, "rb") do |io|
|
399
|
+
digest = OpenSSL::Digest.new(@digest_algorithm)
|
400
|
+
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
401
|
+
|
402
|
+
buffer = ""
|
403
|
+
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
404
|
+
digest.update(buffer)
|
405
|
+
new_digest.update(buffer) if @new_digest_algorithm
|
336
406
|
end
|
337
|
-
|
407
|
+
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
338
408
|
end
|
339
409
|
end
|
340
410
|
|
341
|
-
|
411
|
+
def walk_files(path, &block)
|
412
|
+
Dir.each_child(path, encoding: "UTF-8") do |item|
|
413
|
+
item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
|
414
|
+
begin
|
415
|
+
item_perhaps_nt_path = perhaps_nt_path item
|
416
|
+
|
417
|
+
unless File.symlink? item_perhaps_nt_path
|
418
|
+
if File.directory?(item_perhaps_nt_path)
|
419
|
+
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
420
|
+
walk_files(item, &block)
|
421
|
+
else
|
422
|
+
yield item
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
rescue => exception
|
427
|
+
@counters[:exceptions] += 1
|
428
|
+
report_file_exception exception, item
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
342
432
|
|
343
433
|
def process_file filename
|
344
|
-
|
434
|
+
perhaps_nt_filename = perhaps_nt_path filename
|
435
|
+
|
436
|
+
# this is checked in the walk_files
|
437
|
+
# return if File.symlink? perhaps_nt_filename
|
345
438
|
|
346
|
-
stat = File.stat
|
439
|
+
stat = File.stat perhaps_nt_filename
|
347
440
|
|
348
441
|
return if stat.blockdev?
|
349
442
|
return if stat.chardev?
|
@@ -353,23 +446,19 @@ class FileDigests
|
|
353
446
|
|
354
447
|
raise "File is not readable" unless stat.readable?
|
355
448
|
|
356
|
-
if @
|
357
|
-
puts "SKIPPING
|
449
|
+
if @skip_files.include?(filename)
|
450
|
+
puts "SKIPPING FILE: #{filename}" if @options[:verbose]
|
358
451
|
return
|
359
452
|
end
|
360
453
|
|
361
|
-
normalized_filename = filename.delete_prefix("#{@files_path
|
454
|
+
normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
362
455
|
mtime_string = time_to_database stat.mtime
|
363
|
-
digest, new_digest = get_file_digest(
|
456
|
+
digest, new_digest = get_file_digest(perhaps_nt_filename)
|
364
457
|
|
365
458
|
nested_transaction do
|
366
459
|
new_digests_insert(normalized_filename, new_digest) if new_digest
|
367
460
|
process_file_indeed normalized_filename, mtime_string, digest
|
368
461
|
end
|
369
|
-
|
370
|
-
rescue => exception
|
371
|
-
@counters[:exceptions] += 1
|
372
|
-
print_file_exception exception, filename
|
373
462
|
end
|
374
463
|
|
375
464
|
def process_file_indeed filename, mtime, digest
|
@@ -395,7 +484,7 @@ class FileDigests
|
|
395
484
|
else
|
396
485
|
if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
397
486
|
@counters[:likely_damaged] += 1
|
398
|
-
|
487
|
+
error_text "LIKELY DAMAGED: #{filename}"
|
399
488
|
else
|
400
489
|
@counters[:updated] += 1
|
401
490
|
puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
|
@@ -425,10 +514,20 @@ class FileDigests
|
|
425
514
|
@counters[:renamed] = @db.changes
|
426
515
|
end
|
427
516
|
|
428
|
-
def
|
517
|
+
def report_missing_files
|
429
518
|
puts "\nMISSING FILES:"
|
519
|
+
write_missing_files STDOUT
|
520
|
+
if missing_files_count > 256
|
521
|
+
File.open(@missing_files_path, "a") do |f|
|
522
|
+
write_missing_files f
|
523
|
+
end
|
524
|
+
puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
|
525
|
+
end
|
526
|
+
end
|
527
|
+
|
528
|
+
def write_missing_files dest
|
430
529
|
missing_files_select_all_filenames.each do |record|
|
431
|
-
puts record["filename"]
|
530
|
+
dest.puts record["filename"]
|
432
531
|
end
|
433
532
|
end
|
434
533
|
|
@@ -446,6 +545,17 @@ class FileDigests
|
|
446
545
|
end
|
447
546
|
|
448
547
|
|
548
|
+
# Runtime state helpers
|
549
|
+
|
550
|
+
def any_exceptions?
|
551
|
+
@counters[:exceptions] > 0
|
552
|
+
end
|
553
|
+
|
554
|
+
def any_likely_damaged?
|
555
|
+
@counters[:likely_damaged] > 0
|
556
|
+
end
|
557
|
+
|
558
|
+
|
449
559
|
# Database helpers
|
450
560
|
|
451
561
|
def execute *args, &block
|
@@ -531,7 +641,7 @@ class FileDigests
|
|
531
641
|
def check_if_database_is_at_certain_version target_version
|
532
642
|
current_version = get_metadata("database_version")
|
533
643
|
if current_version != target_version
|
534
|
-
STDERR.puts "This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
|
644
|
+
STDERR.puts "ERROR: This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
|
535
645
|
raise "Incompatible database version"
|
536
646
|
end
|
537
647
|
end
|
@@ -541,57 +651,6 @@ class FileDigests
|
|
541
651
|
end
|
542
652
|
|
543
653
|
|
544
|
-
# Filesystem-related helpers
|
545
|
-
|
546
|
-
def patch_path_string path
|
547
|
-
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
548
|
-
end
|
549
|
-
|
550
|
-
def cleanup_path path
|
551
|
-
Pathname.new(patch_path_string(path)).cleanpath
|
552
|
-
end
|
553
|
-
|
554
|
-
def ensure_dir_exist path
|
555
|
-
if File.exist?(path)
|
556
|
-
unless File.directory?(path)
|
557
|
-
raise "#{path} is not a directory"
|
558
|
-
end
|
559
|
-
else
|
560
|
-
FileUtils.mkdir_p path
|
561
|
-
end
|
562
|
-
end
|
563
|
-
|
564
|
-
def walk_files
|
565
|
-
Find.find(@files_path) do |path|
|
566
|
-
yield path
|
567
|
-
end
|
568
|
-
end
|
569
|
-
|
570
|
-
def get_file_digest filename
|
571
|
-
File.open(filename, "rb") do |io|
|
572
|
-
digest = OpenSSL::Digest.new(@digest_algorithm)
|
573
|
-
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
574
|
-
|
575
|
-
buffer = ""
|
576
|
-
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
577
|
-
digest.update(buffer)
|
578
|
-
new_digest.update(buffer) if @new_digest_algorithm
|
579
|
-
end
|
580
|
-
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
581
|
-
end
|
582
|
-
end
|
583
|
-
|
584
|
-
|
585
|
-
# Runtime state helpers
|
586
|
-
|
587
|
-
def any_exceptions?
|
588
|
-
@counters[:exceptions] > 0
|
589
|
-
end
|
590
|
-
|
591
|
-
def any_likely_damaged?
|
592
|
-
@counters[:likely_damaged] > 0
|
593
|
-
end
|
594
|
-
|
595
654
|
# UI helpers
|
596
655
|
|
597
656
|
def confirm text
|
@@ -611,17 +670,31 @@ class FileDigests
|
|
611
670
|
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
|
612
671
|
end
|
613
672
|
|
614
|
-
def
|
615
|
-
STDERR
|
673
|
+
def report_file_exception exception, filename
|
674
|
+
write_file_exception STDERR, exception, filename
|
675
|
+
File.open(@error_log_path, "a") do |f|
|
676
|
+
write_file_exception f, exception, filename
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
def write_file_exception dest, exception, filename
|
681
|
+
dest.print "ERROR: #{exception.message}, processing file: "
|
616
682
|
begin
|
617
|
-
|
683
|
+
dest.print filename.encode("utf-8", universal_newline: true)
|
618
684
|
rescue
|
619
|
-
|
620
|
-
|
685
|
+
dest.print "(Unable to encode file name to utf-8) "
|
686
|
+
dest.print filename
|
687
|
+
end
|
688
|
+
dest.print "\n"
|
689
|
+
dest.flush
|
690
|
+
exception.backtrace.each { |line| dest.puts " " + line }
|
691
|
+
end
|
692
|
+
|
693
|
+
def error_text text
|
694
|
+
STDERR.puts text
|
695
|
+
File.open(@error_log_path, "a") do |f|
|
696
|
+
f.puts text
|
621
697
|
end
|
622
|
-
STDERR.print "\n"
|
623
|
-
STDERR.flush
|
624
|
-
exception.backtrace.each { |line| STDERR.puts " " + line }
|
625
698
|
end
|
626
699
|
|
627
700
|
def print_counters
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.41
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-10-
|
11
|
+
date: 2020-10-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: openssl
|