file-digests 0.0.40 → 0.0.41
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +163 -163
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
|
4
|
+
data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
|
7
|
+
data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
|
data/lib/file-digests.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
1
3
|
# Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
|
2
4
|
#
|
3
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -17,7 +19,6 @@ require "digest"
|
|
17
19
|
require "fileutils"
|
18
20
|
require "openssl"
|
19
21
|
require "optparse"
|
20
|
-
require "pathname"
|
21
22
|
require "set"
|
22
23
|
require "sqlite3"
|
23
24
|
|
@@ -138,29 +139,105 @@ class FileDigests
|
|
138
139
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
139
140
|
end
|
140
141
|
|
142
|
+
def perform_check
|
143
|
+
measure_time do
|
144
|
+
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
145
|
+
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
146
|
+
|
147
|
+
walk_files(@files_path) do |filename|
|
148
|
+
process_file filename
|
149
|
+
end
|
150
|
+
|
151
|
+
nested_transaction do
|
152
|
+
puts "Tracking renames..." if @options[:verbose]
|
153
|
+
track_renames
|
154
|
+
end
|
155
|
+
|
156
|
+
if any_missing_files?
|
157
|
+
if any_exceptions?
|
158
|
+
STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
|
159
|
+
else
|
160
|
+
report_missing_files
|
161
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
162
|
+
nested_transaction do
|
163
|
+
puts "Removing missing files..." if @options[:verbose]
|
164
|
+
remove_missing_files
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
if @new_digest_algorithm && !@options[:test_only]
|
171
|
+
if any_missing_files? || any_likely_damaged? || any_exceptions?
|
172
|
+
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
173
|
+
else
|
174
|
+
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
175
|
+
digests_update_digests_to_new_digests
|
176
|
+
set_metadata "digest_algorithm", @new_digest_algorithm
|
177
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if any_likely_damaged? || any_exceptions?
|
182
|
+
STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
|
183
|
+
STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
|
184
|
+
end
|
185
|
+
|
186
|
+
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
187
|
+
|
188
|
+
print_counters
|
189
|
+
end
|
190
|
+
|
191
|
+
puts "Performing database maintenance..." if @options[:verbose]
|
192
|
+
execute "PRAGMA optimize"
|
193
|
+
execute "VACUUM"
|
194
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
195
|
+
|
196
|
+
hide_database_files
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def show_duplicates
|
201
|
+
current_digest = nil
|
202
|
+
digests_select_duplicates.each do |found|
|
203
|
+
if current_digest != found["digest"]
|
204
|
+
puts "" if current_digest
|
205
|
+
current_digest = found["digest"]
|
206
|
+
puts "#{found["digest"]}:"
|
207
|
+
end
|
208
|
+
puts " #{found["filename"]}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
private
|
213
|
+
|
141
214
|
def initialize_paths files_path, digest_database_path
|
215
|
+
@files_path = realpath(files_path || ".")
|
216
|
+
|
217
|
+
unless File.directory?(@files_path) && File.readable?(@files_path)
|
218
|
+
raise "ERROR: Files path must be a readable directory"
|
219
|
+
end
|
220
|
+
|
142
221
|
@start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
|
143
|
-
|
144
|
-
|
145
|
-
@
|
222
|
+
|
223
|
+
@error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
|
224
|
+
@missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
|
146
225
|
|
147
|
-
@
|
148
|
-
|
226
|
+
@digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
|
227
|
+
|
228
|
+
if File.directory?(@digest_database_path)
|
229
|
+
@digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
|
230
|
+
end
|
149
231
|
|
150
|
-
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
151
|
-
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
152
|
-
ensure_dir_exist @digest_database_path.dirname
|
153
|
-
@digest_database_path = realdirpath_with_disk @digest_database_path
|
154
|
-
|
155
232
|
@digest_database_files = [
|
156
|
-
|
233
|
+
@digest_database_path,
|
157
234
|
"#{@digest_database_path}-wal",
|
158
235
|
"#{@digest_database_path}-shm"
|
159
236
|
]
|
160
237
|
|
161
238
|
@skip_files = @digest_database_files + [
|
162
|
-
@error_log_path
|
163
|
-
@missing_files_path
|
239
|
+
@error_log_path,
|
240
|
+
@missing_files_path
|
164
241
|
]
|
165
242
|
|
166
243
|
if @options[:verbose]
|
@@ -170,7 +247,7 @@ class FileDigests
|
|
170
247
|
end
|
171
248
|
|
172
249
|
def initialize_database
|
173
|
-
@db = SQLite3::Database.new @digest_database_path
|
250
|
+
@db = SQLite3::Database.new @digest_database_path
|
174
251
|
@db.results_as_hash = true
|
175
252
|
@db.busy_timeout = 5000
|
176
253
|
|
@@ -229,7 +306,7 @@ class FileDigests
|
|
229
306
|
# Convert database from 1st to 2nd version
|
230
307
|
unless get_metadata("digest_algorithm")
|
231
308
|
if get_metadata("database_version") == "1"
|
232
|
-
if File.exist?(@digest_database_path.
|
309
|
+
if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
|
233
310
|
set_metadata("digest_algorithm", "SHA512")
|
234
311
|
else
|
235
312
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -282,81 +359,82 @@ class FileDigests
|
|
282
359
|
prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
|
283
360
|
end
|
284
361
|
|
285
|
-
def perform_check
|
286
|
-
measure_time do
|
287
|
-
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
288
|
-
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
289
362
|
|
290
|
-
|
291
|
-
process_file filename
|
292
|
-
end
|
363
|
+
# Files
|
293
364
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
end
|
365
|
+
def realpath path
|
366
|
+
realxpath path, :realpath
|
367
|
+
end
|
298
368
|
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
else
|
303
|
-
report_missing_files
|
304
|
-
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
305
|
-
nested_transaction do
|
306
|
-
puts "Removing missing files..." if @options[:verbose]
|
307
|
-
remove_missing_files
|
308
|
-
end
|
309
|
-
end
|
310
|
-
end
|
311
|
-
end
|
369
|
+
def realdirpath path
|
370
|
+
realxpath path, :realdirpath
|
371
|
+
end
|
312
372
|
|
313
|
-
|
314
|
-
|
315
|
-
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
316
|
-
else
|
317
|
-
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
318
|
-
digests_update_digests_to_new_digests
|
319
|
-
set_metadata "digest_algorithm", @new_digest_algorithm
|
320
|
-
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
321
|
-
end
|
322
|
-
end
|
373
|
+
def realxpath path, method_name
|
374
|
+
path = path.encode("utf-8")
|
323
375
|
|
324
|
-
|
325
|
-
|
326
|
-
|
376
|
+
if Gem.win_platform?
|
377
|
+
path = path.gsub(/\\/, "/")
|
378
|
+
end
|
327
379
|
|
328
|
-
|
380
|
+
path = File.send(method_name, path).encode("utf-8")
|
329
381
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
puts "Performing database maintenance..." if @options[:verbose]
|
334
|
-
execute "PRAGMA optimize"
|
335
|
-
execute "VACUUM"
|
336
|
-
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
382
|
+
if Gem.win_platform? && path[0] == "/"
|
383
|
+
path = Dir.pwd[0, 2].encode("utf-8") + path
|
384
|
+
end
|
337
385
|
|
338
|
-
|
386
|
+
path
|
387
|
+
end
|
388
|
+
|
389
|
+
def perhaps_nt_path path
|
390
|
+
if Gem.win_platform?
|
391
|
+
"\\??\\#{path.gsub(/\//,"\\")}"
|
392
|
+
else
|
393
|
+
path
|
339
394
|
end
|
340
395
|
end
|
341
396
|
|
342
|
-
def
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
397
|
+
def get_file_digest filename
|
398
|
+
File.open(filename, "rb") do |io|
|
399
|
+
digest = OpenSSL::Digest.new(@digest_algorithm)
|
400
|
+
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
401
|
+
|
402
|
+
buffer = ""
|
403
|
+
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
404
|
+
digest.update(buffer)
|
405
|
+
new_digest.update(buffer) if @new_digest_algorithm
|
349
406
|
end
|
350
|
-
|
407
|
+
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
351
408
|
end
|
352
409
|
end
|
353
410
|
|
354
|
-
|
411
|
+
def walk_files(path, &block)
|
412
|
+
Dir.each_child(path, encoding: "UTF-8") do |item|
|
413
|
+
item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
|
414
|
+
begin
|
415
|
+
item_perhaps_nt_path = perhaps_nt_path item
|
416
|
+
|
417
|
+
unless File.symlink? item_perhaps_nt_path
|
418
|
+
if File.directory?(item_perhaps_nt_path)
|
419
|
+
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
420
|
+
walk_files(item, &block)
|
421
|
+
else
|
422
|
+
yield item
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
rescue => exception
|
427
|
+
@counters[:exceptions] += 1
|
428
|
+
report_file_exception exception, item
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
355
432
|
|
356
433
|
def process_file filename
|
357
434
|
perhaps_nt_filename = perhaps_nt_path filename
|
358
435
|
|
359
|
-
|
436
|
+
# this is checked in the walk_files
|
437
|
+
# return if File.symlink? perhaps_nt_filename
|
360
438
|
|
361
439
|
stat = File.stat perhaps_nt_filename
|
362
440
|
|
@@ -373,7 +451,7 @@ class FileDigests
|
|
373
451
|
return
|
374
452
|
end
|
375
453
|
|
376
|
-
normalized_filename = filename.delete_prefix("#{@files_path
|
454
|
+
normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
377
455
|
mtime_string = time_to_database stat.mtime
|
378
456
|
digest, new_digest = get_file_digest(perhaps_nt_filename)
|
379
457
|
|
@@ -443,6 +521,7 @@ class FileDigests
|
|
443
521
|
File.open(@missing_files_path, "a") do |f|
|
444
522
|
write_missing_files f
|
445
523
|
end
|
524
|
+
puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
|
446
525
|
end
|
447
526
|
end
|
448
527
|
|
@@ -466,6 +545,17 @@ class FileDigests
|
|
466
545
|
end
|
467
546
|
|
468
547
|
|
548
|
+
# Runtime state helpers
|
549
|
+
|
550
|
+
def any_exceptions?
|
551
|
+
@counters[:exceptions] > 0
|
552
|
+
end
|
553
|
+
|
554
|
+
def any_likely_damaged?
|
555
|
+
@counters[:likely_damaged] > 0
|
556
|
+
end
|
557
|
+
|
558
|
+
|
469
559
|
# Database helpers
|
470
560
|
|
471
561
|
def execute *args, &block
|
@@ -561,96 +651,6 @@ class FileDigests
|
|
561
651
|
end
|
562
652
|
|
563
653
|
|
564
|
-
# Filesystem-related helpers
|
565
|
-
|
566
|
-
def realpath_with_disk path
|
567
|
-
path = path.realpath
|
568
|
-
if Gem.win_platform? && path.to_s[0] == "/"
|
569
|
-
return Pathname(Dir.pwd[0, 2] + path.to_s)
|
570
|
-
end
|
571
|
-
path
|
572
|
-
end
|
573
|
-
|
574
|
-
def realdirpath_with_disk path
|
575
|
-
path = path.realdirpath
|
576
|
-
if Gem.win_platform? && path.to_s[0] == "/"
|
577
|
-
return Pathname(Dir.pwd[0, 2] + path.to_s)
|
578
|
-
end
|
579
|
-
path
|
580
|
-
end
|
581
|
-
|
582
|
-
def patch_path_string path
|
583
|
-
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
584
|
-
end
|
585
|
-
|
586
|
-
def cleanup_path path
|
587
|
-
Pathname.new(patch_path_string(path)).cleanpath
|
588
|
-
end
|
589
|
-
|
590
|
-
def ensure_dir_exist path
|
591
|
-
if File.exist?(path)
|
592
|
-
unless File.directory?(path)
|
593
|
-
raise "#{path} is not a directory"
|
594
|
-
end
|
595
|
-
else
|
596
|
-
FileUtils.mkdir_p path
|
597
|
-
end
|
598
|
-
end
|
599
|
-
|
600
|
-
def walk_files(path, &block)
|
601
|
-
Dir.each_child(path, encoding: "UTF-8") do |item|
|
602
|
-
item = "#{path}#{File::SEPARATOR}#{item}"
|
603
|
-
begin
|
604
|
-
item_perhaps_nt_path = perhaps_nt_path item
|
605
|
-
|
606
|
-
unless File.symlink? item_perhaps_nt_path
|
607
|
-
if File.directory?(item_perhaps_nt_path)
|
608
|
-
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
609
|
-
walk_files(item, &block)
|
610
|
-
else
|
611
|
-
yield item
|
612
|
-
end
|
613
|
-
end
|
614
|
-
rescue => exception
|
615
|
-
@counters[:exceptions] += 1
|
616
|
-
report_file_exception exception, item
|
617
|
-
end
|
618
|
-
end
|
619
|
-
end
|
620
|
-
|
621
|
-
def perhaps_nt_path path
|
622
|
-
if Gem.win_platform?
|
623
|
-
"\\??\\#{path.gsub(/\//,"\\")}"
|
624
|
-
else
|
625
|
-
path
|
626
|
-
end
|
627
|
-
end
|
628
|
-
|
629
|
-
def get_file_digest filename
|
630
|
-
File.open(filename, "rb") do |io|
|
631
|
-
digest = OpenSSL::Digest.new(@digest_algorithm)
|
632
|
-
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
633
|
-
|
634
|
-
buffer = ""
|
635
|
-
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
636
|
-
digest.update(buffer)
|
637
|
-
new_digest.update(buffer) if @new_digest_algorithm
|
638
|
-
end
|
639
|
-
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
640
|
-
end
|
641
|
-
end
|
642
|
-
|
643
|
-
|
644
|
-
# Runtime state helpers
|
645
|
-
|
646
|
-
def any_exceptions?
|
647
|
-
@counters[:exceptions] > 0
|
648
|
-
end
|
649
|
-
|
650
|
-
def any_likely_damaged?
|
651
|
-
@counters[:likely_damaged] > 0
|
652
|
-
end
|
653
|
-
|
654
654
|
# UI helpers
|
655
655
|
|
656
656
|
def confirm text
|