file-digests 0.0.40 → 0.0.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +163 -163
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
|
4
|
+
data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
|
7
|
+
data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
|
data/lib/file-digests.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
1
3
|
# Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
|
2
4
|
#
|
3
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -17,7 +19,6 @@ require "digest"
|
|
17
19
|
require "fileutils"
|
18
20
|
require "openssl"
|
19
21
|
require "optparse"
|
20
|
-
require "pathname"
|
21
22
|
require "set"
|
22
23
|
require "sqlite3"
|
23
24
|
|
@@ -138,29 +139,105 @@ class FileDigests
|
|
138
139
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
139
140
|
end
|
140
141
|
|
142
|
+
def perform_check
|
143
|
+
measure_time do
|
144
|
+
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
145
|
+
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
146
|
+
|
147
|
+
walk_files(@files_path) do |filename|
|
148
|
+
process_file filename
|
149
|
+
end
|
150
|
+
|
151
|
+
nested_transaction do
|
152
|
+
puts "Tracking renames..." if @options[:verbose]
|
153
|
+
track_renames
|
154
|
+
end
|
155
|
+
|
156
|
+
if any_missing_files?
|
157
|
+
if any_exceptions?
|
158
|
+
STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
|
159
|
+
else
|
160
|
+
report_missing_files
|
161
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
162
|
+
nested_transaction do
|
163
|
+
puts "Removing missing files..." if @options[:verbose]
|
164
|
+
remove_missing_files
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
if @new_digest_algorithm && !@options[:test_only]
|
171
|
+
if any_missing_files? || any_likely_damaged? || any_exceptions?
|
172
|
+
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
173
|
+
else
|
174
|
+
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
175
|
+
digests_update_digests_to_new_digests
|
176
|
+
set_metadata "digest_algorithm", @new_digest_algorithm
|
177
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if any_likely_damaged? || any_exceptions?
|
182
|
+
STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
|
183
|
+
STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
|
184
|
+
end
|
185
|
+
|
186
|
+
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
187
|
+
|
188
|
+
print_counters
|
189
|
+
end
|
190
|
+
|
191
|
+
puts "Performing database maintenance..." if @options[:verbose]
|
192
|
+
execute "PRAGMA optimize"
|
193
|
+
execute "VACUUM"
|
194
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
195
|
+
|
196
|
+
hide_database_files
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def show_duplicates
|
201
|
+
current_digest = nil
|
202
|
+
digests_select_duplicates.each do |found|
|
203
|
+
if current_digest != found["digest"]
|
204
|
+
puts "" if current_digest
|
205
|
+
current_digest = found["digest"]
|
206
|
+
puts "#{found["digest"]}:"
|
207
|
+
end
|
208
|
+
puts " #{found["filename"]}"
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
private
|
213
|
+
|
141
214
|
def initialize_paths files_path, digest_database_path
|
215
|
+
@files_path = realpath(files_path || ".")
|
216
|
+
|
217
|
+
unless File.directory?(@files_path) && File.readable?(@files_path)
|
218
|
+
raise "ERROR: Files path must be a readable directory"
|
219
|
+
end
|
220
|
+
|
142
221
|
@start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
|
143
|
-
|
144
|
-
|
145
|
-
@
|
222
|
+
|
223
|
+
@error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
|
224
|
+
@missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
|
146
225
|
|
147
|
-
@
|
148
|
-
|
226
|
+
@digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
|
227
|
+
|
228
|
+
if File.directory?(@digest_database_path)
|
229
|
+
@digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
|
230
|
+
end
|
149
231
|
|
150
|
-
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
151
|
-
@digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
|
152
|
-
ensure_dir_exist @digest_database_path.dirname
|
153
|
-
@digest_database_path = realdirpath_with_disk @digest_database_path
|
154
|
-
|
155
232
|
@digest_database_files = [
|
156
|
-
|
233
|
+
@digest_database_path,
|
157
234
|
"#{@digest_database_path}-wal",
|
158
235
|
"#{@digest_database_path}-shm"
|
159
236
|
]
|
160
237
|
|
161
238
|
@skip_files = @digest_database_files + [
|
162
|
-
@error_log_path
|
163
|
-
@missing_files_path
|
239
|
+
@error_log_path,
|
240
|
+
@missing_files_path
|
164
241
|
]
|
165
242
|
|
166
243
|
if @options[:verbose]
|
@@ -170,7 +247,7 @@ class FileDigests
|
|
170
247
|
end
|
171
248
|
|
172
249
|
def initialize_database
|
173
|
-
@db = SQLite3::Database.new @digest_database_path
|
250
|
+
@db = SQLite3::Database.new @digest_database_path
|
174
251
|
@db.results_as_hash = true
|
175
252
|
@db.busy_timeout = 5000
|
176
253
|
|
@@ -229,7 +306,7 @@ class FileDigests
|
|
229
306
|
# Convert database from 1st to 2nd version
|
230
307
|
unless get_metadata("digest_algorithm")
|
231
308
|
if get_metadata("database_version") == "1"
|
232
|
-
if File.exist?(@digest_database_path.
|
309
|
+
if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
|
233
310
|
set_metadata("digest_algorithm", "SHA512")
|
234
311
|
else
|
235
312
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -282,81 +359,82 @@ class FileDigests
|
|
282
359
|
prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
|
283
360
|
end
|
284
361
|
|
285
|
-
def perform_check
|
286
|
-
measure_time do
|
287
|
-
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
288
|
-
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
289
362
|
|
290
|
-
|
291
|
-
process_file filename
|
292
|
-
end
|
363
|
+
# Files
|
293
364
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
end
|
365
|
+
def realpath path
|
366
|
+
realxpath path, :realpath
|
367
|
+
end
|
298
368
|
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
else
|
303
|
-
report_missing_files
|
304
|
-
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
305
|
-
nested_transaction do
|
306
|
-
puts "Removing missing files..." if @options[:verbose]
|
307
|
-
remove_missing_files
|
308
|
-
end
|
309
|
-
end
|
310
|
-
end
|
311
|
-
end
|
369
|
+
def realdirpath path
|
370
|
+
realxpath path, :realdirpath
|
371
|
+
end
|
312
372
|
|
313
|
-
|
314
|
-
|
315
|
-
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
316
|
-
else
|
317
|
-
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
318
|
-
digests_update_digests_to_new_digests
|
319
|
-
set_metadata "digest_algorithm", @new_digest_algorithm
|
320
|
-
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
321
|
-
end
|
322
|
-
end
|
373
|
+
def realxpath path, method_name
|
374
|
+
path = path.encode("utf-8")
|
323
375
|
|
324
|
-
|
325
|
-
|
326
|
-
|
376
|
+
if Gem.win_platform?
|
377
|
+
path = path.gsub(/\\/, "/")
|
378
|
+
end
|
327
379
|
|
328
|
-
|
380
|
+
path = File.send(method_name, path).encode("utf-8")
|
329
381
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
puts "Performing database maintenance..." if @options[:verbose]
|
334
|
-
execute "PRAGMA optimize"
|
335
|
-
execute "VACUUM"
|
336
|
-
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
382
|
+
if Gem.win_platform? && path[0] == "/"
|
383
|
+
path = Dir.pwd[0, 2].encode("utf-8") + path
|
384
|
+
end
|
337
385
|
|
338
|
-
|
386
|
+
path
|
387
|
+
end
|
388
|
+
|
389
|
+
def perhaps_nt_path path
|
390
|
+
if Gem.win_platform?
|
391
|
+
"\\??\\#{path.gsub(/\//,"\\")}"
|
392
|
+
else
|
393
|
+
path
|
339
394
|
end
|
340
395
|
end
|
341
396
|
|
342
|
-
def
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
397
|
+
def get_file_digest filename
|
398
|
+
File.open(filename, "rb") do |io|
|
399
|
+
digest = OpenSSL::Digest.new(@digest_algorithm)
|
400
|
+
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
401
|
+
|
402
|
+
buffer = ""
|
403
|
+
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
404
|
+
digest.update(buffer)
|
405
|
+
new_digest.update(buffer) if @new_digest_algorithm
|
349
406
|
end
|
350
|
-
|
407
|
+
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
351
408
|
end
|
352
409
|
end
|
353
410
|
|
354
|
-
|
411
|
+
def walk_files(path, &block)
|
412
|
+
Dir.each_child(path, encoding: "UTF-8") do |item|
|
413
|
+
item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
|
414
|
+
begin
|
415
|
+
item_perhaps_nt_path = perhaps_nt_path item
|
416
|
+
|
417
|
+
unless File.symlink? item_perhaps_nt_path
|
418
|
+
if File.directory?(item_perhaps_nt_path)
|
419
|
+
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
420
|
+
walk_files(item, &block)
|
421
|
+
else
|
422
|
+
yield item
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
rescue => exception
|
427
|
+
@counters[:exceptions] += 1
|
428
|
+
report_file_exception exception, item
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|
355
432
|
|
356
433
|
def process_file filename
|
357
434
|
perhaps_nt_filename = perhaps_nt_path filename
|
358
435
|
|
359
|
-
|
436
|
+
# this is checked in the walk_files
|
437
|
+
# return if File.symlink? perhaps_nt_filename
|
360
438
|
|
361
439
|
stat = File.stat perhaps_nt_filename
|
362
440
|
|
@@ -373,7 +451,7 @@ class FileDigests
|
|
373
451
|
return
|
374
452
|
end
|
375
453
|
|
376
|
-
normalized_filename = filename.delete_prefix("#{@files_path
|
454
|
+
normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
377
455
|
mtime_string = time_to_database stat.mtime
|
378
456
|
digest, new_digest = get_file_digest(perhaps_nt_filename)
|
379
457
|
|
@@ -443,6 +521,7 @@ class FileDigests
|
|
443
521
|
File.open(@missing_files_path, "a") do |f|
|
444
522
|
write_missing_files f
|
445
523
|
end
|
524
|
+
puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
|
446
525
|
end
|
447
526
|
end
|
448
527
|
|
@@ -466,6 +545,17 @@ class FileDigests
|
|
466
545
|
end
|
467
546
|
|
468
547
|
|
548
|
+
# Runtime state helpers
|
549
|
+
|
550
|
+
def any_exceptions?
|
551
|
+
@counters[:exceptions] > 0
|
552
|
+
end
|
553
|
+
|
554
|
+
def any_likely_damaged?
|
555
|
+
@counters[:likely_damaged] > 0
|
556
|
+
end
|
557
|
+
|
558
|
+
|
469
559
|
# Database helpers
|
470
560
|
|
471
561
|
def execute *args, &block
|
@@ -561,96 +651,6 @@ class FileDigests
|
|
561
651
|
end
|
562
652
|
|
563
653
|
|
564
|
-
# Filesystem-related helpers
|
565
|
-
|
566
|
-
def realpath_with_disk path
|
567
|
-
path = path.realpath
|
568
|
-
if Gem.win_platform? && path.to_s[0] == "/"
|
569
|
-
return Pathname(Dir.pwd[0, 2] + path.to_s)
|
570
|
-
end
|
571
|
-
path
|
572
|
-
end
|
573
|
-
|
574
|
-
def realdirpath_with_disk path
|
575
|
-
path = path.realdirpath
|
576
|
-
if Gem.win_platform? && path.to_s[0] == "/"
|
577
|
-
return Pathname(Dir.pwd[0, 2] + path.to_s)
|
578
|
-
end
|
579
|
-
path
|
580
|
-
end
|
581
|
-
|
582
|
-
def patch_path_string path
|
583
|
-
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
584
|
-
end
|
585
|
-
|
586
|
-
def cleanup_path path
|
587
|
-
Pathname.new(patch_path_string(path)).cleanpath
|
588
|
-
end
|
589
|
-
|
590
|
-
def ensure_dir_exist path
|
591
|
-
if File.exist?(path)
|
592
|
-
unless File.directory?(path)
|
593
|
-
raise "#{path} is not a directory"
|
594
|
-
end
|
595
|
-
else
|
596
|
-
FileUtils.mkdir_p path
|
597
|
-
end
|
598
|
-
end
|
599
|
-
|
600
|
-
def walk_files(path, &block)
|
601
|
-
Dir.each_child(path, encoding: "UTF-8") do |item|
|
602
|
-
item = "#{path}#{File::SEPARATOR}#{item}"
|
603
|
-
begin
|
604
|
-
item_perhaps_nt_path = perhaps_nt_path item
|
605
|
-
|
606
|
-
unless File.symlink? item_perhaps_nt_path
|
607
|
-
if File.directory?(item_perhaps_nt_path)
|
608
|
-
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
609
|
-
walk_files(item, &block)
|
610
|
-
else
|
611
|
-
yield item
|
612
|
-
end
|
613
|
-
end
|
614
|
-
rescue => exception
|
615
|
-
@counters[:exceptions] += 1
|
616
|
-
report_file_exception exception, item
|
617
|
-
end
|
618
|
-
end
|
619
|
-
end
|
620
|
-
|
621
|
-
def perhaps_nt_path path
|
622
|
-
if Gem.win_platform?
|
623
|
-
"\\??\\#{path.gsub(/\//,"\\")}"
|
624
|
-
else
|
625
|
-
path
|
626
|
-
end
|
627
|
-
end
|
628
|
-
|
629
|
-
def get_file_digest filename
|
630
|
-
File.open(filename, "rb") do |io|
|
631
|
-
digest = OpenSSL::Digest.new(@digest_algorithm)
|
632
|
-
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
633
|
-
|
634
|
-
buffer = ""
|
635
|
-
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
636
|
-
digest.update(buffer)
|
637
|
-
new_digest.update(buffer) if @new_digest_algorithm
|
638
|
-
end
|
639
|
-
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
640
|
-
end
|
641
|
-
end
|
642
|
-
|
643
|
-
|
644
|
-
# Runtime state helpers
|
645
|
-
|
646
|
-
def any_exceptions?
|
647
|
-
@counters[:exceptions] > 0
|
648
|
-
end
|
649
|
-
|
650
|
-
def any_likely_damaged?
|
651
|
-
@counters[:likely_damaged] > 0
|
652
|
-
end
|
653
|
-
|
654
654
|
# UI helpers
|
655
655
|
|
656
656
|
def confirm text
|