file-digests 0.0.40 → 0.0.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/file-digests.rb +175 -167
- metadata +10 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e74595cb920bd6b52f8f0a0efab118afae509dc5406389d5203518ce801f2811
|
4
|
+
data.tar.gz: ad4429b2d998dc82c441cb5ebd9b9f3640158f20abe0839ead0f0de57037854b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c78ceabcbba358c5be5222681ef45d25acef175f4840022997d652c1740ad1c66a461b8ff2e618bb48a80f97783f7afa57d4281c41d7251c3aa94b4641b5079
|
7
|
+
data.tar.gz: b83b60f4d66e5a57ed7f5f44a6f997c6c47e5df38686a63459fcf7943dd95cda8f9e7cb41a717e071ec5cfef14459b201afc9f5e18f990364f2a7654566af351
|
data/lib/file-digests.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
1
3
|
# Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
|
2
4
|
#
|
3
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -17,7 +19,6 @@ require "digest"
|
|
17
19
|
require "fileutils"
|
18
20
|
require "openssl"
|
19
21
|
require "optparse"
|
20
|
-
require "pathname"
|
21
22
|
require "set"
|
22
23
|
require "sqlite3"
|
23
24
|
|
@@ -112,6 +113,7 @@ class FileDigests
|
|
112
113
|
|
113
114
|
file_digests = self.new ARGV[0], ARGV[1], options
|
114
115
|
file_digests.send(options[:action] || :perform_check)
|
116
|
+
file_digests.close_database
|
115
117
|
end
|
116
118
|
|
117
119
|
def initialize files_path, digest_database_path, options = {}
|
@@ -138,29 +140,109 @@ class FileDigests
|
|
138
140
|
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
139
141
|
end
|
140
142
|
|
143
|
+
def perform_check
|
144
|
+
measure_time do
|
145
|
+
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
146
|
+
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
147
|
+
|
148
|
+
walk_files(@files_path) do |filename|
|
149
|
+
process_file filename
|
150
|
+
end
|
151
|
+
|
152
|
+
nested_transaction do
|
153
|
+
puts "Tracking renames..." if @options[:verbose]
|
154
|
+
track_renames
|
155
|
+
end
|
156
|
+
|
157
|
+
if any_missing_files?
|
158
|
+
if any_exceptions?
|
159
|
+
STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
|
160
|
+
else
|
161
|
+
report_missing_files
|
162
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
163
|
+
nested_transaction do
|
164
|
+
puts "Removing missing files..." if @options[:verbose]
|
165
|
+
remove_missing_files
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
if @new_digest_algorithm && !@options[:test_only]
|
172
|
+
if any_missing_files? || any_likely_damaged? || any_exceptions?
|
173
|
+
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
174
|
+
else
|
175
|
+
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
176
|
+
digests_update_digests_to_new_digests
|
177
|
+
set_metadata "digest_algorithm", @new_digest_algorithm
|
178
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
if any_likely_damaged? || any_exceptions?
|
183
|
+
STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
|
184
|
+
STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
|
185
|
+
end
|
186
|
+
|
187
|
+
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
188
|
+
|
189
|
+
print_counters
|
190
|
+
end
|
191
|
+
|
192
|
+
puts "Performing database maintenance..." if @options[:verbose]
|
193
|
+
execute "PRAGMA optimize"
|
194
|
+
execute "VACUUM"
|
195
|
+
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def show_duplicates
|
200
|
+
current_digest = nil
|
201
|
+
digests_select_duplicates.each do |found|
|
202
|
+
if current_digest != found["digest"]
|
203
|
+
puts "" if current_digest
|
204
|
+
current_digest = found["digest"]
|
205
|
+
puts "#{found["digest"]}:"
|
206
|
+
end
|
207
|
+
puts " #{found["filename"]}"
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
def close_database
|
212
|
+
@statements.each(&:close)
|
213
|
+
@db.close
|
214
|
+
hide_database_files
|
215
|
+
end
|
216
|
+
|
217
|
+
private
|
218
|
+
|
141
219
|
def initialize_paths files_path, digest_database_path
|
220
|
+
@files_path = realpath(files_path || ".")
|
221
|
+
|
222
|
+
unless File.directory?(@files_path) && File.readable?(@files_path)
|
223
|
+
raise "ERROR: Files path must be a readable directory"
|
224
|
+
end
|
225
|
+
|
142
226
|
@start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
|
143
|
-
|
144
|
-
|
145
|
-
@
|
146
|
-
|
147
|
-
@
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
@digest_database_path = realdirpath_with_disk @digest_database_path
|
154
|
-
|
227
|
+
|
228
|
+
@error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
|
229
|
+
@missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
|
230
|
+
|
231
|
+
@digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
|
232
|
+
|
233
|
+
if File.directory?(@digest_database_path)
|
234
|
+
@digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
|
235
|
+
end
|
236
|
+
|
155
237
|
@digest_database_files = [
|
156
|
-
|
238
|
+
@digest_database_path,
|
157
239
|
"#{@digest_database_path}-wal",
|
158
240
|
"#{@digest_database_path}-shm"
|
159
241
|
]
|
160
242
|
|
161
243
|
@skip_files = @digest_database_files + [
|
162
|
-
@error_log_path
|
163
|
-
@missing_files_path
|
244
|
+
@error_log_path,
|
245
|
+
@missing_files_path
|
164
246
|
]
|
165
247
|
|
166
248
|
if @options[:verbose]
|
@@ -170,9 +252,10 @@ class FileDigests
|
|
170
252
|
end
|
171
253
|
|
172
254
|
def initialize_database
|
173
|
-
@db = SQLite3::Database.new @digest_database_path
|
255
|
+
@db = SQLite3::Database.new @digest_database_path
|
174
256
|
@db.results_as_hash = true
|
175
257
|
@db.busy_timeout = 5000
|
258
|
+
@statements = []
|
176
259
|
|
177
260
|
execute "PRAGMA encoding = 'UTF-8'"
|
178
261
|
execute "PRAGMA locking_mode = 'EXCLUSIVE'"
|
@@ -229,7 +312,7 @@ class FileDigests
|
|
229
312
|
# Convert database from 1st to 2nd version
|
230
313
|
unless get_metadata("digest_algorithm")
|
231
314
|
if get_metadata("database_version") == "1"
|
232
|
-
if File.exist?(@digest_database_path.
|
315
|
+
if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
|
233
316
|
set_metadata("digest_algorithm", "SHA512")
|
234
317
|
else
|
235
318
|
set_metadata("digest_algorithm", "SHA256")
|
@@ -242,7 +325,7 @@ class FileDigests
|
|
242
325
|
execute "CREATE INDEX digests_digest ON digests(digest)"
|
243
326
|
set_metadata "database_version", "3"
|
244
327
|
end
|
245
|
-
|
328
|
+
|
246
329
|
check_if_database_is_at_certain_version "3"
|
247
330
|
|
248
331
|
create_temporary_tables
|
@@ -282,81 +365,81 @@ class FileDigests
|
|
282
365
|
prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
|
283
366
|
end
|
284
367
|
|
285
|
-
|
286
|
-
measure_time do
|
287
|
-
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
288
|
-
@counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
368
|
+
# Files
|
289
369
|
|
290
|
-
|
291
|
-
|
292
|
-
|
370
|
+
def realpath path
|
371
|
+
realxpath path, :realpath
|
372
|
+
end
|
293
373
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
end
|
374
|
+
def realdirpath path
|
375
|
+
realxpath path, :realdirpath
|
376
|
+
end
|
298
377
|
|
299
|
-
|
300
|
-
|
301
|
-
STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
|
302
|
-
else
|
303
|
-
report_missing_files
|
304
|
-
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
305
|
-
nested_transaction do
|
306
|
-
puts "Removing missing files..." if @options[:verbose]
|
307
|
-
remove_missing_files
|
308
|
-
end
|
309
|
-
end
|
310
|
-
end
|
311
|
-
end
|
378
|
+
def realxpath path, method_name
|
379
|
+
path = path.encode("utf-8")
|
312
380
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
else
|
317
|
-
puts "Updating database to a new digest algorithm..." if @options[:verbose]
|
318
|
-
digests_update_digests_to_new_digests
|
319
|
-
set_metadata "digest_algorithm", @new_digest_algorithm
|
320
|
-
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
321
|
-
end
|
322
|
-
end
|
381
|
+
if Gem.win_platform?
|
382
|
+
path = path.gsub(/\\/, "/")
|
383
|
+
end
|
323
384
|
|
324
|
-
|
325
|
-
STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
|
326
|
-
end
|
385
|
+
path = File.send(method_name, path).encode("utf-8")
|
327
386
|
|
328
|
-
|
387
|
+
if Gem.win_platform? && path[0] == "/"
|
388
|
+
path = Dir.pwd[0, 2].encode("utf-8") + path
|
389
|
+
end
|
329
390
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
puts "Performing database maintenance..." if @options[:verbose]
|
334
|
-
execute "PRAGMA optimize"
|
335
|
-
execute "VACUUM"
|
336
|
-
execute "PRAGMA wal_checkpoint(TRUNCATE)"
|
391
|
+
path
|
392
|
+
end
|
337
393
|
|
338
|
-
|
394
|
+
def perhaps_nt_path path
|
395
|
+
if Gem.win_platform?
|
396
|
+
"\\??\\#{path.gsub(/\//,"\\")}"
|
397
|
+
else
|
398
|
+
path
|
339
399
|
end
|
340
400
|
end
|
341
401
|
|
342
|
-
def
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
402
|
+
def get_file_digest filename
|
403
|
+
File.open(filename, "rb") do |io|
|
404
|
+
digest = OpenSSL::Digest.new(@digest_algorithm)
|
405
|
+
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
406
|
+
|
407
|
+
buffer = ""
|
408
|
+
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
409
|
+
digest.update(buffer)
|
410
|
+
new_digest.update(buffer) if @new_digest_algorithm
|
349
411
|
end
|
350
|
-
|
412
|
+
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
351
413
|
end
|
352
414
|
end
|
353
415
|
|
354
|
-
|
416
|
+
def walk_files(path, &block)
|
417
|
+
Dir.each_child(path, encoding: "UTF-8") do |item|
|
418
|
+
item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
|
419
|
+
begin
|
420
|
+
item_perhaps_nt_path = perhaps_nt_path item
|
421
|
+
|
422
|
+
unless File.symlink? item_perhaps_nt_path
|
423
|
+
if File.directory?(item_perhaps_nt_path)
|
424
|
+
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
425
|
+
walk_files(item, &block)
|
426
|
+
else
|
427
|
+
yield item
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
rescue => exception
|
432
|
+
@counters[:exceptions] += 1
|
433
|
+
report_file_exception exception, item
|
434
|
+
end
|
435
|
+
end
|
436
|
+
end
|
355
437
|
|
356
438
|
def process_file filename
|
357
439
|
perhaps_nt_filename = perhaps_nt_path filename
|
358
440
|
|
359
|
-
|
441
|
+
# this is checked in the walk_files
|
442
|
+
# return if File.symlink? perhaps_nt_filename
|
360
443
|
|
361
444
|
stat = File.stat perhaps_nt_filename
|
362
445
|
|
@@ -373,7 +456,7 @@ class FileDigests
|
|
373
456
|
return
|
374
457
|
end
|
375
458
|
|
376
|
-
normalized_filename = filename.delete_prefix("#{@files_path
|
459
|
+
normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
|
377
460
|
mtime_string = time_to_database stat.mtime
|
378
461
|
digest, new_digest = get_file_digest(perhaps_nt_filename)
|
379
462
|
|
@@ -443,6 +526,7 @@ class FileDigests
|
|
443
526
|
File.open(@missing_files_path, "a") do |f|
|
444
527
|
write_missing_files f
|
445
528
|
end
|
529
|
+
puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
|
446
530
|
end
|
447
531
|
end
|
448
532
|
|
@@ -466,6 +550,17 @@ class FileDigests
|
|
466
550
|
end
|
467
551
|
|
468
552
|
|
553
|
+
# Runtime state helpers
|
554
|
+
|
555
|
+
def any_exceptions?
|
556
|
+
@counters[:exceptions] > 0
|
557
|
+
end
|
558
|
+
|
559
|
+
def any_likely_damaged?
|
560
|
+
@counters[:likely_damaged] > 0
|
561
|
+
end
|
562
|
+
|
563
|
+
|
469
564
|
# Database helpers
|
470
565
|
|
471
566
|
def execute *args, &block
|
@@ -506,7 +601,10 @@ class FileDigests
|
|
506
601
|
def prepare_method name, query
|
507
602
|
variable = "@#{name}"
|
508
603
|
|
509
|
-
|
604
|
+
statement = @db.prepare(query)
|
605
|
+
@statements.push(statement)
|
606
|
+
|
607
|
+
instance_variable_set(variable, statement)
|
510
608
|
|
511
609
|
define_singleton_method name do |*args, &block|
|
512
610
|
instance_variable_get(variable).execute(*args, &block)
|
@@ -561,96 +659,6 @@ class FileDigests
|
|
561
659
|
end
|
562
660
|
|
563
661
|
|
564
|
-
# Filesystem-related helpers
|
565
|
-
|
566
|
-
def realpath_with_disk path
|
567
|
-
path = path.realpath
|
568
|
-
if Gem.win_platform? && path.to_s[0] == "/"
|
569
|
-
return Pathname(Dir.pwd[0, 2] + path.to_s)
|
570
|
-
end
|
571
|
-
path
|
572
|
-
end
|
573
|
-
|
574
|
-
def realdirpath_with_disk path
|
575
|
-
path = path.realdirpath
|
576
|
-
if Gem.win_platform? && path.to_s[0] == "/"
|
577
|
-
return Pathname(Dir.pwd[0, 2] + path.to_s)
|
578
|
-
end
|
579
|
-
path
|
580
|
-
end
|
581
|
-
|
582
|
-
def patch_path_string path
|
583
|
-
Gem.win_platform? ? path.gsub(/\\/, "/") : path
|
584
|
-
end
|
585
|
-
|
586
|
-
def cleanup_path path
|
587
|
-
Pathname.new(patch_path_string(path)).cleanpath
|
588
|
-
end
|
589
|
-
|
590
|
-
def ensure_dir_exist path
|
591
|
-
if File.exist?(path)
|
592
|
-
unless File.directory?(path)
|
593
|
-
raise "#{path} is not a directory"
|
594
|
-
end
|
595
|
-
else
|
596
|
-
FileUtils.mkdir_p path
|
597
|
-
end
|
598
|
-
end
|
599
|
-
|
600
|
-
def walk_files(path, &block)
|
601
|
-
Dir.each_child(path, encoding: "UTF-8") do |item|
|
602
|
-
item = "#{path}#{File::SEPARATOR}#{item}"
|
603
|
-
begin
|
604
|
-
item_perhaps_nt_path = perhaps_nt_path item
|
605
|
-
|
606
|
-
unless File.symlink? item_perhaps_nt_path
|
607
|
-
if File.directory?(item_perhaps_nt_path)
|
608
|
-
raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
|
609
|
-
walk_files(item, &block)
|
610
|
-
else
|
611
|
-
yield item
|
612
|
-
end
|
613
|
-
end
|
614
|
-
rescue => exception
|
615
|
-
@counters[:exceptions] += 1
|
616
|
-
report_file_exception exception, item
|
617
|
-
end
|
618
|
-
end
|
619
|
-
end
|
620
|
-
|
621
|
-
def perhaps_nt_path path
|
622
|
-
if Gem.win_platform?
|
623
|
-
"\\??\\#{path.gsub(/\//,"\\")}"
|
624
|
-
else
|
625
|
-
path
|
626
|
-
end
|
627
|
-
end
|
628
|
-
|
629
|
-
def get_file_digest filename
|
630
|
-
File.open(filename, "rb") do |io|
|
631
|
-
digest = OpenSSL::Digest.new(@digest_algorithm)
|
632
|
-
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
633
|
-
|
634
|
-
buffer = ""
|
635
|
-
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
636
|
-
digest.update(buffer)
|
637
|
-
new_digest.update(buffer) if @new_digest_algorithm
|
638
|
-
end
|
639
|
-
return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
|
640
|
-
end
|
641
|
-
end
|
642
|
-
|
643
|
-
|
644
|
-
# Runtime state helpers
|
645
|
-
|
646
|
-
def any_exceptions?
|
647
|
-
@counters[:exceptions] > 0
|
648
|
-
end
|
649
|
-
|
650
|
-
def any_likely_damaged?
|
651
|
-
@counters[:likely_damaged] > 0
|
652
|
-
end
|
653
|
-
|
654
662
|
# UI helpers
|
655
663
|
|
656
664
|
def confirm text
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.43
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: openssl
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '3.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '3.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: sqlite3
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1.
|
33
|
+
version: '1.4'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
40
|
+
version: '1.4'
|
41
41
|
description: Calculate file digests and check for the possible file corruption
|
42
42
|
email: stan@senotrusov.com
|
43
43
|
executables:
|
@@ -51,7 +51,7 @@ homepage: https://github.com/senotrusov/file-digests
|
|
51
51
|
licenses:
|
52
52
|
- Apache-2.0
|
53
53
|
metadata: {}
|
54
|
-
post_install_message:
|
54
|
+
post_install_message:
|
55
55
|
rdoc_options: []
|
56
56
|
require_paths:
|
57
57
|
- lib
|
@@ -66,8 +66,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '0'
|
68
68
|
requirements: []
|
69
|
-
rubygems_version: 3.
|
70
|
-
signing_key:
|
69
|
+
rubygems_version: 3.3.7
|
70
|
+
signing_key:
|
71
71
|
specification_version: 4
|
72
72
|
summary: file-digests
|
73
73
|
test_files: []
|