file-digests 0.0.40 → 0.0.41

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +163 -163
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9d040599aee9aeb62234557b2a92edec36265e4439965e4c9d00a2c9afa117a
4
- data.tar.gz: 992fe8a843afe761537a3c48b1ecde27ccdf8101be58dd4b456572c59232141b
3
+ metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
4
+ data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
5
5
  SHA512:
6
- metadata.gz: 1e15aa584690f8062a51cbb4785c9716e610258fba77859cb2b50642154701ffa1651312be1f6455921ee435ed7df5ed00bcc36f12def4009ac7edcc4aebb93e
7
- data.tar.gz: 5c94ae4165677af0be7b790a7a92ffc9af7751f0dd2661bf238d06b02569ef4ec746908721c7d16781d11a431048759e1490f9e180ea9c2e647f5e585a3ebbe0
6
+ metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
7
+ data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +19,6 @@ require "digest"
17
19
  require "fileutils"
18
20
  require "openssl"
19
21
  require "optparse"
20
- require "pathname"
21
22
  require "set"
22
23
  require "sqlite3"
23
24
 
@@ -138,29 +139,105 @@ class FileDigests
138
139
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
139
140
  end
140
141
 
142
+ def perform_check
143
+ measure_time do
144
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
145
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
146
+
147
+ walk_files(@files_path) do |filename|
148
+ process_file filename
149
+ end
150
+
151
+ nested_transaction do
152
+ puts "Tracking renames..." if @options[:verbose]
153
+ track_renames
154
+ end
155
+
156
+ if any_missing_files?
157
+ if any_exceptions?
158
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
159
+ else
160
+ report_missing_files
161
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
162
+ nested_transaction do
163
+ puts "Removing missing files..." if @options[:verbose]
164
+ remove_missing_files
165
+ end
166
+ end
167
+ end
168
+ end
169
+
170
+ if @new_digest_algorithm && !@options[:test_only]
171
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
172
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
173
+ else
174
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
175
+ digests_update_digests_to_new_digests
176
+ set_metadata "digest_algorithm", @new_digest_algorithm
177
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
178
+ end
179
+ end
180
+
181
+ if any_likely_damaged? || any_exceptions?
182
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
183
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
184
+ end
185
+
186
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
187
+
188
+ print_counters
189
+ end
190
+
191
+ puts "Performing database maintenance..." if @options[:verbose]
192
+ execute "PRAGMA optimize"
193
+ execute "VACUUM"
194
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
195
+
196
+ hide_database_files
197
+ end
198
+ end
199
+
200
+ def show_duplicates
201
+ current_digest = nil
202
+ digests_select_duplicates.each do |found|
203
+ if current_digest != found["digest"]
204
+ puts "" if current_digest
205
+ current_digest = found["digest"]
206
+ puts "#{found["digest"]}:"
207
+ end
208
+ puts " #{found["filename"]}"
209
+ end
210
+ end
211
+
212
+ private
213
+
141
214
  def initialize_paths files_path, digest_database_path
215
+ @files_path = realpath(files_path || ".")
216
+
217
+ unless File.directory?(@files_path) && File.readable?(@files_path)
218
+ raise "ERROR: Files path must be a readable directory"
219
+ end
220
+
142
221
  @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
143
- @files_path = cleanup_path(files_path || ".")
144
- raise "ERROR: Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
145
- @files_path = realpath_with_disk @files_path
222
+
223
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
224
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
146
225
 
147
- @error_log_path = @files_path + "file-digests errors #{@start_time_filename_string}.txt"
148
- @missing_files_path = @files_path + "file-digests missing files #{@start_time_filename_string}.txt"
226
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
227
+
228
+ if File.directory?(@digest_database_path)
229
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
230
+ end
149
231
 
150
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
151
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
152
- ensure_dir_exist @digest_database_path.dirname
153
- @digest_database_path = realdirpath_with_disk @digest_database_path
154
-
155
232
  @digest_database_files = [
156
- "#{@digest_database_path}",
233
+ @digest_database_path,
157
234
  "#{@digest_database_path}-wal",
158
235
  "#{@digest_database_path}-shm"
159
236
  ]
160
237
 
161
238
  @skip_files = @digest_database_files + [
162
- @error_log_path.to_s,
163
- @missing_files_path.to_s
239
+ @error_log_path,
240
+ @missing_files_path
164
241
  ]
165
242
 
166
243
  if @options[:verbose]
@@ -170,7 +247,7 @@ class FileDigests
170
247
  end
171
248
 
172
249
  def initialize_database
173
- @db = SQLite3::Database.new @digest_database_path.to_s
250
+ @db = SQLite3::Database.new @digest_database_path
174
251
  @db.results_as_hash = true
175
252
  @db.busy_timeout = 5000
176
253
 
@@ -229,7 +306,7 @@ class FileDigests
229
306
  # Convert database from 1st to 2nd version
230
307
  unless get_metadata("digest_algorithm")
231
308
  if get_metadata("database_version") == "1"
232
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
309
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
233
310
  set_metadata("digest_algorithm", "SHA512")
234
311
  else
235
312
  set_metadata("digest_algorithm", "SHA256")
@@ -282,81 +359,82 @@ class FileDigests
282
359
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
283
360
  end
284
361
 
285
- def perform_check
286
- measure_time do
287
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
288
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
289
362
 
290
- walk_files(@files_path.to_s) do |filename|
291
- process_file filename
292
- end
363
+ # Files
293
364
 
294
- nested_transaction do
295
- puts "Tracking renames..." if @options[:verbose]
296
- track_renames
297
- end
365
+ def realpath path
366
+ realxpath path, :realpath
367
+ end
298
368
 
299
- if any_missing_files?
300
- if any_exceptions?
301
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
302
- else
303
- report_missing_files
304
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
305
- nested_transaction do
306
- puts "Removing missing files..." if @options[:verbose]
307
- remove_missing_files
308
- end
309
- end
310
- end
311
- end
369
+ def realdirpath path
370
+ realxpath path, :realdirpath
371
+ end
312
372
 
313
- if @new_digest_algorithm && !@options[:test_only]
314
- if any_missing_files? || any_likely_damaged? || any_exceptions?
315
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
316
- else
317
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
318
- digests_update_digests_to_new_digests
319
- set_metadata "digest_algorithm", @new_digest_algorithm
320
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
321
- end
322
- end
373
+ def realxpath path, method_name
374
+ path = path.encode("utf-8")
323
375
 
324
- if any_likely_damaged? || any_exceptions?
325
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
326
- end
376
+ if Gem.win_platform?
377
+ path = path.gsub(/\\/, "/")
378
+ end
327
379
 
328
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
380
+ path = File.send(method_name, path).encode("utf-8")
329
381
 
330
- print_counters
331
- end
332
-
333
- puts "Performing database maintenance..." if @options[:verbose]
334
- execute "PRAGMA optimize"
335
- execute "VACUUM"
336
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
382
+ if Gem.win_platform? && path[0] == "/"
383
+ path = Dir.pwd[0, 2].encode("utf-8") + path
384
+ end
337
385
 
338
- hide_database_files
386
+ path
387
+ end
388
+
389
+ def perhaps_nt_path path
390
+ if Gem.win_platform?
391
+ "\\??\\#{path.gsub(/\//,"\\")}"
392
+ else
393
+ path
339
394
  end
340
395
  end
341
396
 
342
- def show_duplicates
343
- current_digest = nil
344
- digests_select_duplicates.each do |found|
345
- if current_digest != found["digest"]
346
- puts "" if current_digest
347
- current_digest = found["digest"]
348
- puts "#{found["digest"]}:"
397
+ def get_file_digest filename
398
+ File.open(filename, "rb") do |io|
399
+ digest = OpenSSL::Digest.new(@digest_algorithm)
400
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
401
+
402
+ buffer = ""
403
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
404
+ digest.update(buffer)
405
+ new_digest.update(buffer) if @new_digest_algorithm
349
406
  end
350
- puts " #{found["filename"]}"
407
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
351
408
  end
352
409
  end
353
410
 
354
- private
411
+ def walk_files(path, &block)
412
+ Dir.each_child(path, encoding: "UTF-8") do |item|
413
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
414
+ begin
415
+ item_perhaps_nt_path = perhaps_nt_path item
416
+
417
+ unless File.symlink? item_perhaps_nt_path
418
+ if File.directory?(item_perhaps_nt_path)
419
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
420
+ walk_files(item, &block)
421
+ else
422
+ yield item
423
+ end
424
+ end
425
+
426
+ rescue => exception
427
+ @counters[:exceptions] += 1
428
+ report_file_exception exception, item
429
+ end
430
+ end
431
+ end
355
432
 
356
433
  def process_file filename
357
434
  perhaps_nt_filename = perhaps_nt_path filename
358
435
 
359
- return if File.symlink? perhaps_nt_filename
436
+ # this is checked in the walk_files
437
+ # return if File.symlink? perhaps_nt_filename
360
438
 
361
439
  stat = File.stat perhaps_nt_filename
362
440
 
@@ -373,7 +451,7 @@ class FileDigests
373
451
  return
374
452
  end
375
453
 
376
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
454
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
377
455
  mtime_string = time_to_database stat.mtime
378
456
  digest, new_digest = get_file_digest(perhaps_nt_filename)
379
457
 
@@ -443,6 +521,7 @@ class FileDigests
443
521
  File.open(@missing_files_path, "a") do |f|
444
522
  write_missing_files f
445
523
  end
524
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
446
525
  end
447
526
  end
448
527
 
@@ -466,6 +545,17 @@ class FileDigests
466
545
  end
467
546
 
468
547
 
548
+ # Runtime state helpers
549
+
550
+ def any_exceptions?
551
+ @counters[:exceptions] > 0
552
+ end
553
+
554
+ def any_likely_damaged?
555
+ @counters[:likely_damaged] > 0
556
+ end
557
+
558
+
469
559
  # Database helpers
470
560
 
471
561
  def execute *args, &block
@@ -561,96 +651,6 @@ class FileDigests
561
651
  end
562
652
 
563
653
 
564
- # Filesystem-related helpers
565
-
566
- def realpath_with_disk path
567
- path = path.realpath
568
- if Gem.win_platform? && path.to_s[0] == "/"
569
- return Pathname(Dir.pwd[0, 2] + path.to_s)
570
- end
571
- path
572
- end
573
-
574
- def realdirpath_with_disk path
575
- path = path.realdirpath
576
- if Gem.win_platform? && path.to_s[0] == "/"
577
- return Pathname(Dir.pwd[0, 2] + path.to_s)
578
- end
579
- path
580
- end
581
-
582
- def patch_path_string path
583
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
584
- end
585
-
586
- def cleanup_path path
587
- Pathname.new(patch_path_string(path)).cleanpath
588
- end
589
-
590
- def ensure_dir_exist path
591
- if File.exist?(path)
592
- unless File.directory?(path)
593
- raise "#{path} is not a directory"
594
- end
595
- else
596
- FileUtils.mkdir_p path
597
- end
598
- end
599
-
600
- def walk_files(path, &block)
601
- Dir.each_child(path, encoding: "UTF-8") do |item|
602
- item = "#{path}#{File::SEPARATOR}#{item}"
603
- begin
604
- item_perhaps_nt_path = perhaps_nt_path item
605
-
606
- unless File.symlink? item_perhaps_nt_path
607
- if File.directory?(item_perhaps_nt_path)
608
- raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
609
- walk_files(item, &block)
610
- else
611
- yield item
612
- end
613
- end
614
- rescue => exception
615
- @counters[:exceptions] += 1
616
- report_file_exception exception, item
617
- end
618
- end
619
- end
620
-
621
- def perhaps_nt_path path
622
- if Gem.win_platform?
623
- "\\??\\#{path.gsub(/\//,"\\")}"
624
- else
625
- path
626
- end
627
- end
628
-
629
- def get_file_digest filename
630
- File.open(filename, "rb") do |io|
631
- digest = OpenSSL::Digest.new(@digest_algorithm)
632
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
633
-
634
- buffer = ""
635
- while io.read(409600, buffer) # 409600 seems like a sweet spot
636
- digest.update(buffer)
637
- new_digest.update(buffer) if @new_digest_algorithm
638
- end
639
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
640
- end
641
- end
642
-
643
-
644
- # Runtime state helpers
645
-
646
- def any_exceptions?
647
- @counters[:exceptions] > 0
648
- end
649
-
650
- def any_likely_damaged?
651
- @counters[:likely_damaged] > 0
652
- end
653
-
654
654
  # UI helpers
655
655
 
656
656
  def confirm text
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.40
4
+ version: 0.0.41
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov