file-digests 0.0.40 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +163 -163
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9d040599aee9aeb62234557b2a92edec36265e4439965e4c9d00a2c9afa117a
4
- data.tar.gz: 992fe8a843afe761537a3c48b1ecde27ccdf8101be58dd4b456572c59232141b
3
+ metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
4
+ data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
5
5
  SHA512:
6
- metadata.gz: 1e15aa584690f8062a51cbb4785c9716e610258fba77859cb2b50642154701ffa1651312be1f6455921ee435ed7df5ed00bcc36f12def4009ac7edcc4aebb93e
7
- data.tar.gz: 5c94ae4165677af0be7b790a7a92ffc9af7751f0dd2661bf238d06b02569ef4ec746908721c7d16781d11a431048759e1490f9e180ea9c2e647f5e585a3ebbe0
6
+ metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
7
+ data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +19,6 @@ require "digest"
17
19
  require "fileutils"
18
20
  require "openssl"
19
21
  require "optparse"
20
- require "pathname"
21
22
  require "set"
22
23
  require "sqlite3"
23
24
 
@@ -138,29 +139,105 @@ class FileDigests
138
139
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
139
140
  end
140
141
 
142
+ def perform_check
143
+ measure_time do
144
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
145
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
146
+
147
+ walk_files(@files_path) do |filename|
148
+ process_file filename
149
+ end
150
+
151
+ nested_transaction do
152
+ puts "Tracking renames..." if @options[:verbose]
153
+ track_renames
154
+ end
155
+
156
+ if any_missing_files?
157
+ if any_exceptions?
158
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
159
+ else
160
+ report_missing_files
161
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
162
+ nested_transaction do
163
+ puts "Removing missing files..." if @options[:verbose]
164
+ remove_missing_files
165
+ end
166
+ end
167
+ end
168
+ end
169
+
170
+ if @new_digest_algorithm && !@options[:test_only]
171
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
172
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
173
+ else
174
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
175
+ digests_update_digests_to_new_digests
176
+ set_metadata "digest_algorithm", @new_digest_algorithm
177
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
178
+ end
179
+ end
180
+
181
+ if any_likely_damaged? || any_exceptions?
182
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
183
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
184
+ end
185
+
186
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
187
+
188
+ print_counters
189
+ end
190
+
191
+ puts "Performing database maintenance..." if @options[:verbose]
192
+ execute "PRAGMA optimize"
193
+ execute "VACUUM"
194
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
195
+
196
+ hide_database_files
197
+ end
198
+ end
199
+
200
+ def show_duplicates
201
+ current_digest = nil
202
+ digests_select_duplicates.each do |found|
203
+ if current_digest != found["digest"]
204
+ puts "" if current_digest
205
+ current_digest = found["digest"]
206
+ puts "#{found["digest"]}:"
207
+ end
208
+ puts " #{found["filename"]}"
209
+ end
210
+ end
211
+
212
+ private
213
+
141
214
  def initialize_paths files_path, digest_database_path
215
+ @files_path = realpath(files_path || ".")
216
+
217
+ unless File.directory?(@files_path) && File.readable?(@files_path)
218
+ raise "ERROR: Files path must be a readable directory"
219
+ end
220
+
142
221
  @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
143
- @files_path = cleanup_path(files_path || ".")
144
- raise "ERROR: Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
145
- @files_path = realpath_with_disk @files_path
222
+
223
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
224
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
146
225
 
147
- @error_log_path = @files_path + "file-digests errors #{@start_time_filename_string}.txt"
148
- @missing_files_path = @files_path + "file-digests missing files #{@start_time_filename_string}.txt"
226
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
227
+
228
+ if File.directory?(@digest_database_path)
229
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
230
+ end
149
231
 
150
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
151
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
152
- ensure_dir_exist @digest_database_path.dirname
153
- @digest_database_path = realdirpath_with_disk @digest_database_path
154
-
155
232
  @digest_database_files = [
156
- "#{@digest_database_path}",
233
+ @digest_database_path,
157
234
  "#{@digest_database_path}-wal",
158
235
  "#{@digest_database_path}-shm"
159
236
  ]
160
237
 
161
238
  @skip_files = @digest_database_files + [
162
- @error_log_path.to_s,
163
- @missing_files_path.to_s
239
+ @error_log_path,
240
+ @missing_files_path
164
241
  ]
165
242
 
166
243
  if @options[:verbose]
@@ -170,7 +247,7 @@ class FileDigests
170
247
  end
171
248
 
172
249
  def initialize_database
173
- @db = SQLite3::Database.new @digest_database_path.to_s
250
+ @db = SQLite3::Database.new @digest_database_path
174
251
  @db.results_as_hash = true
175
252
  @db.busy_timeout = 5000
176
253
 
@@ -229,7 +306,7 @@ class FileDigests
229
306
  # Convert database from 1st to 2nd version
230
307
  unless get_metadata("digest_algorithm")
231
308
  if get_metadata("database_version") == "1"
232
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
309
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
233
310
  set_metadata("digest_algorithm", "SHA512")
234
311
  else
235
312
  set_metadata("digest_algorithm", "SHA256")
@@ -282,81 +359,82 @@ class FileDigests
282
359
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
283
360
  end
284
361
 
285
- def perform_check
286
- measure_time do
287
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
288
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
289
362
 
290
- walk_files(@files_path.to_s) do |filename|
291
- process_file filename
292
- end
363
+ # Files
293
364
 
294
- nested_transaction do
295
- puts "Tracking renames..." if @options[:verbose]
296
- track_renames
297
- end
365
+ def realpath path
366
+ realxpath path, :realpath
367
+ end
298
368
 
299
- if any_missing_files?
300
- if any_exceptions?
301
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
302
- else
303
- report_missing_files
304
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
305
- nested_transaction do
306
- puts "Removing missing files..." if @options[:verbose]
307
- remove_missing_files
308
- end
309
- end
310
- end
311
- end
369
+ def realdirpath path
370
+ realxpath path, :realdirpath
371
+ end
312
372
 
313
- if @new_digest_algorithm && !@options[:test_only]
314
- if any_missing_files? || any_likely_damaged? || any_exceptions?
315
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
316
- else
317
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
318
- digests_update_digests_to_new_digests
319
- set_metadata "digest_algorithm", @new_digest_algorithm
320
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
321
- end
322
- end
373
+ def realxpath path, method_name
374
+ path = path.encode("utf-8")
323
375
 
324
- if any_likely_damaged? || any_exceptions?
325
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
326
- end
376
+ if Gem.win_platform?
377
+ path = path.gsub(/\\/, "/")
378
+ end
327
379
 
328
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
380
+ path = File.send(method_name, path).encode("utf-8")
329
381
 
330
- print_counters
331
- end
332
-
333
- puts "Performing database maintenance..." if @options[:verbose]
334
- execute "PRAGMA optimize"
335
- execute "VACUUM"
336
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
382
+ if Gem.win_platform? && path[0] == "/"
383
+ path = Dir.pwd[0, 2].encode("utf-8") + path
384
+ end
337
385
 
338
- hide_database_files
386
+ path
387
+ end
388
+
389
+ def perhaps_nt_path path
390
+ if Gem.win_platform?
391
+ "\\??\\#{path.gsub(/\//,"\\")}"
392
+ else
393
+ path
339
394
  end
340
395
  end
341
396
 
342
- def show_duplicates
343
- current_digest = nil
344
- digests_select_duplicates.each do |found|
345
- if current_digest != found["digest"]
346
- puts "" if current_digest
347
- current_digest = found["digest"]
348
- puts "#{found["digest"]}:"
397
+ def get_file_digest filename
398
+ File.open(filename, "rb") do |io|
399
+ digest = OpenSSL::Digest.new(@digest_algorithm)
400
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
401
+
402
+ buffer = ""
403
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
404
+ digest.update(buffer)
405
+ new_digest.update(buffer) if @new_digest_algorithm
349
406
  end
350
- puts " #{found["filename"]}"
407
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
351
408
  end
352
409
  end
353
410
 
354
- private
411
+ def walk_files(path, &block)
412
+ Dir.each_child(path, encoding: "UTF-8") do |item|
413
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
414
+ begin
415
+ item_perhaps_nt_path = perhaps_nt_path item
416
+
417
+ unless File.symlink? item_perhaps_nt_path
418
+ if File.directory?(item_perhaps_nt_path)
419
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
420
+ walk_files(item, &block)
421
+ else
422
+ yield item
423
+ end
424
+ end
425
+
426
+ rescue => exception
427
+ @counters[:exceptions] += 1
428
+ report_file_exception exception, item
429
+ end
430
+ end
431
+ end
355
432
 
356
433
  def process_file filename
357
434
  perhaps_nt_filename = perhaps_nt_path filename
358
435
 
359
- return if File.symlink? perhaps_nt_filename
436
+ # this is checked in the walk_files
437
+ # return if File.symlink? perhaps_nt_filename
360
438
 
361
439
  stat = File.stat perhaps_nt_filename
362
440
 
@@ -373,7 +451,7 @@ class FileDigests
373
451
  return
374
452
  end
375
453
 
376
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
454
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
377
455
  mtime_string = time_to_database stat.mtime
378
456
  digest, new_digest = get_file_digest(perhaps_nt_filename)
379
457
 
@@ -443,6 +521,7 @@ class FileDigests
443
521
  File.open(@missing_files_path, "a") do |f|
444
522
  write_missing_files f
445
523
  end
524
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
446
525
  end
447
526
  end
448
527
 
@@ -466,6 +545,17 @@ class FileDigests
466
545
  end
467
546
 
468
547
 
548
+ # Runtime state helpers
549
+
550
+ def any_exceptions?
551
+ @counters[:exceptions] > 0
552
+ end
553
+
554
+ def any_likely_damaged?
555
+ @counters[:likely_damaged] > 0
556
+ end
557
+
558
+
469
559
  # Database helpers
470
560
 
471
561
  def execute *args, &block
@@ -561,96 +651,6 @@ class FileDigests
561
651
  end
562
652
 
563
653
 
564
- # Filesystem-related helpers
565
-
566
- def realpath_with_disk path
567
- path = path.realpath
568
- if Gem.win_platform? && path.to_s[0] == "/"
569
- return Pathname(Dir.pwd[0, 2] + path.to_s)
570
- end
571
- path
572
- end
573
-
574
- def realdirpath_with_disk path
575
- path = path.realdirpath
576
- if Gem.win_platform? && path.to_s[0] == "/"
577
- return Pathname(Dir.pwd[0, 2] + path.to_s)
578
- end
579
- path
580
- end
581
-
582
- def patch_path_string path
583
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
584
- end
585
-
586
- def cleanup_path path
587
- Pathname.new(patch_path_string(path)).cleanpath
588
- end
589
-
590
- def ensure_dir_exist path
591
- if File.exist?(path)
592
- unless File.directory?(path)
593
- raise "#{path} is not a directory"
594
- end
595
- else
596
- FileUtils.mkdir_p path
597
- end
598
- end
599
-
600
- def walk_files(path, &block)
601
- Dir.each_child(path, encoding: "UTF-8") do |item|
602
- item = "#{path}#{File::SEPARATOR}#{item}"
603
- begin
604
- item_perhaps_nt_path = perhaps_nt_path item
605
-
606
- unless File.symlink? item_perhaps_nt_path
607
- if File.directory?(item_perhaps_nt_path)
608
- raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
609
- walk_files(item, &block)
610
- else
611
- yield item
612
- end
613
- end
614
- rescue => exception
615
- @counters[:exceptions] += 1
616
- report_file_exception exception, item
617
- end
618
- end
619
- end
620
-
621
- def perhaps_nt_path path
622
- if Gem.win_platform?
623
- "\\??\\#{path.gsub(/\//,"\\")}"
624
- else
625
- path
626
- end
627
- end
628
-
629
- def get_file_digest filename
630
- File.open(filename, "rb") do |io|
631
- digest = OpenSSL::Digest.new(@digest_algorithm)
632
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
633
-
634
- buffer = ""
635
- while io.read(409600, buffer) # 409600 seems like a sweet spot
636
- digest.update(buffer)
637
- new_digest.update(buffer) if @new_digest_algorithm
638
- end
639
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
640
- end
641
- end
642
-
643
-
644
- # Runtime state helpers
645
-
646
- def any_exceptions?
647
- @counters[:exceptions] > 0
648
- end
649
-
650
- def any_likely_damaged?
651
- @counters[:likely_damaged] > 0
652
- end
653
-
654
654
  # UI helpers
655
655
 
656
656
  def confirm text
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.40
4
+ version: 0.0.41
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov