file-digests 0.0.36 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +211 -138
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 624f7ec80a0cb15be2bc07a3dd592c33c7a178c22fb7f59ef326760d17cc564f
4
- data.tar.gz: 92bb88c58bc152ad01649b0688861ea16af6801d2d6625c238b96e9ff00aaed4
3
+ metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
4
+ data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
5
5
  SHA512:
6
- metadata.gz: 9dbca39af63f07ddfe4d92618d0ff5e0813fa30561d42b629caa918f664f1149f8ad895e8555137b0f462eb4eac942adda07efed03859d17cb635a8d9bb25b57
7
- data.tar.gz: 788d37c5d82e8892dfcfb6f4a8532f484f7cf17c5f937e8a64b09af902fe3a904ed034fc88d403852bbd8819a9d11ab61a672e2d6f3114420ed7777ea9db2961
6
+ metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
7
+ data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,10 +17,8 @@
15
17
  require "date"
16
18
  require "digest"
17
19
  require "fileutils"
18
- require "find"
19
20
  require "openssl"
20
21
  require "optparse"
21
- require "pathname"
22
22
  require "set"
23
23
  require "sqlite3"
24
24
 
@@ -139,16 +139,106 @@ class FileDigests
139
139
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
140
140
  end
141
141
 
142
+ def perform_check
143
+ measure_time do
144
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
145
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
146
+
147
+ walk_files(@files_path) do |filename|
148
+ process_file filename
149
+ end
150
+
151
+ nested_transaction do
152
+ puts "Tracking renames..." if @options[:verbose]
153
+ track_renames
154
+ end
155
+
156
+ if any_missing_files?
157
+ if any_exceptions?
158
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
159
+ else
160
+ report_missing_files
161
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
162
+ nested_transaction do
163
+ puts "Removing missing files..." if @options[:verbose]
164
+ remove_missing_files
165
+ end
166
+ end
167
+ end
168
+ end
169
+
170
+ if @new_digest_algorithm && !@options[:test_only]
171
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
172
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
173
+ else
174
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
175
+ digests_update_digests_to_new_digests
176
+ set_metadata "digest_algorithm", @new_digest_algorithm
177
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
178
+ end
179
+ end
180
+
181
+ if any_likely_damaged? || any_exceptions?
182
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
183
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
184
+ end
185
+
186
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
187
+
188
+ print_counters
189
+ end
190
+
191
+ puts "Performing database maintenance..." if @options[:verbose]
192
+ execute "PRAGMA optimize"
193
+ execute "VACUUM"
194
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
195
+
196
+ hide_database_files
197
+ end
198
+ end
199
+
200
+ def show_duplicates
201
+ current_digest = nil
202
+ digests_select_duplicates.each do |found|
203
+ if current_digest != found["digest"]
204
+ puts "" if current_digest
205
+ current_digest = found["digest"]
206
+ puts "#{found["digest"]}:"
207
+ end
208
+ puts " #{found["filename"]}"
209
+ end
210
+ end
211
+
212
+ private
213
+
142
214
  def initialize_paths files_path, digest_database_path
143
- @files_path = cleanup_path(files_path || ".")
215
+ @files_path = realpath(files_path || ".")
216
+
217
+ unless File.directory?(@files_path) && File.readable?(@files_path)
218
+ raise "ERROR: Files path must be a readable directory"
219
+ end
144
220
 
145
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
221
+ @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
222
+
223
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
224
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
146
225
 
147
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
148
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
149
- ensure_dir_exist @digest_database_path.dirname
226
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
150
227
 
151
- @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
228
+ if File.directory?(@digest_database_path)
229
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
230
+ end
231
+
232
+ @digest_database_files = [
233
+ @digest_database_path,
234
+ "#{@digest_database_path}-wal",
235
+ "#{@digest_database_path}-shm"
236
+ ]
237
+
238
+ @skip_files = @digest_database_files + [
239
+ @error_log_path,
240
+ @missing_files_path
241
+ ]
152
242
 
153
243
  if @options[:verbose]
154
244
  puts "Target directory: #{@files_path}"
@@ -157,7 +247,7 @@ class FileDigests
157
247
  end
158
248
 
159
249
  def initialize_database
160
- @db = SQLite3::Database.new @digest_database_path.to_s
250
+ @db = SQLite3::Database.new @digest_database_path
161
251
  @db.results_as_hash = true
162
252
  @db.busy_timeout = 5000
163
253
 
@@ -216,7 +306,7 @@ class FileDigests
216
306
  # Convert database from 1st to 2nd version
217
307
  unless get_metadata("digest_algorithm")
218
308
  if get_metadata("database_version") == "1"
219
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
309
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
220
310
  set_metadata("digest_algorithm", "SHA512")
221
311
  else
222
312
  set_metadata("digest_algorithm", "SHA256")
@@ -269,81 +359,84 @@ class FileDigests
269
359
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
270
360
  end
271
361
 
272
- def perform_check
273
- measure_time do
274
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
275
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
276
362
 
277
- walk_files do |filename|
278
- process_file filename
279
- end
363
+ # Files
280
364
 
281
- nested_transaction do
282
- puts "Tracking renames..." if @options[:verbose]
283
- track_renames
284
- end
365
+ def realpath path
366
+ realxpath path, :realpath
367
+ end
285
368
 
286
- if any_missing_files?
287
- if any_exceptions?
288
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
289
- else
290
- print_missing_files
291
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
292
- nested_transaction do
293
- puts "Removing missing files..." if @options[:verbose]
294
- remove_missing_files
295
- end
296
- end
297
- end
298
- end
369
+ def realdirpath path
370
+ realxpath path, :realdirpath
371
+ end
299
372
 
300
- if @new_digest_algorithm && !@options[:test_only]
301
- if any_missing_files? || any_likely_damaged? || any_exceptions?
302
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
303
- else
304
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
305
- digests_update_digests_to_new_digests
306
- set_metadata "digest_algorithm", @new_digest_algorithm
307
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
308
- end
309
- end
373
+ def realxpath path, method_name
374
+ path = path.encode("utf-8")
310
375
 
311
- if any_likely_damaged? || any_exceptions?
312
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
313
- end
376
+ if Gem.win_platform?
377
+ path = path.gsub(/\\/, "/")
378
+ end
314
379
 
315
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
380
+ path = File.send(method_name, path).encode("utf-8")
316
381
 
317
- print_counters
318
- end
319
-
320
- puts "Performing database maintenance..." if @options[:verbose]
321
- execute "PRAGMA optimize"
322
- execute "VACUUM"
323
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
382
+ if Gem.win_platform? && path[0] == "/"
383
+ path = Dir.pwd[0, 2].encode("utf-8") + path
384
+ end
324
385
 
325
- hide_database_files
386
+ path
387
+ end
388
+
389
+ def perhaps_nt_path path
390
+ if Gem.win_platform?
391
+ "\\??\\#{path.gsub(/\//,"\\")}"
392
+ else
393
+ path
326
394
  end
327
395
  end
328
396
 
329
- def show_duplicates
330
- current_digest = nil
331
- digests_select_duplicates.each do |found|
332
- if current_digest != found["digest"]
333
- puts "" if current_digest
334
- current_digest = found["digest"]
335
- puts "#{found["digest"]}:"
397
+ def get_file_digest filename
398
+ File.open(filename, "rb") do |io|
399
+ digest = OpenSSL::Digest.new(@digest_algorithm)
400
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
401
+
402
+ buffer = ""
403
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
404
+ digest.update(buffer)
405
+ new_digest.update(buffer) if @new_digest_algorithm
336
406
  end
337
- puts " #{found["filename"]}"
407
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
338
408
  end
339
409
  end
340
410
 
341
- private
411
+ def walk_files(path, &block)
412
+ Dir.each_child(path, encoding: "UTF-8") do |item|
413
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
414
+ begin
415
+ item_perhaps_nt_path = perhaps_nt_path item
416
+
417
+ unless File.symlink? item_perhaps_nt_path
418
+ if File.directory?(item_perhaps_nt_path)
419
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
420
+ walk_files(item, &block)
421
+ else
422
+ yield item
423
+ end
424
+ end
425
+
426
+ rescue => exception
427
+ @counters[:exceptions] += 1
428
+ report_file_exception exception, item
429
+ end
430
+ end
431
+ end
342
432
 
343
433
  def process_file filename
344
- return if File.symlink? filename
434
+ perhaps_nt_filename = perhaps_nt_path filename
435
+
436
+ # this is checked in the walk_files
437
+ # return if File.symlink? perhaps_nt_filename
345
438
 
346
- stat = File.stat filename
439
+ stat = File.stat perhaps_nt_filename
347
440
 
348
441
  return if stat.blockdev?
349
442
  return if stat.chardev?
@@ -353,23 +446,19 @@ class FileDigests
353
446
 
354
447
  raise "File is not readable" unless stat.readable?
355
448
 
356
- if @digest_database_files.include?(filename)
357
- puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
449
+ if @skip_files.include?(filename)
450
+ puts "SKIPPING FILE: #{filename}" if @options[:verbose]
358
451
  return
359
452
  end
360
453
 
361
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
454
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
362
455
  mtime_string = time_to_database stat.mtime
363
- digest, new_digest = get_file_digest(filename)
456
+ digest, new_digest = get_file_digest(perhaps_nt_filename)
364
457
 
365
458
  nested_transaction do
366
459
  new_digests_insert(normalized_filename, new_digest) if new_digest
367
460
  process_file_indeed normalized_filename, mtime_string, digest
368
461
  end
369
-
370
- rescue => exception
371
- @counters[:exceptions] += 1
372
- print_file_exception exception, filename
373
462
  end
374
463
 
375
464
  def process_file_indeed filename, mtime, digest
@@ -395,7 +484,7 @@ class FileDigests
395
484
  else
396
485
  if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
397
486
  @counters[:likely_damaged] += 1
398
- STDERR.puts "LIKELY DAMAGED: #{filename}"
487
+ error_text "LIKELY DAMAGED: #{filename}"
399
488
  else
400
489
  @counters[:updated] += 1
401
490
  puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
@@ -425,10 +514,20 @@ class FileDigests
425
514
  @counters[:renamed] = @db.changes
426
515
  end
427
516
 
428
- def print_missing_files
517
+ def report_missing_files
429
518
  puts "\nMISSING FILES:"
519
+ write_missing_files STDOUT
520
+ if missing_files_count > 256
521
+ File.open(@missing_files_path, "a") do |f|
522
+ write_missing_files f
523
+ end
524
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
525
+ end
526
+ end
527
+
528
+ def write_missing_files dest
430
529
  missing_files_select_all_filenames.each do |record|
431
- puts record["filename"]
530
+ dest.puts record["filename"]
432
531
  end
433
532
  end
434
533
 
@@ -446,6 +545,17 @@ class FileDigests
446
545
  end
447
546
 
448
547
 
548
+ # Runtime state helpers
549
+
550
+ def any_exceptions?
551
+ @counters[:exceptions] > 0
552
+ end
553
+
554
+ def any_likely_damaged?
555
+ @counters[:likely_damaged] > 0
556
+ end
557
+
558
+
449
559
  # Database helpers
450
560
 
451
561
  def execute *args, &block
@@ -531,7 +641,7 @@ class FileDigests
531
641
  def check_if_database_is_at_certain_version target_version
532
642
  current_version = get_metadata("database_version")
533
643
  if current_version != target_version
534
- STDERR.puts "This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
644
+ STDERR.puts "ERROR: This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
535
645
  raise "Incompatible database version"
536
646
  end
537
647
  end
@@ -541,57 +651,6 @@ class FileDigests
541
651
  end
542
652
 
543
653
 
544
- # Filesystem-related helpers
545
-
546
- def patch_path_string path
547
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
548
- end
549
-
550
- def cleanup_path path
551
- Pathname.new(patch_path_string(path)).cleanpath
552
- end
553
-
554
- def ensure_dir_exist path
555
- if File.exist?(path)
556
- unless File.directory?(path)
557
- raise "#{path} is not a directory"
558
- end
559
- else
560
- FileUtils.mkdir_p path
561
- end
562
- end
563
-
564
- def walk_files
565
- Find.find(@files_path) do |path|
566
- yield path
567
- end
568
- end
569
-
570
- def get_file_digest filename
571
- File.open(filename, "rb") do |io|
572
- digest = OpenSSL::Digest.new(@digest_algorithm)
573
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
574
-
575
- buffer = ""
576
- while io.read(409600, buffer) # 409600 seems like a sweet spot
577
- digest.update(buffer)
578
- new_digest.update(buffer) if @new_digest_algorithm
579
- end
580
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
581
- end
582
- end
583
-
584
-
585
- # Runtime state helpers
586
-
587
- def any_exceptions?
588
- @counters[:exceptions] > 0
589
- end
590
-
591
- def any_likely_damaged?
592
- @counters[:likely_damaged] > 0
593
- end
594
-
595
654
  # UI helpers
596
655
 
597
656
  def confirm text
@@ -611,17 +670,31 @@ class FileDigests
611
670
  puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
612
671
  end
613
672
 
614
- def print_file_exception exception, filename
615
- STDERR.print "EXCEPTION: #{exception.message}, processing file: "
673
+ def report_file_exception exception, filename
674
+ write_file_exception STDERR, exception, filename
675
+ File.open(@error_log_path, "a") do |f|
676
+ write_file_exception f, exception, filename
677
+ end
678
+ end
679
+
680
+ def write_file_exception dest, exception, filename
681
+ dest.print "ERROR: #{exception.message}, processing file: "
616
682
  begin
617
- STDERR.print filename.encode("utf-8", universal_newline: true)
683
+ dest.print filename.encode("utf-8", universal_newline: true)
618
684
  rescue
619
- STDERR.print "(Unable to encode file name to utf-8) "
620
- STDERR.print filename
685
+ dest.print "(Unable to encode file name to utf-8) "
686
+ dest.print filename
687
+ end
688
+ dest.print "\n"
689
+ dest.flush
690
+ exception.backtrace.each { |line| dest.puts " " + line }
691
+ end
692
+
693
+ def error_text text
694
+ STDERR.puts text
695
+ File.open(@error_log_path, "a") do |f|
696
+ f.puts text
621
697
  end
622
- STDERR.print "\n"
623
- STDERR.flush
624
- exception.backtrace.each { |line| STDERR.puts " " + line }
625
698
  end
626
699
 
627
700
  def print_counters
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.36
4
+ version: 0.0.41
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-17 00:00:00.000000000 Z
11
+ date: 2020-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openssl