file-digests 0.0.36 → 0.0.41

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +211 -138
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 624f7ec80a0cb15be2bc07a3dd592c33c7a178c22fb7f59ef326760d17cc564f
4
- data.tar.gz: 92bb88c58bc152ad01649b0688861ea16af6801d2d6625c238b96e9ff00aaed4
3
+ metadata.gz: 911caad1e008b6365ad3f0043e992793a9d77ff13ce71b0da004608a8261398b
4
+ data.tar.gz: f98a696b52e1549694d6ff80f9b3efe894b8ad9dc79c7bc7da1a01a74800fb77
5
5
  SHA512:
6
- metadata.gz: 9dbca39af63f07ddfe4d92618d0ff5e0813fa30561d42b629caa918f664f1149f8ad895e8555137b0f462eb4eac942adda07efed03859d17cb635a8d9bb25b57
7
- data.tar.gz: 788d37c5d82e8892dfcfb6f4a8532f484f7cf17c5f937e8a64b09af902fe3a904ed034fc88d403852bbd8819a9d11ab61a672e2d6f3114420ed7777ea9db2961
6
+ metadata.gz: 9004f1053569a94f58a4f90279ae1c86597a72baa0451ea62bf0481650f3e049d5a4dff6c513e093abf808cf603ec6e9d4ad017a143c16d8ed16007912797b6c
7
+ data.tar.gz: fa65d38a9fe7dde9cc29f5a389d6f1ee52e4593637b0f67a2173821180f8aeeea9bddddf6bbf95ecc7a9ada47364869f4ef087e90b405ef1d7329d00f9c39375
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,10 +17,8 @@
15
17
  require "date"
16
18
  require "digest"
17
19
  require "fileutils"
18
- require "find"
19
20
  require "openssl"
20
21
  require "optparse"
21
- require "pathname"
22
22
  require "set"
23
23
  require "sqlite3"
24
24
 
@@ -139,16 +139,106 @@ class FileDigests
139
139
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
140
140
  end
141
141
 
142
+ def perform_check
143
+ measure_time do
144
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
145
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
146
+
147
+ walk_files(@files_path) do |filename|
148
+ process_file filename
149
+ end
150
+
151
+ nested_transaction do
152
+ puts "Tracking renames..." if @options[:verbose]
153
+ track_renames
154
+ end
155
+
156
+ if any_missing_files?
157
+ if any_exceptions?
158
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
159
+ else
160
+ report_missing_files
161
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
162
+ nested_transaction do
163
+ puts "Removing missing files..." if @options[:verbose]
164
+ remove_missing_files
165
+ end
166
+ end
167
+ end
168
+ end
169
+
170
+ if @new_digest_algorithm && !@options[:test_only]
171
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
172
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
173
+ else
174
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
175
+ digests_update_digests_to_new_digests
176
+ set_metadata "digest_algorithm", @new_digest_algorithm
177
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
178
+ end
179
+ end
180
+
181
+ if any_likely_damaged? || any_exceptions?
182
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
183
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
184
+ end
185
+
186
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
187
+
188
+ print_counters
189
+ end
190
+
191
+ puts "Performing database maintenance..." if @options[:verbose]
192
+ execute "PRAGMA optimize"
193
+ execute "VACUUM"
194
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
195
+
196
+ hide_database_files
197
+ end
198
+ end
199
+
200
+ def show_duplicates
201
+ current_digest = nil
202
+ digests_select_duplicates.each do |found|
203
+ if current_digest != found["digest"]
204
+ puts "" if current_digest
205
+ current_digest = found["digest"]
206
+ puts "#{found["digest"]}:"
207
+ end
208
+ puts " #{found["filename"]}"
209
+ end
210
+ end
211
+
212
+ private
213
+
142
214
  def initialize_paths files_path, digest_database_path
143
- @files_path = cleanup_path(files_path || ".")
215
+ @files_path = realpath(files_path || ".")
216
+
217
+ unless File.directory?(@files_path) && File.readable?(@files_path)
218
+ raise "ERROR: Files path must be a readable directory"
219
+ end
144
220
 
145
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
221
+ @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
222
+
223
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
224
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
146
225
 
147
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
148
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
149
- ensure_dir_exist @digest_database_path.dirname
226
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
150
227
 
151
- @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
228
+ if File.directory?(@digest_database_path)
229
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
230
+ end
231
+
232
+ @digest_database_files = [
233
+ @digest_database_path,
234
+ "#{@digest_database_path}-wal",
235
+ "#{@digest_database_path}-shm"
236
+ ]
237
+
238
+ @skip_files = @digest_database_files + [
239
+ @error_log_path,
240
+ @missing_files_path
241
+ ]
152
242
 
153
243
  if @options[:verbose]
154
244
  puts "Target directory: #{@files_path}"
@@ -157,7 +247,7 @@ class FileDigests
157
247
  end
158
248
 
159
249
  def initialize_database
160
- @db = SQLite3::Database.new @digest_database_path.to_s
250
+ @db = SQLite3::Database.new @digest_database_path
161
251
  @db.results_as_hash = true
162
252
  @db.busy_timeout = 5000
163
253
 
@@ -216,7 +306,7 @@ class FileDigests
216
306
  # Convert database from 1st to 2nd version
217
307
  unless get_metadata("digest_algorithm")
218
308
  if get_metadata("database_version") == "1"
219
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
309
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
220
310
  set_metadata("digest_algorithm", "SHA512")
221
311
  else
222
312
  set_metadata("digest_algorithm", "SHA256")
@@ -269,81 +359,84 @@ class FileDigests
269
359
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
270
360
  end
271
361
 
272
- def perform_check
273
- measure_time do
274
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
275
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
276
362
 
277
- walk_files do |filename|
278
- process_file filename
279
- end
363
+ # Files
280
364
 
281
- nested_transaction do
282
- puts "Tracking renames..." if @options[:verbose]
283
- track_renames
284
- end
365
+ def realpath path
366
+ realxpath path, :realpath
367
+ end
285
368
 
286
- if any_missing_files?
287
- if any_exceptions?
288
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
289
- else
290
- print_missing_files
291
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
292
- nested_transaction do
293
- puts "Removing missing files..." if @options[:verbose]
294
- remove_missing_files
295
- end
296
- end
297
- end
298
- end
369
+ def realdirpath path
370
+ realxpath path, :realdirpath
371
+ end
299
372
 
300
- if @new_digest_algorithm && !@options[:test_only]
301
- if any_missing_files? || any_likely_damaged? || any_exceptions?
302
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
303
- else
304
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
305
- digests_update_digests_to_new_digests
306
- set_metadata "digest_algorithm", @new_digest_algorithm
307
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
308
- end
309
- end
373
+ def realxpath path, method_name
374
+ path = path.encode("utf-8")
310
375
 
311
- if any_likely_damaged? || any_exceptions?
312
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
313
- end
376
+ if Gem.win_platform?
377
+ path = path.gsub(/\\/, "/")
378
+ end
314
379
 
315
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
380
+ path = File.send(method_name, path).encode("utf-8")
316
381
 
317
- print_counters
318
- end
319
-
320
- puts "Performing database maintenance..." if @options[:verbose]
321
- execute "PRAGMA optimize"
322
- execute "VACUUM"
323
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
382
+ if Gem.win_platform? && path[0] == "/"
383
+ path = Dir.pwd[0, 2].encode("utf-8") + path
384
+ end
324
385
 
325
- hide_database_files
386
+ path
387
+ end
388
+
389
+ def perhaps_nt_path path
390
+ if Gem.win_platform?
391
+ "\\??\\#{path.gsub(/\//,"\\")}"
392
+ else
393
+ path
326
394
  end
327
395
  end
328
396
 
329
- def show_duplicates
330
- current_digest = nil
331
- digests_select_duplicates.each do |found|
332
- if current_digest != found["digest"]
333
- puts "" if current_digest
334
- current_digest = found["digest"]
335
- puts "#{found["digest"]}:"
397
+ def get_file_digest filename
398
+ File.open(filename, "rb") do |io|
399
+ digest = OpenSSL::Digest.new(@digest_algorithm)
400
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
401
+
402
+ buffer = ""
403
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
404
+ digest.update(buffer)
405
+ new_digest.update(buffer) if @new_digest_algorithm
336
406
  end
337
- puts " #{found["filename"]}"
407
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
338
408
  end
339
409
  end
340
410
 
341
- private
411
+ def walk_files(path, &block)
412
+ Dir.each_child(path, encoding: "UTF-8") do |item|
413
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
414
+ begin
415
+ item_perhaps_nt_path = perhaps_nt_path item
416
+
417
+ unless File.symlink? item_perhaps_nt_path
418
+ if File.directory?(item_perhaps_nt_path)
419
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
420
+ walk_files(item, &block)
421
+ else
422
+ yield item
423
+ end
424
+ end
425
+
426
+ rescue => exception
427
+ @counters[:exceptions] += 1
428
+ report_file_exception exception, item
429
+ end
430
+ end
431
+ end
342
432
 
343
433
  def process_file filename
344
- return if File.symlink? filename
434
+ perhaps_nt_filename = perhaps_nt_path filename
435
+
436
+ # this is checked in the walk_files
437
+ # return if File.symlink? perhaps_nt_filename
345
438
 
346
- stat = File.stat filename
439
+ stat = File.stat perhaps_nt_filename
347
440
 
348
441
  return if stat.blockdev?
349
442
  return if stat.chardev?
@@ -353,23 +446,19 @@ class FileDigests
353
446
 
354
447
  raise "File is not readable" unless stat.readable?
355
448
 
356
- if @digest_database_files.include?(filename)
357
- puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
449
+ if @skip_files.include?(filename)
450
+ puts "SKIPPING FILE: #{filename}" if @options[:verbose]
358
451
  return
359
452
  end
360
453
 
361
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
454
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
362
455
  mtime_string = time_to_database stat.mtime
363
- digest, new_digest = get_file_digest(filename)
456
+ digest, new_digest = get_file_digest(perhaps_nt_filename)
364
457
 
365
458
  nested_transaction do
366
459
  new_digests_insert(normalized_filename, new_digest) if new_digest
367
460
  process_file_indeed normalized_filename, mtime_string, digest
368
461
  end
369
-
370
- rescue => exception
371
- @counters[:exceptions] += 1
372
- print_file_exception exception, filename
373
462
  end
374
463
 
375
464
  def process_file_indeed filename, mtime, digest
@@ -395,7 +484,7 @@ class FileDigests
395
484
  else
396
485
  if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
397
486
  @counters[:likely_damaged] += 1
398
- STDERR.puts "LIKELY DAMAGED: #{filename}"
487
+ error_text "LIKELY DAMAGED: #{filename}"
399
488
  else
400
489
  @counters[:updated] += 1
401
490
  puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
@@ -425,10 +514,20 @@ class FileDigests
425
514
  @counters[:renamed] = @db.changes
426
515
  end
427
516
 
428
- def print_missing_files
517
+ def report_missing_files
429
518
  puts "\nMISSING FILES:"
519
+ write_missing_files STDOUT
520
+ if missing_files_count > 256
521
+ File.open(@missing_files_path, "a") do |f|
522
+ write_missing_files f
523
+ end
524
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
525
+ end
526
+ end
527
+
528
+ def write_missing_files dest
430
529
  missing_files_select_all_filenames.each do |record|
431
- puts record["filename"]
530
+ dest.puts record["filename"]
432
531
  end
433
532
  end
434
533
 
@@ -446,6 +545,17 @@ class FileDigests
446
545
  end
447
546
 
448
547
 
548
+ # Runtime state helpers
549
+
550
+ def any_exceptions?
551
+ @counters[:exceptions] > 0
552
+ end
553
+
554
+ def any_likely_damaged?
555
+ @counters[:likely_damaged] > 0
556
+ end
557
+
558
+
449
559
  # Database helpers
450
560
 
451
561
  def execute *args, &block
@@ -531,7 +641,7 @@ class FileDigests
531
641
  def check_if_database_is_at_certain_version target_version
532
642
  current_version = get_metadata("database_version")
533
643
  if current_version != target_version
534
- STDERR.puts "This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
644
+ STDERR.puts "ERROR: This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
535
645
  raise "Incompatible database version"
536
646
  end
537
647
  end
@@ -541,57 +651,6 @@ class FileDigests
541
651
  end
542
652
 
543
653
 
544
- # Filesystem-related helpers
545
-
546
- def patch_path_string path
547
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
548
- end
549
-
550
- def cleanup_path path
551
- Pathname.new(patch_path_string(path)).cleanpath
552
- end
553
-
554
- def ensure_dir_exist path
555
- if File.exist?(path)
556
- unless File.directory?(path)
557
- raise "#{path} is not a directory"
558
- end
559
- else
560
- FileUtils.mkdir_p path
561
- end
562
- end
563
-
564
- def walk_files
565
- Find.find(@files_path) do |path|
566
- yield path
567
- end
568
- end
569
-
570
- def get_file_digest filename
571
- File.open(filename, "rb") do |io|
572
- digest = OpenSSL::Digest.new(@digest_algorithm)
573
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
574
-
575
- buffer = ""
576
- while io.read(409600, buffer) # 409600 seems like a sweet spot
577
- digest.update(buffer)
578
- new_digest.update(buffer) if @new_digest_algorithm
579
- end
580
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
581
- end
582
- end
583
-
584
-
585
- # Runtime state helpers
586
-
587
- def any_exceptions?
588
- @counters[:exceptions] > 0
589
- end
590
-
591
- def any_likely_damaged?
592
- @counters[:likely_damaged] > 0
593
- end
594
-
595
654
  # UI helpers
596
655
 
597
656
  def confirm text
@@ -611,17 +670,31 @@ class FileDigests
611
670
  puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
612
671
  end
613
672
 
614
- def print_file_exception exception, filename
615
- STDERR.print "EXCEPTION: #{exception.message}, processing file: "
673
+ def report_file_exception exception, filename
674
+ write_file_exception STDERR, exception, filename
675
+ File.open(@error_log_path, "a") do |f|
676
+ write_file_exception f, exception, filename
677
+ end
678
+ end
679
+
680
+ def write_file_exception dest, exception, filename
681
+ dest.print "ERROR: #{exception.message}, processing file: "
616
682
  begin
617
- STDERR.print filename.encode("utf-8", universal_newline: true)
683
+ dest.print filename.encode("utf-8", universal_newline: true)
618
684
  rescue
619
- STDERR.print "(Unable to encode file name to utf-8) "
620
- STDERR.print filename
685
+ dest.print "(Unable to encode file name to utf-8) "
686
+ dest.print filename
687
+ end
688
+ dest.print "\n"
689
+ dest.flush
690
+ exception.backtrace.each { |line| dest.puts " " + line }
691
+ end
692
+
693
+ def error_text text
694
+ STDERR.puts text
695
+ File.open(@error_log_path, "a") do |f|
696
+ f.puts text
621
697
  end
622
- STDERR.print "\n"
623
- STDERR.flush
624
- exception.backtrace.each { |line| STDERR.puts " " + line }
625
698
  end
626
699
 
627
700
  def print_counters
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.36
4
+ version: 0.0.41
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-17 00:00:00.000000000 Z
11
+ date: 2020-10-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openssl