file-digests 0.0.37 → 0.0.42

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/file-digests +0 -0
  3. data/lib/file-digests.rb +221 -140
  4. metadata +6 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 27e992f2a4849569d6c87e53807ebdc53676b3e47ca2c1efd2799927fd16d0c7
4
- data.tar.gz: 9e292709b7978d906b0423a980cc72b23f8a44c665f070d71f30953ccdc59256
3
+ metadata.gz: c914ef250d4e173e31498c81865f4b43da56f91f81f2f16325f573747758c20b
4
+ data.tar.gz: 55bec69638ec367ca346de5b1d72194daa0bab75972186e0dd3ca4da0a0ceb9e
5
5
  SHA512:
6
- metadata.gz: c80f844d16255d9437c8dd012eff010df23f5a14d87336bf987b34e913a12fa0932fd85e18760884c63d10f882041afecf77b0b354555cef62aadc98de5ba091
7
- data.tar.gz: f5f5c8309b8921f034edeb36643cab34dff52e4091cd5508462294a48e79002dfdbdf09a126988449d1775d207651579ca19ffb7cd169038fe71067dabb5b8af
6
+ metadata.gz: 3963d53565a261db7b7bad2d8d662aa8fe407b92ddca9264afd10f6f9451f6aa1cfeec2825a503fc6b01f18cc85a672e4cf031109a2279e3c2d74d5c74c7cdb2
7
+ data.tar.gz: e6be01d8d1887010387ba5206bcb5937dac395fa5ea05c546076f76998d4603030ba5e781f43c2414e99f20bc241fcc3446721e7b9dca668af0fe7e6cff7fe46
data/bin/file-digests CHANGED
File without changes
data/lib/file-digests.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +19,6 @@ require "digest"
17
19
  require "fileutils"
18
20
  require "openssl"
19
21
  require "optparse"
20
- require "pathname"
21
22
  require "set"
22
23
  require "sqlite3"
23
24
 
@@ -112,6 +113,7 @@ class FileDigests
112
113
 
113
114
  file_digests = self.new ARGV[0], ARGV[1], options
114
115
  file_digests.send(options[:action] || :perform_check)
116
+ file_digests.close_database
115
117
  end
116
118
 
117
119
  def initialize files_path, digest_database_path, options = {}
@@ -138,16 +140,110 @@ class FileDigests
138
140
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
139
141
  end
140
142
 
143
+ def perform_check
144
+ measure_time do
145
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
146
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
147
+
148
+ walk_files(@files_path) do |filename|
149
+ process_file filename
150
+ end
151
+
152
+ nested_transaction do
153
+ puts "Tracking renames..." if @options[:verbose]
154
+ track_renames
155
+ end
156
+
157
+ if any_missing_files?
158
+ if any_exceptions?
159
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
160
+ else
161
+ report_missing_files
162
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
163
+ nested_transaction do
164
+ puts "Removing missing files..." if @options[:verbose]
165
+ remove_missing_files
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ if @new_digest_algorithm && !@options[:test_only]
172
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
173
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
174
+ else
175
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
176
+ digests_update_digests_to_new_digests
177
+ set_metadata "digest_algorithm", @new_digest_algorithm
178
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
179
+ end
180
+ end
181
+
182
+ if any_likely_damaged? || any_exceptions?
183
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
184
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
185
+ end
186
+
187
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
188
+
189
+ print_counters
190
+ end
191
+
192
+ puts "Performing database maintenance..." if @options[:verbose]
193
+ execute "PRAGMA optimize"
194
+ execute "VACUUM"
195
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
196
+ end
197
+ end
198
+
199
+ def show_duplicates
200
+ current_digest = nil
201
+ digests_select_duplicates.each do |found|
202
+ if current_digest != found["digest"]
203
+ puts "" if current_digest
204
+ current_digest = found["digest"]
205
+ puts "#{found["digest"]}:"
206
+ end
207
+ puts " #{found["filename"]}"
208
+ end
209
+ end
210
+
211
+ def close_database
212
+ @statements.each(&:close)
213
+ @db.close
214
+ hide_database_files
215
+ end
216
+
217
+ private
218
+
141
219
  def initialize_paths files_path, digest_database_path
142
- @files_path = cleanup_path(files_path || ".")
220
+ @files_path = realpath(files_path || ".")
221
+
222
+ unless File.directory?(@files_path) && File.readable?(@files_path)
223
+ raise "ERROR: Files path must be a readable directory"
224
+ end
143
225
 
144
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
226
+ @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
145
227
 
146
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
147
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
148
- ensure_dir_exist @digest_database_path.dirname
228
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
229
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
230
+
231
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
232
+
233
+ if File.directory?(@digest_database_path)
234
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
235
+ end
149
236
 
150
- @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
237
+ @digest_database_files = [
238
+ @digest_database_path,
239
+ "#{@digest_database_path}-wal",
240
+ "#{@digest_database_path}-shm"
241
+ ]
242
+
243
+ @skip_files = @digest_database_files + [
244
+ @error_log_path,
245
+ @missing_files_path
246
+ ]
151
247
 
152
248
  if @options[:verbose]
153
249
  puts "Target directory: #{@files_path}"
@@ -156,9 +252,10 @@ class FileDigests
156
252
  end
157
253
 
158
254
  def initialize_database
159
- @db = SQLite3::Database.new @digest_database_path.to_s
255
+ @db = SQLite3::Database.new @digest_database_path
160
256
  @db.results_as_hash = true
161
257
  @db.busy_timeout = 5000
258
+ @statements = []
162
259
 
163
260
  execute "PRAGMA encoding = 'UTF-8'"
164
261
  execute "PRAGMA locking_mode = 'EXCLUSIVE'"
@@ -215,7 +312,7 @@ class FileDigests
215
312
  # Convert database from 1st to 2nd version
216
313
  unless get_metadata("digest_algorithm")
217
314
  if get_metadata("database_version") == "1"
218
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
315
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
219
316
  set_metadata("digest_algorithm", "SHA512")
220
317
  else
221
318
  set_metadata("digest_algorithm", "SHA256")
@@ -228,7 +325,7 @@ class FileDigests
228
325
  execute "CREATE INDEX digests_digest ON digests(digest)"
229
326
  set_metadata "database_version", "3"
230
327
  end
231
-
328
+
232
329
  check_if_database_is_at_certain_version "3"
233
330
 
234
331
  create_temporary_tables
@@ -268,81 +365,83 @@ class FileDigests
268
365
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
269
366
  end
270
367
 
271
- def perform_check
272
- measure_time do
273
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
274
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
368
+ # Files
275
369
 
276
- walk_files do |filename|
277
- process_file filename
278
- end
370
+ def realpath path
371
+ realxpath path, :realpath
372
+ end
279
373
 
280
- nested_transaction do
281
- puts "Tracking renames..." if @options[:verbose]
282
- track_renames
283
- end
374
+ def realdirpath path
375
+ realxpath path, :realdirpath
376
+ end
284
377
 
285
- if any_missing_files?
286
- if any_exceptions?
287
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
288
- else
289
- print_missing_files
290
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
291
- nested_transaction do
292
- puts "Removing missing files..." if @options[:verbose]
293
- remove_missing_files
294
- end
295
- end
296
- end
297
- end
378
+ def realxpath path, method_name
379
+ path = path.encode("utf-8")
298
380
 
299
- if @new_digest_algorithm && !@options[:test_only]
300
- if any_missing_files? || any_likely_damaged? || any_exceptions?
301
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
302
- else
303
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
304
- digests_update_digests_to_new_digests
305
- set_metadata "digest_algorithm", @new_digest_algorithm
306
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
307
- end
308
- end
381
+ if Gem.win_platform?
382
+ path = path.gsub(/\\/, "/")
383
+ end
309
384
 
310
- if any_likely_damaged? || any_exceptions?
311
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
312
- end
385
+ path = File.send(method_name, path).encode("utf-8")
313
386
 
314
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
387
+ if Gem.win_platform? && path[0] == "/"
388
+ path = Dir.pwd[0, 2].encode("utf-8") + path
389
+ end
315
390
 
316
- print_counters
317
- end
318
-
319
- puts "Performing database maintenance..." if @options[:verbose]
320
- execute "PRAGMA optimize"
321
- execute "VACUUM"
322
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
391
+ path
392
+ end
323
393
 
324
- hide_database_files
394
+ def perhaps_nt_path path
395
+ if Gem.win_platform?
396
+ "\\??\\#{path.gsub(/\//,"\\")}"
397
+ else
398
+ path
325
399
  end
326
400
  end
327
401
 
328
- def show_duplicates
329
- current_digest = nil
330
- digests_select_duplicates.each do |found|
331
- if current_digest != found["digest"]
332
- puts "" if current_digest
333
- current_digest = found["digest"]
334
- puts "#{found["digest"]}:"
402
+ def get_file_digest filename
403
+ File.open(filename, "rb") do |io|
404
+ digest = OpenSSL::Digest.new(@digest_algorithm)
405
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
406
+
407
+ buffer = ""
408
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
409
+ digest.update(buffer)
410
+ new_digest.update(buffer) if @new_digest_algorithm
335
411
  end
336
- puts " #{found["filename"]}"
412
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
337
413
  end
338
414
  end
339
415
 
340
- private
416
+ def walk_files(path, &block)
417
+ Dir.each_child(path, encoding: "UTF-8") do |item|
418
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
419
+ begin
420
+ item_perhaps_nt_path = perhaps_nt_path item
421
+
422
+ unless File.symlink? item_perhaps_nt_path
423
+ if File.directory?(item_perhaps_nt_path)
424
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
425
+ walk_files(item, &block)
426
+ else
427
+ yield item
428
+ end
429
+ end
430
+
431
+ rescue => exception
432
+ @counters[:exceptions] += 1
433
+ report_file_exception exception, item
434
+ end
435
+ end
436
+ end
341
437
 
342
438
  def process_file filename
343
- return if File.symlink? filename
439
+ perhaps_nt_filename = perhaps_nt_path filename
440
+
441
+ # this is checked in the walk_files
442
+ # return if File.symlink? perhaps_nt_filename
344
443
 
345
- stat = File.stat filename
444
+ stat = File.stat perhaps_nt_filename
346
445
 
347
446
  return if stat.blockdev?
348
447
  return if stat.chardev?
@@ -352,23 +451,19 @@ class FileDigests
352
451
 
353
452
  raise "File is not readable" unless stat.readable?
354
453
 
355
- if @digest_database_files.include?(filename)
356
- puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
454
+ if @skip_files.include?(filename)
455
+ puts "SKIPPING FILE: #{filename}" if @options[:verbose]
357
456
  return
358
457
  end
359
458
 
360
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
459
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
361
460
  mtime_string = time_to_database stat.mtime
362
- digest, new_digest = get_file_digest(filename)
461
+ digest, new_digest = get_file_digest(perhaps_nt_filename)
363
462
 
364
463
  nested_transaction do
365
464
  new_digests_insert(normalized_filename, new_digest) if new_digest
366
465
  process_file_indeed normalized_filename, mtime_string, digest
367
466
  end
368
-
369
- rescue => exception
370
- @counters[:exceptions] += 1
371
- print_file_exception exception, filename
372
467
  end
373
468
 
374
469
  def process_file_indeed filename, mtime, digest
@@ -394,7 +489,7 @@ class FileDigests
394
489
  else
395
490
  if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
396
491
  @counters[:likely_damaged] += 1
397
- STDERR.puts "LIKELY DAMAGED: #{filename}"
492
+ error_text "LIKELY DAMAGED: #{filename}"
398
493
  else
399
494
  @counters[:updated] += 1
400
495
  puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
@@ -424,10 +519,20 @@ class FileDigests
424
519
  @counters[:renamed] = @db.changes
425
520
  end
426
521
 
427
- def print_missing_files
522
+ def report_missing_files
428
523
  puts "\nMISSING FILES:"
524
+ write_missing_files STDOUT
525
+ if missing_files_count > 256
526
+ File.open(@missing_files_path, "a") do |f|
527
+ write_missing_files f
528
+ end
529
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
530
+ end
531
+ end
532
+
533
+ def write_missing_files dest
429
534
  missing_files_select_all_filenames.each do |record|
430
- puts record["filename"]
535
+ dest.puts record["filename"]
431
536
  end
432
537
  end
433
538
 
@@ -445,6 +550,17 @@ class FileDigests
445
550
  end
446
551
 
447
552
 
553
+ # Runtime state helpers
554
+
555
+ def any_exceptions?
556
+ @counters[:exceptions] > 0
557
+ end
558
+
559
+ def any_likely_damaged?
560
+ @counters[:likely_damaged] > 0
561
+ end
562
+
563
+
448
564
  # Database helpers
449
565
 
450
566
  def execute *args, &block
@@ -485,7 +601,10 @@ class FileDigests
485
601
  def prepare_method name, query
486
602
  variable = "@#{name}"
487
603
 
488
- instance_variable_set(variable, @db.prepare(query))
604
+ statement = @db.prepare(query)
605
+ @statements.push(statement)
606
+
607
+ instance_variable_set(variable, statement)
489
608
 
490
609
  define_singleton_method name do |*args, &block|
491
610
  instance_variable_get(variable).execute(*args, &block)
@@ -530,7 +649,7 @@ class FileDigests
530
649
  def check_if_database_is_at_certain_version target_version
531
650
  current_version = get_metadata("database_version")
532
651
  if current_version != target_version
533
- STDERR.puts "This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
652
+ STDERR.puts "ERROR: This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
534
653
  raise "Incompatible database version"
535
654
  end
536
655
  end
@@ -540,58 +659,6 @@ class FileDigests
540
659
  end
541
660
 
542
661
 
543
- # Filesystem-related helpers
544
-
545
- def patch_path_string path
546
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
547
- end
548
-
549
- def cleanup_path path
550
- Pathname.new(patch_path_string(path)).cleanpath
551
- end
552
-
553
- def ensure_dir_exist path
554
- if File.exist?(path)
555
- unless File.directory?(path)
556
- raise "#{path} is not a directory"
557
- end
558
- else
559
- FileUtils.mkdir_p path
560
- end
561
- end
562
-
563
- def walk_files
564
- puts "Gathering the list of files..." if @options[:verbose]
565
- Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
566
- yield filename
567
- end
568
- end
569
-
570
- def get_file_digest filename
571
- File.open(filename, "rb") do |io|
572
- digest = OpenSSL::Digest.new(@digest_algorithm)
573
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
574
-
575
- buffer = ""
576
- while io.read(409600, buffer) # 409600 seems like a sweet spot
577
- digest.update(buffer)
578
- new_digest.update(buffer) if @new_digest_algorithm
579
- end
580
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
581
- end
582
- end
583
-
584
-
585
- # Runtime state helpers
586
-
587
- def any_exceptions?
588
- @counters[:exceptions] > 0
589
- end
590
-
591
- def any_likely_damaged?
592
- @counters[:likely_damaged] > 0
593
- end
594
-
595
662
  # UI helpers
596
663
 
597
664
  def confirm text
@@ -611,17 +678,31 @@ class FileDigests
611
678
  puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
612
679
  end
613
680
 
614
- def print_file_exception exception, filename
615
- STDERR.print "EXCEPTION: #{exception.message}, processing file: "
681
+ def report_file_exception exception, filename
682
+ write_file_exception STDERR, exception, filename
683
+ File.open(@error_log_path, "a") do |f|
684
+ write_file_exception f, exception, filename
685
+ end
686
+ end
687
+
688
+ def write_file_exception dest, exception, filename
689
+ dest.print "ERROR: #{exception.message}, processing file: "
616
690
  begin
617
- STDERR.print filename.encode("utf-8", universal_newline: true)
691
+ dest.print filename.encode("utf-8", universal_newline: true)
618
692
  rescue
619
- STDERR.print "(Unable to encode file name to utf-8) "
620
- STDERR.print filename
693
+ dest.print "(Unable to encode file name to utf-8) "
694
+ dest.print filename
695
+ end
696
+ dest.print "\n"
697
+ dest.flush
698
+ exception.backtrace.each { |line| dest.puts " " + line }
699
+ end
700
+
701
+ def error_text text
702
+ STDERR.puts text
703
+ File.open(@error_log_path, "a") do |f|
704
+ f.puts text
621
705
  end
622
- STDERR.print "\n"
623
- STDERR.flush
624
- exception.backtrace.each { |line| STDERR.puts " " + line }
625
706
  end
626
707
 
627
708
  def print_counters
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.37
4
+ version: 0.0.42
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-17 00:00:00.000000000 Z
11
+ date: 2021-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openssl
@@ -51,7 +51,7 @@ homepage: https://github.com/senotrusov/file-digests
51
51
  licenses:
52
52
  - Apache-2.0
53
53
  metadata: {}
54
- post_install_message:
54
+ post_install_message:
55
55
  rdoc_options: []
56
56
  require_paths:
57
57
  - lib
@@ -66,8 +66,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  requirements: []
69
- rubygems_version: 3.1.2
70
- signing_key:
69
+ rubygems_version: 3.1.4
70
+ signing_key:
71
71
  specification_version: 4
72
72
  summary: file-digests
73
73
  test_files: []