file-digests 0.0.37 → 0.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/file-digests +0 -0
  3. data/lib/file-digests.rb +221 -140
  4. metadata +6 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 27e992f2a4849569d6c87e53807ebdc53676b3e47ca2c1efd2799927fd16d0c7
4
- data.tar.gz: 9e292709b7978d906b0423a980cc72b23f8a44c665f070d71f30953ccdc59256
3
+ metadata.gz: c914ef250d4e173e31498c81865f4b43da56f91f81f2f16325f573747758c20b
4
+ data.tar.gz: 55bec69638ec367ca346de5b1d72194daa0bab75972186e0dd3ca4da0a0ceb9e
5
5
  SHA512:
6
- metadata.gz: c80f844d16255d9437c8dd012eff010df23f5a14d87336bf987b34e913a12fa0932fd85e18760884c63d10f882041afecf77b0b354555cef62aadc98de5ba091
7
- data.tar.gz: f5f5c8309b8921f034edeb36643cab34dff52e4091cd5508462294a48e79002dfdbdf09a126988449d1775d207651579ca19ffb7cd169038fe71067dabb5b8af
6
+ metadata.gz: 3963d53565a261db7b7bad2d8d662aa8fe407b92ddca9264afd10f6f9451f6aa1cfeec2825a503fc6b01f18cc85a672e4cf031109a2279e3c2d74d5c74c7cdb2
7
+ data.tar.gz: e6be01d8d1887010387ba5206bcb5937dac395fa5ea05c546076f76998d4603030ba5e781f43c2414e99f20bc241fcc3446721e7b9dca668af0fe7e6cff7fe46
data/bin/file-digests CHANGED
File without changes
data/lib/file-digests.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +19,6 @@ require "digest"
17
19
  require "fileutils"
18
20
  require "openssl"
19
21
  require "optparse"
20
- require "pathname"
21
22
  require "set"
22
23
  require "sqlite3"
23
24
 
@@ -112,6 +113,7 @@ class FileDigests
112
113
 
113
114
  file_digests = self.new ARGV[0], ARGV[1], options
114
115
  file_digests.send(options[:action] || :perform_check)
116
+ file_digests.close_database
115
117
  end
116
118
 
117
119
  def initialize files_path, digest_database_path, options = {}
@@ -138,16 +140,110 @@ class FileDigests
138
140
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
139
141
  end
140
142
 
143
+ def perform_check
144
+ measure_time do
145
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
146
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
147
+
148
+ walk_files(@files_path) do |filename|
149
+ process_file filename
150
+ end
151
+
152
+ nested_transaction do
153
+ puts "Tracking renames..." if @options[:verbose]
154
+ track_renames
155
+ end
156
+
157
+ if any_missing_files?
158
+ if any_exceptions?
159
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
160
+ else
161
+ report_missing_files
162
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
163
+ nested_transaction do
164
+ puts "Removing missing files..." if @options[:verbose]
165
+ remove_missing_files
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ if @new_digest_algorithm && !@options[:test_only]
172
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
173
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
174
+ else
175
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
176
+ digests_update_digests_to_new_digests
177
+ set_metadata "digest_algorithm", @new_digest_algorithm
178
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
179
+ end
180
+ end
181
+
182
+ if any_likely_damaged? || any_exceptions?
183
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
184
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
185
+ end
186
+
187
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
188
+
189
+ print_counters
190
+ end
191
+
192
+ puts "Performing database maintenance..." if @options[:verbose]
193
+ execute "PRAGMA optimize"
194
+ execute "VACUUM"
195
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
196
+ end
197
+ end
198
+
199
+ def show_duplicates
200
+ current_digest = nil
201
+ digests_select_duplicates.each do |found|
202
+ if current_digest != found["digest"]
203
+ puts "" if current_digest
204
+ current_digest = found["digest"]
205
+ puts "#{found["digest"]}:"
206
+ end
207
+ puts " #{found["filename"]}"
208
+ end
209
+ end
210
+
211
+ def close_database
212
+ @statements.each(&:close)
213
+ @db.close
214
+ hide_database_files
215
+ end
216
+
217
+ private
218
+
141
219
  def initialize_paths files_path, digest_database_path
142
- @files_path = cleanup_path(files_path || ".")
220
+ @files_path = realpath(files_path || ".")
221
+
222
+ unless File.directory?(@files_path) && File.readable?(@files_path)
223
+ raise "ERROR: Files path must be a readable directory"
224
+ end
143
225
 
144
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
226
+ @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
145
227
 
146
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
147
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
148
- ensure_dir_exist @digest_database_path.dirname
228
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
229
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
230
+
231
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
232
+
233
+ if File.directory?(@digest_database_path)
234
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
235
+ end
149
236
 
150
- @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
237
+ @digest_database_files = [
238
+ @digest_database_path,
239
+ "#{@digest_database_path}-wal",
240
+ "#{@digest_database_path}-shm"
241
+ ]
242
+
243
+ @skip_files = @digest_database_files + [
244
+ @error_log_path,
245
+ @missing_files_path
246
+ ]
151
247
 
152
248
  if @options[:verbose]
153
249
  puts "Target directory: #{@files_path}"
@@ -156,9 +252,10 @@ class FileDigests
156
252
  end
157
253
 
158
254
  def initialize_database
159
- @db = SQLite3::Database.new @digest_database_path.to_s
255
+ @db = SQLite3::Database.new @digest_database_path
160
256
  @db.results_as_hash = true
161
257
  @db.busy_timeout = 5000
258
+ @statements = []
162
259
 
163
260
  execute "PRAGMA encoding = 'UTF-8'"
164
261
  execute "PRAGMA locking_mode = 'EXCLUSIVE'"
@@ -215,7 +312,7 @@ class FileDigests
215
312
  # Convert database from 1st to 2nd version
216
313
  unless get_metadata("digest_algorithm")
217
314
  if get_metadata("database_version") == "1"
218
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
315
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
219
316
  set_metadata("digest_algorithm", "SHA512")
220
317
  else
221
318
  set_metadata("digest_algorithm", "SHA256")
@@ -228,7 +325,7 @@ class FileDigests
228
325
  execute "CREATE INDEX digests_digest ON digests(digest)"
229
326
  set_metadata "database_version", "3"
230
327
  end
231
-
328
+
232
329
  check_if_database_is_at_certain_version "3"
233
330
 
234
331
  create_temporary_tables
@@ -268,81 +365,83 @@ class FileDigests
268
365
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
269
366
  end
270
367
 
271
- def perform_check
272
- measure_time do
273
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
274
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
368
+ # Files
275
369
 
276
- walk_files do |filename|
277
- process_file filename
278
- end
370
+ def realpath path
371
+ realxpath path, :realpath
372
+ end
279
373
 
280
- nested_transaction do
281
- puts "Tracking renames..." if @options[:verbose]
282
- track_renames
283
- end
374
+ def realdirpath path
375
+ realxpath path, :realdirpath
376
+ end
284
377
 
285
- if any_missing_files?
286
- if any_exceptions?
287
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
288
- else
289
- print_missing_files
290
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
291
- nested_transaction do
292
- puts "Removing missing files..." if @options[:verbose]
293
- remove_missing_files
294
- end
295
- end
296
- end
297
- end
378
+ def realxpath path, method_name
379
+ path = path.encode("utf-8")
298
380
 
299
- if @new_digest_algorithm && !@options[:test_only]
300
- if any_missing_files? || any_likely_damaged? || any_exceptions?
301
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
302
- else
303
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
304
- digests_update_digests_to_new_digests
305
- set_metadata "digest_algorithm", @new_digest_algorithm
306
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
307
- end
308
- end
381
+ if Gem.win_platform?
382
+ path = path.gsub(/\\/, "/")
383
+ end
309
384
 
310
- if any_likely_damaged? || any_exceptions?
311
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
312
- end
385
+ path = File.send(method_name, path).encode("utf-8")
313
386
 
314
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
387
+ if Gem.win_platform? && path[0] == "/"
388
+ path = Dir.pwd[0, 2].encode("utf-8") + path
389
+ end
315
390
 
316
- print_counters
317
- end
318
-
319
- puts "Performing database maintenance..." if @options[:verbose]
320
- execute "PRAGMA optimize"
321
- execute "VACUUM"
322
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
391
+ path
392
+ end
323
393
 
324
- hide_database_files
394
+ def perhaps_nt_path path
395
+ if Gem.win_platform?
396
+ "\\??\\#{path.gsub(/\//,"\\")}"
397
+ else
398
+ path
325
399
  end
326
400
  end
327
401
 
328
- def show_duplicates
329
- current_digest = nil
330
- digests_select_duplicates.each do |found|
331
- if current_digest != found["digest"]
332
- puts "" if current_digest
333
- current_digest = found["digest"]
334
- puts "#{found["digest"]}:"
402
+ def get_file_digest filename
403
+ File.open(filename, "rb") do |io|
404
+ digest = OpenSSL::Digest.new(@digest_algorithm)
405
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
406
+
407
+ buffer = ""
408
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
409
+ digest.update(buffer)
410
+ new_digest.update(buffer) if @new_digest_algorithm
335
411
  end
336
- puts " #{found["filename"]}"
412
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
337
413
  end
338
414
  end
339
415
 
340
- private
416
+ def walk_files(path, &block)
417
+ Dir.each_child(path, encoding: "UTF-8") do |item|
418
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
419
+ begin
420
+ item_perhaps_nt_path = perhaps_nt_path item
421
+
422
+ unless File.symlink? item_perhaps_nt_path
423
+ if File.directory?(item_perhaps_nt_path)
424
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
425
+ walk_files(item, &block)
426
+ else
427
+ yield item
428
+ end
429
+ end
430
+
431
+ rescue => exception
432
+ @counters[:exceptions] += 1
433
+ report_file_exception exception, item
434
+ end
435
+ end
436
+ end
341
437
 
342
438
  def process_file filename
343
- return if File.symlink? filename
439
+ perhaps_nt_filename = perhaps_nt_path filename
440
+
441
+ # this is checked in the walk_files
442
+ # return if File.symlink? perhaps_nt_filename
344
443
 
345
- stat = File.stat filename
444
+ stat = File.stat perhaps_nt_filename
346
445
 
347
446
  return if stat.blockdev?
348
447
  return if stat.chardev?
@@ -352,23 +451,19 @@ class FileDigests
352
451
 
353
452
  raise "File is not readable" unless stat.readable?
354
453
 
355
- if @digest_database_files.include?(filename)
356
- puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
454
+ if @skip_files.include?(filename)
455
+ puts "SKIPPING FILE: #{filename}" if @options[:verbose]
357
456
  return
358
457
  end
359
458
 
360
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
459
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
361
460
  mtime_string = time_to_database stat.mtime
362
- digest, new_digest = get_file_digest(filename)
461
+ digest, new_digest = get_file_digest(perhaps_nt_filename)
363
462
 
364
463
  nested_transaction do
365
464
  new_digests_insert(normalized_filename, new_digest) if new_digest
366
465
  process_file_indeed normalized_filename, mtime_string, digest
367
466
  end
368
-
369
- rescue => exception
370
- @counters[:exceptions] += 1
371
- print_file_exception exception, filename
372
467
  end
373
468
 
374
469
  def process_file_indeed filename, mtime, digest
@@ -394,7 +489,7 @@ class FileDigests
394
489
  else
395
490
  if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
396
491
  @counters[:likely_damaged] += 1
397
- STDERR.puts "LIKELY DAMAGED: #{filename}"
492
+ error_text "LIKELY DAMAGED: #{filename}"
398
493
  else
399
494
  @counters[:updated] += 1
400
495
  puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
@@ -424,10 +519,20 @@ class FileDigests
424
519
  @counters[:renamed] = @db.changes
425
520
  end
426
521
 
427
- def print_missing_files
522
+ def report_missing_files
428
523
  puts "\nMISSING FILES:"
524
+ write_missing_files STDOUT
525
+ if missing_files_count > 256
526
+ File.open(@missing_files_path, "a") do |f|
527
+ write_missing_files f
528
+ end
529
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
530
+ end
531
+ end
532
+
533
+ def write_missing_files dest
429
534
  missing_files_select_all_filenames.each do |record|
430
- puts record["filename"]
535
+ dest.puts record["filename"]
431
536
  end
432
537
  end
433
538
 
@@ -445,6 +550,17 @@ class FileDigests
445
550
  end
446
551
 
447
552
 
553
+ # Runtime state helpers
554
+
555
+ def any_exceptions?
556
+ @counters[:exceptions] > 0
557
+ end
558
+
559
+ def any_likely_damaged?
560
+ @counters[:likely_damaged] > 0
561
+ end
562
+
563
+
448
564
  # Database helpers
449
565
 
450
566
  def execute *args, &block
@@ -485,7 +601,10 @@ class FileDigests
485
601
  def prepare_method name, query
486
602
  variable = "@#{name}"
487
603
 
488
- instance_variable_set(variable, @db.prepare(query))
604
+ statement = @db.prepare(query)
605
+ @statements.push(statement)
606
+
607
+ instance_variable_set(variable, statement)
489
608
 
490
609
  define_singleton_method name do |*args, &block|
491
610
  instance_variable_get(variable).execute(*args, &block)
@@ -530,7 +649,7 @@ class FileDigests
530
649
  def check_if_database_is_at_certain_version target_version
531
650
  current_version = get_metadata("database_version")
532
651
  if current_version != target_version
533
- STDERR.puts "This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
652
+ STDERR.puts "ERROR: This version of file-digests (#{FileDigests::VERSION || "unknown"}) is only compartible with the database version #{target_version}. Current database version is #{current_version}. To use this database, please install appropriate version if file-digest."
534
653
  raise "Incompatible database version"
535
654
  end
536
655
  end
@@ -540,58 +659,6 @@ class FileDigests
540
659
  end
541
660
 
542
661
 
543
- # Filesystem-related helpers
544
-
545
- def patch_path_string path
546
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
547
- end
548
-
549
- def cleanup_path path
550
- Pathname.new(patch_path_string(path)).cleanpath
551
- end
552
-
553
- def ensure_dir_exist path
554
- if File.exist?(path)
555
- unless File.directory?(path)
556
- raise "#{path} is not a directory"
557
- end
558
- else
559
- FileUtils.mkdir_p path
560
- end
561
- end
562
-
563
- def walk_files
564
- puts "Gathering the list of files..." if @options[:verbose]
565
- Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
566
- yield filename
567
- end
568
- end
569
-
570
- def get_file_digest filename
571
- File.open(filename, "rb") do |io|
572
- digest = OpenSSL::Digest.new(@digest_algorithm)
573
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
574
-
575
- buffer = ""
576
- while io.read(409600, buffer) # 409600 seems like a sweet spot
577
- digest.update(buffer)
578
- new_digest.update(buffer) if @new_digest_algorithm
579
- end
580
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
581
- end
582
- end
583
-
584
-
585
- # Runtime state helpers
586
-
587
- def any_exceptions?
588
- @counters[:exceptions] > 0
589
- end
590
-
591
- def any_likely_damaged?
592
- @counters[:likely_damaged] > 0
593
- end
594
-
595
662
  # UI helpers
596
663
 
597
664
  def confirm text
@@ -611,17 +678,31 @@ class FileDigests
611
678
  puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
612
679
  end
613
680
 
614
- def print_file_exception exception, filename
615
- STDERR.print "EXCEPTION: #{exception.message}, processing file: "
681
+ def report_file_exception exception, filename
682
+ write_file_exception STDERR, exception, filename
683
+ File.open(@error_log_path, "a") do |f|
684
+ write_file_exception f, exception, filename
685
+ end
686
+ end
687
+
688
+ def write_file_exception dest, exception, filename
689
+ dest.print "ERROR: #{exception.message}, processing file: "
616
690
  begin
617
- STDERR.print filename.encode("utf-8", universal_newline: true)
691
+ dest.print filename.encode("utf-8", universal_newline: true)
618
692
  rescue
619
- STDERR.print "(Unable to encode file name to utf-8) "
620
- STDERR.print filename
693
+ dest.print "(Unable to encode file name to utf-8) "
694
+ dest.print filename
695
+ end
696
+ dest.print "\n"
697
+ dest.flush
698
+ exception.backtrace.each { |line| dest.puts " " + line }
699
+ end
700
+
701
+ def error_text text
702
+ STDERR.puts text
703
+ File.open(@error_log_path, "a") do |f|
704
+ f.puts text
621
705
  end
622
- STDERR.print "\n"
623
- STDERR.flush
624
- exception.backtrace.each { |line| STDERR.puts " " + line }
625
706
  end
626
707
 
627
708
  def print_counters
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.37
4
+ version: 0.0.42
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-17 00:00:00.000000000 Z
11
+ date: 2021-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openssl
@@ -51,7 +51,7 @@ homepage: https://github.com/senotrusov/file-digests
51
51
  licenses:
52
52
  - Apache-2.0
53
53
  metadata: {}
54
- post_install_message:
54
+ post_install_message:
55
55
  rdoc_options: []
56
56
  require_paths:
57
57
  - lib
@@ -66,8 +66,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  requirements: []
69
- rubygems_version: 3.1.2
70
- signing_key:
69
+ rubygems_version: 3.1.4
70
+ signing_key:
71
71
  specification_version: 4
72
72
  summary: file-digests
73
73
  test_files: []