file-digests 0.0.40 → 0.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +175 -167
  3. metadata +10 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9d040599aee9aeb62234557b2a92edec36265e4439965e4c9d00a2c9afa117a
4
- data.tar.gz: 992fe8a843afe761537a3c48b1ecde27ccdf8101be58dd4b456572c59232141b
3
+ metadata.gz: e74595cb920bd6b52f8f0a0efab118afae509dc5406389d5203518ce801f2811
4
+ data.tar.gz: ad4429b2d998dc82c441cb5ebd9b9f3640158f20abe0839ead0f0de57037854b
5
5
  SHA512:
6
- metadata.gz: 1e15aa584690f8062a51cbb4785c9716e610258fba77859cb2b50642154701ffa1651312be1f6455921ee435ed7df5ed00bcc36f12def4009ac7edcc4aebb93e
7
- data.tar.gz: 5c94ae4165677af0be7b790a7a92ffc9af7751f0dd2661bf238d06b02569ef4ec746908721c7d16781d11a431048759e1490f9e180ea9c2e647f5e585a3ebbe0
6
+ metadata.gz: 8c78ceabcbba358c5be5222681ef45d25acef175f4840022997d652c1740ad1c66a461b8ff2e618bb48a80f97783f7afa57d4281c41d7251c3aa94b4641b5079
7
+ data.tar.gz: b83b60f4d66e5a57ed7f5f44a6f997c6c47e5df38686a63459fcf7943dd95cda8f9e7cb41a717e071ec5cfef14459b201afc9f5e18f990364f2a7654566af351
data/lib/file-digests.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +19,6 @@ require "digest"
17
19
  require "fileutils"
18
20
  require "openssl"
19
21
  require "optparse"
20
- require "pathname"
21
22
  require "set"
22
23
  require "sqlite3"
23
24
 
@@ -112,6 +113,7 @@ class FileDigests
112
113
 
113
114
  file_digests = self.new ARGV[0], ARGV[1], options
114
115
  file_digests.send(options[:action] || :perform_check)
116
+ file_digests.close_database
115
117
  end
116
118
 
117
119
  def initialize files_path, digest_database_path, options = {}
@@ -138,29 +140,109 @@ class FileDigests
138
140
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
139
141
  end
140
142
 
143
+ def perform_check
144
+ measure_time do
145
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
146
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
147
+
148
+ walk_files(@files_path) do |filename|
149
+ process_file filename
150
+ end
151
+
152
+ nested_transaction do
153
+ puts "Tracking renames..." if @options[:verbose]
154
+ track_renames
155
+ end
156
+
157
+ if any_missing_files?
158
+ if any_exceptions?
159
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
160
+ else
161
+ report_missing_files
162
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
163
+ nested_transaction do
164
+ puts "Removing missing files..." if @options[:verbose]
165
+ remove_missing_files
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ if @new_digest_algorithm && !@options[:test_only]
172
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
173
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
174
+ else
175
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
176
+ digests_update_digests_to_new_digests
177
+ set_metadata "digest_algorithm", @new_digest_algorithm
178
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
179
+ end
180
+ end
181
+
182
+ if any_likely_damaged? || any_exceptions?
183
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
184
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
185
+ end
186
+
187
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
188
+
189
+ print_counters
190
+ end
191
+
192
+ puts "Performing database maintenance..." if @options[:verbose]
193
+ execute "PRAGMA optimize"
194
+ execute "VACUUM"
195
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
196
+ end
197
+ end
198
+
199
+ def show_duplicates
200
+ current_digest = nil
201
+ digests_select_duplicates.each do |found|
202
+ if current_digest != found["digest"]
203
+ puts "" if current_digest
204
+ current_digest = found["digest"]
205
+ puts "#{found["digest"]}:"
206
+ end
207
+ puts " #{found["filename"]}"
208
+ end
209
+ end
210
+
211
+ def close_database
212
+ @statements.each(&:close)
213
+ @db.close
214
+ hide_database_files
215
+ end
216
+
217
+ private
218
+
141
219
  def initialize_paths files_path, digest_database_path
220
+ @files_path = realpath(files_path || ".")
221
+
222
+ unless File.directory?(@files_path) && File.readable?(@files_path)
223
+ raise "ERROR: Files path must be a readable directory"
224
+ end
225
+
142
226
  @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
143
- @files_path = cleanup_path(files_path || ".")
144
- raise "ERROR: Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
145
- @files_path = realpath_with_disk @files_path
146
-
147
- @error_log_path = @files_path + "file-digests errors #{@start_time_filename_string}.txt"
148
- @missing_files_path = @files_path + "file-digests missing files #{@start_time_filename_string}.txt"
149
-
150
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
151
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
152
- ensure_dir_exist @digest_database_path.dirname
153
- @digest_database_path = realdirpath_with_disk @digest_database_path
154
-
227
+
228
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
229
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
230
+
231
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
232
+
233
+ if File.directory?(@digest_database_path)
234
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
235
+ end
236
+
155
237
  @digest_database_files = [
156
- "#{@digest_database_path}",
238
+ @digest_database_path,
157
239
  "#{@digest_database_path}-wal",
158
240
  "#{@digest_database_path}-shm"
159
241
  ]
160
242
 
161
243
  @skip_files = @digest_database_files + [
162
- @error_log_path.to_s,
163
- @missing_files_path.to_s
244
+ @error_log_path,
245
+ @missing_files_path
164
246
  ]
165
247
 
166
248
  if @options[:verbose]
@@ -170,9 +252,10 @@ class FileDigests
170
252
  end
171
253
 
172
254
  def initialize_database
173
- @db = SQLite3::Database.new @digest_database_path.to_s
255
+ @db = SQLite3::Database.new @digest_database_path
174
256
  @db.results_as_hash = true
175
257
  @db.busy_timeout = 5000
258
+ @statements = []
176
259
 
177
260
  execute "PRAGMA encoding = 'UTF-8'"
178
261
  execute "PRAGMA locking_mode = 'EXCLUSIVE'"
@@ -229,7 +312,7 @@ class FileDigests
229
312
  # Convert database from 1st to 2nd version
230
313
  unless get_metadata("digest_algorithm")
231
314
  if get_metadata("database_version") == "1"
232
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
315
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
233
316
  set_metadata("digest_algorithm", "SHA512")
234
317
  else
235
318
  set_metadata("digest_algorithm", "SHA256")
@@ -242,7 +325,7 @@ class FileDigests
242
325
  execute "CREATE INDEX digests_digest ON digests(digest)"
243
326
  set_metadata "database_version", "3"
244
327
  end
245
-
328
+
246
329
  check_if_database_is_at_certain_version "3"
247
330
 
248
331
  create_temporary_tables
@@ -282,81 +365,81 @@ class FileDigests
282
365
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
283
366
  end
284
367
 
285
- def perform_check
286
- measure_time do
287
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
288
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
368
+ # Files
289
369
 
290
- walk_files(@files_path.to_s) do |filename|
291
- process_file filename
292
- end
370
+ def realpath path
371
+ realxpath path, :realpath
372
+ end
293
373
 
294
- nested_transaction do
295
- puts "Tracking renames..." if @options[:verbose]
296
- track_renames
297
- end
374
+ def realdirpath path
375
+ realxpath path, :realdirpath
376
+ end
298
377
 
299
- if any_missing_files?
300
- if any_exceptions?
301
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
302
- else
303
- report_missing_files
304
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
305
- nested_transaction do
306
- puts "Removing missing files..." if @options[:verbose]
307
- remove_missing_files
308
- end
309
- end
310
- end
311
- end
378
+ def realxpath path, method_name
379
+ path = path.encode("utf-8")
312
380
 
313
- if @new_digest_algorithm && !@options[:test_only]
314
- if any_missing_files? || any_likely_damaged? || any_exceptions?
315
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
316
- else
317
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
318
- digests_update_digests_to_new_digests
319
- set_metadata "digest_algorithm", @new_digest_algorithm
320
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
321
- end
322
- end
381
+ if Gem.win_platform?
382
+ path = path.gsub(/\\/, "/")
383
+ end
323
384
 
324
- if any_likely_damaged? || any_exceptions?
325
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
326
- end
385
+ path = File.send(method_name, path).encode("utf-8")
327
386
 
328
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
387
+ if Gem.win_platform? && path[0] == "/"
388
+ path = Dir.pwd[0, 2].encode("utf-8") + path
389
+ end
329
390
 
330
- print_counters
331
- end
332
-
333
- puts "Performing database maintenance..." if @options[:verbose]
334
- execute "PRAGMA optimize"
335
- execute "VACUUM"
336
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
391
+ path
392
+ end
337
393
 
338
- hide_database_files
394
+ def perhaps_nt_path path
395
+ if Gem.win_platform?
396
+ "\\??\\#{path.gsub(/\//,"\\")}"
397
+ else
398
+ path
339
399
  end
340
400
  end
341
401
 
342
- def show_duplicates
343
- current_digest = nil
344
- digests_select_duplicates.each do |found|
345
- if current_digest != found["digest"]
346
- puts "" if current_digest
347
- current_digest = found["digest"]
348
- puts "#{found["digest"]}:"
402
+ def get_file_digest filename
403
+ File.open(filename, "rb") do |io|
404
+ digest = OpenSSL::Digest.new(@digest_algorithm)
405
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
406
+
407
+ buffer = ""
408
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
409
+ digest.update(buffer)
410
+ new_digest.update(buffer) if @new_digest_algorithm
349
411
  end
350
- puts " #{found["filename"]}"
412
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
351
413
  end
352
414
  end
353
415
 
354
- private
416
+ def walk_files(path, &block)
417
+ Dir.each_child(path, encoding: "UTF-8") do |item|
418
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
419
+ begin
420
+ item_perhaps_nt_path = perhaps_nt_path item
421
+
422
+ unless File.symlink? item_perhaps_nt_path
423
+ if File.directory?(item_perhaps_nt_path)
424
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
425
+ walk_files(item, &block)
426
+ else
427
+ yield item
428
+ end
429
+ end
430
+
431
+ rescue => exception
432
+ @counters[:exceptions] += 1
433
+ report_file_exception exception, item
434
+ end
435
+ end
436
+ end
355
437
 
356
438
  def process_file filename
357
439
  perhaps_nt_filename = perhaps_nt_path filename
358
440
 
359
- return if File.symlink? perhaps_nt_filename
441
+ # this is checked in the walk_files
442
+ # return if File.symlink? perhaps_nt_filename
360
443
 
361
444
  stat = File.stat perhaps_nt_filename
362
445
 
@@ -373,7 +456,7 @@ class FileDigests
373
456
  return
374
457
  end
375
458
 
376
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
459
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
377
460
  mtime_string = time_to_database stat.mtime
378
461
  digest, new_digest = get_file_digest(perhaps_nt_filename)
379
462
 
@@ -443,6 +526,7 @@ class FileDigests
443
526
  File.open(@missing_files_path, "a") do |f|
444
527
  write_missing_files f
445
528
  end
529
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
446
530
  end
447
531
  end
448
532
 
@@ -466,6 +550,17 @@ class FileDigests
466
550
  end
467
551
 
468
552
 
553
+ # Runtime state helpers
554
+
555
+ def any_exceptions?
556
+ @counters[:exceptions] > 0
557
+ end
558
+
559
+ def any_likely_damaged?
560
+ @counters[:likely_damaged] > 0
561
+ end
562
+
563
+
469
564
  # Database helpers
470
565
 
471
566
  def execute *args, &block
@@ -506,7 +601,10 @@ class FileDigests
506
601
  def prepare_method name, query
507
602
  variable = "@#{name}"
508
603
 
509
- instance_variable_set(variable, @db.prepare(query))
604
+ statement = @db.prepare(query)
605
+ @statements.push(statement)
606
+
607
+ instance_variable_set(variable, statement)
510
608
 
511
609
  define_singleton_method name do |*args, &block|
512
610
  instance_variable_get(variable).execute(*args, &block)
@@ -561,96 +659,6 @@ class FileDigests
561
659
  end
562
660
 
563
661
 
564
- # Filesystem-related helpers
565
-
566
- def realpath_with_disk path
567
- path = path.realpath
568
- if Gem.win_platform? && path.to_s[0] == "/"
569
- return Pathname(Dir.pwd[0, 2] + path.to_s)
570
- end
571
- path
572
- end
573
-
574
- def realdirpath_with_disk path
575
- path = path.realdirpath
576
- if Gem.win_platform? && path.to_s[0] == "/"
577
- return Pathname(Dir.pwd[0, 2] + path.to_s)
578
- end
579
- path
580
- end
581
-
582
- def patch_path_string path
583
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
584
- end
585
-
586
- def cleanup_path path
587
- Pathname.new(patch_path_string(path)).cleanpath
588
- end
589
-
590
- def ensure_dir_exist path
591
- if File.exist?(path)
592
- unless File.directory?(path)
593
- raise "#{path} is not a directory"
594
- end
595
- else
596
- FileUtils.mkdir_p path
597
- end
598
- end
599
-
600
- def walk_files(path, &block)
601
- Dir.each_child(path, encoding: "UTF-8") do |item|
602
- item = "#{path}#{File::SEPARATOR}#{item}"
603
- begin
604
- item_perhaps_nt_path = perhaps_nt_path item
605
-
606
- unless File.symlink? item_perhaps_nt_path
607
- if File.directory?(item_perhaps_nt_path)
608
- raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
609
- walk_files(item, &block)
610
- else
611
- yield item
612
- end
613
- end
614
- rescue => exception
615
- @counters[:exceptions] += 1
616
- report_file_exception exception, item
617
- end
618
- end
619
- end
620
-
621
- def perhaps_nt_path path
622
- if Gem.win_platform?
623
- "\\??\\#{path.gsub(/\//,"\\")}"
624
- else
625
- path
626
- end
627
- end
628
-
629
- def get_file_digest filename
630
- File.open(filename, "rb") do |io|
631
- digest = OpenSSL::Digest.new(@digest_algorithm)
632
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
633
-
634
- buffer = ""
635
- while io.read(409600, buffer) # 409600 seems like a sweet spot
636
- digest.update(buffer)
637
- new_digest.update(buffer) if @new_digest_algorithm
638
- end
639
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
640
- end
641
- end
642
-
643
-
644
- # Runtime state helpers
645
-
646
- def any_exceptions?
647
- @counters[:exceptions] > 0
648
- end
649
-
650
- def any_likely_damaged?
651
- @counters[:likely_damaged] > 0
652
- end
653
-
654
662
  # UI helpers
655
663
 
656
664
  def confirm text
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.40
4
+ version: 0.0.43
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-20 00:00:00.000000000 Z
11
+ date: 2022-07-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openssl
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.1'
19
+ version: '3.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: '3.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.3'
33
+ version: '1.4'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.3'
40
+ version: '1.4'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables:
@@ -51,7 +51,7 @@ homepage: https://github.com/senotrusov/file-digests
51
51
  licenses:
52
52
  - Apache-2.0
53
53
  metadata: {}
54
- post_install_message:
54
+ post_install_message:
55
55
  rdoc_options: []
56
56
  require_paths:
57
57
  - lib
@@ -66,8 +66,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  requirements: []
69
- rubygems_version: 3.1.2
70
- signing_key:
69
+ rubygems_version: 3.3.7
70
+ signing_key:
71
71
  specification_version: 4
72
72
  summary: file-digests
73
73
  test_files: []