file-digests 0.0.40 → 0.0.43

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +175 -167
  3. metadata +10 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9d040599aee9aeb62234557b2a92edec36265e4439965e4c9d00a2c9afa117a
4
- data.tar.gz: 992fe8a843afe761537a3c48b1ecde27ccdf8101be58dd4b456572c59232141b
3
+ metadata.gz: e74595cb920bd6b52f8f0a0efab118afae509dc5406389d5203518ce801f2811
4
+ data.tar.gz: ad4429b2d998dc82c441cb5ebd9b9f3640158f20abe0839ead0f0de57037854b
5
5
  SHA512:
6
- metadata.gz: 1e15aa584690f8062a51cbb4785c9716e610258fba77859cb2b50642154701ffa1651312be1f6455921ee435ed7df5ed00bcc36f12def4009ac7edcc4aebb93e
7
- data.tar.gz: 5c94ae4165677af0be7b790a7a92ffc9af7751f0dd2661bf238d06b02569ef4ec746908721c7d16781d11a431048759e1490f9e180ea9c2e647f5e585a3ebbe0
6
+ metadata.gz: 8c78ceabcbba358c5be5222681ef45d25acef175f4840022997d652c1740ad1c66a461b8ff2e618bb48a80f97783f7afa57d4281c41d7251c3aa94b4641b5079
7
+ data.tar.gz: b83b60f4d66e5a57ed7f5f44a6f997c6c47e5df38686a63459fcf7943dd95cda8f9e7cb41a717e071ec5cfef14459b201afc9f5e18f990364f2a7654566af351
data/lib/file-digests.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # encoding: UTF-8
2
+
1
3
  # Copyright 2020 Stanislav Senotrusov <stan@senotrusov.com>
2
4
  #
3
5
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,7 +19,6 @@ require "digest"
17
19
  require "fileutils"
18
20
  require "openssl"
19
21
  require "optparse"
20
- require "pathname"
21
22
  require "set"
22
23
  require "sqlite3"
23
24
 
@@ -112,6 +113,7 @@ class FileDigests
112
113
 
113
114
  file_digests = self.new ARGV[0], ARGV[1], options
114
115
  file_digests.send(options[:action] || :perform_check)
116
+ file_digests.close_database
115
117
  end
116
118
 
117
119
  def initialize files_path, digest_database_path, options = {}
@@ -138,29 +140,109 @@ class FileDigests
138
140
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
139
141
  end
140
142
 
143
+ def perform_check
144
+ measure_time do
145
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
146
+ @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
147
+
148
+ walk_files(@files_path) do |filename|
149
+ process_file filename
150
+ end
151
+
152
+ nested_transaction do
153
+ puts "Tracking renames..." if @options[:verbose]
154
+ track_renames
155
+ end
156
+
157
+ if any_missing_files?
158
+ if any_exceptions?
159
+ STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
160
+ else
161
+ report_missing_files
162
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
163
+ nested_transaction do
164
+ puts "Removing missing files..." if @options[:verbose]
165
+ remove_missing_files
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ if @new_digest_algorithm && !@options[:test_only]
172
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
173
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
174
+ else
175
+ puts "Updating database to a new digest algorithm..." if @options[:verbose]
176
+ digests_update_digests_to_new_digests
177
+ set_metadata "digest_algorithm", @new_digest_algorithm
178
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
179
+ end
180
+ end
181
+
182
+ if any_likely_damaged? || any_exceptions?
183
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
184
+ STDERR.puts "A list of errors is also saved in a file: #{@error_log_path}"
185
+ end
186
+
187
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
188
+
189
+ print_counters
190
+ end
191
+
192
+ puts "Performing database maintenance..." if @options[:verbose]
193
+ execute "PRAGMA optimize"
194
+ execute "VACUUM"
195
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
196
+ end
197
+ end
198
+
199
+ def show_duplicates
200
+ current_digest = nil
201
+ digests_select_duplicates.each do |found|
202
+ if current_digest != found["digest"]
203
+ puts "" if current_digest
204
+ current_digest = found["digest"]
205
+ puts "#{found["digest"]}:"
206
+ end
207
+ puts " #{found["filename"]}"
208
+ end
209
+ end
210
+
211
+ def close_database
212
+ @statements.each(&:close)
213
+ @db.close
214
+ hide_database_files
215
+ end
216
+
217
+ private
218
+
141
219
  def initialize_paths files_path, digest_database_path
220
+ @files_path = realpath(files_path || ".")
221
+
222
+ unless File.directory?(@files_path) && File.readable?(@files_path)
223
+ raise "ERROR: Files path must be a readable directory"
224
+ end
225
+
142
226
  @start_time_filename_string = Time.now.strftime("%Y-%m-%d %H-%M-%S")
143
- @files_path = cleanup_path(files_path || ".")
144
- raise "ERROR: Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
145
- @files_path = realpath_with_disk @files_path
146
-
147
- @error_log_path = @files_path + "file-digests errors #{@start_time_filename_string}.txt"
148
- @missing_files_path = @files_path + "file-digests missing files #{@start_time_filename_string}.txt"
149
-
150
- @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
151
- @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
152
- ensure_dir_exist @digest_database_path.dirname
153
- @digest_database_path = realdirpath_with_disk @digest_database_path
154
-
227
+
228
+ @error_log_path = "#{@files_path}#{File::SEPARATOR}file-digests errors #{@start_time_filename_string}.txt"
229
+ @missing_files_path = "#{@files_path}#{File::SEPARATOR}file-digests missing files #{@start_time_filename_string}.txt"
230
+
231
+ @digest_database_path = digest_database_path ? realdirpath(digest_database_path) : @files_path
232
+
233
+ if File.directory?(@digest_database_path)
234
+ @digest_database_path += "#{File::SEPARATOR}.file-digests.sqlite"
235
+ end
236
+
155
237
  @digest_database_files = [
156
- "#{@digest_database_path}",
238
+ @digest_database_path,
157
239
  "#{@digest_database_path}-wal",
158
240
  "#{@digest_database_path}-shm"
159
241
  ]
160
242
 
161
243
  @skip_files = @digest_database_files + [
162
- @error_log_path.to_s,
163
- @missing_files_path.to_s
244
+ @error_log_path,
245
+ @missing_files_path
164
246
  ]
165
247
 
166
248
  if @options[:verbose]
@@ -170,9 +252,10 @@ class FileDigests
170
252
  end
171
253
 
172
254
  def initialize_database
173
- @db = SQLite3::Database.new @digest_database_path.to_s
255
+ @db = SQLite3::Database.new @digest_database_path
174
256
  @db.results_as_hash = true
175
257
  @db.busy_timeout = 5000
258
+ @statements = []
176
259
 
177
260
  execute "PRAGMA encoding = 'UTF-8'"
178
261
  execute "PRAGMA locking_mode = 'EXCLUSIVE'"
@@ -229,7 +312,7 @@ class FileDigests
229
312
  # Convert database from 1st to 2nd version
230
313
  unless get_metadata("digest_algorithm")
231
314
  if get_metadata("database_version") == "1"
232
- if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
315
+ if File.exist?("#{File.dirname(@digest_database_path)}#{File::SEPARATOR}.file-digests.sha512")
233
316
  set_metadata("digest_algorithm", "SHA512")
234
317
  else
235
318
  set_metadata("digest_algorithm", "SHA256")
@@ -242,7 +325,7 @@ class FileDigests
242
325
  execute "CREATE INDEX digests_digest ON digests(digest)"
243
326
  set_metadata "database_version", "3"
244
327
  end
245
-
328
+
246
329
  check_if_database_is_at_certain_version "3"
247
330
 
248
331
  create_temporary_tables
@@ -282,81 +365,81 @@ class FileDigests
282
365
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
283
366
  end
284
367
 
285
- def perform_check
286
- measure_time do
287
- perhaps_transaction(@new_digest_algorithm, :exclusive) do
288
- @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
368
+ # Files
289
369
 
290
- walk_files(@files_path.to_s) do |filename|
291
- process_file filename
292
- end
370
+ def realpath path
371
+ realxpath path, :realpath
372
+ end
293
373
 
294
- nested_transaction do
295
- puts "Tracking renames..." if @options[:verbose]
296
- track_renames
297
- end
374
+ def realdirpath path
375
+ realxpath path, :realdirpath
376
+ end
298
377
 
299
- if any_missing_files?
300
- if any_exceptions?
301
- STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
302
- else
303
- report_missing_files
304
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
305
- nested_transaction do
306
- puts "Removing missing files..." if @options[:verbose]
307
- remove_missing_files
308
- end
309
- end
310
- end
311
- end
378
+ def realxpath path, method_name
379
+ path = path.encode("utf-8")
312
380
 
313
- if @new_digest_algorithm && !@options[:test_only]
314
- if any_missing_files? || any_likely_damaged? || any_exceptions?
315
- STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
316
- else
317
- puts "Updating database to a new digest algorithm..." if @options[:verbose]
318
- digests_update_digests_to_new_digests
319
- set_metadata "digest_algorithm", @new_digest_algorithm
320
- puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
321
- end
322
- end
381
+ if Gem.win_platform?
382
+ path = path.gsub(/\\/, "/")
383
+ end
323
384
 
324
- if any_likely_damaged? || any_exceptions?
325
- STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
326
- end
385
+ path = File.send(method_name, path).encode("utf-8")
327
386
 
328
- set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
387
+ if Gem.win_platform? && path[0] == "/"
388
+ path = Dir.pwd[0, 2].encode("utf-8") + path
389
+ end
329
390
 
330
- print_counters
331
- end
332
-
333
- puts "Performing database maintenance..." if @options[:verbose]
334
- execute "PRAGMA optimize"
335
- execute "VACUUM"
336
- execute "PRAGMA wal_checkpoint(TRUNCATE)"
391
+ path
392
+ end
337
393
 
338
- hide_database_files
394
+ def perhaps_nt_path path
395
+ if Gem.win_platform?
396
+ "\\??\\#{path.gsub(/\//,"\\")}"
397
+ else
398
+ path
339
399
  end
340
400
  end
341
401
 
342
- def show_duplicates
343
- current_digest = nil
344
- digests_select_duplicates.each do |found|
345
- if current_digest != found["digest"]
346
- puts "" if current_digest
347
- current_digest = found["digest"]
348
- puts "#{found["digest"]}:"
402
+ def get_file_digest filename
403
+ File.open(filename, "rb") do |io|
404
+ digest = OpenSSL::Digest.new(@digest_algorithm)
405
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
406
+
407
+ buffer = ""
408
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
409
+ digest.update(buffer)
410
+ new_digest.update(buffer) if @new_digest_algorithm
349
411
  end
350
- puts " #{found["filename"]}"
412
+ return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
351
413
  end
352
414
  end
353
415
 
354
- private
416
+ def walk_files(path, &block)
417
+ Dir.each_child(path, encoding: "UTF-8") do |item|
418
+ item = "#{path}#{File::SEPARATOR}#{item.encode("utf-8")}"
419
+ begin
420
+ item_perhaps_nt_path = perhaps_nt_path item
421
+
422
+ unless File.symlink? item_perhaps_nt_path
423
+ if File.directory?(item_perhaps_nt_path)
424
+ raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
425
+ walk_files(item, &block)
426
+ else
427
+ yield item
428
+ end
429
+ end
430
+
431
+ rescue => exception
432
+ @counters[:exceptions] += 1
433
+ report_file_exception exception, item
434
+ end
435
+ end
436
+ end
355
437
 
356
438
  def process_file filename
357
439
  perhaps_nt_filename = perhaps_nt_path filename
358
440
 
359
- return if File.symlink? perhaps_nt_filename
441
+ # this is checked in the walk_files
442
+ # return if File.symlink? perhaps_nt_filename
360
443
 
361
444
  stat = File.stat perhaps_nt_filename
362
445
 
@@ -373,7 +456,7 @@ class FileDigests
373
456
  return
374
457
  end
375
458
 
376
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
459
+ normalized_filename = filename.delete_prefix("#{@files_path}#{File::SEPARATOR}").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
377
460
  mtime_string = time_to_database stat.mtime
378
461
  digest, new_digest = get_file_digest(perhaps_nt_filename)
379
462
 
@@ -443,6 +526,7 @@ class FileDigests
443
526
  File.open(@missing_files_path, "a") do |f|
444
527
  write_missing_files f
445
528
  end
529
+ puts "\n(A list of missing files is also saved in a file: #{@missing_files_path})"
446
530
  end
447
531
  end
448
532
 
@@ -466,6 +550,17 @@ class FileDigests
466
550
  end
467
551
 
468
552
 
553
+ # Runtime state helpers
554
+
555
+ def any_exceptions?
556
+ @counters[:exceptions] > 0
557
+ end
558
+
559
+ def any_likely_damaged?
560
+ @counters[:likely_damaged] > 0
561
+ end
562
+
563
+
469
564
  # Database helpers
470
565
 
471
566
  def execute *args, &block
@@ -506,7 +601,10 @@ class FileDigests
506
601
  def prepare_method name, query
507
602
  variable = "@#{name}"
508
603
 
509
- instance_variable_set(variable, @db.prepare(query))
604
+ statement = @db.prepare(query)
605
+ @statements.push(statement)
606
+
607
+ instance_variable_set(variable, statement)
510
608
 
511
609
  define_singleton_method name do |*args, &block|
512
610
  instance_variable_get(variable).execute(*args, &block)
@@ -561,96 +659,6 @@ class FileDigests
561
659
  end
562
660
 
563
661
 
564
- # Filesystem-related helpers
565
-
566
- def realpath_with_disk path
567
- path = path.realpath
568
- if Gem.win_platform? && path.to_s[0] == "/"
569
- return Pathname(Dir.pwd[0, 2] + path.to_s)
570
- end
571
- path
572
- end
573
-
574
- def realdirpath_with_disk path
575
- path = path.realdirpath
576
- if Gem.win_platform? && path.to_s[0] == "/"
577
- return Pathname(Dir.pwd[0, 2] + path.to_s)
578
- end
579
- path
580
- end
581
-
582
- def patch_path_string path
583
- Gem.win_platform? ? path.gsub(/\\/, "/") : path
584
- end
585
-
586
- def cleanup_path path
587
- Pathname.new(patch_path_string(path)).cleanpath
588
- end
589
-
590
- def ensure_dir_exist path
591
- if File.exist?(path)
592
- unless File.directory?(path)
593
- raise "#{path} is not a directory"
594
- end
595
- else
596
- FileUtils.mkdir_p path
597
- end
598
- end
599
-
600
- def walk_files(path, &block)
601
- Dir.each_child(path, encoding: "UTF-8") do |item|
602
- item = "#{path}#{File::SEPARATOR}#{item}"
603
- begin
604
- item_perhaps_nt_path = perhaps_nt_path item
605
-
606
- unless File.symlink? item_perhaps_nt_path
607
- if File.directory?(item_perhaps_nt_path)
608
- raise "Directory is not readable" unless File.readable?(item_perhaps_nt_path)
609
- walk_files(item, &block)
610
- else
611
- yield item
612
- end
613
- end
614
- rescue => exception
615
- @counters[:exceptions] += 1
616
- report_file_exception exception, item
617
- end
618
- end
619
- end
620
-
621
- def perhaps_nt_path path
622
- if Gem.win_platform?
623
- "\\??\\#{path.gsub(/\//,"\\")}"
624
- else
625
- path
626
- end
627
- end
628
-
629
- def get_file_digest filename
630
- File.open(filename, "rb") do |io|
631
- digest = OpenSSL::Digest.new(@digest_algorithm)
632
- new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
633
-
634
- buffer = ""
635
- while io.read(409600, buffer) # 409600 seems like a sweet spot
636
- digest.update(buffer)
637
- new_digest.update(buffer) if @new_digest_algorithm
638
- end
639
- return [digest.hexdigest, (new_digest.hexdigest if @new_digest_algorithm)]
640
- end
641
- end
642
-
643
-
644
- # Runtime state helpers
645
-
646
- def any_exceptions?
647
- @counters[:exceptions] > 0
648
- end
649
-
650
- def any_likely_damaged?
651
- @counters[:likely_damaged] > 0
652
- end
653
-
654
662
  # UI helpers
655
663
 
656
664
  def confirm text
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.40
4
+ version: 0.0.43
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-20 00:00:00.000000000 Z
11
+ date: 2022-07-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: openssl
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.1'
19
+ version: '3.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
26
+ version: '3.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.3'
33
+ version: '1.4'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.3'
40
+ version: '1.4'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables:
@@ -51,7 +51,7 @@ homepage: https://github.com/senotrusov/file-digests
51
51
  licenses:
52
52
  - Apache-2.0
53
53
  metadata: {}
54
- post_install_message:
54
+ post_install_message:
55
55
  rdoc_options: []
56
56
  require_paths:
57
57
  - lib
@@ -66,8 +66,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0'
68
68
  requirements: []
69
- rubygems_version: 3.1.2
70
- signing_key:
69
+ rubygems_version: 3.3.7
70
+ signing_key:
71
71
  specification_version: 4
72
72
  summary: file-digests
73
73
  test_files: []