file-digests 0.0.25 → 0.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +110 -70
  3. metadata +8 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bae6b10fe6496192c178f9139003b8d5a19774c70d957fd83f83ee91540cff9
4
- data.tar.gz: 15d2965cb47eaeedec5dd56a0fe0a30e2f686e72d80f823416a351326d034ba8
3
+ metadata.gz: 99b58aede8267994cc69da0ac1fd2e35d6661fd666257e9f3dfcde5054b3b6f8
4
+ data.tar.gz: 4b733dcf4be1f14b4a08931d15d0531c18e161f3d6d3af3e3025b36266f67e9d
5
5
  SHA512:
6
- metadata.gz: 89b7fc7eea7a5b87f2cc23ea794ac1b2f85e9b367825694876afd91c9206c438a8df91487aaa4872e7b24480e0e5eb25b2750237318fc63305c605aa7fa372a4
7
- data.tar.gz: 8fedea5e7034824276ea1f315543d5248e09a06e2fb97cf714f242181012701209383ab6416fcc0bfa206ce5b278abcdef216e4d3e41bbb9463a265b537091f2
6
+ metadata.gz: 9cf4c3df2b8f206b54689f2506de474ca3b021c572869765caf6e1424dac73a7c6dca3b67918ba5cb88f712353febe9aebe5fc40f7da24f6dd4019300b052436
7
+ data.tar.gz: cb0904f141e8861c9923647214c92bb8ce022952c53e8a96cee9819a43f4192bf550b47c927cc0ea8f5a16f2c58c3013561555c2afde9c093dd6cd7997f25c94
@@ -1,11 +1,11 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
@@ -37,18 +37,18 @@ class FileDigests
37
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
38
38
  ].join "\n"
39
39
 
40
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
41
41
  options[:auto] = true
42
42
  end
43
43
 
44
44
  opts.on(
45
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
46
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
47
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
48
48
  "#{digest_algorithms_list_text}.",
49
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
50
- 'Any time later you could specify a new algorithm to change the current one.',
51
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
52
52
  ) do |value|
53
53
  digest_algorithm = canonical_digest_algorithm_name(value)
54
54
  unless DIGEST_ALGORITHMS.include?(digest_algorithm)
@@ -58,26 +58,31 @@ class FileDigests
58
58
  options[:digest_algorithm] = digest_algorithm
59
59
  end
60
60
 
61
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
62
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
63
63
  end
64
64
 
65
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
66
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
67
68
  end
68
69
 
69
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
72
+ end
73
+
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
70
75
  options[:quiet] = true
71
76
  end
72
77
 
73
- opts.on("-v", "--verbose", "More verbose output") do
74
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
75
80
  end
76
81
 
77
- opts.on("-h", "--help", "Prints this help") do
78
- puts opts
79
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
80
84
  end
85
+
81
86
  end.parse!
82
87
  options
83
88
  end
@@ -95,15 +100,20 @@ class FileDigests
95
100
  initialize_paths files_path, digest_database_path
96
101
  initialize_database
97
102
 
98
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
99
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
100
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if db_digest_algorithm = get_metadata("digest_algorithm")
105
+ if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
106
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
107
+ @new_digest_algorithm = @options[:digest_algorithm]
108
+ end
109
+ else
110
+ raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
111
+ end
112
+ else
113
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
114
+ set_metadata "digest_algorithm", @digest_algorithm
101
115
  end
102
- else
103
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
104
- set_metadata "digest_algorithm", @digest_algorithm
105
116
  end
106
-
107
117
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
108
118
  end
109
119
 
@@ -113,8 +123,10 @@ class FileDigests
113
123
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
114
124
 
115
125
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
116
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
117
- ensure_dir_exists @digest_database_path.dirname
126
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
127
+ ensure_dir_exist @digest_database_path.dirname
128
+
129
+ @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
118
130
 
119
131
  if @options[:verbose]
120
132
  puts "Target directory: #{@files_path}"
@@ -125,14 +137,17 @@ class FileDigests
125
137
  def initialize_database
126
138
  @db = SQLite3::Database.new @digest_database_path.to_s
127
139
  @db.results_as_hash = true
140
+ @db.busy_timeout = 5000
128
141
 
129
142
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
130
143
 
131
- execute 'PRAGMA encoding = "UTF-8"'
132
- execute 'PRAGMA journal_mode = "WAL"'
133
- execute 'PRAGMA synchronous = "NORMAL"'
134
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
135
- execute 'PRAGMA cache_size = "5000"'
144
+ execute "PRAGMA encoding = 'UTF-8'"
145
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
146
+ execute "PRAGMA journal_mode = 'WAL'"
147
+ execute "PRAGMA synchronous = 'NORMAL'"
148
+ execute "PRAGMA cache_size = '5000'"
149
+
150
+ integrity_check
136
151
 
137
152
  @db.transaction(:exclusive) do
138
153
  metadata_table_was_created = false
@@ -183,7 +198,7 @@ class FileDigests
183
198
  # Convert database from 1st to 2nd version
184
199
  unless get_metadata("digest_algorithm")
185
200
  if get_metadata("database_version") == "1"
186
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
201
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
187
202
  set_metadata("digest_algorithm", "SHA512")
188
203
  else
189
204
  set_metadata("digest_algorithm", "SHA256")
@@ -193,7 +208,7 @@ class FileDigests
193
208
  end
194
209
 
195
210
  if get_metadata("database_version") != "2"
196
- STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
211
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
197
212
  raise "Incompatible database version"
198
213
  end
199
214
  end
@@ -213,7 +228,9 @@ class FileDigests
213
228
  end
214
229
  end
215
230
 
216
- track_renames
231
+ nested_transaction do
232
+ track_renames
233
+ end
217
234
 
218
235
  if any_missing_files?
219
236
  if any_exceptions?
@@ -221,7 +238,9 @@ class FileDigests
221
238
  else
222
239
  print_missing_files
223
240
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
224
- remove_missing_files
241
+ nested_transaction do
242
+ remove_missing_files
243
+ end
225
244
  end
226
245
  end
227
246
  end
@@ -234,6 +253,7 @@ class FileDigests
234
253
  update_digest_to_new_digest new_digest, old_digest
235
254
  end
236
255
  set_metadata "digest_algorithm", @new_digest_algorithm
256
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
237
257
  end
238
258
  end
239
259
 
@@ -245,17 +265,22 @@ class FileDigests
245
265
 
246
266
  print_counters
247
267
  end
268
+ execute "PRAGMA optimize"
269
+ execute "VACUUM"
270
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
271
+
272
+ hide_database_files
248
273
  end
249
274
 
250
275
  def show_duplicates
251
276
  current_digest = nil
252
277
  query_duplicates.each do |found|
253
- if current_digest != found['digest']
278
+ if current_digest != found["digest"]
254
279
  puts "" if current_digest
255
- current_digest = found['digest']
256
- puts "#{found['digest']}:"
280
+ current_digest = found["digest"]
281
+ puts "#{found["digest"]}:"
257
282
  end
258
- puts " #{found['filename']}"
283
+ puts " #{found["filename"]}"
259
284
  end
260
285
  end
261
286
 
@@ -274,17 +299,18 @@ class FileDigests
274
299
 
275
300
  raise "File is not readable" unless stat.readable?
276
301
 
277
- if filename == "#{@digest_database_path}" ||
278
- filename == "#{@digest_database_path}-wal" ||
279
- filename == "#{@digest_database_path}-shm"
302
+ if @digest_database_files.include?(filename)
280
303
  puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
281
304
  return
282
305
  end
283
306
 
284
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
307
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
285
308
  mtime_string = time_to_database stat.mtime
309
+ digest = get_file_digest(filename)
286
310
 
287
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
311
+ nested_transaction do
312
+ process_file_indeed normalized_filename, mtime_string, digest
313
+ end
288
314
 
289
315
  rescue => exception
290
316
  @counters[:exceptions] += 1
@@ -301,25 +327,25 @@ class FileDigests
301
327
 
302
328
  def process_previously_seen_file found, filename, mtime, digest
303
329
  @missing_files.delete(filename)
304
- if found['digest'] == digest
330
+ if found["digest"] == digest
305
331
  @counters[:good] += 1
306
332
  puts "GOOD: #{filename}" if @options[:verbose]
307
333
  unless @options[:test_only]
308
- if found['mtime'] == mtime
309
- touch_digest_check_time found['id']
334
+ if found["mtime"] == mtime
335
+ touch_digest_check_time found["id"]
310
336
  else
311
- update_mtime mtime, found['id']
337
+ update_mtime mtime, found["id"]
312
338
  end
313
339
  end
314
340
  else
315
- if found['mtime'] == mtime # Digest is different and mtime is the same
341
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
316
342
  @counters[:likely_damaged] += 1
317
343
  STDERR.puts "LIKELY DAMAGED: #{filename}"
318
344
  else
319
345
  @counters[:updated] += 1
320
- puts "UPDATED: #{filename}" unless @options[:quiet]
346
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
321
347
  unless @options[:test_only]
322
- update_mtime_and_digest mtime, digest, found['id']
348
+ update_mtime_and_digest mtime, digest, found["id"]
323
349
  end
324
350
  end
325
351
  end
@@ -357,12 +383,10 @@ class FileDigests
357
383
  end
358
384
 
359
385
  def remove_missing_files
360
- nested_transaction do
361
- @missing_files.each do |filename, digest|
362
- delete_by_filename filename
363
- end
364
- @missing_files = {}
386
+ @missing_files.each do |filename, digest|
387
+ delete_by_filename filename
365
388
  end
389
+ @missing_files = {}
366
390
  end
367
391
 
368
392
 
@@ -372,6 +396,12 @@ class FileDigests
372
396
  @db.execute *args, &block
373
397
  end
374
398
 
399
+ def integrity_check
400
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
401
+ raise "Database integrity check failed"
402
+ end
403
+ end
404
+
375
405
  def nested_transaction(mode = :deferred)
376
406
  if @db.transaction_active?
377
407
  yield
@@ -393,7 +423,7 @@ class FileDigests
393
423
  end
394
424
 
395
425
  def table_exist? table_name
396
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
426
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
397
427
  end
398
428
 
399
429
  def prepare_method name, query
@@ -428,21 +458,31 @@ class FileDigests
428
458
  end
429
459
 
430
460
  def time_to_database time
431
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
461
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
462
+ end
463
+
464
+ def hide_database_files
465
+ if Gem.win_platform?
466
+ @digest_database_files.each do |file|
467
+ if File.exist?(file)
468
+ system "attrib", "+H", file, exception: true
469
+ end
470
+ end
471
+ end
432
472
  end
433
473
 
434
474
 
435
475
  # Filesystem-related helpers
436
476
 
437
477
  def patch_path_string path
438
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
478
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
439
479
  end
440
480
 
441
481
  def cleanup_path path
442
482
  Pathname.new(patch_path_string(path)).cleanpath
443
483
  end
444
484
 
445
- def ensure_dir_exists path
485
+ def ensure_dir_exist path
446
486
  if File.exist?(path)
447
487
  unless File.directory?(path)
448
488
  raise "#{path} is not a directory"
@@ -453,13 +493,13 @@ class FileDigests
453
493
  end
454
494
 
455
495
  def walk_files
456
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
496
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
457
497
  yield filename
458
498
  end
459
499
  end
460
500
 
461
501
  def get_file_digest filename
462
- File.open(filename, 'rb') do |io|
502
+ File.open(filename, "rb") do |io|
463
503
  digest = OpenSSL::Digest.new(@digest_algorithm)
464
504
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
465
505
 
@@ -501,13 +541,13 @@ class FileDigests
501
541
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
502
542
  yield
503
543
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
504
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
544
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
505
545
  end
506
546
 
507
547
  def print_file_exception exception, filename
508
548
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
509
549
  begin
510
- STDERR.print filename.encode('utf-8', universal_newline: true)
550
+ STDERR.print filename.encode("utf-8", universal_newline: true)
511
551
  rescue
512
552
  STDERR.print "(Unable to encode file name to utf-8) "
513
553
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.25
4
+ version: 0.0.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.1'
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.1'
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: