file-digests 0.0.26 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +89 -68
  3. metadata +8 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 912b66387453e26ebd7280d7f2dbd599838098a6bcc4da70d1eabe19737ba172
4
- data.tar.gz: 9c02352223aff8d6489892df533606bd512de3357dfb59a82513dc04aa1eddfe
3
+ metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
4
+ data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
5
5
  SHA512:
6
- metadata.gz: '0380c5acf3750632a3fe49f63d93dbc7533da1389f3e8cab41addc8909f71552a280aec7e3e783e9ae20071bc94af3d70021098b1fbfba96a3e1f78967479c34'
7
- data.tar.gz: 4b0a05a27dcb61ee405dc06688753d5eb162e3fdf513fa45aee9a1e2d946fd4697b4f2dbb45f1af57f33ee787c366665144a999c161976cd81992902bfe90fb5
6
+ metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
7
+ data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
@@ -1,11 +1,11 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
@@ -37,22 +37,18 @@ class FileDigests
37
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
38
38
  ].join "\n"
39
39
 
40
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
41
41
  options[:auto] = true
42
42
  end
43
43
 
44
- opts.on("--accept-fate", "Accept the current state of files that are likely damaged and update their digest data") do
45
- options[:accept_fate] = true
46
- end
47
-
48
44
  opts.on(
49
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
50
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
51
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
52
48
  "#{digest_algorithms_list_text}.",
53
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
54
- 'Any time later you could specify a new algorithm to change the current one.',
55
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
56
52
  ) do |value|
57
53
  digest_algorithm = canonical_digest_algorithm_name(value)
58
54
  unless DIGEST_ALGORITHMS.include?(digest_algorithm)
@@ -62,26 +58,31 @@ class FileDigests
62
58
  options[:digest_algorithm] = digest_algorithm
63
59
  end
64
60
 
65
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
66
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
67
63
  end
68
64
 
69
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
70
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
71
68
  end
72
69
 
73
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
72
+ end
73
+
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
74
75
  options[:quiet] = true
75
76
  end
76
77
 
77
- opts.on("-v", "--verbose", "More verbose output") do
78
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
79
80
  end
80
81
 
81
- opts.on("-h", "--help", "Prints this help") do
82
- puts opts
83
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
84
84
  end
85
+
85
86
  end.parse!
86
87
  options
87
88
  end
@@ -99,13 +100,15 @@ class FileDigests
99
100
  initialize_paths files_path, digest_database_path
100
101
  initialize_database
101
102
 
102
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
103
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
104
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
105
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
106
+ @new_digest_algorithm = @options[:digest_algorithm]
107
+ end
108
+ else
109
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
110
+ set_metadata "digest_algorithm", @digest_algorithm
105
111
  end
106
- else
107
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
108
- set_metadata "digest_algorithm", @digest_algorithm
109
112
  end
110
113
 
111
114
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
@@ -117,7 +120,7 @@ class FileDigests
117
120
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
118
121
 
119
122
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
120
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
123
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
121
124
  ensure_dir_exists @digest_database_path.dirname
122
125
 
123
126
  if @options[:verbose]
@@ -129,14 +132,17 @@ class FileDigests
129
132
  def initialize_database
130
133
  @db = SQLite3::Database.new @digest_database_path.to_s
131
134
  @db.results_as_hash = true
135
+ @db.busy_timeout = 5000
132
136
 
133
137
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
134
138
 
135
- execute 'PRAGMA encoding = "UTF-8"'
136
- execute 'PRAGMA journal_mode = "WAL"'
137
- execute 'PRAGMA synchronous = "NORMAL"'
138
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
139
- execute 'PRAGMA cache_size = "5000"'
139
+ execute "PRAGMA encoding = 'UTF-8'"
140
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
141
+ execute "PRAGMA journal_mode = 'WAL'"
142
+ execute "PRAGMA synchronous = 'NORMAL'"
143
+ execute "PRAGMA cache_size = '5000'"
144
+
145
+ integrity_check
140
146
 
141
147
  @db.transaction(:exclusive) do
142
148
  metadata_table_was_created = false
@@ -187,7 +193,7 @@ class FileDigests
187
193
  # Convert database from 1st to 2nd version
188
194
  unless get_metadata("digest_algorithm")
189
195
  if get_metadata("database_version") == "1"
190
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
196
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
191
197
  set_metadata("digest_algorithm", "SHA512")
192
198
  else
193
199
  set_metadata("digest_algorithm", "SHA256")
@@ -197,7 +203,7 @@ class FileDigests
197
203
  end
198
204
 
199
205
  if get_metadata("database_version") != "2"
200
- STDERR.puts "This version of file-digests (#{file_digests_gem_version || 'unknown'}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
206
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
201
207
  raise "Incompatible database version"
202
208
  end
203
209
  end
@@ -217,7 +223,9 @@ class FileDigests
217
223
  end
218
224
  end
219
225
 
220
- track_renames
226
+ nested_transaction do
227
+ track_renames
228
+ end
221
229
 
222
230
  if any_missing_files?
223
231
  if any_exceptions?
@@ -225,7 +233,9 @@ class FileDigests
225
233
  else
226
234
  print_missing_files
227
235
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
228
- remove_missing_files
236
+ nested_transaction do
237
+ remove_missing_files
238
+ end
229
239
  end
230
240
  end
231
241
  end
@@ -248,6 +258,10 @@ class FileDigests
248
258
 
249
259
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
250
260
 
261
+ execute "PRAGMA optimize"
262
+ execute "VACUUM"
263
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
264
+
251
265
  print_counters
252
266
  end
253
267
  end
@@ -255,12 +269,12 @@ class FileDigests
255
269
  def show_duplicates
256
270
  current_digest = nil
257
271
  query_duplicates.each do |found|
258
- if current_digest != found['digest']
272
+ if current_digest != found["digest"]
259
273
  puts "" if current_digest
260
- current_digest = found['digest']
261
- puts "#{found['digest']}:"
274
+ current_digest = found["digest"]
275
+ puts "#{found["digest"]}:"
262
276
  end
263
- puts " #{found['filename']}"
277
+ puts " #{found["filename"]}"
264
278
  end
265
279
  end
266
280
 
@@ -286,10 +300,13 @@ class FileDigests
286
300
  return
287
301
  end
288
302
 
289
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
303
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
290
304
  mtime_string = time_to_database stat.mtime
305
+ digest = get_file_digest(filename)
291
306
 
292
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
307
+ nested_transaction do
308
+ process_file_indeed normalized_filename, mtime_string, digest
309
+ end
293
310
 
294
311
  rescue => exception
295
312
  @counters[:exceptions] += 1
@@ -306,25 +323,25 @@ class FileDigests
306
323
 
307
324
  def process_previously_seen_file found, filename, mtime, digest
308
325
  @missing_files.delete(filename)
309
- if found['digest'] == digest
326
+ if found["digest"] == digest
310
327
  @counters[:good] += 1
311
328
  puts "GOOD: #{filename}" if @options[:verbose]
312
329
  unless @options[:test_only]
313
- if found['mtime'] == mtime
314
- touch_digest_check_time found['id']
330
+ if found["mtime"] == mtime
331
+ touch_digest_check_time found["id"]
315
332
  else
316
- update_mtime mtime, found['id']
333
+ update_mtime mtime, found["id"]
317
334
  end
318
335
  end
319
336
  else
320
- if found['mtime'] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
337
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
321
338
  @counters[:likely_damaged] += 1
322
339
  STDERR.puts "LIKELY DAMAGED: #{filename}"
323
340
  else
324
341
  @counters[:updated] += 1
325
- puts "UPDATED: #{filename}" unless @options[:quiet]
342
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
326
343
  unless @options[:test_only]
327
- update_mtime_and_digest mtime, digest, found['id']
344
+ update_mtime_and_digest mtime, digest, found["id"]
328
345
  end
329
346
  end
330
347
  end
@@ -362,12 +379,10 @@ class FileDigests
362
379
  end
363
380
 
364
381
  def remove_missing_files
365
- nested_transaction do
366
- @missing_files.each do |filename, digest|
367
- delete_by_filename filename
368
- end
369
- @missing_files = {}
382
+ @missing_files.each do |filename, digest|
383
+ delete_by_filename filename
370
384
  end
385
+ @missing_files = {}
371
386
  end
372
387
 
373
388
 
@@ -377,6 +392,12 @@ class FileDigests
377
392
  @db.execute *args, &block
378
393
  end
379
394
 
395
+ def integrity_check
396
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
397
+ raise "Database integrity check failed"
398
+ end
399
+ end
400
+
380
401
  def nested_transaction(mode = :deferred)
381
402
  if @db.transaction_active?
382
403
  yield
@@ -398,7 +419,7 @@ class FileDigests
398
419
  end
399
420
 
400
421
  def table_exist? table_name
401
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
422
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
402
423
  end
403
424
 
404
425
  def prepare_method name, query
@@ -433,14 +454,14 @@ class FileDigests
433
454
  end
434
455
 
435
456
  def time_to_database time
436
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
457
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
437
458
  end
438
459
 
439
460
 
440
461
  # Filesystem-related helpers
441
462
 
442
463
  def patch_path_string path
443
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
464
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
444
465
  end
445
466
 
446
467
  def cleanup_path path
@@ -458,13 +479,13 @@ class FileDigests
458
479
  end
459
480
 
460
481
  def walk_files
461
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
482
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
462
483
  yield filename
463
484
  end
464
485
  end
465
486
 
466
487
  def get_file_digest filename
467
- File.open(filename, 'rb') do |io|
488
+ File.open(filename, "rb") do |io|
468
489
  digest = OpenSSL::Digest.new(@digest_algorithm)
469
490
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
470
491
 
@@ -506,13 +527,13 @@ class FileDigests
506
527
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
507
528
  yield
508
529
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
509
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
530
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
510
531
  end
511
532
 
512
533
  def print_file_exception exception, filename
513
534
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
514
535
  begin
515
- STDERR.print filename.encode('utf-8', universal_newline: true)
536
+ STDERR.print filename.encode("utf-8", universal_newline: true)
516
537
  rescue
517
538
  STDERR.print "(Unable to encode file name to utf-8) "
518
539
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.26
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.1'
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.1'
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: