file-digests 0.0.26 → 0.0.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +89 -68
  3. metadata +8 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 912b66387453e26ebd7280d7f2dbd599838098a6bcc4da70d1eabe19737ba172
4
- data.tar.gz: 9c02352223aff8d6489892df533606bd512de3357dfb59a82513dc04aa1eddfe
3
+ metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
4
+ data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
5
5
  SHA512:
6
- metadata.gz: '0380c5acf3750632a3fe49f63d93dbc7533da1389f3e8cab41addc8909f71552a280aec7e3e783e9ae20071bc94af3d70021098b1fbfba96a3e1f78967479c34'
7
- data.tar.gz: 4b0a05a27dcb61ee405dc06688753d5eb162e3fdf513fa45aee9a1e2d946fd4697b4f2dbb45f1af57f33ee787c366665144a999c161976cd81992902bfe90fb5
6
+ metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
7
+ data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
@@ -1,11 +1,11 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
@@ -37,22 +37,18 @@ class FileDigests
37
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
38
38
  ].join "\n"
39
39
 
40
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
41
41
  options[:auto] = true
42
42
  end
43
43
 
44
- opts.on("--accept-fate", "Accept the current state of files that are likely damaged and update their digest data") do
45
- options[:accept_fate] = true
46
- end
47
-
48
44
  opts.on(
49
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
50
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
51
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
52
48
  "#{digest_algorithms_list_text}.",
53
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
54
- 'Any time later you could specify a new algorithm to change the current one.',
55
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
56
52
  ) do |value|
57
53
  digest_algorithm = canonical_digest_algorithm_name(value)
58
54
  unless DIGEST_ALGORITHMS.include?(digest_algorithm)
@@ -62,26 +58,31 @@ class FileDigests
62
58
  options[:digest_algorithm] = digest_algorithm
63
59
  end
64
60
 
65
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
66
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
67
63
  end
68
64
 
69
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
70
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
71
68
  end
72
69
 
73
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
72
+ end
73
+
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
74
75
  options[:quiet] = true
75
76
  end
76
77
 
77
- opts.on("-v", "--verbose", "More verbose output") do
78
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
79
80
  end
80
81
 
81
- opts.on("-h", "--help", "Prints this help") do
82
- puts opts
83
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
84
84
  end
85
+
85
86
  end.parse!
86
87
  options
87
88
  end
@@ -99,13 +100,15 @@ class FileDigests
99
100
  initialize_paths files_path, digest_database_path
100
101
  initialize_database
101
102
 
102
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
103
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
104
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
105
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
106
+ @new_digest_algorithm = @options[:digest_algorithm]
107
+ end
108
+ else
109
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
110
+ set_metadata "digest_algorithm", @digest_algorithm
105
111
  end
106
- else
107
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
108
- set_metadata "digest_algorithm", @digest_algorithm
109
112
  end
110
113
 
111
114
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
@@ -117,7 +120,7 @@ class FileDigests
117
120
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
118
121
 
119
122
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
120
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
123
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
121
124
  ensure_dir_exists @digest_database_path.dirname
122
125
 
123
126
  if @options[:verbose]
@@ -129,14 +132,17 @@ class FileDigests
129
132
  def initialize_database
130
133
  @db = SQLite3::Database.new @digest_database_path.to_s
131
134
  @db.results_as_hash = true
135
+ @db.busy_timeout = 5000
132
136
 
133
137
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
134
138
 
135
- execute 'PRAGMA encoding = "UTF-8"'
136
- execute 'PRAGMA journal_mode = "WAL"'
137
- execute 'PRAGMA synchronous = "NORMAL"'
138
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
139
- execute 'PRAGMA cache_size = "5000"'
139
+ execute "PRAGMA encoding = 'UTF-8'"
140
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
141
+ execute "PRAGMA journal_mode = 'WAL'"
142
+ execute "PRAGMA synchronous = 'NORMAL'"
143
+ execute "PRAGMA cache_size = '5000'"
144
+
145
+ integrity_check
140
146
 
141
147
  @db.transaction(:exclusive) do
142
148
  metadata_table_was_created = false
@@ -187,7 +193,7 @@ class FileDigests
187
193
  # Convert database from 1st to 2nd version
188
194
  unless get_metadata("digest_algorithm")
189
195
  if get_metadata("database_version") == "1"
190
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
196
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
191
197
  set_metadata("digest_algorithm", "SHA512")
192
198
  else
193
199
  set_metadata("digest_algorithm", "SHA256")
@@ -197,7 +203,7 @@ class FileDigests
197
203
  end
198
204
 
199
205
  if get_metadata("database_version") != "2"
200
- STDERR.puts "This version of file-digests (#{file_digests_gem_version || 'unknown'}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
206
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
201
207
  raise "Incompatible database version"
202
208
  end
203
209
  end
@@ -217,7 +223,9 @@ class FileDigests
217
223
  end
218
224
  end
219
225
 
220
- track_renames
226
+ nested_transaction do
227
+ track_renames
228
+ end
221
229
 
222
230
  if any_missing_files?
223
231
  if any_exceptions?
@@ -225,7 +233,9 @@ class FileDigests
225
233
  else
226
234
  print_missing_files
227
235
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
228
- remove_missing_files
236
+ nested_transaction do
237
+ remove_missing_files
238
+ end
229
239
  end
230
240
  end
231
241
  end
@@ -248,6 +258,10 @@ class FileDigests
248
258
 
249
259
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
250
260
 
261
+ execute "PRAGMA optimize"
262
+ execute "VACUUM"
263
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
264
+
251
265
  print_counters
252
266
  end
253
267
  end
@@ -255,12 +269,12 @@ class FileDigests
255
269
  def show_duplicates
256
270
  current_digest = nil
257
271
  query_duplicates.each do |found|
258
- if current_digest != found['digest']
272
+ if current_digest != found["digest"]
259
273
  puts "" if current_digest
260
- current_digest = found['digest']
261
- puts "#{found['digest']}:"
274
+ current_digest = found["digest"]
275
+ puts "#{found["digest"]}:"
262
276
  end
263
- puts " #{found['filename']}"
277
+ puts " #{found["filename"]}"
264
278
  end
265
279
  end
266
280
 
@@ -286,10 +300,13 @@ class FileDigests
286
300
  return
287
301
  end
288
302
 
289
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
303
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
290
304
  mtime_string = time_to_database stat.mtime
305
+ digest = get_file_digest(filename)
291
306
 
292
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
307
+ nested_transaction do
308
+ process_file_indeed normalized_filename, mtime_string, digest
309
+ end
293
310
 
294
311
  rescue => exception
295
312
  @counters[:exceptions] += 1
@@ -306,25 +323,25 @@ class FileDigests
306
323
 
307
324
  def process_previously_seen_file found, filename, mtime, digest
308
325
  @missing_files.delete(filename)
309
- if found['digest'] == digest
326
+ if found["digest"] == digest
310
327
  @counters[:good] += 1
311
328
  puts "GOOD: #{filename}" if @options[:verbose]
312
329
  unless @options[:test_only]
313
- if found['mtime'] == mtime
314
- touch_digest_check_time found['id']
330
+ if found["mtime"] == mtime
331
+ touch_digest_check_time found["id"]
315
332
  else
316
- update_mtime mtime, found['id']
333
+ update_mtime mtime, found["id"]
317
334
  end
318
335
  end
319
336
  else
320
- if found['mtime'] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
337
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
321
338
  @counters[:likely_damaged] += 1
322
339
  STDERR.puts "LIKELY DAMAGED: #{filename}"
323
340
  else
324
341
  @counters[:updated] += 1
325
- puts "UPDATED: #{filename}" unless @options[:quiet]
342
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
326
343
  unless @options[:test_only]
327
- update_mtime_and_digest mtime, digest, found['id']
344
+ update_mtime_and_digest mtime, digest, found["id"]
328
345
  end
329
346
  end
330
347
  end
@@ -362,12 +379,10 @@ class FileDigests
362
379
  end
363
380
 
364
381
  def remove_missing_files
365
- nested_transaction do
366
- @missing_files.each do |filename, digest|
367
- delete_by_filename filename
368
- end
369
- @missing_files = {}
382
+ @missing_files.each do |filename, digest|
383
+ delete_by_filename filename
370
384
  end
385
+ @missing_files = {}
371
386
  end
372
387
 
373
388
 
@@ -377,6 +392,12 @@ class FileDigests
377
392
  @db.execute *args, &block
378
393
  end
379
394
 
395
+ def integrity_check
396
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
397
+ raise "Database integrity check failed"
398
+ end
399
+ end
400
+
380
401
  def nested_transaction(mode = :deferred)
381
402
  if @db.transaction_active?
382
403
  yield
@@ -398,7 +419,7 @@ class FileDigests
398
419
  end
399
420
 
400
421
  def table_exist? table_name
401
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
422
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
402
423
  end
403
424
 
404
425
  def prepare_method name, query
@@ -433,14 +454,14 @@ class FileDigests
433
454
  end
434
455
 
435
456
  def time_to_database time
436
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
457
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
437
458
  end
438
459
 
439
460
 
440
461
  # Filesystem-related helpers
441
462
 
442
463
  def patch_path_string path
443
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
464
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
444
465
  end
445
466
 
446
467
  def cleanup_path path
@@ -458,13 +479,13 @@ class FileDigests
458
479
  end
459
480
 
460
481
  def walk_files
461
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
482
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
462
483
  yield filename
463
484
  end
464
485
  end
465
486
 
466
487
  def get_file_digest filename
467
- File.open(filename, 'rb') do |io|
488
+ File.open(filename, "rb") do |io|
468
489
  digest = OpenSSL::Digest.new(@digest_algorithm)
469
490
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
470
491
 
@@ -506,13 +527,13 @@ class FileDigests
506
527
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
507
528
  yield
508
529
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
509
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
530
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
510
531
  end
511
532
 
512
533
  def print_file_exception exception, filename
513
534
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
514
535
  begin
515
- STDERR.print filename.encode('utf-8', universal_newline: true)
536
+ STDERR.print filename.encode("utf-8", universal_newline: true)
516
537
  rescue
517
538
  STDERR.print "(Unable to encode file name to utf-8) "
518
539
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.26
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.1'
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.1'
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: