file-digests 0.0.25 → 0.0.30

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +110 -70
  3. metadata +8 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3bae6b10fe6496192c178f9139003b8d5a19774c70d957fd83f83ee91540cff9
4
- data.tar.gz: 15d2965cb47eaeedec5dd56a0fe0a30e2f686e72d80f823416a351326d034ba8
3
+ metadata.gz: 99b58aede8267994cc69da0ac1fd2e35d6661fd666257e9f3dfcde5054b3b6f8
4
+ data.tar.gz: 4b733dcf4be1f14b4a08931d15d0531c18e161f3d6d3af3e3025b36266f67e9d
5
5
  SHA512:
6
- metadata.gz: 89b7fc7eea7a5b87f2cc23ea794ac1b2f85e9b367825694876afd91c9206c438a8df91487aaa4872e7b24480e0e5eb25b2750237318fc63305c605aa7fa372a4
7
- data.tar.gz: 8fedea5e7034824276ea1f315543d5248e09a06e2fb97cf714f242181012701209383ab6416fcc0bfa206ce5b278abcdef216e4d3e41bbb9463a265b537091f2
6
+ metadata.gz: 9cf4c3df2b8f206b54689f2506de474ca3b021c572869765caf6e1424dac73a7c6dca3b67918ba5cb88f712353febe9aebe5fc40f7da24f6dd4019300b052436
7
+ data.tar.gz: cb0904f141e8861c9923647214c92bb8ce022952c53e8a96cee9819a43f4192bf550b47c927cc0ea8f5a16f2c58c3013561555c2afde9c093dd6cd7997f25c94
@@ -1,11 +1,11 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
@@ -37,18 +37,18 @@ class FileDigests
37
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
38
38
  ].join "\n"
39
39
 
40
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
41
41
  options[:auto] = true
42
42
  end
43
43
 
44
44
  opts.on(
45
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
46
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
47
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
48
48
  "#{digest_algorithms_list_text}.",
49
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
50
- 'Any time later you could specify a new algorithm to change the current one.',
51
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
52
52
  ) do |value|
53
53
  digest_algorithm = canonical_digest_algorithm_name(value)
54
54
  unless DIGEST_ALGORITHMS.include?(digest_algorithm)
@@ -58,26 +58,31 @@ class FileDigests
58
58
  options[:digest_algorithm] = digest_algorithm
59
59
  end
60
60
 
61
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
62
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
63
63
  end
64
64
 
65
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
66
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
67
68
  end
68
69
 
69
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
72
+ end
73
+
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
70
75
  options[:quiet] = true
71
76
  end
72
77
 
73
- opts.on("-v", "--verbose", "More verbose output") do
74
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
75
80
  end
76
81
 
77
- opts.on("-h", "--help", "Prints this help") do
78
- puts opts
79
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
80
84
  end
85
+
81
86
  end.parse!
82
87
  options
83
88
  end
@@ -95,15 +100,20 @@ class FileDigests
95
100
  initialize_paths files_path, digest_database_path
96
101
  initialize_database
97
102
 
98
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
99
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
100
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if db_digest_algorithm = get_metadata("digest_algorithm")
105
+ if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
106
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
107
+ @new_digest_algorithm = @options[:digest_algorithm]
108
+ end
109
+ else
110
+ raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
111
+ end
112
+ else
113
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
114
+ set_metadata "digest_algorithm", @digest_algorithm
101
115
  end
102
- else
103
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
104
- set_metadata "digest_algorithm", @digest_algorithm
105
116
  end
106
-
107
117
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
108
118
  end
109
119
 
@@ -113,8 +123,10 @@ class FileDigests
113
123
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
114
124
 
115
125
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
116
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
117
- ensure_dir_exists @digest_database_path.dirname
126
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
127
+ ensure_dir_exist @digest_database_path.dirname
128
+
129
+ @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
118
130
 
119
131
  if @options[:verbose]
120
132
  puts "Target directory: #{@files_path}"
@@ -125,14 +137,17 @@ class FileDigests
125
137
  def initialize_database
126
138
  @db = SQLite3::Database.new @digest_database_path.to_s
127
139
  @db.results_as_hash = true
140
+ @db.busy_timeout = 5000
128
141
 
129
142
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
130
143
 
131
- execute 'PRAGMA encoding = "UTF-8"'
132
- execute 'PRAGMA journal_mode = "WAL"'
133
- execute 'PRAGMA synchronous = "NORMAL"'
134
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
135
- execute 'PRAGMA cache_size = "5000"'
144
+ execute "PRAGMA encoding = 'UTF-8'"
145
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
146
+ execute "PRAGMA journal_mode = 'WAL'"
147
+ execute "PRAGMA synchronous = 'NORMAL'"
148
+ execute "PRAGMA cache_size = '5000'"
149
+
150
+ integrity_check
136
151
 
137
152
  @db.transaction(:exclusive) do
138
153
  metadata_table_was_created = false
@@ -183,7 +198,7 @@ class FileDigests
183
198
  # Convert database from 1st to 2nd version
184
199
  unless get_metadata("digest_algorithm")
185
200
  if get_metadata("database_version") == "1"
186
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
201
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
187
202
  set_metadata("digest_algorithm", "SHA512")
188
203
  else
189
204
  set_metadata("digest_algorithm", "SHA256")
@@ -193,7 +208,7 @@ class FileDigests
193
208
  end
194
209
 
195
210
  if get_metadata("database_version") != "2"
196
- STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
211
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
197
212
  raise "Incompatible database version"
198
213
  end
199
214
  end
@@ -213,7 +228,9 @@ class FileDigests
213
228
  end
214
229
  end
215
230
 
216
- track_renames
231
+ nested_transaction do
232
+ track_renames
233
+ end
217
234
 
218
235
  if any_missing_files?
219
236
  if any_exceptions?
@@ -221,7 +238,9 @@ class FileDigests
221
238
  else
222
239
  print_missing_files
223
240
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
224
- remove_missing_files
241
+ nested_transaction do
242
+ remove_missing_files
243
+ end
225
244
  end
226
245
  end
227
246
  end
@@ -234,6 +253,7 @@ class FileDigests
234
253
  update_digest_to_new_digest new_digest, old_digest
235
254
  end
236
255
  set_metadata "digest_algorithm", @new_digest_algorithm
256
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
237
257
  end
238
258
  end
239
259
 
@@ -245,17 +265,22 @@ class FileDigests
245
265
 
246
266
  print_counters
247
267
  end
268
+ execute "PRAGMA optimize"
269
+ execute "VACUUM"
270
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
271
+
272
+ hide_database_files
248
273
  end
249
274
 
250
275
  def show_duplicates
251
276
  current_digest = nil
252
277
  query_duplicates.each do |found|
253
- if current_digest != found['digest']
278
+ if current_digest != found["digest"]
254
279
  puts "" if current_digest
255
- current_digest = found['digest']
256
- puts "#{found['digest']}:"
280
+ current_digest = found["digest"]
281
+ puts "#{found["digest"]}:"
257
282
  end
258
- puts " #{found['filename']}"
283
+ puts " #{found["filename"]}"
259
284
  end
260
285
  end
261
286
 
@@ -274,17 +299,18 @@ class FileDigests
274
299
 
275
300
  raise "File is not readable" unless stat.readable?
276
301
 
277
- if filename == "#{@digest_database_path}" ||
278
- filename == "#{@digest_database_path}-wal" ||
279
- filename == "#{@digest_database_path}-shm"
302
+ if @digest_database_files.include?(filename)
280
303
  puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
281
304
  return
282
305
  end
283
306
 
284
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
307
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
285
308
  mtime_string = time_to_database stat.mtime
309
+ digest = get_file_digest(filename)
286
310
 
287
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
311
+ nested_transaction do
312
+ process_file_indeed normalized_filename, mtime_string, digest
313
+ end
288
314
 
289
315
  rescue => exception
290
316
  @counters[:exceptions] += 1
@@ -301,25 +327,25 @@ class FileDigests
301
327
 
302
328
  def process_previously_seen_file found, filename, mtime, digest
303
329
  @missing_files.delete(filename)
304
- if found['digest'] == digest
330
+ if found["digest"] == digest
305
331
  @counters[:good] += 1
306
332
  puts "GOOD: #{filename}" if @options[:verbose]
307
333
  unless @options[:test_only]
308
- if found['mtime'] == mtime
309
- touch_digest_check_time found['id']
334
+ if found["mtime"] == mtime
335
+ touch_digest_check_time found["id"]
310
336
  else
311
- update_mtime mtime, found['id']
337
+ update_mtime mtime, found["id"]
312
338
  end
313
339
  end
314
340
  else
315
- if found['mtime'] == mtime # Digest is different and mtime is the same
341
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
316
342
  @counters[:likely_damaged] += 1
317
343
  STDERR.puts "LIKELY DAMAGED: #{filename}"
318
344
  else
319
345
  @counters[:updated] += 1
320
- puts "UPDATED: #{filename}" unless @options[:quiet]
346
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
321
347
  unless @options[:test_only]
322
- update_mtime_and_digest mtime, digest, found['id']
348
+ update_mtime_and_digest mtime, digest, found["id"]
323
349
  end
324
350
  end
325
351
  end
@@ -357,12 +383,10 @@ class FileDigests
357
383
  end
358
384
 
359
385
  def remove_missing_files
360
- nested_transaction do
361
- @missing_files.each do |filename, digest|
362
- delete_by_filename filename
363
- end
364
- @missing_files = {}
386
+ @missing_files.each do |filename, digest|
387
+ delete_by_filename filename
365
388
  end
389
+ @missing_files = {}
366
390
  end
367
391
 
368
392
 
@@ -372,6 +396,12 @@ class FileDigests
372
396
  @db.execute *args, &block
373
397
  end
374
398
 
399
+ def integrity_check
400
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
401
+ raise "Database integrity check failed"
402
+ end
403
+ end
404
+
375
405
  def nested_transaction(mode = :deferred)
376
406
  if @db.transaction_active?
377
407
  yield
@@ -393,7 +423,7 @@ class FileDigests
393
423
  end
394
424
 
395
425
  def table_exist? table_name
396
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
426
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
397
427
  end
398
428
 
399
429
  def prepare_method name, query
@@ -428,21 +458,31 @@ class FileDigests
428
458
  end
429
459
 
430
460
  def time_to_database time
431
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
461
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
462
+ end
463
+
464
+ def hide_database_files
465
+ if Gem.win_platform?
466
+ @digest_database_files.each do |file|
467
+ if File.exist?(file)
468
+ system "attrib", "+H", file, exception: true
469
+ end
470
+ end
471
+ end
432
472
  end
433
473
 
434
474
 
435
475
  # Filesystem-related helpers
436
476
 
437
477
  def patch_path_string path
438
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
478
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
439
479
  end
440
480
 
441
481
  def cleanup_path path
442
482
  Pathname.new(patch_path_string(path)).cleanpath
443
483
  end
444
484
 
445
- def ensure_dir_exists path
485
+ def ensure_dir_exist path
446
486
  if File.exist?(path)
447
487
  unless File.directory?(path)
448
488
  raise "#{path} is not a directory"
@@ -453,13 +493,13 @@ class FileDigests
453
493
  end
454
494
 
455
495
  def walk_files
456
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
496
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
457
497
  yield filename
458
498
  end
459
499
  end
460
500
 
461
501
  def get_file_digest filename
462
- File.open(filename, 'rb') do |io|
502
+ File.open(filename, "rb") do |io|
463
503
  digest = OpenSSL::Digest.new(@digest_algorithm)
464
504
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
465
505
 
@@ -501,13 +541,13 @@ class FileDigests
501
541
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
502
542
  yield
503
543
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
504
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
544
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
505
545
  end
506
546
 
507
547
  def print_file_exception exception, filename
508
548
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
509
549
  begin
510
- STDERR.print filename.encode('utf-8', universal_newline: true)
550
+ STDERR.print filename.encode("utf-8", universal_newline: true)
511
551
  rescue
512
552
  STDERR.print "(Unable to encode file name to utf-8) "
513
553
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.25
4
+ version: 0.0.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.1'
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.1'
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: