file-digests 0.0.24 → 0.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +123 -80
  3. metadata +8 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1816d453ddc057c7a8eef4a0c81f63699f9ba10fdee404488fa8dc945bac791d
4
- data.tar.gz: ae5baf350ac64577f0807b81231f045d875eff6351c78e559df46379ad8d6b1e
3
+ metadata.gz: cab5bc80dc7949984501c3068bf99f44f82564774733e84b1cd6810fd0cb6a05
4
+ data.tar.gz: 97a4220b83a08408345b21a60256e55bbbe0a5fb30e6c97efa05a4faac42d356
5
5
  SHA512:
6
- metadata.gz: d76b3e25d709e17e7260c0ab7836f016107c7335461f7d8ab6a74b391067fbfb1691abd6286fa3894aa432b9af229ba417972ba08c5d1524dd06d06c8dffe653
7
- data.tar.gz: 9e39f4ae5d19268c986fac6d6d3bc47e2be66bb60857be608c033670ccd4b7ba3f0c5a8242a18e9ebaf161b8f5b230c040f931405e32a5a53d840e250c46fe9f
6
+ metadata.gz: 5eaae0823ccb95bb3db6245e0863ff37aaa6f14f599bd92d040c8085e753aaa8d3be7903d1039a433bded9011bdc73c6a0d42e3e10d1e2c52eb81b6b791a5bc1
7
+ data.tar.gz: 97f7fa2ac8605fc1775570accf44a93c73e67a11699b7e98bfa9687f9e2c0219ad1890c1a293f52ca901079e602f6cf6863b5f57de174dd39e36ec338d6597f7
@@ -1,19 +1,21 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+ LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
12
13
 
13
14
  def self.canonical_digest_algorithm_name(string)
14
15
  if string
15
- index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
- index && DIGEST_ALGORITHMS[index]
16
+ algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
17
+ index = algorithms.map(&:downcase).index(string.downcase)
18
+ index && algorithms[index]
17
19
  end
18
20
  end
19
21
 
@@ -27,55 +29,60 @@ class FileDigests
27
29
 
28
30
  def self.parse_cli_options
29
31
  options = {}
30
-
32
+
31
33
  OptionParser.new do |opts|
32
34
  opts.banner = [
33
35
  "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
36
  " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
38
  ].join "\n"
37
-
38
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
39
41
  options[:auto] = true
40
42
  end
41
43
 
42
44
  opts.on(
43
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
44
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
48
  "#{digest_algorithms_list_text}.",
47
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
- 'Any time later you could specify a new algorithm to change the current one.',
49
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
50
52
  ) do |value|
51
53
  digest_algorithm = canonical_digest_algorithm_name(value)
52
- unless digest_algorithm
53
- STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ unless DIGEST_ALGORITHMS.include?(digest_algorithm)
55
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
56
  exit 1
55
57
  end
56
58
  options[:digest_algorithm] = digest_algorithm
57
59
  end
58
60
 
59
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
61
63
  end
62
64
 
63
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
65
68
  end
66
69
 
67
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
72
+ end
73
+
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
68
75
  options[:quiet] = true
69
76
  end
70
77
 
71
- opts.on("-v", "--verbose", "More verbose output") do
72
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
73
80
  end
74
81
 
75
- opts.on("-h", "--help", "Prints this help") do
76
- puts opts
77
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
78
84
  end
85
+
79
86
  end.parse!
80
87
  options
81
88
  end
@@ -93,15 +100,20 @@ class FileDigests
93
100
  initialize_paths files_path, digest_database_path
94
101
  initialize_database
95
102
 
96
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if db_digest_algorithm = get_metadata("digest_algorithm")
105
+ if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
106
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
107
+ @new_digest_algorithm = @options[:digest_algorithm]
108
+ end
109
+ else
110
+ raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
111
+ end
112
+ else
113
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
114
+ set_metadata "digest_algorithm", @digest_algorithm
99
115
  end
100
- else
101
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
- set_metadata "digest_algorithm", @digest_algorithm
103
116
  end
104
-
105
117
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
106
118
  end
107
119
 
@@ -111,8 +123,10 @@ class FileDigests
111
123
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
124
 
113
125
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
115
- ensure_dir_exists @digest_database_path.dirname
126
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
127
+ ensure_dir_exist @digest_database_path.dirname
128
+
129
+ @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
116
130
 
117
131
  if @options[:verbose]
118
132
  puts "Target directory: #{@files_path}"
@@ -123,14 +137,17 @@ class FileDigests
123
137
  def initialize_database
124
138
  @db = SQLite3::Database.new @digest_database_path.to_s
125
139
  @db.results_as_hash = true
140
+ @db.busy_timeout = 5000
126
141
 
127
142
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
143
 
129
- execute 'PRAGMA encoding = "UTF-8"'
130
- execute 'PRAGMA journal_mode = "WAL"'
131
- execute 'PRAGMA synchronous = "NORMAL"'
132
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
- execute 'PRAGMA cache_size = "5000"'
144
+ execute "PRAGMA encoding = 'UTF-8'"
145
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
146
+ execute "PRAGMA journal_mode = 'WAL'"
147
+ execute "PRAGMA synchronous = 'NORMAL'"
148
+ execute "PRAGMA cache_size = '5000'"
149
+
150
+ integrity_check
134
151
 
135
152
  @db.transaction(:exclusive) do
136
153
  metadata_table_was_created = false
@@ -181,7 +198,7 @@ class FileDigests
181
198
  # Convert database from 1st to 2nd version
182
199
  unless get_metadata("digest_algorithm")
183
200
  if get_metadata("database_version") == "1"
184
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
201
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
185
202
  set_metadata("digest_algorithm", "SHA512")
186
203
  else
187
204
  set_metadata("digest_algorithm", "SHA256")
@@ -191,7 +208,7 @@ class FileDigests
191
208
  end
192
209
 
193
210
  if get_metadata("database_version") != "2"
194
- STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
211
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
195
212
  raise "Incompatible database version"
196
213
  end
197
214
  end
@@ -211,15 +228,19 @@ class FileDigests
211
228
  end
212
229
  end
213
230
 
214
- track_renames
215
-
231
+ nested_transaction do
232
+ track_renames
233
+ end
234
+
216
235
  if any_missing_files?
217
236
  if any_exceptions?
218
237
  STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
219
238
  else
220
239
  print_missing_files
221
240
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
222
- remove_missing_files
241
+ nested_transaction do
242
+ remove_missing_files
243
+ end
223
244
  end
224
245
  end
225
246
  end
@@ -232,6 +253,7 @@ class FileDigests
232
253
  update_digest_to_new_digest new_digest, old_digest
233
254
  end
234
255
  set_metadata "digest_algorithm", @new_digest_algorithm
256
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
235
257
  end
236
258
  end
237
259
 
@@ -241,6 +263,12 @@ class FileDigests
241
263
 
242
264
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
243
265
 
266
+ execute "PRAGMA optimize"
267
+ execute "VACUUM"
268
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
269
+
270
+ hide_database_files
271
+
244
272
  print_counters
245
273
  end
246
274
  end
@@ -248,12 +276,12 @@ class FileDigests
248
276
  def show_duplicates
249
277
  current_digest = nil
250
278
  query_duplicates.each do |found|
251
- if current_digest != found['digest']
279
+ if current_digest != found["digest"]
252
280
  puts "" if current_digest
253
- current_digest = found['digest']
254
- puts "#{found['digest']}:"
281
+ current_digest = found["digest"]
282
+ puts "#{found["digest"]}:"
255
283
  end
256
- puts " #{found['filename']}"
284
+ puts " #{found["filename"]}"
257
285
  end
258
286
  end
259
287
 
@@ -272,17 +300,18 @@ class FileDigests
272
300
 
273
301
  raise "File is not readable" unless stat.readable?
274
302
 
275
- if filename == "#{@digest_database_path}" ||
276
- filename == "#{@digest_database_path}-wal" ||
277
- filename == "#{@digest_database_path}-shm"
303
+ if @digest_database_files.include?(filename)
278
304
  puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
279
305
  return
280
306
  end
281
307
 
282
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
308
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
283
309
  mtime_string = time_to_database stat.mtime
310
+ digest = get_file_digest(filename)
284
311
 
285
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
312
+ nested_transaction do
313
+ process_file_indeed normalized_filename, mtime_string, digest
314
+ end
286
315
 
287
316
  rescue => exception
288
317
  @counters[:exceptions] += 1
@@ -299,25 +328,25 @@ class FileDigests
299
328
 
300
329
  def process_previously_seen_file found, filename, mtime, digest
301
330
  @missing_files.delete(filename)
302
- if found['digest'] == digest
331
+ if found["digest"] == digest
303
332
  @counters[:good] += 1
304
333
  puts "GOOD: #{filename}" if @options[:verbose]
305
334
  unless @options[:test_only]
306
- if found['mtime'] == mtime
307
- touch_digest_check_time found['id']
335
+ if found["mtime"] == mtime
336
+ touch_digest_check_time found["id"]
308
337
  else
309
- update_mtime mtime, found['id']
338
+ update_mtime mtime, found["id"]
310
339
  end
311
340
  end
312
341
  else
313
- if found['mtime'] == mtime # Digest is different and mtime is the same
342
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
314
343
  @counters[:likely_damaged] += 1
315
344
  STDERR.puts "LIKELY DAMAGED: #{filename}"
316
345
  else
317
346
  @counters[:updated] += 1
318
- puts "UPDATED: #{filename}" unless @options[:quiet]
347
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
319
348
  unless @options[:test_only]
320
- update_mtime_and_digest mtime, digest, found['id']
349
+ update_mtime_and_digest mtime, digest, found["id"]
321
350
  end
322
351
  end
323
352
  end
@@ -355,12 +384,10 @@ class FileDigests
355
384
  end
356
385
 
357
386
  def remove_missing_files
358
- nested_transaction do
359
- @missing_files.each do |filename, digest|
360
- delete_by_filename filename
361
- end
362
- @missing_files = {}
387
+ @missing_files.each do |filename, digest|
388
+ delete_by_filename filename
363
389
  end
390
+ @missing_files = {}
364
391
  end
365
392
 
366
393
 
@@ -370,7 +397,13 @@ class FileDigests
370
397
  @db.execute *args, &block
371
398
  end
372
399
 
373
- def nested_transaction(mode)
400
+ def integrity_check
401
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
402
+ raise "Database integrity check failed"
403
+ end
404
+ end
405
+
406
+ def nested_transaction(mode = :deferred)
374
407
  if @db.transaction_active?
375
408
  yield
376
409
  else
@@ -380,9 +413,9 @@ class FileDigests
380
413
  end
381
414
  end
382
415
 
383
- def perhaps_transaction(condition, mode)
416
+ def perhaps_transaction(condition, mode = :deferred)
384
417
  if condition
385
- @db.transaction(mode) do
418
+ nested_transaction(mode) do
386
419
  yield
387
420
  end
388
421
  else
@@ -391,7 +424,7 @@ class FileDigests
391
424
  end
392
425
 
393
426
  def table_exist? table_name
394
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
427
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
395
428
  end
396
429
 
397
430
  def prepare_method name, query
@@ -426,21 +459,31 @@ class FileDigests
426
459
  end
427
460
 
428
461
  def time_to_database time
429
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
462
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
463
+ end
464
+
465
+ def hide_database_files
466
+ if Gem.win_platform?
467
+ @digest_database_files.each do |file|
468
+ if File.exist?(file)
469
+ system "attrib", "+H", file, exception: true
470
+ end
471
+ end
472
+ end
430
473
  end
431
474
 
432
475
 
433
476
  # Filesystem-related helpers
434
477
 
435
478
  def patch_path_string path
436
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
479
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
437
480
  end
438
481
 
439
482
  def cleanup_path path
440
483
  Pathname.new(patch_path_string(path)).cleanpath
441
484
  end
442
485
 
443
- def ensure_dir_exists path
486
+ def ensure_dir_exist path
444
487
  if File.exist?(path)
445
488
  unless File.directory?(path)
446
489
  raise "#{path} is not a directory"
@@ -451,13 +494,13 @@ class FileDigests
451
494
  end
452
495
 
453
496
  def walk_files
454
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
497
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
455
498
  yield filename
456
499
  end
457
500
  end
458
501
 
459
502
  def get_file_digest filename
460
- File.open(filename, 'rb') do |io|
503
+ File.open(filename, "rb") do |io|
461
504
  digest = OpenSSL::Digest.new(@digest_algorithm)
462
505
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
463
506
 
@@ -499,13 +542,13 @@ class FileDigests
499
542
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
500
543
  yield
501
544
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
502
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
545
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
503
546
  end
504
547
 
505
548
  def print_file_exception exception, filename
506
549
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
507
550
  begin
508
- STDERR.print filename.encode('utf-8', universal_newline: true)
551
+ STDERR.print filename.encode("utf-8", universal_newline: true)
509
552
  rescue
510
553
  STDERR.print "(Unable to encode file name to utf-8) "
511
554
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.0.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.1'
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.1'
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: