file-digests 0.0.24 → 0.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +123 -80
  3. metadata +8 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1816d453ddc057c7a8eef4a0c81f63699f9ba10fdee404488fa8dc945bac791d
4
- data.tar.gz: ae5baf350ac64577f0807b81231f045d875eff6351c78e559df46379ad8d6b1e
3
+ metadata.gz: cab5bc80dc7949984501c3068bf99f44f82564774733e84b1cd6810fd0cb6a05
4
+ data.tar.gz: 97a4220b83a08408345b21a60256e55bbbe0a5fb30e6c97efa05a4faac42d356
5
5
  SHA512:
6
- metadata.gz: d76b3e25d709e17e7260c0ab7836f016107c7335461f7d8ab6a74b391067fbfb1691abd6286fa3894aa432b9af229ba417972ba08c5d1524dd06d06c8dffe653
7
- data.tar.gz: 9e39f4ae5d19268c986fac6d6d3bc47e2be66bb60857be608c033670ccd4b7ba3f0c5a8242a18e9ebaf161b8f5b230c040f931405e32a5a53d840e250c46fe9f
6
+ metadata.gz: 5eaae0823ccb95bb3db6245e0863ff37aaa6f14f599bd92d040c8085e753aaa8d3be7903d1039a433bded9011bdc73c6a0d42e3e10d1e2c52eb81b6b791a5bc1
7
+ data.tar.gz: 97f7fa2ac8605fc1775570accf44a93c73e67a11699b7e98bfa9687f9e2c0219ad1890c1a293f52ca901079e602f6cf6863b5f57de174dd39e36ec338d6597f7
@@ -1,19 +1,21 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+ LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
12
13
 
13
14
  def self.canonical_digest_algorithm_name(string)
14
15
  if string
15
- index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
- index && DIGEST_ALGORITHMS[index]
16
+ algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
17
+ index = algorithms.map(&:downcase).index(string.downcase)
18
+ index && algorithms[index]
17
19
  end
18
20
  end
19
21
 
@@ -27,55 +29,60 @@ class FileDigests
27
29
 
28
30
  def self.parse_cli_options
29
31
  options = {}
30
-
32
+
31
33
  OptionParser.new do |opts|
32
34
  opts.banner = [
33
35
  "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
36
  " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
38
  ].join "\n"
37
-
38
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
39
41
  options[:auto] = true
40
42
  end
41
43
 
42
44
  opts.on(
43
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
44
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
48
  "#{digest_algorithms_list_text}.",
47
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
- 'Any time later you could specify a new algorithm to change the current one.',
49
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
50
52
  ) do |value|
51
53
  digest_algorithm = canonical_digest_algorithm_name(value)
52
- unless digest_algorithm
53
- STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ unless DIGEST_ALGORITHMS.include?(digest_algorithm)
55
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
56
  exit 1
55
57
  end
56
58
  options[:digest_algorithm] = digest_algorithm
57
59
  end
58
60
 
59
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
61
63
  end
62
64
 
63
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
65
68
  end
66
69
 
67
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
72
+ end
73
+
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
68
75
  options[:quiet] = true
69
76
  end
70
77
 
71
- opts.on("-v", "--verbose", "More verbose output") do
72
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
73
80
  end
74
81
 
75
- opts.on("-h", "--help", "Prints this help") do
76
- puts opts
77
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
78
84
  end
85
+
79
86
  end.parse!
80
87
  options
81
88
  end
@@ -93,15 +100,20 @@ class FileDigests
93
100
  initialize_paths files_path, digest_database_path
94
101
  initialize_database
95
102
 
96
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if db_digest_algorithm = get_metadata("digest_algorithm")
105
+ if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
106
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
107
+ @new_digest_algorithm = @options[:digest_algorithm]
108
+ end
109
+ else
110
+ raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
111
+ end
112
+ else
113
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
114
+ set_metadata "digest_algorithm", @digest_algorithm
99
115
  end
100
- else
101
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
- set_metadata "digest_algorithm", @digest_algorithm
103
116
  end
104
-
105
117
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
106
118
  end
107
119
 
@@ -111,8 +123,10 @@ class FileDigests
111
123
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
124
 
113
125
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
115
- ensure_dir_exists @digest_database_path.dirname
126
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
127
+ ensure_dir_exist @digest_database_path.dirname
128
+
129
+ @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]
116
130
 
117
131
  if @options[:verbose]
118
132
  puts "Target directory: #{@files_path}"
@@ -123,14 +137,17 @@ class FileDigests
123
137
  def initialize_database
124
138
  @db = SQLite3::Database.new @digest_database_path.to_s
125
139
  @db.results_as_hash = true
140
+ @db.busy_timeout = 5000
126
141
 
127
142
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
143
 
129
- execute 'PRAGMA encoding = "UTF-8"'
130
- execute 'PRAGMA journal_mode = "WAL"'
131
- execute 'PRAGMA synchronous = "NORMAL"'
132
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
- execute 'PRAGMA cache_size = "5000"'
144
+ execute "PRAGMA encoding = 'UTF-8'"
145
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
146
+ execute "PRAGMA journal_mode = 'WAL'"
147
+ execute "PRAGMA synchronous = 'NORMAL'"
148
+ execute "PRAGMA cache_size = '5000'"
149
+
150
+ integrity_check
134
151
 
135
152
  @db.transaction(:exclusive) do
136
153
  metadata_table_was_created = false
@@ -181,7 +198,7 @@ class FileDigests
181
198
  # Convert database from 1st to 2nd version
182
199
  unless get_metadata("digest_algorithm")
183
200
  if get_metadata("database_version") == "1"
184
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
201
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
185
202
  set_metadata("digest_algorithm", "SHA512")
186
203
  else
187
204
  set_metadata("digest_algorithm", "SHA256")
@@ -191,7 +208,7 @@ class FileDigests
191
208
  end
192
209
 
193
210
  if get_metadata("database_version") != "2"
194
- STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
211
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
195
212
  raise "Incompatible database version"
196
213
  end
197
214
  end
@@ -211,15 +228,19 @@ class FileDigests
211
228
  end
212
229
  end
213
230
 
214
- track_renames
215
-
231
+ nested_transaction do
232
+ track_renames
233
+ end
234
+
216
235
  if any_missing_files?
217
236
  if any_exceptions?
218
237
  STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
219
238
  else
220
239
  print_missing_files
221
240
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
222
- remove_missing_files
241
+ nested_transaction do
242
+ remove_missing_files
243
+ end
223
244
  end
224
245
  end
225
246
  end
@@ -232,6 +253,7 @@ class FileDigests
232
253
  update_digest_to_new_digest new_digest, old_digest
233
254
  end
234
255
  set_metadata "digest_algorithm", @new_digest_algorithm
256
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
235
257
  end
236
258
  end
237
259
 
@@ -241,6 +263,12 @@ class FileDigests
241
263
 
242
264
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
243
265
 
266
+ execute "PRAGMA optimize"
267
+ execute "VACUUM"
268
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
269
+
270
+ hide_database_files
271
+
244
272
  print_counters
245
273
  end
246
274
  end
@@ -248,12 +276,12 @@ class FileDigests
248
276
  def show_duplicates
249
277
  current_digest = nil
250
278
  query_duplicates.each do |found|
251
- if current_digest != found['digest']
279
+ if current_digest != found["digest"]
252
280
  puts "" if current_digest
253
- current_digest = found['digest']
254
- puts "#{found['digest']}:"
281
+ current_digest = found["digest"]
282
+ puts "#{found["digest"]}:"
255
283
  end
256
- puts " #{found['filename']}"
284
+ puts " #{found["filename"]}"
257
285
  end
258
286
  end
259
287
 
@@ -272,17 +300,18 @@ class FileDigests
272
300
 
273
301
  raise "File is not readable" unless stat.readable?
274
302
 
275
- if filename == "#{@digest_database_path}" ||
276
- filename == "#{@digest_database_path}-wal" ||
277
- filename == "#{@digest_database_path}-shm"
303
+ if @digest_database_files.include?(filename)
278
304
  puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
279
305
  return
280
306
  end
281
307
 
282
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
308
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
283
309
  mtime_string = time_to_database stat.mtime
310
+ digest = get_file_digest(filename)
284
311
 
285
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
312
+ nested_transaction do
313
+ process_file_indeed normalized_filename, mtime_string, digest
314
+ end
286
315
 
287
316
  rescue => exception
288
317
  @counters[:exceptions] += 1
@@ -299,25 +328,25 @@ class FileDigests
299
328
 
300
329
  def process_previously_seen_file found, filename, mtime, digest
301
330
  @missing_files.delete(filename)
302
- if found['digest'] == digest
331
+ if found["digest"] == digest
303
332
  @counters[:good] += 1
304
333
  puts "GOOD: #{filename}" if @options[:verbose]
305
334
  unless @options[:test_only]
306
- if found['mtime'] == mtime
307
- touch_digest_check_time found['id']
335
+ if found["mtime"] == mtime
336
+ touch_digest_check_time found["id"]
308
337
  else
309
- update_mtime mtime, found['id']
338
+ update_mtime mtime, found["id"]
310
339
  end
311
340
  end
312
341
  else
313
- if found['mtime'] == mtime # Digest is different and mtime is the same
342
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
314
343
  @counters[:likely_damaged] += 1
315
344
  STDERR.puts "LIKELY DAMAGED: #{filename}"
316
345
  else
317
346
  @counters[:updated] += 1
318
- puts "UPDATED: #{filename}" unless @options[:quiet]
347
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
319
348
  unless @options[:test_only]
320
- update_mtime_and_digest mtime, digest, found['id']
349
+ update_mtime_and_digest mtime, digest, found["id"]
321
350
  end
322
351
  end
323
352
  end
@@ -355,12 +384,10 @@ class FileDigests
355
384
  end
356
385
 
357
386
  def remove_missing_files
358
- nested_transaction do
359
- @missing_files.each do |filename, digest|
360
- delete_by_filename filename
361
- end
362
- @missing_files = {}
387
+ @missing_files.each do |filename, digest|
388
+ delete_by_filename filename
363
389
  end
390
+ @missing_files = {}
364
391
  end
365
392
 
366
393
 
@@ -370,7 +397,13 @@ class FileDigests
370
397
  @db.execute *args, &block
371
398
  end
372
399
 
373
- def nested_transaction(mode)
400
+ def integrity_check
401
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
402
+ raise "Database integrity check failed"
403
+ end
404
+ end
405
+
406
+ def nested_transaction(mode = :deferred)
374
407
  if @db.transaction_active?
375
408
  yield
376
409
  else
@@ -380,9 +413,9 @@ class FileDigests
380
413
  end
381
414
  end
382
415
 
383
- def perhaps_transaction(condition, mode)
416
+ def perhaps_transaction(condition, mode = :deferred)
384
417
  if condition
385
- @db.transaction(mode) do
418
+ nested_transaction(mode) do
386
419
  yield
387
420
  end
388
421
  else
@@ -391,7 +424,7 @@ class FileDigests
391
424
  end
392
425
 
393
426
  def table_exist? table_name
394
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
427
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
395
428
  end
396
429
 
397
430
  def prepare_method name, query
@@ -426,21 +459,31 @@ class FileDigests
426
459
  end
427
460
 
428
461
  def time_to_database time
429
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
462
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
463
+ end
464
+
465
+ def hide_database_files
466
+ if Gem.win_platform?
467
+ @digest_database_files.each do |file|
468
+ if File.exist?(file)
469
+ system "attrib", "+H", file, exception: true
470
+ end
471
+ end
472
+ end
430
473
  end
431
474
 
432
475
 
433
476
  # Filesystem-related helpers
434
477
 
435
478
  def patch_path_string path
436
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
479
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
437
480
  end
438
481
 
439
482
  def cleanup_path path
440
483
  Pathname.new(patch_path_string(path)).cleanpath
441
484
  end
442
485
 
443
- def ensure_dir_exists path
486
+ def ensure_dir_exist path
444
487
  if File.exist?(path)
445
488
  unless File.directory?(path)
446
489
  raise "#{path} is not a directory"
@@ -451,13 +494,13 @@ class FileDigests
451
494
  end
452
495
 
453
496
  def walk_files
454
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
497
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
455
498
  yield filename
456
499
  end
457
500
  end
458
501
 
459
502
  def get_file_digest filename
460
- File.open(filename, 'rb') do |io|
503
+ File.open(filename, "rb") do |io|
461
504
  digest = OpenSSL::Digest.new(@digest_algorithm)
462
505
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
463
506
 
@@ -499,13 +542,13 @@ class FileDigests
499
542
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
500
543
  yield
501
544
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
502
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
545
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
503
546
  end
504
547
 
505
548
  def print_file_exception exception, filename
506
549
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
507
550
  begin
508
- STDERR.print filename.encode('utf-8', universal_newline: true)
551
+ STDERR.print filename.encode("utf-8", universal_newline: true)
509
552
  rescue
510
553
  STDERR.print "(Unable to encode file name to utf-8) "
511
554
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.0.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.3'
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.3'
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.1'
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.1'
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: