file-digests 0.0.23 → 0.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +109 -74
  3. metadata +12 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2d9f3f948e0853b515442c1b480326373309320cb91eb6aa68a5844b0d2be4e
4
- data.tar.gz: 804b6f40e5475286a6cee0ea49adb646e3fd7367e2fc73d4a105b45137ce747a
3
+ metadata.gz: 406b02c22923ae98c45dc92f2836a99dffa6dc8b2343ff62b5b5957a4a154bdc
4
+ data.tar.gz: 5898cbc3826818da8c3fa5cf16a334bcb4627f95dc2635a297cc9c7d81d25dfe
5
5
  SHA512:
6
- metadata.gz: f785ab8fe3d91bee0a59bc33a46d6c1790f1095d9ea54eb2b1a8aafe43189ec1e7c6c1353312db547d36d21aed871f7bb985638499b20719f17112fecc8d8ac0
7
- data.tar.gz: bafc97b9a8e37b3dc4bcd3bfdf534849f3e9aa33e3a17c17da30044c51bf254f53694643b3fb9be94d4bac8800177549d454012ad2371d7e68f7d78b36e61fb1
6
+ metadata.gz: '021494ba0a65daada30e55c63e489c60c018ae3b189409be200635948132f0ed5118d217388e81d8097bc991cf10fea2ce09c4a3a7c148c1fdaa8f66e6b8e074'
7
+ data.tar.gz: 3a28808aa979157a2597b5ea56ad006576d28f35678cbaa6e13d62be20aa37066a9d4684d48f5ed2990b24c42acb2ee8cd2d0f14f4f9b12f893c5b3946d5b76e
@@ -1,19 +1,21 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+ LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
12
13
 
13
14
  def self.canonical_digest_algorithm_name(string)
14
15
  if string
15
- index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
- index && DIGEST_ALGORITHMS[index]
16
+ algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
17
+ index = algorithms.map(&:downcase).index(string.downcase)
18
+ index && algorithms[index]
17
19
  end
18
20
  end
19
21
 
@@ -27,55 +29,60 @@ class FileDigests
27
29
 
28
30
  def self.parse_cli_options
29
31
  options = {}
30
-
32
+
31
33
  OptionParser.new do |opts|
32
34
  opts.banner = [
33
35
  "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
36
  " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
38
  ].join "\n"
37
-
38
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
39
41
  options[:auto] = true
40
42
  end
41
43
 
42
44
  opts.on(
43
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
44
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
48
  "#{digest_algorithms_list_text}.",
47
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
- 'Any time later you could specify a new algorithm to change the current one.',
49
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
50
52
  ) do |value|
51
53
  digest_algorithm = canonical_digest_algorithm_name(value)
52
- unless digest_algorithm
53
- STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ unless DIGEST_ALGORITHMS.include?(digest_algorithm)
55
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
56
  exit 1
55
57
  end
56
58
  options[:digest_algorithm] = digest_algorithm
57
59
  end
58
60
 
59
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
61
63
  end
62
64
 
63
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
68
+ end
69
+
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
65
72
  end
66
73
 
67
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
68
75
  options[:quiet] = true
69
76
  end
70
77
 
71
- opts.on("-v", "--verbose", "More verbose output") do
72
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
73
80
  end
74
81
 
75
- opts.on("-h", "--help", "Prints this help") do
76
- puts opts
77
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
78
84
  end
85
+
79
86
  end.parse!
80
87
  options
81
88
  end
@@ -93,15 +100,20 @@ class FileDigests
93
100
  initialize_paths files_path, digest_database_path
94
101
  initialize_database
95
102
 
96
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if db_digest_algorithm = get_metadata("digest_algorithm")
105
+ if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
106
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
107
+ @new_digest_algorithm = @options[:digest_algorithm]
108
+ end
109
+ else
110
+ raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
111
+ end
112
+ else
113
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
114
+ set_metadata "digest_algorithm", @digest_algorithm
99
115
  end
100
- else
101
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
- set_metadata "digest_algorithm", @digest_algorithm
103
116
  end
104
-
105
117
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
106
118
  end
107
119
 
@@ -111,7 +123,7 @@ class FileDigests
111
123
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
124
 
113
125
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
126
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
115
127
  ensure_dir_exists @digest_database_path.dirname
116
128
 
117
129
  if @options[:verbose]
@@ -123,14 +135,17 @@ class FileDigests
123
135
  def initialize_database
124
136
  @db = SQLite3::Database.new @digest_database_path.to_s
125
137
  @db.results_as_hash = true
138
+ @db.busy_timeout = 5000
126
139
 
127
140
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
141
 
129
- execute 'PRAGMA encoding = "UTF-8"'
130
- execute 'PRAGMA journal_mode = "WAL"'
131
- execute 'PRAGMA synchronous = "NORMAL"'
132
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
- execute 'PRAGMA cache_size = "5000"'
142
+ execute "PRAGMA encoding = 'UTF-8'"
143
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
144
+ execute "PRAGMA journal_mode = 'WAL'"
145
+ execute "PRAGMA synchronous = 'NORMAL'"
146
+ execute "PRAGMA cache_size = '5000'"
147
+
148
+ integrity_check
134
149
 
135
150
  @db.transaction(:exclusive) do
136
151
  metadata_table_was_created = false
@@ -181,7 +196,7 @@ class FileDigests
181
196
  # Convert database from 1st to 2nd version
182
197
  unless get_metadata("digest_algorithm")
183
198
  if get_metadata("database_version") == "1"
184
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
199
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
185
200
  set_metadata("digest_algorithm", "SHA512")
186
201
  else
187
202
  set_metadata("digest_algorithm", "SHA256")
@@ -190,6 +205,10 @@ class FileDigests
190
205
  end
191
206
  end
192
207
 
208
+ if get_metadata("database_version") != "2"
209
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
210
+ raise "Incompatible database version"
211
+ end
193
212
  end
194
213
  end
195
214
 
@@ -207,15 +226,19 @@ class FileDigests
207
226
  end
208
227
  end
209
228
 
210
- track_renames
211
-
229
+ nested_transaction do
230
+ track_renames
231
+ end
232
+
212
233
  if any_missing_files?
213
234
  if any_exceptions?
214
235
  STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
215
236
  else
216
237
  print_missing_files
217
238
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
218
- remove_missing_files
239
+ nested_transaction do
240
+ remove_missing_files
241
+ end
219
242
  end
220
243
  end
221
244
  end
@@ -228,6 +251,7 @@ class FileDigests
228
251
  update_digest_to_new_digest new_digest, old_digest
229
252
  end
230
253
  set_metadata "digest_algorithm", @new_digest_algorithm
254
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
231
255
  end
232
256
  end
233
257
 
@@ -237,6 +261,10 @@ class FileDigests
237
261
 
238
262
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
239
263
 
264
+ execute "PRAGMA optimize"
265
+ execute "VACUUM"
266
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
267
+
240
268
  print_counters
241
269
  end
242
270
  end
@@ -244,12 +272,12 @@ class FileDigests
244
272
  def show_duplicates
245
273
  current_digest = nil
246
274
  query_duplicates.each do |found|
247
- if current_digest != found['digest']
275
+ if current_digest != found["digest"]
248
276
  puts "" if current_digest
249
- current_digest = found['digest']
250
- puts "#{found['digest']}:"
277
+ current_digest = found["digest"]
278
+ puts "#{found["digest"]}:"
251
279
  end
252
- puts " #{found['filename']}"
280
+ puts " #{found["filename"]}"
253
281
  end
254
282
  end
255
283
 
@@ -275,10 +303,13 @@ class FileDigests
275
303
  return
276
304
  end
277
305
 
278
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
306
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
279
307
  mtime_string = time_to_database stat.mtime
308
+ digest = get_file_digest(filename)
280
309
 
281
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
310
+ nested_transaction do
311
+ process_file_indeed normalized_filename, mtime_string, digest
312
+ end
282
313
 
283
314
  rescue => exception
284
315
  @counters[:exceptions] += 1
@@ -295,25 +326,25 @@ class FileDigests
295
326
 
296
327
  def process_previously_seen_file found, filename, mtime, digest
297
328
  @missing_files.delete(filename)
298
- if found['digest'] == digest
329
+ if found["digest"] == digest
299
330
  @counters[:good] += 1
300
331
  puts "GOOD: #{filename}" if @options[:verbose]
301
332
  unless @options[:test_only]
302
- if found['mtime'] == mtime
303
- touch_digest_check_time found['id']
333
+ if found["mtime"] == mtime
334
+ touch_digest_check_time found["id"]
304
335
  else
305
- update_mtime mtime, found['id']
336
+ update_mtime mtime, found["id"]
306
337
  end
307
338
  end
308
339
  else
309
- if found['mtime'] == mtime # Digest is different and mtime is the same
340
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
310
341
  @counters[:likely_damaged] += 1
311
342
  STDERR.puts "LIKELY DAMAGED: #{filename}"
312
343
  else
313
344
  @counters[:updated] += 1
314
- puts "UPDATED: #{filename}" unless @options[:quiet]
345
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
315
346
  unless @options[:test_only]
316
- update_mtime_and_digest mtime, digest, found['id']
347
+ update_mtime_and_digest mtime, digest, found["id"]
317
348
  end
318
349
  end
319
350
  end
@@ -351,12 +382,10 @@ class FileDigests
351
382
  end
352
383
 
353
384
  def remove_missing_files
354
- nested_transaction do
355
- @missing_files.each do |filename, digest|
356
- delete_by_filename filename
357
- end
358
- @missing_files = {}
385
+ @missing_files.each do |filename, digest|
386
+ delete_by_filename filename
359
387
  end
388
+ @missing_files = {}
360
389
  end
361
390
 
362
391
 
@@ -366,7 +395,13 @@ class FileDigests
366
395
  @db.execute *args, &block
367
396
  end
368
397
 
369
- def nested_transaction(mode)
398
+ def integrity_check
399
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
400
+ raise "Database integrity check failed"
401
+ end
402
+ end
403
+
404
+ def nested_transaction(mode = :deferred)
370
405
  if @db.transaction_active?
371
406
  yield
372
407
  else
@@ -376,9 +411,9 @@ class FileDigests
376
411
  end
377
412
  end
378
413
 
379
- def perhaps_transaction(condition, mode)
414
+ def perhaps_transaction(condition, mode = :deferred)
380
415
  if condition
381
- @db.transaction(mode) do
416
+ nested_transaction(mode) do
382
417
  yield
383
418
  end
384
419
  else
@@ -387,7 +422,7 @@ class FileDigests
387
422
  end
388
423
 
389
424
  def table_exist? table_name
390
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
425
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
391
426
  end
392
427
 
393
428
  def prepare_method name, query
@@ -422,14 +457,14 @@ class FileDigests
422
457
  end
423
458
 
424
459
  def time_to_database time
425
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
460
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
426
461
  end
427
462
 
428
463
 
429
464
  # Filesystem-related helpers
430
465
 
431
466
  def patch_path_string path
432
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
467
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
433
468
  end
434
469
 
435
470
  def cleanup_path path
@@ -447,13 +482,13 @@ class FileDigests
447
482
  end
448
483
 
449
484
  def walk_files
450
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
485
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
451
486
  yield filename
452
487
  end
453
488
  end
454
489
 
455
490
  def get_file_digest filename
456
- File.open(filename, 'rb') do |io|
491
+ File.open(filename, "rb") do |io|
457
492
  digest = OpenSSL::Digest.new(@digest_algorithm)
458
493
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
459
494
 
@@ -495,13 +530,13 @@ class FileDigests
495
530
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
496
531
  yield
497
532
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
498
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
533
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
499
534
  end
500
535
 
501
536
  def print_file_exception exception, filename
502
537
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
503
538
  begin
504
- STDERR.print filename.encode('utf-8', universal_newline: true)
539
+ STDERR.print filename.encode("utf-8", universal_newline: true)
505
540
  rescue
506
541
  STDERR.print "(Unable to encode file name to utf-8) "
507
542
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.23
4
+ version: 0.0.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.3.0
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.3.0
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.1.0
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.1.0
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: