file-digests 0.0.23 → 0.0.28

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +109 -74
  3. metadata +12 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2d9f3f948e0853b515442c1b480326373309320cb91eb6aa68a5844b0d2be4e
4
- data.tar.gz: 804b6f40e5475286a6cee0ea49adb646e3fd7367e2fc73d4a105b45137ce747a
3
+ metadata.gz: 406b02c22923ae98c45dc92f2836a99dffa6dc8b2343ff62b5b5957a4a154bdc
4
+ data.tar.gz: 5898cbc3826818da8c3fa5cf16a334bcb4627f95dc2635a297cc9c7d81d25dfe
5
5
  SHA512:
6
- metadata.gz: f785ab8fe3d91bee0a59bc33a46d6c1790f1095d9ea54eb2b1a8aafe43189ec1e7c6c1353312db547d36d21aed871f7bb985638499b20719f17112fecc8d8ac0
7
- data.tar.gz: bafc97b9a8e37b3dc4bcd3bfdf534849f3e9aa33e3a17c17da30044c51bf254f53694643b3fb9be94d4bac8800177549d454012ad2371d7e68f7d78b36e61fb1
6
+ metadata.gz: '021494ba0a65daada30e55c63e489c60c018ae3b189409be200635948132f0ed5118d217388e81d8097bc991cf10fea2ce09c4a3a7c148c1fdaa8f66e6b8e074'
7
+ data.tar.gz: 3a28808aa979157a2597b5ea56ad006576d28f35678cbaa6e13d62be20aa37066a9d4684d48f5ed2990b24c42acb2ee8cd2d0f14f4f9b12f893c5b3946d5b76e
@@ -1,19 +1,21 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+ LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
12
13
 
13
14
  def self.canonical_digest_algorithm_name(string)
14
15
  if string
15
- index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
- index && DIGEST_ALGORITHMS[index]
16
+ algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
17
+ index = algorithms.map(&:downcase).index(string.downcase)
18
+ index && algorithms[index]
17
19
  end
18
20
  end
19
21
 
@@ -27,55 +29,60 @@ class FileDigests
27
29
 
28
30
  def self.parse_cli_options
29
31
  options = {}
30
-
32
+
31
33
  OptionParser.new do |opts|
32
34
  opts.banner = [
33
35
  "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
36
  " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
38
  ].join "\n"
37
-
38
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
39
41
  options[:auto] = true
40
42
  end
41
43
 
42
44
  opts.on(
43
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
44
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
48
  "#{digest_algorithms_list_text}.",
47
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
- 'Any time later you could specify a new algorithm to change the current one.',
49
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
50
52
  ) do |value|
51
53
  digest_algorithm = canonical_digest_algorithm_name(value)
52
- unless digest_algorithm
53
- STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ unless DIGEST_ALGORITHMS.include?(digest_algorithm)
55
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
56
  exit 1
55
57
  end
56
58
  options[:digest_algorithm] = digest_algorithm
57
59
  end
58
60
 
59
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
61
63
  end
62
64
 
63
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
68
+ end
69
+
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
65
72
  end
66
73
 
67
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
68
75
  options[:quiet] = true
69
76
  end
70
77
 
71
- opts.on("-v", "--verbose", "More verbose output") do
72
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
73
80
  end
74
81
 
75
- opts.on("-h", "--help", "Prints this help") do
76
- puts opts
77
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
78
84
  end
85
+
79
86
  end.parse!
80
87
  options
81
88
  end
@@ -93,15 +100,20 @@ class FileDigests
93
100
  initialize_paths files_path, digest_database_path
94
101
  initialize_database
95
102
 
96
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if db_digest_algorithm = get_metadata("digest_algorithm")
105
+ if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
106
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
107
+ @new_digest_algorithm = @options[:digest_algorithm]
108
+ end
109
+ else
110
+ raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
111
+ end
112
+ else
113
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
114
+ set_metadata "digest_algorithm", @digest_algorithm
99
115
  end
100
- else
101
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
- set_metadata "digest_algorithm", @digest_algorithm
103
116
  end
104
-
105
117
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
106
118
  end
107
119
 
@@ -111,7 +123,7 @@ class FileDigests
111
123
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
124
 
113
125
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
126
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
115
127
  ensure_dir_exists @digest_database_path.dirname
116
128
 
117
129
  if @options[:verbose]
@@ -123,14 +135,17 @@ class FileDigests
123
135
  def initialize_database
124
136
  @db = SQLite3::Database.new @digest_database_path.to_s
125
137
  @db.results_as_hash = true
138
+ @db.busy_timeout = 5000
126
139
 
127
140
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
141
 
129
- execute 'PRAGMA encoding = "UTF-8"'
130
- execute 'PRAGMA journal_mode = "WAL"'
131
- execute 'PRAGMA synchronous = "NORMAL"'
132
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
- execute 'PRAGMA cache_size = "5000"'
142
+ execute "PRAGMA encoding = 'UTF-8'"
143
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
144
+ execute "PRAGMA journal_mode = 'WAL'"
145
+ execute "PRAGMA synchronous = 'NORMAL'"
146
+ execute "PRAGMA cache_size = '5000'"
147
+
148
+ integrity_check
134
149
 
135
150
  @db.transaction(:exclusive) do
136
151
  metadata_table_was_created = false
@@ -181,7 +196,7 @@ class FileDigests
181
196
  # Convert database from 1st to 2nd version
182
197
  unless get_metadata("digest_algorithm")
183
198
  if get_metadata("database_version") == "1"
184
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
199
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
185
200
  set_metadata("digest_algorithm", "SHA512")
186
201
  else
187
202
  set_metadata("digest_algorithm", "SHA256")
@@ -190,6 +205,10 @@ class FileDigests
190
205
  end
191
206
  end
192
207
 
208
+ if get_metadata("database_version") != "2"
209
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
210
+ raise "Incompatible database version"
211
+ end
193
212
  end
194
213
  end
195
214
 
@@ -207,15 +226,19 @@ class FileDigests
207
226
  end
208
227
  end
209
228
 
210
- track_renames
211
-
229
+ nested_transaction do
230
+ track_renames
231
+ end
232
+
212
233
  if any_missing_files?
213
234
  if any_exceptions?
214
235
  STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
215
236
  else
216
237
  print_missing_files
217
238
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
218
- remove_missing_files
239
+ nested_transaction do
240
+ remove_missing_files
241
+ end
219
242
  end
220
243
  end
221
244
  end
@@ -228,6 +251,7 @@ class FileDigests
228
251
  update_digest_to_new_digest new_digest, old_digest
229
252
  end
230
253
  set_metadata "digest_algorithm", @new_digest_algorithm
254
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
231
255
  end
232
256
  end
233
257
 
@@ -237,6 +261,10 @@ class FileDigests
237
261
 
238
262
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
239
263
 
264
+ execute "PRAGMA optimize"
265
+ execute "VACUUM"
266
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
267
+
240
268
  print_counters
241
269
  end
242
270
  end
@@ -244,12 +272,12 @@ class FileDigests
244
272
  def show_duplicates
245
273
  current_digest = nil
246
274
  query_duplicates.each do |found|
247
- if current_digest != found['digest']
275
+ if current_digest != found["digest"]
248
276
  puts "" if current_digest
249
- current_digest = found['digest']
250
- puts "#{found['digest']}:"
277
+ current_digest = found["digest"]
278
+ puts "#{found["digest"]}:"
251
279
  end
252
- puts " #{found['filename']}"
280
+ puts " #{found["filename"]}"
253
281
  end
254
282
  end
255
283
 
@@ -275,10 +303,13 @@ class FileDigests
275
303
  return
276
304
  end
277
305
 
278
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
306
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
279
307
  mtime_string = time_to_database stat.mtime
308
+ digest = get_file_digest(filename)
280
309
 
281
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
310
+ nested_transaction do
311
+ process_file_indeed normalized_filename, mtime_string, digest
312
+ end
282
313
 
283
314
  rescue => exception
284
315
  @counters[:exceptions] += 1
@@ -295,25 +326,25 @@ class FileDigests
295
326
 
296
327
  def process_previously_seen_file found, filename, mtime, digest
297
328
  @missing_files.delete(filename)
298
- if found['digest'] == digest
329
+ if found["digest"] == digest
299
330
  @counters[:good] += 1
300
331
  puts "GOOD: #{filename}" if @options[:verbose]
301
332
  unless @options[:test_only]
302
- if found['mtime'] == mtime
303
- touch_digest_check_time found['id']
333
+ if found["mtime"] == mtime
334
+ touch_digest_check_time found["id"]
304
335
  else
305
- update_mtime mtime, found['id']
336
+ update_mtime mtime, found["id"]
306
337
  end
307
338
  end
308
339
  else
309
- if found['mtime'] == mtime # Digest is different and mtime is the same
340
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
310
341
  @counters[:likely_damaged] += 1
311
342
  STDERR.puts "LIKELY DAMAGED: #{filename}"
312
343
  else
313
344
  @counters[:updated] += 1
314
- puts "UPDATED: #{filename}" unless @options[:quiet]
345
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
315
346
  unless @options[:test_only]
316
- update_mtime_and_digest mtime, digest, found['id']
347
+ update_mtime_and_digest mtime, digest, found["id"]
317
348
  end
318
349
  end
319
350
  end
@@ -351,12 +382,10 @@ class FileDigests
351
382
  end
352
383
 
353
384
  def remove_missing_files
354
- nested_transaction do
355
- @missing_files.each do |filename, digest|
356
- delete_by_filename filename
357
- end
358
- @missing_files = {}
385
+ @missing_files.each do |filename, digest|
386
+ delete_by_filename filename
359
387
  end
388
+ @missing_files = {}
360
389
  end
361
390
 
362
391
 
@@ -366,7 +395,13 @@ class FileDigests
366
395
  @db.execute *args, &block
367
396
  end
368
397
 
369
- def nested_transaction(mode)
398
+ def integrity_check
399
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
400
+ raise "Database integrity check failed"
401
+ end
402
+ end
403
+
404
+ def nested_transaction(mode = :deferred)
370
405
  if @db.transaction_active?
371
406
  yield
372
407
  else
@@ -376,9 +411,9 @@ class FileDigests
376
411
  end
377
412
  end
378
413
 
379
- def perhaps_transaction(condition, mode)
414
+ def perhaps_transaction(condition, mode = :deferred)
380
415
  if condition
381
- @db.transaction(mode) do
416
+ nested_transaction(mode) do
382
417
  yield
383
418
  end
384
419
  else
@@ -387,7 +422,7 @@ class FileDigests
387
422
  end
388
423
 
389
424
  def table_exist? table_name
390
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
425
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
391
426
  end
392
427
 
393
428
  def prepare_method name, query
@@ -422,14 +457,14 @@ class FileDigests
422
457
  end
423
458
 
424
459
  def time_to_database time
425
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
460
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
426
461
  end
427
462
 
428
463
 
429
464
  # Filesystem-related helpers
430
465
 
431
466
  def patch_path_string path
432
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
467
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
433
468
  end
434
469
 
435
470
  def cleanup_path path
@@ -447,13 +482,13 @@ class FileDigests
447
482
  end
448
483
 
449
484
  def walk_files
450
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
485
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
451
486
  yield filename
452
487
  end
453
488
  end
454
489
 
455
490
  def get_file_digest filename
456
- File.open(filename, 'rb') do |io|
491
+ File.open(filename, "rb") do |io|
457
492
  digest = OpenSSL::Digest.new(@digest_algorithm)
458
493
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
459
494
 
@@ -495,13 +530,13 @@ class FileDigests
495
530
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
496
531
  yield
497
532
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
498
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
533
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
499
534
  end
500
535
 
501
536
  def print_file_exception exception, filename
502
537
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
503
538
  begin
504
- STDERR.print filename.encode('utf-8', universal_newline: true)
539
+ STDERR.print filename.encode("utf-8", universal_newline: true)
505
540
  rescue
506
541
  STDERR.print "(Unable to encode file name to utf-8) "
507
542
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.23
4
+ version: 0.0.28
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.3.0
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.3.0
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.1.0
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.1.0
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: