file-digests 0.0.22 → 0.0.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +109 -74
  3. metadata +12 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f08714e86e275eb74108da01c667d403134366e6d7e9c5e8b08278d8c8842ba8
4
- data.tar.gz: 57802538edae099807ede5460a07d6fd7ea6a5d0810f40d306db8f330435631d
3
+ metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
4
+ data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
5
5
  SHA512:
6
- metadata.gz: 4c642c5aaf06d903114aabc65ec13ae3c5eb04c807b167c7eb1f3c63e8f56144ff3d93964fdc55cbcb33fad743cad635cfa61284171c94ed3b7a9a42c3efbca6
7
- data.tar.gz: eebb6b444c6d0921f638200a27f97af5e9026ab0cf21b7b43b3e58f6f13ccc6323f482f93594ae993637a18b7611514469848307defe3796f412dd453c08b932
6
+ metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
7
+ data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
@@ -1,19 +1,21 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+ LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
12
13
 
13
14
  def self.canonical_digest_algorithm_name(string)
14
15
  if string
15
- index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
- index && DIGEST_ALGORITHMS[index]
16
+ algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
17
+ index = algorithms.map(&:downcase).index(string.downcase)
18
+ index && algorithms[index]
17
19
  end
18
20
  end
19
21
 
@@ -27,55 +29,60 @@ class FileDigests
27
29
 
28
30
  def self.parse_cli_options
29
31
  options = {}
30
-
32
+
31
33
  OptionParser.new do |opts|
32
34
  opts.banner = [
33
35
  "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
36
  " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
38
  ].join "\n"
37
-
38
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
39
41
  options[:auto] = true
40
42
  end
41
43
 
42
44
  opts.on(
43
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
44
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
48
  "#{digest_algorithms_list_text}.",
47
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
- 'Any time later you could specify a new algorithm to change the current one.',
49
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
50
52
  ) do |value|
51
53
  digest_algorithm = canonical_digest_algorithm_name(value)
52
- unless digest_algorithm
53
- STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ unless DIGEST_ALGORITHMS.include?(digest_algorithm)
55
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
56
  exit 1
55
57
  end
56
58
  options[:digest_algorithm] = digest_algorithm
57
59
  end
58
60
 
59
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
61
63
  end
62
64
 
63
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
68
+ end
69
+
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
65
72
  end
66
73
 
67
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
68
75
  options[:quiet] = true
69
76
  end
70
77
 
71
- opts.on("-v", "--verbose", "More verbose output") do
72
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
73
80
  end
74
81
 
75
- opts.on("-h", "--help", "Prints this help") do
76
- puts opts
77
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
78
84
  end
85
+
79
86
  end.parse!
80
87
  options
81
88
  end
@@ -93,13 +100,15 @@ class FileDigests
93
100
  initialize_paths files_path, digest_database_path
94
101
  initialize_database
95
102
 
96
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
105
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
106
+ @new_digest_algorithm = @options[:digest_algorithm]
107
+ end
108
+ else
109
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
110
+ set_metadata "digest_algorithm", @digest_algorithm
99
111
  end
100
- else
101
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
- set_metadata "digest_algorithm", @digest_algorithm
103
112
  end
104
113
 
105
114
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
@@ -111,7 +120,7 @@ class FileDigests
111
120
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
121
 
113
122
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
123
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
115
124
  ensure_dir_exists @digest_database_path.dirname
116
125
 
117
126
  if @options[:verbose]
@@ -123,27 +132,33 @@ class FileDigests
123
132
  def initialize_database
124
133
  @db = SQLite3::Database.new @digest_database_path.to_s
125
134
  @db.results_as_hash = true
135
+ @db.busy_timeout = 5000
126
136
 
127
137
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
138
 
129
- execute 'PRAGMA encoding = "UTF-8"'
130
- execute 'PRAGMA journal_mode = "WAL"'
131
- execute 'PRAGMA synchronous = "NORMAL"'
132
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
- execute 'PRAGMA cache_size = "5000"'
139
+ execute "PRAGMA encoding = 'UTF-8'"
140
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
141
+ execute "PRAGMA journal_mode = 'WAL'"
142
+ execute "PRAGMA synchronous = 'NORMAL'"
143
+ execute "PRAGMA cache_size = '5000'"
144
+
145
+ integrity_check
134
146
 
135
147
  @db.transaction(:exclusive) do
148
+ metadata_table_was_created = false
136
149
  unless table_exist?("metadata")
137
150
  execute "CREATE TABLE metadata (
138
151
  key TEXT NOT NULL PRIMARY KEY,
139
152
  value TEXT)"
140
153
  execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
141
- set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version
154
+ metadata_table_was_created = true
142
155
  end
143
156
 
144
157
  prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
145
158
  prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"
146
159
 
160
+ set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && metadata_table_was_created
161
+
147
162
  # Heuristic to detect database version 1 (metadata was not stored back then)
148
163
  unless get_metadata("database_version")
149
164
  if table_exist?("digests")
@@ -178,7 +193,7 @@ class FileDigests
178
193
  # Convert database from 1st to 2nd version
179
194
  unless get_metadata("digest_algorithm")
180
195
  if get_metadata("database_version") == "1"
181
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
196
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
182
197
  set_metadata("digest_algorithm", "SHA512")
183
198
  else
184
199
  set_metadata("digest_algorithm", "SHA256")
@@ -187,6 +202,10 @@ class FileDigests
187
202
  end
188
203
  end
189
204
 
205
+ if get_metadata("database_version") != "2"
206
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
207
+ raise "Incompatible database version"
208
+ end
190
209
  end
191
210
  end
192
211
 
@@ -204,15 +223,19 @@ class FileDigests
204
223
  end
205
224
  end
206
225
 
207
- track_renames
208
-
226
+ nested_transaction do
227
+ track_renames
228
+ end
229
+
209
230
  if any_missing_files?
210
231
  if any_exceptions?
211
232
  STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
212
233
  else
213
234
  print_missing_files
214
235
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
215
- remove_missing_files
236
+ nested_transaction do
237
+ remove_missing_files
238
+ end
216
239
  end
217
240
  end
218
241
  end
@@ -225,6 +248,7 @@ class FileDigests
225
248
  update_digest_to_new_digest new_digest, old_digest
226
249
  end
227
250
  set_metadata "digest_algorithm", @new_digest_algorithm
251
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
228
252
  end
229
253
  end
230
254
 
@@ -234,6 +258,10 @@ class FileDigests
234
258
 
235
259
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
236
260
 
261
+ execute "PRAGMA optimize"
262
+ execute "VACUUM"
263
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
264
+
237
265
  print_counters
238
266
  end
239
267
  end
@@ -241,12 +269,12 @@ class FileDigests
241
269
  def show_duplicates
242
270
  current_digest = nil
243
271
  query_duplicates.each do |found|
244
- if current_digest != found['digest']
272
+ if current_digest != found["digest"]
245
273
  puts "" if current_digest
246
- current_digest = found['digest']
247
- puts "#{found['digest']}:"
274
+ current_digest = found["digest"]
275
+ puts "#{found["digest"]}:"
248
276
  end
249
- puts " #{found['filename']}"
277
+ puts " #{found["filename"]}"
250
278
  end
251
279
  end
252
280
 
@@ -272,10 +300,13 @@ class FileDigests
272
300
  return
273
301
  end
274
302
 
275
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
303
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
276
304
  mtime_string = time_to_database stat.mtime
305
+ digest = get_file_digest(filename)
277
306
 
278
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
307
+ nested_transaction do
308
+ process_file_indeed normalized_filename, mtime_string, digest
309
+ end
279
310
 
280
311
  rescue => exception
281
312
  @counters[:exceptions] += 1
@@ -292,25 +323,25 @@ class FileDigests
292
323
 
293
324
  def process_previously_seen_file found, filename, mtime, digest
294
325
  @missing_files.delete(filename)
295
- if found['digest'] == digest
326
+ if found["digest"] == digest
296
327
  @counters[:good] += 1
297
328
  puts "GOOD: #{filename}" if @options[:verbose]
298
329
  unless @options[:test_only]
299
- if found['mtime'] == mtime
300
- touch_digest_check_time found['id']
330
+ if found["mtime"] == mtime
331
+ touch_digest_check_time found["id"]
301
332
  else
302
- update_mtime mtime, found['id']
333
+ update_mtime mtime, found["id"]
303
334
  end
304
335
  end
305
336
  else
306
- if found['mtime'] == mtime # Digest is different and mtime is the same
337
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
307
338
  @counters[:likely_damaged] += 1
308
339
  STDERR.puts "LIKELY DAMAGED: #{filename}"
309
340
  else
310
341
  @counters[:updated] += 1
311
- puts "UPDATED: #{filename}" unless @options[:quiet]
342
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
312
343
  unless @options[:test_only]
313
- update_mtime_and_digest mtime, digest, found['id']
344
+ update_mtime_and_digest mtime, digest, found["id"]
314
345
  end
315
346
  end
316
347
  end
@@ -348,12 +379,10 @@ class FileDigests
348
379
  end
349
380
 
350
381
  def remove_missing_files
351
- nested_transaction do
352
- @missing_files.each do |filename, digest|
353
- delete_by_filename filename
354
- end
355
- @missing_files = {}
382
+ @missing_files.each do |filename, digest|
383
+ delete_by_filename filename
356
384
  end
385
+ @missing_files = {}
357
386
  end
358
387
 
359
388
 
@@ -363,7 +392,13 @@ class FileDigests
363
392
  @db.execute *args, &block
364
393
  end
365
394
 
366
- def nested_transaction(mode)
395
+ def integrity_check
396
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
397
+ raise "Database integrity check failed"
398
+ end
399
+ end
400
+
401
+ def nested_transaction(mode = :deferred)
367
402
  if @db.transaction_active?
368
403
  yield
369
404
  else
@@ -373,9 +408,9 @@ class FileDigests
373
408
  end
374
409
  end
375
410
 
376
- def perhaps_transaction(condition, mode)
411
+ def perhaps_transaction(condition, mode = :deferred)
377
412
  if condition
378
- @db.transaction(mode) do
413
+ nested_transaction(mode) do
379
414
  yield
380
415
  end
381
416
  else
@@ -384,7 +419,7 @@ class FileDigests
384
419
  end
385
420
 
386
421
  def table_exist? table_name
387
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
422
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
388
423
  end
389
424
 
390
425
  def prepare_method name, query
@@ -419,14 +454,14 @@ class FileDigests
419
454
  end
420
455
 
421
456
  def time_to_database time
422
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
457
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
423
458
  end
424
459
 
425
460
 
426
461
  # Filesystem-related helpers
427
462
 
428
463
  def patch_path_string path
429
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
464
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
430
465
  end
431
466
 
432
467
  def cleanup_path path
@@ -444,13 +479,13 @@ class FileDigests
444
479
  end
445
480
 
446
481
  def walk_files
447
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
482
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
448
483
  yield filename
449
484
  end
450
485
  end
451
486
 
452
487
  def get_file_digest filename
453
- File.open(filename, 'rb') do |io|
488
+ File.open(filename, "rb") do |io|
454
489
  digest = OpenSSL::Digest.new(@digest_algorithm)
455
490
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
456
491
 
@@ -492,13 +527,13 @@ class FileDigests
492
527
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
493
528
  yield
494
529
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
495
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
530
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
496
531
  end
497
532
 
498
533
  def print_file_exception exception, filename
499
534
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
500
535
  begin
501
- STDERR.print filename.encode('utf-8', universal_newline: true)
536
+ STDERR.print filename.encode("utf-8", universal_newline: true)
502
537
  rescue
503
538
  STDERR.print "(Unable to encode file name to utf-8) "
504
539
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.22
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.3.0
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.3.0
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.1.0
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.1.0
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: