file-digests 0.0.22 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/file-digests.rb +109 -74
  3. metadata +12 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f08714e86e275eb74108da01c667d403134366e6d7e9c5e8b08278d8c8842ba8
4
- data.tar.gz: 57802538edae099807ede5460a07d6fd7ea6a5d0810f40d306db8f330435631d
3
+ metadata.gz: 63e300c17abcf4035c957c9e9c45b8d677b2f47172919efd758467ff4da7f51e
4
+ data.tar.gz: dbee998de8f9957d8b69a4afbbca54ca39ce8ca2cd4d9ee743998ee7bdd5f3c2
5
5
  SHA512:
6
- metadata.gz: 4c642c5aaf06d903114aabc65ec13ae3c5eb04c807b167c7eb1f3c63e8f56144ff3d93964fdc55cbcb33fad743cad635cfa61284171c94ed3b7a9a42c3efbca6
7
- data.tar.gz: eebb6b444c6d0921f638200a27f97af5e9026ab0cf21b7b43b3e58f6f13ccc6323f482f93594ae993637a18b7611514469848307defe3796f412dd453c08b932
6
+ metadata.gz: 66e5d0eb877617acf92b6c7bdada2c77a262d1484933dc44b7e3df548a3fd58fb0a0aa4460c368aaac785360375a650badeb148253d919a77d9882daa5b31201
7
+ data.tar.gz: 4444e166dbe2d71ac240cebf69992c57f846648c046696c89f575e174b7b92e3be92256d85ed8538f80581877a6e5e5e23a30ffc47a0d141762abc5d09a67e4b
@@ -1,19 +1,21 @@
1
- require 'date'
2
- require 'digest'
3
- require 'fileutils'
4
- require 'openssl'
5
- require 'optparse'
6
- require 'pathname'
7
- require 'set'
8
- require 'sqlite3'
1
+ require "date"
2
+ require "digest"
3
+ require "fileutils"
4
+ require "openssl"
5
+ require "optparse"
6
+ require "pathname"
7
+ require "set"
8
+ require "sqlite3"
9
9
 
10
10
  class FileDigests
11
11
  DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+ LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
12
13
 
13
14
  def self.canonical_digest_algorithm_name(string)
14
15
  if string
15
- index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
- index && DIGEST_ALGORITHMS[index]
16
+ algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
17
+ index = algorithms.map(&:downcase).index(string.downcase)
18
+ index && algorithms[index]
17
19
  end
18
20
  end
19
21
 
@@ -27,55 +29,60 @@ class FileDigests
27
29
 
28
30
  def self.parse_cli_options
29
31
  options = {}
30
-
32
+
31
33
  OptionParser.new do |opts|
32
34
  opts.banner = [
33
35
  "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
36
  " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
37
  " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
38
  ].join "\n"
37
-
38
- opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+
40
+ opts.on("-a", "--auto", "Do not ask for any confirmation.") do
39
41
  options[:auto] = true
40
42
  end
41
43
 
42
44
  opts.on(
43
- '--digest=DIGEST',
45
+ "-d", "--digest DIGEST",
44
46
  'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
47
  'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
48
  "#{digest_algorithms_list_text}.",
47
- 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
- 'Any time later you could specify a new algorithm to change the current one.',
49
- 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
49
+ "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
50
+ "Any time later you could specify a new algorithm to change the current one.",
51
+ "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
50
52
  ) do |value|
51
53
  digest_algorithm = canonical_digest_algorithm_name(value)
52
- unless digest_algorithm
53
- STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ unless DIGEST_ALGORITHMS.include?(digest_algorithm)
55
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
56
  exit 1
55
57
  end
56
58
  options[:digest_algorithm] = digest_algorithm
57
59
  end
58
60
 
59
- opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
- options[:action] = :show_duplicates
61
+ opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
62
+ options[:accept_fate] = true
61
63
  end
62
64
 
63
- opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
- options[:test_only] = true
65
+ opts.on("-h", "--help", "Prints this help.") do
66
+ puts opts
67
+ exit
68
+ end
69
+
70
+ opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
71
+ options[:action] = :show_duplicates
65
72
  end
66
73
 
67
- opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
74
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
68
75
  options[:quiet] = true
69
76
  end
70
77
 
71
- opts.on("-v", "--verbose", "More verbose output") do
72
- options[:verbose] = true
78
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
79
+ options[:test_only] = true
73
80
  end
74
81
 
75
- opts.on("-h", "--help", "Prints this help") do
76
- puts opts
77
- exit
82
+ opts.on("-v", "--verbose", "More verbose output.") do
83
+ options[:verbose] = true
78
84
  end
85
+
79
86
  end.parse!
80
87
  options
81
88
  end
@@ -93,13 +100,15 @@ class FileDigests
93
100
  initialize_paths files_path, digest_database_path
94
101
  initialize_database
95
102
 
96
- if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
- if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
- @new_digest_algorithm = @options[:digest_algorithm]
103
+ @db.transaction(:exclusive) do
104
+ if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
105
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
106
+ @new_digest_algorithm = @options[:digest_algorithm]
107
+ end
108
+ else
109
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
110
+ set_metadata "digest_algorithm", @digest_algorithm
99
111
  end
100
- else
101
- @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
- set_metadata "digest_algorithm", @digest_algorithm
103
112
  end
104
113
 
105
114
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
@@ -111,7 +120,7 @@ class FileDigests
111
120
  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
121
 
113
122
  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
- @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
123
+ @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
115
124
  ensure_dir_exists @digest_database_path.dirname
116
125
 
117
126
  if @options[:verbose]
@@ -123,27 +132,33 @@ class FileDigests
123
132
  def initialize_database
124
133
  @db = SQLite3::Database.new @digest_database_path.to_s
125
134
  @db.results_as_hash = true
135
+ @db.busy_timeout = 5000
126
136
 
127
137
  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
138
 
129
- execute 'PRAGMA encoding = "UTF-8"'
130
- execute 'PRAGMA journal_mode = "WAL"'
131
- execute 'PRAGMA synchronous = "NORMAL"'
132
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
- execute 'PRAGMA cache_size = "5000"'
139
+ execute "PRAGMA encoding = 'UTF-8'"
140
+ execute "PRAGMA locking_mode = 'EXCLUSIVE'"
141
+ execute "PRAGMA journal_mode = 'WAL'"
142
+ execute "PRAGMA synchronous = 'NORMAL'"
143
+ execute "PRAGMA cache_size = '5000'"
144
+
145
+ integrity_check
134
146
 
135
147
  @db.transaction(:exclusive) do
148
+ metadata_table_was_created = false
136
149
  unless table_exist?("metadata")
137
150
  execute "CREATE TABLE metadata (
138
151
  key TEXT NOT NULL PRIMARY KEY,
139
152
  value TEXT)"
140
153
  execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
141
- set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version
154
+ metadata_table_was_created = true
142
155
  end
143
156
 
144
157
  prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
145
158
  prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"
146
159
 
160
+ set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && metadata_table_was_created
161
+
147
162
  # Heuristic to detect database version 1 (metadata was not stored back then)
148
163
  unless get_metadata("database_version")
149
164
  if table_exist?("digests")
@@ -178,7 +193,7 @@ class FileDigests
178
193
  # Convert database from 1st to 2nd version
179
194
  unless get_metadata("digest_algorithm")
180
195
  if get_metadata("database_version") == "1"
181
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
196
+ if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
182
197
  set_metadata("digest_algorithm", "SHA512")
183
198
  else
184
199
  set_metadata("digest_algorithm", "SHA256")
@@ -187,6 +202,10 @@ class FileDigests
187
202
  end
188
203
  end
189
204
 
205
+ if get_metadata("database_version") != "2"
206
+ STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
207
+ raise "Incompatible database version"
208
+ end
190
209
  end
191
210
  end
192
211
 
@@ -204,15 +223,19 @@ class FileDigests
204
223
  end
205
224
  end
206
225
 
207
- track_renames
208
-
226
+ nested_transaction do
227
+ track_renames
228
+ end
229
+
209
230
  if any_missing_files?
210
231
  if any_exceptions?
211
232
  STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
212
233
  else
213
234
  print_missing_files
214
235
  if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
215
- remove_missing_files
236
+ nested_transaction do
237
+ remove_missing_files
238
+ end
216
239
  end
217
240
  end
218
241
  end
@@ -225,6 +248,7 @@ class FileDigests
225
248
  update_digest_to_new_digest new_digest, old_digest
226
249
  end
227
250
  set_metadata "digest_algorithm", @new_digest_algorithm
251
+ puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
228
252
  end
229
253
  end
230
254
 
@@ -234,6 +258,10 @@ class FileDigests
234
258
 
235
259
  set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
236
260
 
261
+ execute "PRAGMA optimize"
262
+ execute "VACUUM"
263
+ execute "PRAGMA wal_checkpoint(TRUNCATE)"
264
+
237
265
  print_counters
238
266
  end
239
267
  end
@@ -241,12 +269,12 @@ class FileDigests
241
269
  def show_duplicates
242
270
  current_digest = nil
243
271
  query_duplicates.each do |found|
244
- if current_digest != found['digest']
272
+ if current_digest != found["digest"]
245
273
  puts "" if current_digest
246
- current_digest = found['digest']
247
- puts "#{found['digest']}:"
274
+ current_digest = found["digest"]
275
+ puts "#{found["digest"]}:"
248
276
  end
249
- puts " #{found['filename']}"
277
+ puts " #{found["filename"]}"
250
278
  end
251
279
  end
252
280
 
@@ -272,10 +300,13 @@ class FileDigests
272
300
  return
273
301
  end
274
302
 
275
- normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
303
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode("utf-8", universal_newline: true).unicode_normalize(:nfkc)
276
304
  mtime_string = time_to_database stat.mtime
305
+ digest = get_file_digest(filename)
277
306
 
278
- process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
307
+ nested_transaction do
308
+ process_file_indeed normalized_filename, mtime_string, digest
309
+ end
279
310
 
280
311
  rescue => exception
281
312
  @counters[:exceptions] += 1
@@ -292,25 +323,25 @@ class FileDigests
292
323
 
293
324
  def process_previously_seen_file found, filename, mtime, digest
294
325
  @missing_files.delete(filename)
295
- if found['digest'] == digest
326
+ if found["digest"] == digest
296
327
  @counters[:good] += 1
297
328
  puts "GOOD: #{filename}" if @options[:verbose]
298
329
  unless @options[:test_only]
299
- if found['mtime'] == mtime
300
- touch_digest_check_time found['id']
330
+ if found["mtime"] == mtime
331
+ touch_digest_check_time found["id"]
301
332
  else
302
- update_mtime mtime, found['id']
333
+ update_mtime mtime, found["id"]
303
334
  end
304
335
  end
305
336
  else
306
- if found['mtime'] == mtime # Digest is different and mtime is the same
337
+ if found["mtime"] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
307
338
  @counters[:likely_damaged] += 1
308
339
  STDERR.puts "LIKELY DAMAGED: #{filename}"
309
340
  else
310
341
  @counters[:updated] += 1
311
- puts "UPDATED: #{filename}" unless @options[:quiet]
342
+ puts "UPDATED#{" (FATE ACCEPTED)" if found["mtime"] == mtime && @options[:accept_fate]}: #{filename}" unless @options[:quiet]
312
343
  unless @options[:test_only]
313
- update_mtime_and_digest mtime, digest, found['id']
344
+ update_mtime_and_digest mtime, digest, found["id"]
314
345
  end
315
346
  end
316
347
  end
@@ -348,12 +379,10 @@ class FileDigests
348
379
  end
349
380
 
350
381
  def remove_missing_files
351
- nested_transaction do
352
- @missing_files.each do |filename, digest|
353
- delete_by_filename filename
354
- end
355
- @missing_files = {}
382
+ @missing_files.each do |filename, digest|
383
+ delete_by_filename filename
356
384
  end
385
+ @missing_files = {}
357
386
  end
358
387
 
359
388
 
@@ -363,7 +392,13 @@ class FileDigests
363
392
  @db.execute *args, &block
364
393
  end
365
394
 
366
- def nested_transaction(mode)
395
+ def integrity_check
396
+ if execute("PRAGMA integrity_check")&.first&.fetch("integrity_check") != "ok"
397
+ raise "Database integrity check failed"
398
+ end
399
+ end
400
+
401
+ def nested_transaction(mode = :deferred)
367
402
  if @db.transaction_active?
368
403
  yield
369
404
  else
@@ -373,9 +408,9 @@ class FileDigests
373
408
  end
374
409
  end
375
410
 
376
- def perhaps_transaction(condition, mode)
411
+ def perhaps_transaction(condition, mode = :deferred)
377
412
  if condition
378
- @db.transaction(mode) do
413
+ nested_transaction(mode) do
379
414
  yield
380
415
  end
381
416
  else
@@ -384,7 +419,7 @@ class FileDigests
384
419
  end
385
420
 
386
421
  def table_exist? table_name
387
- execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
422
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = ?", table_name).length == 1
388
423
  end
389
424
 
390
425
  def prepare_method name, query
@@ -419,14 +454,14 @@ class FileDigests
419
454
  end
420
455
 
421
456
  def time_to_database time
422
- time.utc.strftime('%Y-%m-%d %H:%M:%S')
457
+ time.utc.strftime("%Y-%m-%d %H:%M:%S")
423
458
  end
424
459
 
425
460
 
426
461
  # Filesystem-related helpers
427
462
 
428
463
  def patch_path_string path
429
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
464
+ Gem.win_platform? ? path.gsub(/\\/, "/") : path
430
465
  end
431
466
 
432
467
  def cleanup_path path
@@ -444,13 +479,13 @@ class FileDigests
444
479
  end
445
480
 
446
481
  def walk_files
447
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
482
+ Dir.glob(@files_path + "**" + "*", File::FNM_DOTMATCH) do |filename|
448
483
  yield filename
449
484
  end
450
485
  end
451
486
 
452
487
  def get_file_digest filename
453
- File.open(filename, 'rb') do |io|
488
+ File.open(filename, "rb") do |io|
454
489
  digest = OpenSSL::Digest.new(@digest_algorithm)
455
490
  new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
456
491
 
@@ -492,13 +527,13 @@ class FileDigests
492
527
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
493
528
  yield
494
529
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
495
- puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
530
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{"%.3f" % (elapsed % 60)}s" unless @options[:quiet]
496
531
  end
497
532
 
498
533
  def print_file_exception exception, filename
499
534
  STDERR.print "EXCEPTION: #{exception.message}, processing file: "
500
535
  begin
501
- STDERR.print filename.encode('utf-8', universal_newline: true)
536
+ STDERR.print filename.encode("utf-8", universal_newline: true)
502
537
  rescue
503
538
  STDERR.print "(Unable to encode file name to utf-8) "
504
539
  STDERR.print filename
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.22
4
+ version: 0.0.27
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-12 00:00:00.000000000 Z
11
+ date: 2020-10-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: sqlite3
14
+ name: openssl
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.3.0
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.3.0
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
- name: openssl
28
+ name: sqlite3
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.1.0
33
+ version: '1.3'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.1.0
40
+ version: '1.3'
41
41
  description: Calculate file digests and check for the possible file corruption
42
42
  email: stan@senotrusov.com
43
43
  executables: