file-digests 0.0.18 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d69e75a52c05cbc2caf912491be9fbaaadb5136a2dfc723920a9010d3e4c2592
4
- data.tar.gz: f90deae82f2581d301d1cb7ba7c730e684b24f5c9582d6deb2581b9f2e6fa557
3
+ metadata.gz: a2d9f3f948e0853b515442c1b480326373309320cb91eb6aa68a5844b0d2be4e
4
+ data.tar.gz: 804b6f40e5475286a6cee0ea49adb646e3fd7367e2fc73d4a105b45137ce747a
5
5
  SHA512:
6
- metadata.gz: 5ad20e936d21d42f56ed20250728eb0b7c2b9a034877e764f528c6e297853bb7724d6780a56b7c0f11a3df975c1d51fed04852ddb3f3ff9848f4c546167902e8
7
- data.tar.gz: 27f86166310f420ac5858fa86c0233af666bbb2af40f53501a9f8e02205a3753151db5cfd6777558bbf0040e2472f9bc296d9382afebb8dd45f65fb981fc173b
6
+ metadata.gz: f785ab8fe3d91bee0a59bc33a46d6c1790f1095d9ea54eb2b1a8aafe43189ec1e7c6c1353312db547d36d21aed871f7bb985638499b20719f17112fecc8d8ac0
7
+ data.tar.gz: bafc97b9a8e37b3dc4bcd3bfdf534849f3e9aa33e3a17c17da30044c51bf254f53694643b3fb9be94d4bac8800177549d454012ad2371d7e68f7d78b36e61fb1
@@ -2,4 +2,4 @@
2
2
 
3
3
  require 'file-digests'
4
4
 
5
- FileDigests.perform_check
5
+ FileDigests.run_cli_utility
@@ -1,283 +1,523 @@
1
1
  require 'date'
2
- require 'set'
3
2
  require 'digest'
4
3
  require 'fileutils'
4
+ require 'openssl'
5
+ require 'optparse'
5
6
  require 'pathname'
7
+ require 'set'
6
8
  require 'sqlite3'
7
9
 
8
- module FileDigests
10
+ class FileDigests
11
+ DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+
13
+ def self.canonical_digest_algorithm_name(string)
14
+ if string
15
+ index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
+ index && DIGEST_ALGORITHMS[index]
17
+ end
18
+ end
9
19
 
10
- def self.perform_check
11
- options = {
12
- auto: (ENV["AUTO"] == "true"),
13
- quiet: (ENV["QUIET"] == "true"),
14
- test_only: (ENV["TEST_ONLY"] == "true")
15
- }
16
- checker = Checker.new ARGV[0], ARGV[1], options
17
- checker.perform_check
20
+ def canonical_digest_algorithm_name string
21
+ self.class.canonical_digest_algorithm_name string
18
22
  end
19
23
 
20
- class DigestDatabase
21
- def initialize path, options = {}
22
- @options = options
24
+ def self.digest_algorithms_list_text
25
+ "Digest algorithm should be one of the following: #{DIGEST_ALGORITHMS.join ", "}"
26
+ end
27
+
28
+ def self.parse_cli_options
29
+ options = {}
30
+
31
+ OptionParser.new do |opts|
32
+ opts.banner = [
33
+ "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
+ " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
+ " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
+ ].join "\n"
37
+
38
+ opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+ options[:auto] = true
40
+ end
41
+
42
+ opts.on(
43
+ '--digest=DIGEST',
44
+ 'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
+ 'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
+ "#{digest_algorithms_list_text}.",
47
+ 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
+ 'Any time later you could specify a new algorithm to change the current one.',
49
+ 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
50
+ ) do |value|
51
+ digest_algorithm = canonical_digest_algorithm_name(value)
52
+ unless digest_algorithm
53
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ exit 1
55
+ end
56
+ options[:digest_algorithm] = digest_algorithm
57
+ end
58
+
59
+ opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
+ options[:action] = :show_duplicates
61
+ end
62
+
63
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
+ options[:test_only] = true
65
+ end
66
+
67
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
68
+ options[:quiet] = true
69
+ end
70
+
71
+ opts.on("-v", "--verbose", "More verbose output") do
72
+ options[:verbose] = true
73
+ end
74
+
75
+ opts.on("-h", "--help", "Prints this help") do
76
+ puts opts
77
+ exit
78
+ end
79
+ end.parse!
80
+ options
81
+ end
82
+
83
+ def self.run_cli_utility
84
+ options = parse_cli_options
85
+
86
+ file_digests = self.new ARGV[0], ARGV[1], options
87
+ file_digests.send(options[:action] || :perform_check)
88
+ end
89
+
90
+ def initialize files_path, digest_database_path, options = {}
91
+ @options = options
92
+
93
+ initialize_paths files_path, digest_database_path
94
+ initialize_database
95
+
96
+ if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
+ @new_digest_algorithm = @options[:digest_algorithm]
99
+ end
100
+ else
101
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
+ set_metadata "digest_algorithm", @digest_algorithm
103
+ end
104
+
105
+ puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
106
+ end
107
+
108
+ def initialize_paths files_path, digest_database_path
109
+ @files_path = cleanup_path(files_path || ".")
110
+
111
+ raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
+
113
+ @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
+ @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
115
+ ensure_dir_exists @digest_database_path.dirname
116
+
117
+ if @options[:verbose]
118
+ puts "Target directory: #{@files_path}"
119
+ puts "Database location: #{@digest_database_path}"
120
+ end
121
+ end
122
+
123
+ def initialize_database
124
+ @db = SQLite3::Database.new @digest_database_path.to_s
125
+ @db.results_as_hash = true
126
+
127
+ file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
+
129
+ execute 'PRAGMA encoding = "UTF-8"'
130
+ execute 'PRAGMA journal_mode = "WAL"'
131
+ execute 'PRAGMA synchronous = "NORMAL"'
132
+ execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
+ execute 'PRAGMA cache_size = "5000"'
134
+
135
+ @db.transaction(:exclusive) do
136
+ metadata_table_was_created = false
137
+ unless table_exist?("metadata")
138
+ execute "CREATE TABLE metadata (
139
+ key TEXT NOT NULL PRIMARY KEY,
140
+ value TEXT)"
141
+ execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
142
+ metadata_table_was_created = true
143
+ end
144
+
145
+ prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
146
+ prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"
23
147
 
24
- @db = SQLite3::Database.new path.to_s
25
- @db.results_as_hash = true
148
+ set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && metadata_table_was_created
26
149
 
27
- execute 'PRAGMA journal_mode = "WAL"'
28
- execute 'PRAGMA synchronous = "NORMAL"'
29
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
30
- execute 'PRAGMA cache_size = "5000"'
150
+ # Heuristic to detect database version 1 (metadata was not stored back then)
151
+ unless get_metadata("database_version")
152
+ if table_exist?("digests")
153
+ set_metadata "database_version", "1"
154
+ end
155
+ end
31
156
 
32
- unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
33
- execute 'PRAGMA encoding = "UTF-8"'
157
+ unless table_exist?("digests")
34
158
  execute "CREATE TABLE digests (
35
- id INTEGER PRIMARY KEY,
36
- filename TEXT,
159
+ id INTEGER NOT NULL PRIMARY KEY,
160
+ filename TEXT NOT NULL,
37
161
  mtime TEXT,
38
- digest TEXT,
39
- digest_check_time TEXT)"
162
+ digest TEXT NOT NULL,
163
+ digest_check_time TEXT NOT NULL)"
40
164
  execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
165
+ set_metadata("digests_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version
41
166
  end
42
167
 
43
- @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
44
- @new_files = {}
45
-
46
168
  prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
47
- prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
169
+ prepare_method :find_by_filename_query, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
48
170
  prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
49
171
  prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
50
172
  prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
51
173
  prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
52
- end
174
+ prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"
175
+ prepare_method :update_digest_to_new_digest, "UPDATE digests SET digest = ? WHERE digest = ?"
53
176
 
54
- def insert_or_update file_path, mtime, digest, counters
55
- result = find_by_filename file_path
56
-
57
- if found = result.next_hash
58
- raise "Multiple records found" if result.next
59
-
60
- @missing_files.delete(file_path)
177
+ unless get_metadata("database_version")
178
+ set_metadata "database_version", "2"
179
+ end
61
180
 
62
- if found['digest'] == digest
63
- counters[:good] += 1
64
- # puts "GOOD: #{file_path}" unless @options[:quiet]
65
- unless @options[:test_only]
66
- if found['mtime'] == mtime
67
- touch_digest_check_time found['id']
68
- else
69
- update_mtime mtime, found['id']
70
- end
71
- end
72
- else
73
- if found['mtime'] == mtime # Digest is different and mtime is the same
74
- counters[:likely_damaged] += 1
75
- STDERR.puts "LIKELY DAMAGED: #{file_path}"
181
+ # Convert database from 1st to 2nd version
182
+ unless get_metadata("digest_algorithm")
183
+ if get_metadata("database_version") == "1"
184
+ if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
185
+ set_metadata("digest_algorithm", "SHA512")
76
186
  else
77
- counters[:updated] += 1
78
- puts "UPDATED: #{file_path}" unless @options[:quiet]
79
- unless @options[:test_only]
80
- update_mtime_and_digest mtime, digest, found['id']
81
- end
187
+ set_metadata("digest_algorithm", "SHA256")
82
188
  end
83
- end
84
- else
85
- counters[:new] += 1
86
- puts "NEW: #{file_path}" unless @options[:quiet]
87
- unless @options[:test_only]
88
- @new_files[file_path] = digest
89
- insert file_path, mtime, digest
189
+ set_metadata "database_version", "2"
90
190
  end
91
191
  end
192
+
92
193
  end
194
+ end
93
195
 
94
- def track_renames counters
95
- @missing_files.delete_if do |filename, digest|
96
- if @new_files.value?(digest)
97
- counters[:renamed] += 1
98
- unless @options[:test_only]
99
- delete_by_filename filename
100
- end
101
- true
196
+ def perform_check
197
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
198
+ @counters = {good: 0, updated: 0, new: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
199
+ @new_files = {}
200
+ @new_digests = {}
201
+
202
+ @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
203
+
204
+ measure_time do
205
+ walk_files do |filename|
206
+ process_file filename
102
207
  end
103
208
  end
104
- counters[:missing] = @missing_files.length
105
- end
106
-
107
- def any_missing_files?
108
- @missing_files.length > 0
109
- end
110
209
 
111
- def print_missing_files
112
- puts "\nMISSING FILES:"
113
- @missing_files.sort.to_h.each do |filename, digest|
114
- puts filename
210
+ track_renames
211
+
212
+ if any_missing_files?
213
+ if any_exceptions?
214
+ STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
215
+ else
216
+ print_missing_files
217
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
218
+ remove_missing_files
219
+ end
220
+ end
115
221
  end
116
- end
117
222
 
118
- def remove_missing_files
119
- @db.transaction do
120
- @missing_files.each do |filename, digest|
121
- delete_by_filename filename
223
+ if @new_digest_algorithm && !@options[:test_only]
224
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
225
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
226
+ else
227
+ @new_digests.each do |old_digest, new_digest|
228
+ update_digest_to_new_digest new_digest, old_digest
229
+ end
230
+ set_metadata "digest_algorithm", @new_digest_algorithm
122
231
  end
123
232
  end
124
- end
125
233
 
126
- private
234
+ if any_likely_damaged? || any_exceptions?
235
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
236
+ end
237
+
238
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
127
239
 
128
- def execute *args, &block
129
- @db.execute *args, &block
240
+ print_counters
130
241
  end
242
+ end
131
243
 
132
- def prepare_method name, query
133
- variable = "@#{name}"
134
- instance_variable_set(variable, @db.prepare(query))
135
- define_singleton_method name do |*args, &block|
136
- instance_variable_get(variable).execute(*args, &block)
244
+ def show_duplicates
245
+ current_digest = nil
246
+ query_duplicates.each do |found|
247
+ if current_digest != found['digest']
248
+ puts "" if current_digest
249
+ current_digest = found['digest']
250
+ puts "#{found['digest']}:"
137
251
  end
252
+ puts " #{found['filename']}"
138
253
  end
139
254
  end
140
255
 
141
- class Checker
142
- def initialize files_path, digest_database_path, options = {}
143
- @options = options
144
- @files_path = cleanup_path(files_path || ".")
145
- @prefix_to_remove = @files_path.to_s + '/'
256
+ private
146
257
 
147
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
258
+ def process_file filename
259
+ return if File.symlink? filename
148
260
 
149
- @digest_database_path = if digest_database_path
150
- cleanup_path(digest_database_path)
151
- else
152
- @files_path + '.file-digests.sqlite'
153
- end
261
+ stat = File.stat filename
154
262
 
155
- if File.directory?(@digest_database_path)
156
- @digest_database_path = @digest_database_path + '.file-digests.sqlite'
157
- end
263
+ return if stat.blockdev?
264
+ return if stat.chardev?
265
+ return if stat.directory?
266
+ return if stat.pipe?
267
+ return if stat.socket?
158
268
 
159
- if @files_path == @digest_database_path.dirname
160
- @skip_file_digests_sqlite = true
161
- end
269
+ raise "File is not readable" unless stat.readable?
162
270
 
163
- ensure_dir_exists @digest_database_path.dirname
271
+ if filename == "#{@digest_database_path}" ||
272
+ filename == "#{@digest_database_path}-wal" ||
273
+ filename == "#{@digest_database_path}-shm"
274
+ puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
275
+ return
276
+ end
164
277
 
165
- # Please do not use this flag, support for sha512 is here for backward compatibility, and one day it will be removed.
166
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
167
- @use_sha512 = true
168
- end
278
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
279
+ mtime_string = time_to_database stat.mtime
280
+
281
+ process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
282
+
283
+ rescue => exception
284
+ @counters[:exceptions] += 1
285
+ print_file_exception exception, filename
286
+ end
169
287
 
170
- @digest_database = DigestDatabase.new @digest_database_path, @options
171
- @counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
288
+ def process_file_indeed filename, mtime, digest
289
+ if found = find_by_filename(filename)
290
+ process_previously_seen_file found, filename, mtime, digest
291
+ else
292
+ process_new_file filename, mtime, digest
172
293
  end
294
+ end
173
295
 
174
- def perform_check
175
- measure_time do
176
- walk_files do |filename|
177
- process_file filename
296
+ def process_previously_seen_file found, filename, mtime, digest
297
+ @missing_files.delete(filename)
298
+ if found['digest'] == digest
299
+ @counters[:good] += 1
300
+ puts "GOOD: #{filename}" if @options[:verbose]
301
+ unless @options[:test_only]
302
+ if found['mtime'] == mtime
303
+ touch_digest_check_time found['id']
304
+ else
305
+ update_mtime mtime, found['id']
178
306
  end
179
307
  end
308
+ else
309
+ if found['mtime'] == mtime # Digest is different and mtime is the same
310
+ @counters[:likely_damaged] += 1
311
+ STDERR.puts "LIKELY DAMAGED: #{filename}"
312
+ else
313
+ @counters[:updated] += 1
314
+ puts "UPDATED: #{filename}" unless @options[:quiet]
315
+ unless @options[:test_only]
316
+ update_mtime_and_digest mtime, digest, found['id']
317
+ end
318
+ end
319
+ end
320
+ end
321
+
322
+ def process_new_file filename, mtime, digest
323
+ @counters[:new] += 1
324
+ puts "NEW: #{filename}" unless @options[:quiet]
325
+ unless @options[:test_only]
326
+ @new_files[filename] = digest
327
+ insert filename, mtime, digest
328
+ end
329
+ end
330
+
180
331
 
181
- @digest_database.track_renames @counters
332
+ # Renames and missing files
182
333
 
183
- if @digest_database.any_missing_files?
184
- @digest_database.print_missing_files
185
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
186
- @digest_database.remove_missing_files
334
+ def track_renames
335
+ @missing_files.delete_if do |filename, digest|
336
+ if @new_files.value?(digest)
337
+ @counters[:renamed] += 1
338
+ unless @options[:test_only]
339
+ delete_by_filename filename
187
340
  end
341
+ true
188
342
  end
343
+ end
344
+ end
189
345
 
190
- if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
191
- STDERR.puts "ERRORS WERE OCCURRED"
192
- end
346
+ def print_missing_files
347
+ puts "\nMISSING FILES:"
348
+ @missing_files.sort.to_h.each do |filename, digest|
349
+ puts filename
350
+ end
351
+ end
193
352
 
194
- puts @counters.inspect
353
+ def remove_missing_files
354
+ nested_transaction do
355
+ @missing_files.each do |filename, digest|
356
+ delete_by_filename filename
357
+ end
358
+ @missing_files = {}
195
359
  end
360
+ end
361
+
196
362
 
197
- private
363
+ # Database helpers
198
364
 
365
+ def execute *args, &block
366
+ @db.execute *args, &block
367
+ end
199
368
 
200
- def confirm text
201
- if STDIN.tty? && STDOUT.tty?
202
- puts "#{text} (y/n)?"
203
- STDIN.gets.strip.downcase == "y"
369
+ def nested_transaction(mode)
370
+ if @db.transaction_active?
371
+ yield
372
+ else
373
+ @db.transaction(mode) do
374
+ yield
204
375
  end
205
376
  end
377
+ end
206
378
 
207
- def process_file filename
208
- return if File.symlink? filename
379
+ def perhaps_transaction(condition, mode)
380
+ if condition
381
+ @db.transaction(mode) do
382
+ yield
383
+ end
384
+ else
385
+ yield
386
+ end
387
+ end
209
388
 
210
- stat = File.stat filename
389
+ def table_exist? table_name
390
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
391
+ end
211
392
 
212
- return if stat.blockdev?
213
- return if stat.chardev?
214
- return if stat.directory?
215
- return if stat.pipe?
216
- unless stat.readable?
217
- raise "File is not readable"
218
- end
219
- return if stat.socket?
220
-
221
- if @skip_file_digests_sqlite
222
- basename = File.basename(filename)
223
- return if basename == '.file-digests.sha512'
224
- return if basename == '.file-digests.sqlite'
225
- return if basename == '.file-digests.sqlite-wal'
226
- return if basename == '.file-digests.sqlite-shm'
227
- end
393
+ def prepare_method name, query
394
+ variable = "@#{name}"
228
395
 
229
- @digest_database.insert_or_update(
230
- filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
231
- stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
232
- get_file_digest(filename),
233
- @counters
234
- )
235
- rescue => exception
236
- @counters[:exceptions] += 1
237
- STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
238
- end
396
+ instance_variable_set(variable, @db.prepare(query))
239
397
 
240
- def patch_path_string path
241
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
398
+ define_singleton_method name do |*args, &block|
399
+ instance_variable_get(variable).execute(*args, &block)
242
400
  end
243
401
 
244
- def cleanup_path path
245
- Pathname.new(patch_path_string(path)).cleanpath
402
+ define_singleton_method "#{name}!" do |*args, &block|
403
+ instance_variable_get(variable).execute!(*args, &block)
246
404
  end
405
+ end
247
406
 
248
- def ensure_dir_exists path
249
- if File.exist?(path)
250
- unless File.directory?(path)
251
- raise "#{path} is not a directory"
252
- end
253
- else
254
- FileUtils.mkdir_p path
407
+ def set_metadata key, value
408
+ set_metadata_query key, value
409
+ puts "#{key} set to: #{value}" if @options[:verbose]
410
+ value
411
+ end
412
+
413
+ def get_metadata key
414
+ get_metadata_query!(key)&.first&.first
415
+ end
416
+
417
+ def find_by_filename filename
418
+ result = find_by_filename_query filename
419
+ found = result.next
420
+ raise "Multiple records found" if result.next
421
+ found
422
+ end
423
+
424
+ def time_to_database time
425
+ time.utc.strftime('%Y-%m-%d %H:%M:%S')
426
+ end
427
+
428
+
429
+ # Filesystem-related helpers
430
+
431
+ def patch_path_string path
432
+ Gem.win_platform? ? path.gsub(/\\/, '/') : path
433
+ end
434
+
435
+ def cleanup_path path
436
+ Pathname.new(patch_path_string(path)).cleanpath
437
+ end
438
+
439
+ def ensure_dir_exists path
440
+ if File.exist?(path)
441
+ unless File.directory?(path)
442
+ raise "#{path} is not a directory"
255
443
  end
444
+ else
445
+ FileUtils.mkdir_p path
256
446
  end
447
+ end
257
448
 
258
- def walk_files
259
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
260
- yield filename
261
- end
449
+ def walk_files
450
+ Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
451
+ yield filename
262
452
  end
453
+ end
263
454
 
264
- def get_file_digest filename
265
- File.open(filename, 'rb') do |io|
266
- digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
267
- buffer = ""
268
- while io.read(40960, buffer)
269
- digest.update(buffer)
270
- end
271
- return digest.hexdigest
455
+ def get_file_digest filename
456
+ File.open(filename, 'rb') do |io|
457
+ digest = OpenSSL::Digest.new(@digest_algorithm)
458
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
459
+
460
+ buffer = ""
461
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
462
+ digest.update(buffer)
463
+ new_digest.update(buffer) if @new_digest_algorithm
272
464
  end
465
+ @new_digests[digest.hexdigest] = new_digest.hexdigest if @new_digest_algorithm
466
+ return digest.hexdigest
273
467
  end
468
+ end
274
469
 
275
- def measure_time
276
- start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
277
- yield
278
- elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
279
- puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless @options[:quiet]
470
+
471
+ # Runtime state helpers
472
+
473
+ def any_missing_files?
474
+ @missing_files.length > 0
475
+ end
476
+
477
+ def any_exceptions?
478
+ @counters[:exceptions] > 0
479
+ end
480
+
481
+ def any_likely_damaged?
482
+ @counters[:likely_damaged] > 0
483
+ end
484
+
485
+ # UI helpers
486
+
487
+ def confirm text
488
+ if STDIN.tty? && STDOUT.tty?
489
+ puts "#{text} (y/n)?"
490
+ STDIN.gets.strip.downcase == "y"
491
+ end
492
+ end
493
+
494
+ def measure_time
495
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
496
+ yield
497
+ elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
498
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
499
+ end
500
+
501
+ def print_file_exception exception, filename
502
+ STDERR.print "EXCEPTION: #{exception.message}, processing file: "
503
+ begin
504
+ STDERR.print filename.encode('utf-8', universal_newline: true)
505
+ rescue
506
+ STDERR.print "(Unable to encode file name to utf-8) "
507
+ STDERR.print filename
280
508
  end
509
+ STDERR.print "\n"
510
+ STDERR.flush
511
+ exception.backtrace.each { |line| STDERR.puts " " + line }
512
+ end
281
513
 
514
+ def print_counters
515
+ puts "#{@counters[:good]} file(s) passes digest check" if @counters[:good] > 0
516
+ puts "#{@counters[:updated]} file(s) are updated" if @counters[:updated] > 0
517
+ puts "#{@counters[:new]} file(s) are new" if @counters[:new] > 0
518
+ puts "#{@counters[:renamed]} file(s) are renamed" if @counters[:renamed] > 0
519
+ puts "#{@missing_files.length} file(s) are missing" if @missing_files.length > 0
520
+ puts "#{@counters[:likely_damaged]} file(s) are likely damaged (!)" if @counters[:likely_damaged] > 0
521
+ puts "#{@counters[:exceptions]} file(s) had exceptions occured during processing (!)" if @counters[:exceptions] > 0
282
522
  end
283
523
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.0.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-08 00:00:00.000000000 Z
11
+ date: 2020-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sqlite3
@@ -24,24 +24,34 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.3.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: openssl
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 2.1.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 2.1.0
27
41
  description: Calculate file digests and check for the possible file corruption
28
42
  email: stan@senotrusov.com
29
43
  executables:
30
44
  - file-digests
31
- - file-digests-auto
32
- - file-digests-test
33
45
  extensions: []
34
46
  extra_rdoc_files: []
35
47
  files:
36
48
  - bin/file-digests
37
- - bin/file-digests-auto
38
- - bin/file-digests-test
39
49
  - lib/file-digests.rb
40
50
  homepage: https://github.com/senotrusov/file-digests
41
51
  licenses:
42
52
  - Apache-2.0
43
53
  metadata: {}
44
- post_install_message:
54
+ post_install_message:
45
55
  rdoc_options: []
46
56
  require_paths:
47
57
  - lib
@@ -57,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
67
  version: '0'
58
68
  requirements: []
59
69
  rubygems_version: 3.1.2
60
- signing_key:
70
+ signing_key:
61
71
  specification_version: 4
62
72
  summary: file-digests
63
73
  test_files: []
@@ -1,7 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ENV["AUTO"] = "true"
4
-
5
- require 'file-digests'
6
-
7
- FileDigests.perform_check
@@ -1,7 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ENV["TEST_ONLY"] = "true"
4
-
5
- require 'file-digests'
6
-
7
- FileDigests.perform_check