file-digests 0.0.17 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 27ef5c07b544bb7e63a8b9ca9d8b99d0b46588b45311ef940d7431691178b99c
4
- data.tar.gz: b1febf1fbdabab014eca65a86e3beee4cfaf4478ead18a5da071b045ae0ab56a
3
+ metadata.gz: f08714e86e275eb74108da01c667d403134366e6d7e9c5e8b08278d8c8842ba8
4
+ data.tar.gz: 57802538edae099807ede5460a07d6fd7ea6a5d0810f40d306db8f330435631d
5
5
  SHA512:
6
- metadata.gz: f4d0db5b5ca2a29adaac1fa9aaefd276977f39d0c6e7e17e5432586d63a91a5bb20bd4af8c955e4e5c625460102ae1431b33947e5c40cc9351c5bb858f28974b
7
- data.tar.gz: 3dc1ec4ac2224a84d1cc914b81314ccfba5580a11ca179ac9b4ca9201a599f57a52edb997af8adc369861124a733d1030f5dcb2a234dbf3c10a68b03690a5b3a
6
+ metadata.gz: 4c642c5aaf06d903114aabc65ec13ae3c5eb04c807b167c7eb1f3c63e8f56144ff3d93964fdc55cbcb33fad743cad635cfa61284171c94ed3b7a9a42c3efbca6
7
+ data.tar.gz: eebb6b444c6d0921f638200a27f97af5e9026ab0cf21b7b43b3e58f6f13ccc6323f482f93594ae993637a18b7611514469848307defe3796f412dd453c08b932
@@ -2,4 +2,4 @@
2
2
 
3
3
  require 'file-digests'
4
4
 
5
- FileDigests.perform_check
5
+ FileDigests.run_cli_utility
@@ -1,283 +1,520 @@
1
1
  require 'date'
2
- require 'set'
3
2
  require 'digest'
4
3
  require 'fileutils'
4
+ require 'openssl'
5
+ require 'optparse'
5
6
  require 'pathname'
7
+ require 'set'
6
8
  require 'sqlite3'
7
9
 
8
- module FileDigests
10
+ class FileDigests
11
+ DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
12
+
13
+ def self.canonical_digest_algorithm_name(string)
14
+ if string
15
+ index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase)
16
+ index && DIGEST_ALGORITHMS[index]
17
+ end
18
+ end
9
19
 
10
- def self.perform_check
11
- options = {
12
- auto: (ENV["AUTO"] == "true"),
13
- quiet: (ENV["QUIET"] == "true"),
14
- test_only: (ENV["TEST_ONLY"] == "true")
15
- }
16
- checker = Checker.new ARGV[0], ARGV[1], options
17
- checker.perform_check
20
+ def canonical_digest_algorithm_name string
21
+ self.class.canonical_digest_algorithm_name string
18
22
  end
19
23
 
20
- class DigestDatabase
21
- def initialize path, options = {}
22
- @options = options
24
+ def self.digest_algorithms_list_text
25
+ "Digest algorithm should be one of the following: #{DIGEST_ALGORITHMS.join ", "}"
26
+ end
27
+
28
+ def self.parse_cli_options
29
+ options = {}
30
+
31
+ OptionParser.new do |opts|
32
+ opts.banner = [
33
+ "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
34
+ " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
35
+ " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
36
+ ].join "\n"
37
+
38
+ opts.on("-a", "--auto", "Do not ask for any confirmation") do
39
+ options[:auto] = true
40
+ end
41
+
42
+ opts.on(
43
+ '--digest=DIGEST',
44
+ 'Select a digest algorithm to use. Default is "BLAKE2b512".',
45
+ 'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
46
+ "#{digest_algorithms_list_text}.",
47
+ 'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
48
+ 'Any time later you could specify a new algorithm to change the current one.',
49
+ 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
50
+ ) do |value|
51
+ digest_algorithm = canonical_digest_algorithm_name(value)
52
+ unless digest_algorithm
53
+ STDERR.puts "ERROR: #{digest_algorithms_list_text}"
54
+ exit 1
55
+ end
56
+ options[:digest_algorithm] = digest_algorithm
57
+ end
23
58
 
24
- @db = SQLite3::Database.new path.to_s
25
- @db.results_as_hash = true
59
+ opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
60
+ options[:action] = :show_duplicates
61
+ end
62
+
63
+ opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
64
+ options[:test_only] = true
65
+ end
66
+
67
+ opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
68
+ options[:quiet] = true
69
+ end
70
+
71
+ opts.on("-v", "--verbose", "More verbose output") do
72
+ options[:verbose] = true
73
+ end
74
+
75
+ opts.on("-h", "--help", "Prints this help") do
76
+ puts opts
77
+ exit
78
+ end
79
+ end.parse!
80
+ options
81
+ end
26
82
 
27
- execute 'PRAGMA journal_mode = "WAL"'
28
- execute 'PRAGMA synchronous = "NORMAL"'
29
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
30
- execute 'PRAGMA cache_size = "5000"'
83
+ def self.run_cli_utility
84
+ options = parse_cli_options
31
85
 
32
- unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
33
- execute 'PRAGMA encoding = "UTF-8"'
86
+ file_digests = self.new ARGV[0], ARGV[1], options
87
+ file_digests.send(options[:action] || :perform_check)
88
+ end
89
+
90
+ def initialize files_path, digest_database_path, options = {}
91
+ @options = options
92
+
93
+ initialize_paths files_path, digest_database_path
94
+ initialize_database
95
+
96
+ if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
97
+ if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
98
+ @new_digest_algorithm = @options[:digest_algorithm]
99
+ end
100
+ else
101
+ @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
102
+ set_metadata "digest_algorithm", @digest_algorithm
103
+ end
104
+
105
+ puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
106
+ end
107
+
108
+ def initialize_paths files_path, digest_database_path
109
+ @files_path = cleanup_path(files_path || ".")
110
+
111
+ raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
112
+
113
+ @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
114
+ @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
115
+ ensure_dir_exists @digest_database_path.dirname
116
+
117
+ if @options[:verbose]
118
+ puts "Target directory: #{@files_path}"
119
+ puts "Database location: #{@digest_database_path}"
120
+ end
121
+ end
122
+
123
+ def initialize_database
124
+ @db = SQLite3::Database.new @digest_database_path.to_s
125
+ @db.results_as_hash = true
126
+
127
+ file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
128
+
129
+ execute 'PRAGMA encoding = "UTF-8"'
130
+ execute 'PRAGMA journal_mode = "WAL"'
131
+ execute 'PRAGMA synchronous = "NORMAL"'
132
+ execute 'PRAGMA locking_mode = "EXCLUSIVE"'
133
+ execute 'PRAGMA cache_size = "5000"'
134
+
135
+ @db.transaction(:exclusive) do
136
+ unless table_exist?("metadata")
137
+ execute "CREATE TABLE metadata (
138
+ key TEXT NOT NULL PRIMARY KEY,
139
+ value TEXT)"
140
+ execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
141
+ set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version
142
+ end
143
+
144
+ prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
145
+ prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"
146
+
147
+ # Heuristic to detect database version 1 (metadata was not stored back then)
148
+ unless get_metadata("database_version")
149
+ if table_exist?("digests")
150
+ set_metadata "database_version", "1"
151
+ end
152
+ end
153
+
154
+ unless table_exist?("digests")
34
155
  execute "CREATE TABLE digests (
35
- id INTEGER PRIMARY KEY,
36
- filename TEXT,
156
+ id INTEGER NOT NULL PRIMARY KEY,
157
+ filename TEXT NOT NULL,
37
158
  mtime TEXT,
38
- digest TEXT,
39
- digest_check_time TEXT)"
159
+ digest TEXT NOT NULL,
160
+ digest_check_time TEXT NOT NULL)"
40
161
  execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
162
+ set_metadata("digests_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version
41
163
  end
42
164
 
43
- @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
44
- @new_files = {}
45
-
46
165
  prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
47
- prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
166
+ prepare_method :find_by_filename_query, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
48
167
  prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
49
168
  prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
50
169
  prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
51
170
  prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
52
- end
171
+ prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"
172
+ prepare_method :update_digest_to_new_digest, "UPDATE digests SET digest = ? WHERE digest = ?"
53
173
 
54
- def insert_or_update file_path, mtime, digest, counters
55
- result = find_by_filename file_path
56
-
57
- if found = result.next_hash
58
- raise "Multiple records found" if result.next
59
-
60
- @missing_files.delete(file_path)
174
+ unless get_metadata("database_version")
175
+ set_metadata "database_version", "2"
176
+ end
61
177
 
62
- if found['digest'] == digest
63
- counters[:good] += 1
64
- # puts "GOOD: #{file_path}" unless @options[:quiet]
65
- unless @options[:test_only]
66
- if found['mtime'] == mtime
67
- touch_digest_check_time found['id']
68
- else
69
- update_mtime mtime, found['id']
70
- end
71
- end
72
- else
73
- if found['mtime'] == mtime # Digest is different and mtime is the same
74
- counters[:likely_damaged] += 1
75
- STDERR.puts "LIKELY DAMAGED: #{file_path}"
178
+ # Convert database from 1st to 2nd version
179
+ unless get_metadata("digest_algorithm")
180
+ if get_metadata("database_version") == "1"
181
+ if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
182
+ set_metadata("digest_algorithm", "SHA512")
76
183
  else
77
- counters[:updated] += 1
78
- puts "UPDATED: #{file_path}" unless @options[:quiet]
79
- unless @options[:test_only]
80
- update_mtime_and_digest mtime, digest, found['id']
81
- end
184
+ set_metadata("digest_algorithm", "SHA256")
82
185
  end
83
- end
84
- else
85
- counters[:new] += 1
86
- puts "NEW: #{file_path}" unless @options[:quiet]
87
- unless @options[:test_only]
88
- @new_files[file_path] = digest
89
- insert file_path, mtime, digest
186
+ set_metadata "database_version", "2"
90
187
  end
91
188
  end
189
+
92
190
  end
191
+ end
93
192
 
94
- def track_renames counters
95
- @missing_files.delete_if do |filename, digest|
96
- if @new_files.value?(digest)
97
- counters[:renamed] += 1
98
- unless @options[:test_only]
99
- delete_by_filename filename
100
- end
101
- true
193
+ def perform_check
194
+ perhaps_transaction(@new_digest_algorithm, :exclusive) do
195
+ @counters = {good: 0, updated: 0, new: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
196
+ @new_files = {}
197
+ @new_digests = {}
198
+
199
+ @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
200
+
201
+ measure_time do
202
+ walk_files do |filename|
203
+ process_file filename
102
204
  end
103
205
  end
104
- counters[:missing] = @missing_files.length
105
- end
106
-
107
- def any_missing_files?
108
- @missing_files.length > 0
109
- end
110
206
 
111
- def print_missing_files
112
- puts "\nMISSING FILES:"
113
- @missing_files.sort.to_h.each do |filename, digest|
114
- puts filename
207
+ track_renames
208
+
209
+ if any_missing_files?
210
+ if any_exceptions?
211
+ STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
212
+ else
213
+ print_missing_files
214
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
215
+ remove_missing_files
216
+ end
217
+ end
115
218
  end
116
- end
117
219
 
118
- def remove_missing_files
119
- @db.transaction do
120
- @missing_files.each do |filename, digest|
121
- delete_by_filename filename
220
+ if @new_digest_algorithm && !@options[:test_only]
221
+ if any_missing_files? || any_likely_damaged? || any_exceptions?
222
+ STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
223
+ else
224
+ @new_digests.each do |old_digest, new_digest|
225
+ update_digest_to_new_digest new_digest, old_digest
226
+ end
227
+ set_metadata "digest_algorithm", @new_digest_algorithm
122
228
  end
123
229
  end
124
- end
125
230
 
126
- private
231
+ if any_likely_damaged? || any_exceptions?
232
+ STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
233
+ end
234
+
235
+ set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
127
236
 
128
- def execute *args, &block
129
- @db.execute *args, &block
237
+ print_counters
130
238
  end
239
+ end
131
240
 
132
- def prepare_method name, query
133
- variable = "@#{name}"
134
- instance_variable_set(variable, @db.prepare(query))
135
- define_singleton_method name do |*args, &block|
136
- instance_variable_get(variable).execute(*args, &block)
241
+ def show_duplicates
242
+ current_digest = nil
243
+ query_duplicates.each do |found|
244
+ if current_digest != found['digest']
245
+ puts "" if current_digest
246
+ current_digest = found['digest']
247
+ puts "#{found['digest']}:"
137
248
  end
249
+ puts " #{found['filename']}"
138
250
  end
139
251
  end
140
252
 
141
- class Checker
142
- def initialize files_path, digest_database_path, options = {}
143
- @options = options
144
- @files_path = cleanup_path(files_path || ".")
145
- @prefix_to_remove = @files_path.to_s + '/'
253
+ private
146
254
 
147
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
255
+ def process_file filename
256
+ return if File.symlink? filename
148
257
 
149
- @digest_database_path = if digest_database_path
150
- cleanup_path(digest_database_path)
151
- else
152
- @files_path + '.file-digests.sqlite'
153
- end
258
+ stat = File.stat filename
154
259
 
155
- if File.directory?(@digest_database_path)
156
- @digest_database_path = @digest_database_path + '.file-digests.sqlite'
157
- end
260
+ return if stat.blockdev?
261
+ return if stat.chardev?
262
+ return if stat.directory?
263
+ return if stat.pipe?
264
+ return if stat.socket?
158
265
 
159
- if @files_path == @digest_database_path.dirname
160
- @skip_file_digests_sqlite = true
161
- end
266
+ raise "File is not readable" unless stat.readable?
162
267
 
163
- ensure_dir_exists @digest_database_path.dirname
268
+ if filename == "#{@digest_database_path}" ||
269
+ filename == "#{@digest_database_path}-wal" ||
270
+ filename == "#{@digest_database_path}-shm"
271
+ puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
272
+ return
273
+ end
164
274
 
165
- # Please do not use this flag, support for sha512 is here for backward compatibility, and one day it will be removed.
166
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
167
- @use_sha512 = true
168
- end
275
+ normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
276
+ mtime_string = time_to_database stat.mtime
277
+
278
+ process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
279
+
280
+ rescue => exception
281
+ @counters[:exceptions] += 1
282
+ print_file_exception exception, filename
283
+ end
169
284
 
170
- @digest_database = DigestDatabase.new @digest_database_path, @options
171
- @counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
285
+ def process_file_indeed filename, mtime, digest
286
+ if found = find_by_filename(filename)
287
+ process_previously_seen_file found, filename, mtime, digest
288
+ else
289
+ process_new_file filename, mtime, digest
172
290
  end
291
+ end
173
292
 
174
- def perform_check
175
- measure_time do
176
- walk_files do |filename|
177
- process_file filename
293
+ def process_previously_seen_file found, filename, mtime, digest
294
+ @missing_files.delete(filename)
295
+ if found['digest'] == digest
296
+ @counters[:good] += 1
297
+ puts "GOOD: #{filename}" if @options[:verbose]
298
+ unless @options[:test_only]
299
+ if found['mtime'] == mtime
300
+ touch_digest_check_time found['id']
301
+ else
302
+ update_mtime mtime, found['id']
178
303
  end
179
304
  end
305
+ else
306
+ if found['mtime'] == mtime # Digest is different and mtime is the same
307
+ @counters[:likely_damaged] += 1
308
+ STDERR.puts "LIKELY DAMAGED: #{filename}"
309
+ else
310
+ @counters[:updated] += 1
311
+ puts "UPDATED: #{filename}" unless @options[:quiet]
312
+ unless @options[:test_only]
313
+ update_mtime_and_digest mtime, digest, found['id']
314
+ end
315
+ end
316
+ end
317
+ end
318
+
319
+ def process_new_file filename, mtime, digest
320
+ @counters[:new] += 1
321
+ puts "NEW: #{filename}" unless @options[:quiet]
322
+ unless @options[:test_only]
323
+ @new_files[filename] = digest
324
+ insert filename, mtime, digest
325
+ end
326
+ end
327
+
180
328
 
181
- @digest_database.track_renames @counters
329
+ # Renames and missing files
182
330
 
183
- if @digest_database.any_missing_files?
184
- @digest_database.print_missing_files
185
- if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
186
- @digest_database.remove_missing_files
331
+ def track_renames
332
+ @missing_files.delete_if do |filename, digest|
333
+ if @new_files.value?(digest)
334
+ @counters[:renamed] += 1
335
+ unless @options[:test_only]
336
+ delete_by_filename filename
187
337
  end
338
+ true
188
339
  end
340
+ end
341
+ end
189
342
 
190
- if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
191
- STDERR.puts "ERRORS WERE OCCURRED"
192
- end
343
+ def print_missing_files
344
+ puts "\nMISSING FILES:"
345
+ @missing_files.sort.to_h.each do |filename, digest|
346
+ puts filename
347
+ end
348
+ end
193
349
 
194
- puts @counters.inspect
350
+ def remove_missing_files
351
+ nested_transaction do
352
+ @missing_files.each do |filename, digest|
353
+ delete_by_filename filename
354
+ end
355
+ @missing_files = {}
195
356
  end
357
+ end
358
+
196
359
 
197
- private
360
+ # Database helpers
198
361
 
362
+ def execute *args, &block
363
+ @db.execute *args, &block
364
+ end
199
365
 
200
- def confirm text
201
- if STDIN.tty? && STDOUT.tty?
202
- puts "#{text} (y/n)?"
203
- STDIN.gets.strip.downcase == "y"
366
+ def nested_transaction(mode)
367
+ if @db.transaction_active?
368
+ yield
369
+ else
370
+ @db.transaction(mode) do
371
+ yield
204
372
  end
205
373
  end
374
+ end
206
375
 
207
- def process_file filename
208
- return if File.symlink? filename
376
+ def perhaps_transaction(condition, mode)
377
+ if condition
378
+ @db.transaction(mode) do
379
+ yield
380
+ end
381
+ else
382
+ yield
383
+ end
384
+ end
209
385
 
210
- stat = File.stat filename
386
+ def table_exist? table_name
387
+ execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
388
+ end
211
389
 
212
- return if stat.blockdev?
213
- return if stat.chardev?
214
- return if stat.directory?
215
- return if stat.pipe?
216
- unless stat.readable?
217
- raise "File is not readable"
218
- end
219
- return if stat.socket?
220
-
221
- if @skip_file_digests_sqlite
222
- basename = File.basename(filename)
223
- return if basename == '.file-digests.sha512'
224
- return if basename == '.file-digests.sqlite'
225
- return if basename == '.file-digests.sqlite-wal'
226
- return if basename == '.file-digests.sqlite-shm'
227
- end
390
+ def prepare_method name, query
391
+ variable = "@#{name}"
228
392
 
229
- @digest_database.insert_or_update(
230
- filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
231
- stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
232
- get_file_digest(filename),
233
- @counters
234
- )
235
- rescue => exception
236
- @counters[:exceptions] += 1
237
- STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
238
- end
393
+ instance_variable_set(variable, @db.prepare(query))
239
394
 
240
- def patch_path_string path
241
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
395
+ define_singleton_method name do |*args, &block|
396
+ instance_variable_get(variable).execute(*args, &block)
242
397
  end
243
398
 
244
- def cleanup_path path
245
- Pathname.new(patch_path_string(path)).cleanpath
399
+ define_singleton_method "#{name}!" do |*args, &block|
400
+ instance_variable_get(variable).execute!(*args, &block)
246
401
  end
402
+ end
247
403
 
248
- def ensure_dir_exists path
249
- if File.exist?(path)
250
- unless File.directory?(path)
251
- raise "#{path} is not a directory"
252
- end
253
- else
254
- FileUtils.mkdir_p path
404
+ def set_metadata key, value
405
+ set_metadata_query key, value
406
+ puts "#{key} set to: #{value}" if @options[:verbose]
407
+ value
408
+ end
409
+
410
+ def get_metadata key
411
+ get_metadata_query!(key)&.first&.first
412
+ end
413
+
414
+ def find_by_filename filename
415
+ result = find_by_filename_query filename
416
+ found = result.next
417
+ raise "Multiple records found" if result.next
418
+ found
419
+ end
420
+
421
+ def time_to_database time
422
+ time.utc.strftime('%Y-%m-%d %H:%M:%S')
423
+ end
424
+
425
+
426
+ # Filesystem-related helpers
427
+
428
+ def patch_path_string path
429
+ Gem.win_platform? ? path.gsub(/\\/, '/') : path
430
+ end
431
+
432
+ def cleanup_path path
433
+ Pathname.new(patch_path_string(path)).cleanpath
434
+ end
435
+
436
+ def ensure_dir_exists path
437
+ if File.exist?(path)
438
+ unless File.directory?(path)
439
+ raise "#{path} is not a directory"
255
440
  end
441
+ else
442
+ FileUtils.mkdir_p path
256
443
  end
444
+ end
257
445
 
258
- def walk_files
259
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
260
- yield filename
261
- end
446
+ def walk_files
447
+ Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
448
+ yield filename
262
449
  end
450
+ end
263
451
 
264
- def get_file_digest filename
265
- File.open(filename, 'rb') do |io|
266
- digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
267
- buffer = ""
268
- while io.read(40960, buffer)
269
- digest.update(buffer)
270
- end
271
- return digest.hexdigest
452
+ def get_file_digest filename
453
+ File.open(filename, 'rb') do |io|
454
+ digest = OpenSSL::Digest.new(@digest_algorithm)
455
+ new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
456
+
457
+ buffer = ""
458
+ while io.read(409600, buffer) # 409600 seems like a sweet spot
459
+ digest.update(buffer)
460
+ new_digest.update(buffer) if @new_digest_algorithm
272
461
  end
462
+ @new_digests[digest.hexdigest] = new_digest.hexdigest if @new_digest_algorithm
463
+ return digest.hexdigest
273
464
  end
465
+ end
274
466
 
275
- def measure_time
276
- start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
277
- yield
278
- elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
279
- puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless @options[:quiet]
467
+
468
+ # Runtime state helpers
469
+
470
+ def any_missing_files?
471
+ @missing_files.length > 0
472
+ end
473
+
474
+ def any_exceptions?
475
+ @counters[:exceptions] > 0
476
+ end
477
+
478
+ def any_likely_damaged?
479
+ @counters[:likely_damaged] > 0
480
+ end
481
+
482
+ # UI helpers
483
+
484
+ def confirm text
485
+ if STDIN.tty? && STDOUT.tty?
486
+ puts "#{text} (y/n)?"
487
+ STDIN.gets.strip.downcase == "y"
488
+ end
489
+ end
490
+
491
+ def measure_time
492
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
493
+ yield
494
+ elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
495
+ puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
496
+ end
497
+
498
+ def print_file_exception exception, filename
499
+ STDERR.print "EXCEPTION: #{exception.message}, processing file: "
500
+ begin
501
+ STDERR.print filename.encode('utf-8', universal_newline: true)
502
+ rescue
503
+ STDERR.print "(Unable to encode file name to utf-8) "
504
+ STDERR.print filename
280
505
  end
506
+ STDERR.print "\n"
507
+ STDERR.flush
508
+ exception.backtrace.each { |line| STDERR.puts " " + line }
509
+ end
281
510
 
511
+ def print_counters
512
+ puts "#{@counters[:good]} file(s) passes digest check" if @counters[:good] > 0
513
+ puts "#{@counters[:updated]} file(s) are updated" if @counters[:updated] > 0
514
+ puts "#{@counters[:new]} file(s) are new" if @counters[:new] > 0
515
+ puts "#{@counters[:renamed]} file(s) are renamed" if @counters[:renamed] > 0
516
+ puts "#{@missing_files.length} file(s) are missing" if @missing_files.length > 0
517
+ puts "#{@counters[:likely_damaged]} file(s) are likely damaged (!)" if @counters[:likely_damaged] > 0
518
+ puts "#{@counters[:exceptions]} file(s) had exceptions occured during processing (!)" if @counters[:exceptions] > 0
282
519
  end
283
520
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-08 00:00:00.000000000 Z
11
+ date: 2020-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sqlite3
@@ -24,22 +24,34 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.3.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: openssl
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 2.1.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 2.1.0
27
41
  description: Calculate file digests and check for the possible file corruption
28
42
  email: stan@senotrusov.com
29
43
  executables:
30
44
  - file-digests
31
- - file-digests-test
32
45
  extensions: []
33
46
  extra_rdoc_files: []
34
47
  files:
35
48
  - bin/file-digests
36
- - bin/file-digests-test
37
49
  - lib/file-digests.rb
38
50
  homepage: https://github.com/senotrusov/file-digests
39
51
  licenses:
40
52
  - Apache-2.0
41
53
  metadata: {}
42
- post_install_message:
54
+ post_install_message:
43
55
  rdoc_options: []
44
56
  require_paths:
45
57
  - lib
@@ -55,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
55
67
  version: '0'
56
68
  requirements: []
57
69
  rubygems_version: 3.1.2
58
- signing_key:
70
+ signing_key:
59
71
  specification_version: 4
60
72
  summary: file-digests
61
73
  test_files: []
@@ -1,7 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- ENV["TEST_ONLY"] = "true"
4
-
5
- require 'file-digests'
6
-
7
- FileDigests.perform_check