file-digests 0.0.21 → 0.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/file-digests +1 -1
- data/lib/file-digests.rb +407 -166
- metadata +23 -15
- data/bin/file-digests-auto +0 -7
- data/bin/file-digests-show-duplicates +0 -5
- data/bin/file-digests-test +0 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 912b66387453e26ebd7280d7f2dbd599838098a6bcc4da70d1eabe19737ba172
|
|
4
|
+
data.tar.gz: 9c02352223aff8d6489892df533606bd512de3357dfb59a82513dc04aa1eddfe
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0380c5acf3750632a3fe49f63d93dbc7533da1389f3e8cab41addc8909f71552a280aec7e3e783e9ae20071bc94af3d70021098b1fbfba96a3e1f78967479c34'
|
|
7
|
+
data.tar.gz: 4b0a05a27dcb61ee405dc06688753d5eb162e3fdf513fa45aee9a1e2d946fd4697b4f2dbb45f1af57f33ee787c366665144a999c161976cd81992902bfe90fb5
|
data/bin/file-digests
CHANGED
data/lib/file-digests.rb
CHANGED
|
@@ -1,120 +1,260 @@
|
|
|
1
1
|
require 'date'
|
|
2
|
-
require 'set'
|
|
3
2
|
require 'digest'
|
|
4
3
|
require 'fileutils'
|
|
4
|
+
require 'openssl'
|
|
5
|
+
require 'optparse'
|
|
5
6
|
require 'pathname'
|
|
7
|
+
require 'set'
|
|
6
8
|
require 'sqlite3'
|
|
7
9
|
|
|
8
10
|
class FileDigests
|
|
11
|
+
DIGEST_ALGORITHMS=["BLAKE2b512", "SHA3-256", "SHA512-256"]
|
|
12
|
+
LEGACY_DIGEST_ALGORITHMS = ["SHA512", "SHA256"]
|
|
13
|
+
|
|
14
|
+
def self.canonical_digest_algorithm_name(string)
|
|
15
|
+
if string
|
|
16
|
+
algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
|
|
17
|
+
index = algorithms.map(&:downcase).index(string.downcase)
|
|
18
|
+
index && algorithms[index]
|
|
19
|
+
end
|
|
20
|
+
end
|
|
9
21
|
|
|
10
|
-
def
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
}
|
|
16
|
-
file_digests = self.new ARGV[0], ARGV[1], options
|
|
17
|
-
file_digests.perform_check
|
|
22
|
+
def canonical_digest_algorithm_name string
|
|
23
|
+
self.class.canonical_digest_algorithm_name string
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.digest_algorithms_list_text
|
|
27
|
+
"Digest algorithm should be one of the following: #{DIGEST_ALGORITHMS.join ", "}"
|
|
18
28
|
end
|
|
19
29
|
|
|
20
|
-
def self.
|
|
21
|
-
|
|
22
|
-
|
|
30
|
+
def self.parse_cli_options
|
|
31
|
+
options = {}
|
|
32
|
+
|
|
33
|
+
OptionParser.new do |opts|
|
|
34
|
+
opts.banner = [
|
|
35
|
+
"Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
|
|
36
|
+
" By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
|
|
37
|
+
" Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
|
|
38
|
+
].join "\n"
|
|
39
|
+
|
|
40
|
+
opts.on("-a", "--auto", "Do not ask for any confirmation") do
|
|
41
|
+
options[:auto] = true
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
opts.on("--accept-fate", "Accept the current state of files that are likely damaged and update their digest data") do
|
|
45
|
+
options[:accept_fate] = true
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
opts.on(
|
|
49
|
+
'--digest=DIGEST',
|
|
50
|
+
'Select a digest algorithm to use. Default is "BLAKE2b512".',
|
|
51
|
+
'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
|
|
52
|
+
"#{digest_algorithms_list_text}.",
|
|
53
|
+
'You only need to specify an algorithm on the first run, your choice will be saved to a database.',
|
|
54
|
+
'Any time later you could specify a new algorithm to change the current one.',
|
|
55
|
+
'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.'
|
|
56
|
+
) do |value|
|
|
57
|
+
digest_algorithm = canonical_digest_algorithm_name(value)
|
|
58
|
+
unless DIGEST_ALGORITHMS.include?(digest_algorithm)
|
|
59
|
+
STDERR.puts "ERROR: #{digest_algorithms_list_text}"
|
|
60
|
+
exit 1
|
|
61
|
+
end
|
|
62
|
+
options[:digest_algorithm] = digest_algorithm
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do
|
|
66
|
+
options[:action] = :show_duplicates
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do
|
|
70
|
+
options[:test_only] = true
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do
|
|
74
|
+
options[:quiet] = true
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
opts.on("-v", "--verbose", "More verbose output") do
|
|
78
|
+
options[:verbose] = true
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
opts.on("-h", "--help", "Prints this help") do
|
|
82
|
+
puts opts
|
|
83
|
+
exit
|
|
84
|
+
end
|
|
85
|
+
end.parse!
|
|
86
|
+
options
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def self.run_cli_utility
|
|
90
|
+
options = parse_cli_options
|
|
91
|
+
|
|
92
|
+
file_digests = self.new ARGV[0], ARGV[1], options
|
|
93
|
+
file_digests.send(options[:action] || :perform_check)
|
|
23
94
|
end
|
|
24
95
|
|
|
25
96
|
def initialize files_path, digest_database_path, options = {}
|
|
26
97
|
@options = options
|
|
27
98
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
|
99
|
+
initialize_paths files_path, digest_database_path
|
|
100
|
+
initialize_database
|
|
32
101
|
|
|
33
|
-
@
|
|
34
|
-
|
|
102
|
+
if @digest_algorithm = canonical_digest_algorithm_name(get_metadata("digest_algorithm"))
|
|
103
|
+
if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
|
|
104
|
+
@new_digest_algorithm = @options[:digest_algorithm]
|
|
105
|
+
end
|
|
35
106
|
else
|
|
36
|
-
@
|
|
107
|
+
@digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
|
|
108
|
+
set_metadata "digest_algorithm", @digest_algorithm
|
|
37
109
|
end
|
|
38
110
|
|
|
39
|
-
if
|
|
40
|
-
|
|
41
|
-
end
|
|
111
|
+
puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
|
|
112
|
+
end
|
|
42
113
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
end
|
|
114
|
+
def initialize_paths files_path, digest_database_path
|
|
115
|
+
@files_path = cleanup_path(files_path || ".")
|
|
46
116
|
|
|
117
|
+
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
|
118
|
+
|
|
119
|
+
@digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
|
|
120
|
+
@digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path)
|
|
47
121
|
ensure_dir_exists @digest_database_path.dirname
|
|
48
122
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
123
|
+
if @options[:verbose]
|
|
124
|
+
puts "Target directory: #{@files_path}"
|
|
125
|
+
puts "Database location: #{@digest_database_path}"
|
|
52
126
|
end
|
|
53
|
-
|
|
54
|
-
initialize_database @digest_database_path
|
|
55
127
|
end
|
|
56
128
|
|
|
57
|
-
def initialize_database
|
|
58
|
-
@db = SQLite3::Database.new
|
|
129
|
+
def initialize_database
|
|
130
|
+
@db = SQLite3::Database.new @digest_database_path.to_s
|
|
59
131
|
@db.results_as_hash = true
|
|
60
132
|
|
|
133
|
+
file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s
|
|
134
|
+
|
|
135
|
+
execute 'PRAGMA encoding = "UTF-8"'
|
|
61
136
|
execute 'PRAGMA journal_mode = "WAL"'
|
|
62
137
|
execute 'PRAGMA synchronous = "NORMAL"'
|
|
63
138
|
execute 'PRAGMA locking_mode = "EXCLUSIVE"'
|
|
64
139
|
execute 'PRAGMA cache_size = "5000"'
|
|
65
140
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
141
|
+
@db.transaction(:exclusive) do
|
|
142
|
+
metadata_table_was_created = false
|
|
143
|
+
unless table_exist?("metadata")
|
|
144
|
+
execute "CREATE TABLE metadata (
|
|
145
|
+
key TEXT NOT NULL PRIMARY KEY,
|
|
146
|
+
value TEXT)"
|
|
147
|
+
execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
|
|
148
|
+
metadata_table_was_created = true
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
|
|
152
|
+
prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"
|
|
76
153
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
154
|
+
set_metadata("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && metadata_table_was_created
|
|
155
|
+
|
|
156
|
+
# Heuristic to detect database version 1 (metadata was not stored back then)
|
|
157
|
+
unless get_metadata("database_version")
|
|
158
|
+
if table_exist?("digests")
|
|
159
|
+
set_metadata "database_version", "1"
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
unless table_exist?("digests")
|
|
164
|
+
execute "CREATE TABLE digests (
|
|
165
|
+
id INTEGER NOT NULL PRIMARY KEY,
|
|
166
|
+
filename TEXT NOT NULL,
|
|
167
|
+
mtime TEXT,
|
|
168
|
+
digest TEXT NOT NULL,
|
|
169
|
+
digest_check_time TEXT NOT NULL)"
|
|
170
|
+
execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
|
|
171
|
+
set_metadata("digests_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
|
|
175
|
+
prepare_method :find_by_filename_query, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
|
|
176
|
+
prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
|
|
177
|
+
prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
|
|
178
|
+
prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
|
|
179
|
+
prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
|
|
180
|
+
prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"
|
|
181
|
+
prepare_method :update_digest_to_new_digest, "UPDATE digests SET digest = ? WHERE digest = ?"
|
|
182
|
+
|
|
183
|
+
unless get_metadata("database_version")
|
|
184
|
+
set_metadata "database_version", "2"
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Convert database from 1st to 2nd version
|
|
188
|
+
unless get_metadata("digest_algorithm")
|
|
189
|
+
if get_metadata("database_version") == "1"
|
|
190
|
+
if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
|
|
191
|
+
set_metadata("digest_algorithm", "SHA512")
|
|
192
|
+
else
|
|
193
|
+
set_metadata("digest_algorithm", "SHA256")
|
|
194
|
+
end
|
|
195
|
+
set_metadata "database_version", "2"
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
if get_metadata("database_version") != "2"
|
|
200
|
+
STDERR.puts "This version of file-digests (#{file_digests_gem_version || 'unknown'}) is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest."
|
|
201
|
+
raise "Incompatible database version"
|
|
202
|
+
end
|
|
203
|
+
end
|
|
84
204
|
end
|
|
85
205
|
|
|
86
206
|
def perform_check
|
|
87
|
-
@
|
|
88
|
-
|
|
89
|
-
|
|
207
|
+
perhaps_transaction(@new_digest_algorithm, :exclusive) do
|
|
208
|
+
@counters = {good: 0, updated: 0, new: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
|
209
|
+
@new_files = {}
|
|
210
|
+
@new_digests = {}
|
|
211
|
+
|
|
212
|
+
@missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
|
|
90
213
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
214
|
+
measure_time do
|
|
215
|
+
walk_files do |filename|
|
|
216
|
+
process_file filename
|
|
217
|
+
end
|
|
94
218
|
end
|
|
95
|
-
end
|
|
96
219
|
|
|
97
|
-
|
|
220
|
+
track_renames
|
|
98
221
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
222
|
+
if any_missing_files?
|
|
223
|
+
if any_exceptions?
|
|
224
|
+
STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
|
|
225
|
+
else
|
|
226
|
+
print_missing_files
|
|
227
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
|
228
|
+
remove_missing_files
|
|
229
|
+
end
|
|
230
|
+
end
|
|
103
231
|
end
|
|
104
|
-
end
|
|
105
232
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
233
|
+
if @new_digest_algorithm && !@options[:test_only]
|
|
234
|
+
if any_missing_files? || any_likely_damaged? || any_exceptions?
|
|
235
|
+
STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
|
|
236
|
+
else
|
|
237
|
+
@new_digests.each do |old_digest, new_digest|
|
|
238
|
+
update_digest_to_new_digest new_digest, old_digest
|
|
239
|
+
end
|
|
240
|
+
set_metadata "digest_algorithm", @new_digest_algorithm
|
|
241
|
+
puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
if any_likely_damaged? || any_exceptions?
|
|
246
|
+
STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
|
|
247
|
+
end
|
|
109
248
|
|
|
110
|
-
|
|
249
|
+
set_metadata(@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))
|
|
250
|
+
|
|
251
|
+
print_counters
|
|
252
|
+
end
|
|
111
253
|
end
|
|
112
254
|
|
|
113
255
|
def show_duplicates
|
|
114
256
|
current_digest = nil
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
while found = result.next_hash do
|
|
257
|
+
query_duplicates.each do |found|
|
|
118
258
|
if current_digest != found['digest']
|
|
119
259
|
puts "" if current_digest
|
|
120
260
|
current_digest = found['digest']
|
|
@@ -135,117 +275,72 @@ class FileDigests
|
|
|
135
275
|
return if stat.chardev?
|
|
136
276
|
return if stat.directory?
|
|
137
277
|
return if stat.pipe?
|
|
138
|
-
unless stat.readable?
|
|
139
|
-
raise "File is not readable"
|
|
140
|
-
end
|
|
141
278
|
return if stat.socket?
|
|
142
279
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
280
|
+
raise "File is not readable" unless stat.readable?
|
|
281
|
+
|
|
282
|
+
if filename == "#{@digest_database_path}" ||
|
|
283
|
+
filename == "#{@digest_database_path}-wal" ||
|
|
284
|
+
filename == "#{@digest_database_path}-shm"
|
|
285
|
+
puts "SKIPPING DATABASE FILE: #{filename}" if @options[:verbose]
|
|
286
|
+
return
|
|
149
287
|
end
|
|
150
288
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
|
|
154
|
-
get_file_digest(filename)
|
|
155
|
-
)
|
|
156
|
-
rescue => exception
|
|
157
|
-
@counters[:exceptions] += 1
|
|
158
|
-
STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
|
|
159
|
-
end
|
|
289
|
+
normalized_filename = filename.delete_prefix("#{@files_path.to_s}/").encode('utf-8', universal_newline: true).unicode_normalize(:nfkc)
|
|
290
|
+
mtime_string = time_to_database stat.mtime
|
|
160
291
|
|
|
161
|
-
|
|
162
|
-
Gem.win_platform? ? path.gsub(/\\/, '/') : path
|
|
163
|
-
end
|
|
292
|
+
process_file_indeed normalized_filename, mtime_string, get_file_digest(filename)
|
|
164
293
|
|
|
165
|
-
|
|
166
|
-
|
|
294
|
+
rescue => exception
|
|
295
|
+
@counters[:exceptions] += 1
|
|
296
|
+
print_file_exception exception, filename
|
|
167
297
|
end
|
|
168
298
|
|
|
169
|
-
def
|
|
170
|
-
if
|
|
171
|
-
|
|
172
|
-
raise "#{path} is not a directory"
|
|
173
|
-
end
|
|
299
|
+
def process_file_indeed filename, mtime, digest
|
|
300
|
+
if found = find_by_filename(filename)
|
|
301
|
+
process_previously_seen_file found, filename, mtime, digest
|
|
174
302
|
else
|
|
175
|
-
|
|
303
|
+
process_new_file filename, mtime, digest
|
|
176
304
|
end
|
|
177
305
|
end
|
|
178
306
|
|
|
179
|
-
def
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
307
|
+
def process_previously_seen_file found, filename, mtime, digest
|
|
308
|
+
@missing_files.delete(filename)
|
|
309
|
+
if found['digest'] == digest
|
|
310
|
+
@counters[:good] += 1
|
|
311
|
+
puts "GOOD: #{filename}" if @options[:verbose]
|
|
312
|
+
unless @options[:test_only]
|
|
313
|
+
if found['mtime'] == mtime
|
|
314
|
+
touch_digest_check_time found['id']
|
|
315
|
+
else
|
|
316
|
+
update_mtime mtime, found['id']
|
|
317
|
+
end
|
|
318
|
+
end
|
|
319
|
+
else
|
|
320
|
+
if found['mtime'] == mtime && !@options[:accept_fate] # Digest is different and mtime is the same
|
|
321
|
+
@counters[:likely_damaged] += 1
|
|
322
|
+
STDERR.puts "LIKELY DAMAGED: #{filename}"
|
|
323
|
+
else
|
|
324
|
+
@counters[:updated] += 1
|
|
325
|
+
puts "UPDATED: #{filename}" unless @options[:quiet]
|
|
326
|
+
unless @options[:test_only]
|
|
327
|
+
update_mtime_and_digest mtime, digest, found['id']
|
|
328
|
+
end
|
|
191
329
|
end
|
|
192
|
-
return digest.hexdigest
|
|
193
330
|
end
|
|
194
331
|
end
|
|
195
332
|
|
|
196
|
-
def
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
333
|
+
def process_new_file filename, mtime, digest
|
|
334
|
+
@counters[:new] += 1
|
|
335
|
+
puts "NEW: #{filename}" unless @options[:quiet]
|
|
336
|
+
unless @options[:test_only]
|
|
337
|
+
@new_files[filename] = digest
|
|
338
|
+
insert filename, mtime, digest
|
|
200
339
|
end
|
|
201
340
|
end
|
|
202
341
|
|
|
203
|
-
def measure_time
|
|
204
|
-
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
205
|
-
yield
|
|
206
|
-
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
|
|
207
|
-
puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless @options[:quiet]
|
|
208
|
-
end
|
|
209
|
-
|
|
210
|
-
def insert_or_update file_path, mtime, digest
|
|
211
|
-
result = find_by_filename file_path
|
|
212
342
|
|
|
213
|
-
|
|
214
|
-
raise "Multiple records found" if result.next
|
|
215
|
-
|
|
216
|
-
@missing_files.delete(file_path)
|
|
217
|
-
|
|
218
|
-
if found['digest'] == digest
|
|
219
|
-
@counters[:good] += 1
|
|
220
|
-
# puts "GOOD: #{file_path}" unless @options[:quiet]
|
|
221
|
-
unless @options[:test_only]
|
|
222
|
-
if found['mtime'] == mtime
|
|
223
|
-
touch_digest_check_time found['id']
|
|
224
|
-
else
|
|
225
|
-
update_mtime mtime, found['id']
|
|
226
|
-
end
|
|
227
|
-
end
|
|
228
|
-
else
|
|
229
|
-
if found['mtime'] == mtime # Digest is different and mtime is the same
|
|
230
|
-
@counters[:likely_damaged] += 1
|
|
231
|
-
STDERR.puts "LIKELY DAMAGED: #{file_path}"
|
|
232
|
-
else
|
|
233
|
-
@counters[:updated] += 1
|
|
234
|
-
puts "UPDATED: #{file_path}" unless @options[:quiet]
|
|
235
|
-
unless @options[:test_only]
|
|
236
|
-
update_mtime_and_digest mtime, digest, found['id']
|
|
237
|
-
end
|
|
238
|
-
end
|
|
239
|
-
end
|
|
240
|
-
else
|
|
241
|
-
@counters[:new] += 1
|
|
242
|
-
puts "NEW: #{file_path}" unless @options[:quiet]
|
|
243
|
-
unless @options[:test_only]
|
|
244
|
-
@new_files[file_path] = digest
|
|
245
|
-
insert file_path, mtime, digest
|
|
246
|
-
end
|
|
247
|
-
end
|
|
248
|
-
end
|
|
343
|
+
# Renames and missing files
|
|
249
344
|
|
|
250
345
|
def track_renames
|
|
251
346
|
@missing_files.delete_if do |filename, digest|
|
|
@@ -257,11 +352,6 @@ class FileDigests
|
|
|
257
352
|
true
|
|
258
353
|
end
|
|
259
354
|
end
|
|
260
|
-
@counters[:missing] = @missing_files.length
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
def any_missing_files?
|
|
264
|
-
@missing_files.length > 0
|
|
265
355
|
end
|
|
266
356
|
|
|
267
357
|
def print_missing_files
|
|
@@ -272,22 +362,173 @@ class FileDigests
|
|
|
272
362
|
end
|
|
273
363
|
|
|
274
364
|
def remove_missing_files
|
|
275
|
-
|
|
365
|
+
nested_transaction do
|
|
276
366
|
@missing_files.each do |filename, digest|
|
|
277
367
|
delete_by_filename filename
|
|
278
368
|
end
|
|
369
|
+
@missing_files = {}
|
|
279
370
|
end
|
|
280
371
|
end
|
|
281
372
|
|
|
373
|
+
|
|
374
|
+
# Database helpers
|
|
375
|
+
|
|
282
376
|
def execute *args, &block
|
|
283
377
|
@db.execute *args, &block
|
|
284
378
|
end
|
|
285
379
|
|
|
380
|
+
def nested_transaction(mode = :deferred)
|
|
381
|
+
if @db.transaction_active?
|
|
382
|
+
yield
|
|
383
|
+
else
|
|
384
|
+
@db.transaction(mode) do
|
|
385
|
+
yield
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
def perhaps_transaction(condition, mode = :deferred)
|
|
391
|
+
if condition
|
|
392
|
+
nested_transaction(mode) do
|
|
393
|
+
yield
|
|
394
|
+
end
|
|
395
|
+
else
|
|
396
|
+
yield
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def table_exist? table_name
|
|
401
|
+
execute("SELECT name FROM sqlite_master WHERE type='table' AND name = '#{table_name}'").length == 1
|
|
402
|
+
end
|
|
403
|
+
|
|
286
404
|
def prepare_method name, query
|
|
287
405
|
variable = "@#{name}"
|
|
406
|
+
|
|
288
407
|
instance_variable_set(variable, @db.prepare(query))
|
|
408
|
+
|
|
289
409
|
define_singleton_method name do |*args, &block|
|
|
290
410
|
instance_variable_get(variable).execute(*args, &block)
|
|
291
411
|
end
|
|
412
|
+
|
|
413
|
+
define_singleton_method "#{name}!" do |*args, &block|
|
|
414
|
+
instance_variable_get(variable).execute!(*args, &block)
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def set_metadata key, value
|
|
419
|
+
set_metadata_query key, value
|
|
420
|
+
puts "#{key} set to: #{value}" if @options[:verbose]
|
|
421
|
+
value
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def get_metadata key
|
|
425
|
+
get_metadata_query!(key)&.first&.first
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
def find_by_filename filename
|
|
429
|
+
result = find_by_filename_query filename
|
|
430
|
+
found = result.next
|
|
431
|
+
raise "Multiple records found" if result.next
|
|
432
|
+
found
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def time_to_database time
|
|
436
|
+
time.utc.strftime('%Y-%m-%d %H:%M:%S')
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
# Filesystem-related helpers
|
|
441
|
+
|
|
442
|
+
def patch_path_string path
|
|
443
|
+
Gem.win_platform? ? path.gsub(/\\/, '/') : path
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def cleanup_path path
|
|
447
|
+
Pathname.new(patch_path_string(path)).cleanpath
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
def ensure_dir_exists path
|
|
451
|
+
if File.exist?(path)
|
|
452
|
+
unless File.directory?(path)
|
|
453
|
+
raise "#{path} is not a directory"
|
|
454
|
+
end
|
|
455
|
+
else
|
|
456
|
+
FileUtils.mkdir_p path
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
def walk_files
|
|
461
|
+
Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
|
|
462
|
+
yield filename
|
|
463
|
+
end
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
def get_file_digest filename
|
|
467
|
+
File.open(filename, 'rb') do |io|
|
|
468
|
+
digest = OpenSSL::Digest.new(@digest_algorithm)
|
|
469
|
+
new_digest = OpenSSL::Digest.new(@new_digest_algorithm) if @new_digest_algorithm
|
|
470
|
+
|
|
471
|
+
buffer = ""
|
|
472
|
+
while io.read(409600, buffer) # 409600 seems like a sweet spot
|
|
473
|
+
digest.update(buffer)
|
|
474
|
+
new_digest.update(buffer) if @new_digest_algorithm
|
|
475
|
+
end
|
|
476
|
+
@new_digests[digest.hexdigest] = new_digest.hexdigest if @new_digest_algorithm
|
|
477
|
+
return digest.hexdigest
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
# Runtime state helpers
|
|
483
|
+
|
|
484
|
+
def any_missing_files?
|
|
485
|
+
@missing_files.length > 0
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
def any_exceptions?
|
|
489
|
+
@counters[:exceptions] > 0
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def any_likely_damaged?
|
|
493
|
+
@counters[:likely_damaged] > 0
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
# UI helpers
|
|
497
|
+
|
|
498
|
+
def confirm text
|
|
499
|
+
if STDIN.tty? && STDOUT.tty?
|
|
500
|
+
puts "#{text} (y/n)?"
|
|
501
|
+
STDIN.gets.strip.downcase == "y"
|
|
502
|
+
end
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
def measure_time
|
|
506
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
507
|
+
yield
|
|
508
|
+
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start)
|
|
509
|
+
puts "Elapsed time: #{elapsed.to_i / 3600}h #{(elapsed.to_i % 3600) / 60}m #{'%.3f' % (elapsed % 60)}s" unless @options[:quiet]
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
def print_file_exception exception, filename
|
|
513
|
+
STDERR.print "EXCEPTION: #{exception.message}, processing file: "
|
|
514
|
+
begin
|
|
515
|
+
STDERR.print filename.encode('utf-8', universal_newline: true)
|
|
516
|
+
rescue
|
|
517
|
+
STDERR.print "(Unable to encode file name to utf-8) "
|
|
518
|
+
STDERR.print filename
|
|
519
|
+
end
|
|
520
|
+
STDERR.print "\n"
|
|
521
|
+
STDERR.flush
|
|
522
|
+
exception.backtrace.each { |line| STDERR.puts " " + line }
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
def print_counters
|
|
526
|
+
puts "#{@counters[:good]} file(s) passes digest check" if @counters[:good] > 0
|
|
527
|
+
puts "#{@counters[:updated]} file(s) are updated" if @counters[:updated] > 0
|
|
528
|
+
puts "#{@counters[:new]} file(s) are new" if @counters[:new] > 0
|
|
529
|
+
puts "#{@counters[:renamed]} file(s) are renamed" if @counters[:renamed] > 0
|
|
530
|
+
puts "#{@missing_files.length} file(s) are missing" if @missing_files.length > 0
|
|
531
|
+
puts "#{@counters[:likely_damaged]} file(s) are likely damaged (!)" if @counters[:likely_damaged] > 0
|
|
532
|
+
puts "#{@counters[:exceptions]} file(s) had exceptions occured during processing (!)" if @counters[:exceptions] > 0
|
|
292
533
|
end
|
|
293
534
|
end
|
metadata
CHANGED
|
@@ -1,49 +1,57 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: file-digests
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.26
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Stanislav Senotrusov
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-10-
|
|
11
|
+
date: 2020-10-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: sqlite3
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- - "
|
|
17
|
+
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 1.3
|
|
19
|
+
version: '1.3'
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- - "
|
|
24
|
+
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 1.3
|
|
26
|
+
version: '1.3'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: openssl
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '2.1'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '2.1'
|
|
27
41
|
description: Calculate file digests and check for the possible file corruption
|
|
28
42
|
email: stan@senotrusov.com
|
|
29
43
|
executables:
|
|
30
44
|
- file-digests
|
|
31
|
-
- file-digests-auto
|
|
32
|
-
- file-digests-show-duplicates
|
|
33
|
-
- file-digests-test
|
|
34
45
|
extensions: []
|
|
35
46
|
extra_rdoc_files: []
|
|
36
47
|
files:
|
|
37
48
|
- bin/file-digests
|
|
38
|
-
- bin/file-digests-auto
|
|
39
|
-
- bin/file-digests-show-duplicates
|
|
40
|
-
- bin/file-digests-test
|
|
41
49
|
- lib/file-digests.rb
|
|
42
50
|
homepage: https://github.com/senotrusov/file-digests
|
|
43
51
|
licenses:
|
|
44
52
|
- Apache-2.0
|
|
45
53
|
metadata: {}
|
|
46
|
-
post_install_message:
|
|
54
|
+
post_install_message:
|
|
47
55
|
rdoc_options: []
|
|
48
56
|
require_paths:
|
|
49
57
|
- lib
|
|
@@ -59,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
59
67
|
version: '0'
|
|
60
68
|
requirements: []
|
|
61
69
|
rubygems_version: 3.1.2
|
|
62
|
-
signing_key:
|
|
70
|
+
signing_key:
|
|
63
71
|
specification_version: 4
|
|
64
72
|
summary: file-digests
|
|
65
73
|
test_files: []
|
data/bin/file-digests-auto
DELETED