file-digests 0.0.15 → 0.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/file-digests +0 -3
- data/bin/file-digests-auto +7 -0
- data/bin/file-digests-test +1 -2
- data/lib/file-digests.rb +226 -190
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 053daa0145db845876fb37b63656102744c524034b87cb91e85f0bce4658aec9
|
4
|
+
data.tar.gz: eac730f10cbe5812078e3461b74b19a740611e9375dd7c34684b2a896e881fc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ce4894b25eb9ecbcbee2ae6c32a9e7bc09fce12286a31896c5d6093ed1ca0f0618e2eaf01facd464c3ebb52f1f9e327198d6a4abffae4f011f9c9d589e6c2d3c
|
7
|
+
data.tar.gz: bdd07cc7c958095b11b57174d8d98cd065d9a83289aeac5ce62dc604c9526f46ab512a7b229e1620c3ba0a49139cae341ac51b11af372e8395eb5a9bfbece1c6
|
data/bin/file-digests
CHANGED
data/bin/file-digests-test
CHANGED
data/lib/file-digests.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'date'
|
3
2
|
require 'set'
|
4
3
|
require 'digest'
|
@@ -6,252 +5,289 @@ require 'fileutils'
|
|
6
5
|
require 'pathname'
|
7
6
|
require 'sqlite3'
|
8
7
|
|
9
|
-
|
8
|
+
class FileDigests
|
10
9
|
|
11
10
|
def self.perform_check
|
12
|
-
|
13
|
-
|
11
|
+
options = {
|
12
|
+
auto: (ENV["AUTO"] == "true"),
|
13
|
+
quiet: (ENV["QUIET"] == "true"),
|
14
|
+
test_only: (ENV["TEST_ONLY"] == "true")
|
15
|
+
}
|
16
|
+
file_digests = self.new ARGV[0], ARGV[1], options
|
17
|
+
file_digests.perform_check
|
14
18
|
end
|
15
19
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
execute 'PRAGMA journal_mode = "WAL"'
|
22
|
-
execute 'PRAGMA synchronous = "NORMAL"'
|
23
|
-
execute 'PRAGMA locking_mode = "EXCLUSIVE"'
|
24
|
-
execute 'PRAGMA cache_size = "5000"'
|
25
|
-
|
26
|
-
unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
|
27
|
-
execute 'PRAGMA encoding = "UTF-8"'
|
28
|
-
execute "CREATE TABLE digests (
|
29
|
-
id INTEGER PRIMARY KEY,
|
30
|
-
filename TEXT,
|
31
|
-
mtime TEXT,
|
32
|
-
digest TEXT,
|
33
|
-
digest_check_time TEXT)"
|
34
|
-
execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
|
35
|
-
end
|
20
|
+
def self.show_duplicates
|
21
|
+
file_digests = self.new ARGV[0], ARGV[1]
|
22
|
+
file_digests.show_duplicates
|
23
|
+
end
|
36
24
|
|
37
|
-
|
38
|
-
|
25
|
+
def initialize files_path, digest_database_path, options = {}
|
26
|
+
@options = options
|
39
27
|
|
40
|
-
|
41
|
-
|
42
|
-
prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
|
43
|
-
prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
|
44
|
-
prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
|
45
|
-
prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
|
46
|
-
end
|
28
|
+
@files_path = cleanup_path(files_path || ".")
|
29
|
+
@prefix_to_remove = @files_path.to_s + '/'
|
47
30
|
|
48
|
-
|
49
|
-
result = find_by_filename file_path
|
31
|
+
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
50
32
|
|
51
|
-
|
52
|
-
|
33
|
+
@digest_database_path = if digest_database_path
|
34
|
+
cleanup_path(digest_database_path)
|
35
|
+
else
|
36
|
+
@files_path + '.file-digests.sqlite'
|
37
|
+
end
|
53
38
|
|
54
|
-
|
39
|
+
if File.directory?(@digest_database_path)
|
40
|
+
@digest_database_path = @digest_database_path + '.file-digests.sqlite'
|
41
|
+
end
|
55
42
|
|
56
|
-
|
57
|
-
|
58
|
-
# puts "GOOD: #{file_path}" unless QUIET
|
59
|
-
unless TEST_ONLY
|
60
|
-
if found['mtime'] == mtime
|
61
|
-
touch_digest_check_time found['id']
|
62
|
-
else
|
63
|
-
update_mtime mtime, found['id']
|
64
|
-
end
|
65
|
-
end
|
66
|
-
else
|
67
|
-
if found['mtime'] == mtime # Digest is different and mtime is the same
|
68
|
-
counters[:likely_damaged] += 1
|
69
|
-
STDERR.puts "LIKELY DAMAGED: #{file_path}"
|
70
|
-
else
|
71
|
-
counters[:updated] += 1
|
72
|
-
puts "UPDATED: #{file_path}" unless QUIET
|
73
|
-
unless TEST_ONLY
|
74
|
-
update_mtime_and_digest mtime, digest, found['id']
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
else
|
79
|
-
counters[:new] += 1
|
80
|
-
puts "NEW: #{file_path}" unless QUIET
|
81
|
-
unless TEST_ONLY
|
82
|
-
@new_files[file_path] = digest
|
83
|
-
insert file_path, mtime, digest
|
84
|
-
end
|
85
|
-
end
|
43
|
+
if @files_path == @digest_database_path.dirname
|
44
|
+
@skip_file_digests_sqlite = true
|
86
45
|
end
|
87
46
|
|
88
|
-
|
89
|
-
@missing_files.delete_if do |filename, digest|
|
90
|
-
if @new_files.value?(digest)
|
91
|
-
counters[:renamed] += 1
|
92
|
-
unless TEST_ONLY
|
93
|
-
delete_by_filename filename
|
94
|
-
end
|
95
|
-
true
|
96
|
-
end
|
97
|
-
end
|
47
|
+
ensure_dir_exists @digest_database_path.dirname
|
98
48
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
puts filename
|
103
|
-
end
|
104
|
-
unless TEST_ONLY
|
105
|
-
puts "Remove missing files from the database (y/n)?"
|
106
|
-
if STDIN.gets.strip.downcase == "y"
|
107
|
-
@db.transaction do
|
108
|
-
@missing_files.each do |filename, digest|
|
109
|
-
delete_by_filename filename
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
49
|
+
# Please do not use this flag, support for sha512 is here for backward compatibility, and one day it will be removed.
|
50
|
+
if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
|
51
|
+
@use_sha512 = true
|
115
52
|
end
|
116
53
|
|
117
|
-
|
54
|
+
initialize_database @digest_database_path
|
55
|
+
end
|
118
56
|
|
119
|
-
|
120
|
-
|
57
|
+
def initialize_database path
|
58
|
+
@db = SQLite3::Database.new path.to_s
|
59
|
+
@db.results_as_hash = true
|
60
|
+
|
61
|
+
execute 'PRAGMA journal_mode = "WAL"'
|
62
|
+
execute 'PRAGMA synchronous = "NORMAL"'
|
63
|
+
execute 'PRAGMA locking_mode = "EXCLUSIVE"'
|
64
|
+
execute 'PRAGMA cache_size = "5000"'
|
65
|
+
|
66
|
+
unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
|
67
|
+
execute 'PRAGMA encoding = "UTF-8"'
|
68
|
+
execute "CREATE TABLE digests (
|
69
|
+
id INTEGER PRIMARY KEY,
|
70
|
+
filename TEXT,
|
71
|
+
mtime TEXT,
|
72
|
+
digest TEXT,
|
73
|
+
digest_check_time TEXT)"
|
74
|
+
execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
|
121
75
|
end
|
122
76
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
77
|
+
prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
|
78
|
+
prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
|
79
|
+
prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
|
80
|
+
prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
|
81
|
+
prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
|
82
|
+
prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
|
83
|
+
prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"
|
130
84
|
end
|
131
85
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
86
|
+
def perform_check
|
87
|
+
@counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
88
|
+
@missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
|
89
|
+
@new_files = {}
|
136
90
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
cleanup_path(digest_database_path)
|
141
|
-
else
|
142
|
-
@files_path + '.file-digests.sqlite'
|
91
|
+
measure_time do
|
92
|
+
walk_files do |filename|
|
93
|
+
process_file filename
|
143
94
|
end
|
95
|
+
end
|
144
96
|
|
145
|
-
|
146
|
-
@digest_database_path = @digest_database_path + '.file-digests.sqlite'
|
147
|
-
end
|
97
|
+
track_renames
|
148
98
|
|
149
|
-
|
150
|
-
|
99
|
+
if any_missing_files?
|
100
|
+
print_missing_files
|
101
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
102
|
+
remove_missing_files
|
151
103
|
end
|
104
|
+
end
|
152
105
|
|
153
|
-
|
106
|
+
if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
|
107
|
+
STDERR.puts "ERRORS WERE OCCURRED"
|
108
|
+
end
|
154
109
|
|
155
|
-
|
156
|
-
|
157
|
-
end
|
110
|
+
puts @counters.inspect
|
111
|
+
end
|
158
112
|
|
159
|
-
|
160
|
-
|
161
|
-
|
113
|
+
def show_duplicates
|
114
|
+
current_digest = nil
|
115
|
+
result = query_duplicates
|
162
116
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
117
|
+
while found = result.next_hash do
|
118
|
+
if current_digest != found['digest']
|
119
|
+
puts "" if current_digest
|
120
|
+
current_digest = found['digest']
|
121
|
+
puts "#{found['digest']}:"
|
168
122
|
end
|
123
|
+
puts " #{found['filename']}"
|
124
|
+
end
|
125
|
+
end
|
169
126
|
|
170
|
-
|
127
|
+
private
|
171
128
|
|
172
|
-
|
173
|
-
|
174
|
-
end
|
129
|
+
def process_file filename
|
130
|
+
return if File.symlink? filename
|
175
131
|
|
176
|
-
|
132
|
+
stat = File.stat filename
|
133
|
+
|
134
|
+
return if stat.blockdev?
|
135
|
+
return if stat.chardev?
|
136
|
+
return if stat.directory?
|
137
|
+
return if stat.pipe?
|
138
|
+
unless stat.readable?
|
139
|
+
raise "File is not readable"
|
140
|
+
end
|
141
|
+
return if stat.socket?
|
142
|
+
|
143
|
+
if @skip_file_digests_sqlite
|
144
|
+
basename = File.basename(filename)
|
145
|
+
return if basename == '.file-digests.sha512'
|
146
|
+
return if basename == '.file-digests.sqlite'
|
147
|
+
return if basename == '.file-digests.sqlite-wal'
|
148
|
+
return if basename == '.file-digests.sqlite-shm'
|
177
149
|
end
|
178
150
|
|
179
|
-
|
151
|
+
insert_or_update(
|
152
|
+
filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
|
153
|
+
stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
|
154
|
+
get_file_digest(filename)
|
155
|
+
)
|
156
|
+
rescue => exception
|
157
|
+
@counters[:exceptions] += 1
|
158
|
+
STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
|
159
|
+
end
|
180
160
|
|
181
|
-
|
182
|
-
|
161
|
+
def patch_path_string path
|
162
|
+
Gem.win_platform? ? path.gsub(/\\/, '/') : path
|
163
|
+
end
|
183
164
|
|
184
|
-
|
165
|
+
def cleanup_path path
|
166
|
+
Pathname.new(patch_path_string(path)).cleanpath
|
167
|
+
end
|
185
168
|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
unless stat.readable?
|
191
|
-
raise "File is not readable"
|
192
|
-
end
|
193
|
-
return if stat.socket?
|
194
|
-
|
195
|
-
if @skip_file_digests_sqlite
|
196
|
-
basename = File.basename(filename)
|
197
|
-
return if basename == '.file-digests.sha512'
|
198
|
-
return if basename == '.file-digests.sqlite'
|
199
|
-
return if basename == '.file-digests.sqlite-wal'
|
200
|
-
return if basename == '.file-digests.sqlite-shm'
|
169
|
+
def ensure_dir_exists path
|
170
|
+
if File.exist?(path)
|
171
|
+
unless File.directory?(path)
|
172
|
+
raise "#{path} is not a directory"
|
201
173
|
end
|
174
|
+
else
|
175
|
+
FileUtils.mkdir_p path
|
176
|
+
end
|
177
|
+
end
|
202
178
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
get_file_digest(filename),
|
207
|
-
@counters
|
208
|
-
)
|
209
|
-
rescue => exception
|
210
|
-
@counters[:exceptions] += 1
|
211
|
-
STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
|
179
|
+
def walk_files
|
180
|
+
Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
|
181
|
+
yield filename
|
212
182
|
end
|
183
|
+
end
|
213
184
|
|
214
|
-
|
215
|
-
|
185
|
+
def get_file_digest filename
|
186
|
+
File.open(filename, 'rb') do |io|
|
187
|
+
digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
|
188
|
+
buffer = ""
|
189
|
+
while io.read(40960, buffer)
|
190
|
+
digest.update(buffer)
|
191
|
+
end
|
192
|
+
return digest.hexdigest
|
216
193
|
end
|
194
|
+
end
|
217
195
|
|
218
|
-
|
219
|
-
|
196
|
+
def confirm text
|
197
|
+
if STDIN.tty? && STDOUT.tty?
|
198
|
+
puts "#{text} (y/n)?"
|
199
|
+
STDIN.gets.strip.downcase == "y"
|
220
200
|
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def measure_time
|
204
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
205
|
+
yield
|
206
|
+
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
|
207
|
+
puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless @options[:quiet]
|
208
|
+
end
|
221
209
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
210
|
+
def insert_or_update file_path, mtime, digest
|
211
|
+
result = find_by_filename file_path
|
212
|
+
|
213
|
+
if found = result.next_hash
|
214
|
+
raise "Multiple records found" if result.next
|
215
|
+
|
216
|
+
@missing_files.delete(file_path)
|
217
|
+
|
218
|
+
if found['digest'] == digest
|
219
|
+
@counters[:good] += 1
|
220
|
+
# puts "GOOD: #{file_path}" unless @options[:quiet]
|
221
|
+
unless @options[:test_only]
|
222
|
+
if found['mtime'] == mtime
|
223
|
+
touch_digest_check_time found['id']
|
224
|
+
else
|
225
|
+
update_mtime mtime, found['id']
|
226
|
+
end
|
226
227
|
end
|
227
228
|
else
|
228
|
-
|
229
|
+
if found['mtime'] == mtime # Digest is different and mtime is the same
|
230
|
+
@counters[:likely_damaged] += 1
|
231
|
+
STDERR.puts "LIKELY DAMAGED: #{file_path}"
|
232
|
+
else
|
233
|
+
@counters[:updated] += 1
|
234
|
+
puts "UPDATED: #{file_path}" unless @options[:quiet]
|
235
|
+
unless @options[:test_only]
|
236
|
+
update_mtime_and_digest mtime, digest, found['id']
|
237
|
+
end
|
238
|
+
end
|
229
239
|
end
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
240
|
+
else
|
241
|
+
@counters[:new] += 1
|
242
|
+
puts "NEW: #{file_path}" unless @options[:quiet]
|
243
|
+
unless @options[:test_only]
|
244
|
+
@new_files[file_path] = digest
|
245
|
+
insert file_path, mtime, digest
|
235
246
|
end
|
236
247
|
end
|
248
|
+
end
|
237
249
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
250
|
+
def track_renames
|
251
|
+
@missing_files.delete_if do |filename, digest|
|
252
|
+
if @new_files.value?(digest)
|
253
|
+
@counters[:renamed] += 1
|
254
|
+
unless @options[:test_only]
|
255
|
+
delete_by_filename filename
|
244
256
|
end
|
245
|
-
|
257
|
+
true
|
246
258
|
end
|
247
259
|
end
|
260
|
+
@counters[:missing] = @missing_files.length
|
261
|
+
end
|
262
|
+
|
263
|
+
def any_missing_files?
|
264
|
+
@missing_files.length > 0
|
265
|
+
end
|
266
|
+
|
267
|
+
def print_missing_files
|
268
|
+
puts "\nMISSING FILES:"
|
269
|
+
@missing_files.sort.to_h.each do |filename, digest|
|
270
|
+
puts filename
|
271
|
+
end
|
272
|
+
end
|
248
273
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
274
|
+
def remove_missing_files
|
275
|
+
@db.transaction do
|
276
|
+
@missing_files.each do |filename, digest|
|
277
|
+
delete_by_filename filename
|
278
|
+
end
|
254
279
|
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def execute *args, &block
|
283
|
+
@db.execute *args, &block
|
284
|
+
end
|
255
285
|
|
286
|
+
def prepare_method name, query
|
287
|
+
variable = "@#{name}"
|
288
|
+
instance_variable_set(variable, @db.prepare(query))
|
289
|
+
define_singleton_method name do |*args, &block|
|
290
|
+
instance_variable_get(variable).execute(*args, &block)
|
291
|
+
end
|
256
292
|
end
|
257
293
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
@@ -28,11 +28,13 @@ description: Calculate file digests and check for the possible file corruption
|
|
28
28
|
email: stan@senotrusov.com
|
29
29
|
executables:
|
30
30
|
- file-digests
|
31
|
+
- file-digests-auto
|
31
32
|
- file-digests-test
|
32
33
|
extensions: []
|
33
34
|
extra_rdoc_files: []
|
34
35
|
files:
|
35
36
|
- bin/file-digests
|
37
|
+
- bin/file-digests-auto
|
36
38
|
- bin/file-digests-test
|
37
39
|
- lib/file-digests.rb
|
38
40
|
homepage: https://github.com/senotrusov/file-digests
|