file-digests 0.0.14 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/file-digests +0 -3
- data/bin/file-digests-auto +7 -0
- data/bin/file-digests-test +1 -2
- data/lib/file-digests.rb +217 -194
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7aeebbfa688dc871d736ca71e740b3bdd4804701d29e4b960da16d1f75bdb04f
|
4
|
+
data.tar.gz: 136b2f908e41f56ba2ef243130d4a8a13b156f49b1c179a37225d3d19b0eff1c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9dd356eb305cd87874c3c8bc84905dec4388172e14db4b6a0d78004cf9bf183dc6c508537aa6f2806cc8d0f7eaeec68ccb6d05ea7cc5cf18db95fe86db9f4a15
|
7
|
+
data.tar.gz: 8a2d67681bc07b46fa4e93c280c8355eb639168e7bc1f244d3dae6cfd57e58fde6ded1a4d25aa2f63364bc639a55038936bedf990cc0608b267e2facd60bf556
|
data/bin/file-digests
CHANGED
data/bin/file-digests-test
CHANGED
data/lib/file-digests.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'date'
|
3
2
|
require 'set'
|
4
3
|
require 'digest'
|
@@ -6,246 +5,270 @@ require 'fileutils'
|
|
6
5
|
require 'pathname'
|
7
6
|
require 'sqlite3'
|
8
7
|
|
9
|
-
|
8
|
+
class FileDigests
|
10
9
|
|
11
|
-
def self.
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
def self.perform_check
|
11
|
+
options = {
|
12
|
+
auto: (ENV["AUTO"] == "true"),
|
13
|
+
quiet: (ENV["QUIET"] == "true"),
|
14
|
+
test_only: (ENV["TEST_ONLY"] == "true")
|
15
|
+
}
|
16
|
+
file_digests = self.new ARGV[0], ARGV[1], options
|
17
|
+
file_digests.perform_check
|
19
18
|
end
|
20
19
|
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless QUIET
|
26
|
-
end
|
20
|
+
def initialize files_path, digest_database_path, options = {}
|
21
|
+
@options = options
|
22
|
+
@files_path = cleanup_path(files_path || ".")
|
23
|
+
@prefix_to_remove = @files_path.to_s + '/'
|
27
24
|
|
28
|
-
|
29
|
-
Gem.win_platform? ? path.gsub(/\\/, '/') : path
|
30
|
-
end
|
25
|
+
raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
|
31
26
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
class DigestDatabase
|
38
|
-
def initialize path
|
39
|
-
@db = SQLite3::Database.new path.to_s
|
40
|
-
@db.results_as_hash = true
|
41
|
-
|
42
|
-
execute 'PRAGMA journal_mode = "WAL"'
|
43
|
-
execute 'PRAGMA synchronous = "NORMAL"'
|
44
|
-
execute 'PRAGMA locking_mode = "EXCLUSIVE"'
|
45
|
-
execute 'PRAGMA cache_size = "5000"'
|
46
|
-
|
47
|
-
unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
|
48
|
-
execute 'PRAGMA encoding = "UTF-8"'
|
49
|
-
execute "CREATE TABLE digests (
|
50
|
-
id INTEGER PRIMARY KEY,
|
51
|
-
filename TEXT,
|
52
|
-
mtime TEXT,
|
53
|
-
digest TEXT,
|
54
|
-
digest_check_time TEXT)"
|
55
|
-
execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
|
56
|
-
end
|
27
|
+
@digest_database_path = if digest_database_path
|
28
|
+
cleanup_path(digest_database_path)
|
29
|
+
else
|
30
|
+
@files_path + '.file-digests.sqlite'
|
31
|
+
end
|
57
32
|
|
58
|
-
|
59
|
-
@
|
33
|
+
if File.directory?(@digest_database_path)
|
34
|
+
@digest_database_path = @digest_database_path + '.file-digests.sqlite'
|
35
|
+
end
|
60
36
|
|
61
|
-
|
62
|
-
|
63
|
-
prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
|
64
|
-
prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
|
65
|
-
prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
|
66
|
-
prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
|
37
|
+
if @files_path == @digest_database_path.dirname
|
38
|
+
@skip_file_digests_sqlite = true
|
67
39
|
end
|
68
40
|
|
69
|
-
|
70
|
-
result = find_by_filename file_path
|
41
|
+
ensure_dir_exists @digest_database_path.dirname
|
71
42
|
|
72
|
-
|
73
|
-
|
43
|
+
# Please do not use this flag, support for sha512 is here for backward compatibility, and one day it will be removed.
|
44
|
+
if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
|
45
|
+
@use_sha512 = true
|
46
|
+
end
|
74
47
|
|
75
|
-
|
48
|
+
initialize_database @digest_database_path
|
76
49
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
99
|
-
else
|
100
|
-
counters[:new] += 1
|
101
|
-
puts "NEW: #{file_path}" unless QUIET
|
102
|
-
unless TEST_ONLY
|
103
|
-
@new_files[file_path] = digest
|
104
|
-
insert file_path, mtime, digest
|
105
|
-
end
|
106
|
-
end
|
50
|
+
@counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
51
|
+
end
|
52
|
+
|
53
|
+
def initialize_database path
|
54
|
+
@db = SQLite3::Database.new path.to_s
|
55
|
+
@db.results_as_hash = true
|
56
|
+
|
57
|
+
execute 'PRAGMA journal_mode = "WAL"'
|
58
|
+
execute 'PRAGMA synchronous = "NORMAL"'
|
59
|
+
execute 'PRAGMA locking_mode = "EXCLUSIVE"'
|
60
|
+
execute 'PRAGMA cache_size = "5000"'
|
61
|
+
|
62
|
+
unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
|
63
|
+
execute 'PRAGMA encoding = "UTF-8"'
|
64
|
+
execute "CREATE TABLE digests (
|
65
|
+
id INTEGER PRIMARY KEY,
|
66
|
+
filename TEXT,
|
67
|
+
mtime TEXT,
|
68
|
+
digest TEXT,
|
69
|
+
digest_check_time TEXT)"
|
70
|
+
execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
|
107
71
|
end
|
108
72
|
|
109
|
-
|
110
|
-
|
111
|
-
if @new_files.value?(digest)
|
112
|
-
counters[:renamed] += 1
|
113
|
-
unless TEST_ONLY
|
114
|
-
delete_by_filename filename
|
115
|
-
end
|
116
|
-
true
|
117
|
-
end
|
118
|
-
end
|
73
|
+
@missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
|
74
|
+
@new_files = {}
|
119
75
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|
76
|
+
prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
|
77
|
+
prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
|
78
|
+
prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
|
79
|
+
prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
|
80
|
+
prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
|
81
|
+
prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
|
82
|
+
end
|
83
|
+
|
84
|
+
def perform_check
|
85
|
+
measure_time do
|
86
|
+
walk_files do |filename|
|
87
|
+
process_file filename
|
135
88
|
end
|
136
89
|
end
|
137
90
|
|
138
|
-
|
91
|
+
track_renames
|
139
92
|
|
140
|
-
|
141
|
-
|
93
|
+
if any_missing_files?
|
94
|
+
print_missing_files
|
95
|
+
if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
|
96
|
+
remove_missing_files
|
97
|
+
end
|
142
98
|
end
|
143
99
|
|
144
|
-
|
145
|
-
|
146
|
-
instance_variable_set(variable, @db.prepare(query))
|
147
|
-
define_singleton_method name do |*args, &block|
|
148
|
-
instance_variable_get(variable).execute(*args, &block)
|
149
|
-
end
|
100
|
+
if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
|
101
|
+
STDERR.puts "ERRORS WERE OCCURRED"
|
150
102
|
end
|
151
|
-
end
|
152
103
|
|
153
|
-
|
154
|
-
|
155
|
-
@files_path = Pathname.new(FileDigests::patch_path_string(files_path || ".")).cleanpath
|
156
|
-
@prefix_to_remove = @files_path.to_s + '/'
|
104
|
+
puts @counters.inspect
|
105
|
+
end
|
157
106
|
|
158
|
-
|
107
|
+
private
|
159
108
|
|
160
|
-
|
161
|
-
|
162
|
-
else
|
163
|
-
@files_path + '.file-digests.sqlite'
|
164
|
-
end
|
109
|
+
def process_file filename
|
110
|
+
return if File.symlink? filename
|
165
111
|
|
166
|
-
|
167
|
-
@digest_database_path = @digest_database_path + '.file-digests.sqlite'
|
168
|
-
end
|
112
|
+
stat = File.stat filename
|
169
113
|
|
170
|
-
|
171
|
-
|
172
|
-
|
114
|
+
return if stat.blockdev?
|
115
|
+
return if stat.chardev?
|
116
|
+
return if stat.directory?
|
117
|
+
return if stat.pipe?
|
118
|
+
unless stat.readable?
|
119
|
+
raise "File is not readable"
|
120
|
+
end
|
121
|
+
return if stat.socket?
|
122
|
+
|
123
|
+
if @skip_file_digests_sqlite
|
124
|
+
basename = File.basename(filename)
|
125
|
+
return if basename == '.file-digests.sha512'
|
126
|
+
return if basename == '.file-digests.sqlite'
|
127
|
+
return if basename == '.file-digests.sqlite-wal'
|
128
|
+
return if basename == '.file-digests.sqlite-shm'
|
129
|
+
end
|
173
130
|
|
174
|
-
|
131
|
+
insert_or_update(
|
132
|
+
filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
|
133
|
+
stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
|
134
|
+
get_file_digest(filename)
|
135
|
+
)
|
136
|
+
rescue => exception
|
137
|
+
@counters[:exceptions] += 1
|
138
|
+
STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
|
139
|
+
end
|
175
140
|
|
176
|
-
|
177
|
-
|
178
|
-
|
141
|
+
def patch_path_string path
|
142
|
+
Gem.win_platform? ? path.gsub(/\\/, '/') : path
|
143
|
+
end
|
179
144
|
|
180
|
-
|
181
|
-
|
182
|
-
|
145
|
+
def cleanup_path path
|
146
|
+
Pathname.new(patch_path_string(path)).cleanpath
|
147
|
+
end
|
183
148
|
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
end
|
149
|
+
def ensure_dir_exists path
|
150
|
+
if File.exist?(path)
|
151
|
+
unless File.directory?(path)
|
152
|
+
raise "#{path} is not a directory"
|
189
153
|
end
|
154
|
+
else
|
155
|
+
FileUtils.mkdir_p path
|
156
|
+
end
|
157
|
+
end
|
190
158
|
|
191
|
-
|
159
|
+
def walk_files
|
160
|
+
Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
|
161
|
+
yield filename
|
162
|
+
end
|
163
|
+
end
|
192
164
|
|
193
|
-
|
194
|
-
|
165
|
+
def get_file_digest filename
|
166
|
+
File.open(filename, 'rb') do |io|
|
167
|
+
digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
|
168
|
+
buffer = ""
|
169
|
+
while io.read(40960, buffer)
|
170
|
+
digest.update(buffer)
|
195
171
|
end
|
196
|
-
|
197
|
-
puts @counters.inspect
|
172
|
+
return digest.hexdigest
|
198
173
|
end
|
174
|
+
end
|
199
175
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
176
|
+
def confirm text
|
177
|
+
if STDIN.tty? && STDOUT.tty?
|
178
|
+
puts "#{text} (y/n)?"
|
179
|
+
STDIN.gets.strip.downcase == "y"
|
204
180
|
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def measure_time
|
184
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
185
|
+
yield
|
186
|
+
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
|
187
|
+
puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless @options[:quiet]
|
188
|
+
end
|
189
|
+
|
190
|
+
def insert_or_update file_path, mtime, digest
|
191
|
+
result = find_by_filename file_path
|
205
192
|
|
206
|
-
|
207
|
-
|
193
|
+
if found = result.next_hash
|
194
|
+
raise "Multiple records found" if result.next
|
208
195
|
|
209
|
-
|
196
|
+
@missing_files.delete(file_path)
|
210
197
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
198
|
+
if found['digest'] == digest
|
199
|
+
@counters[:good] += 1
|
200
|
+
# puts "GOOD: #{file_path}" unless @options[:quiet]
|
201
|
+
unless @options[:test_only]
|
202
|
+
if found['mtime'] == mtime
|
203
|
+
touch_digest_check_time found['id']
|
204
|
+
else
|
205
|
+
update_mtime mtime, found['id']
|
206
|
+
end
|
207
|
+
end
|
208
|
+
else
|
209
|
+
if found['mtime'] == mtime # Digest is different and mtime is the same
|
210
|
+
@counters[:likely_damaged] += 1
|
211
|
+
STDERR.puts "LIKELY DAMAGED: #{file_path}"
|
212
|
+
else
|
213
|
+
@counters[:updated] += 1
|
214
|
+
puts "UPDATED: #{file_path}" unless @options[:quiet]
|
215
|
+
unless @options[:test_only]
|
216
|
+
update_mtime_and_digest mtime, digest, found['id']
|
217
|
+
end
|
218
|
+
end
|
217
219
|
end
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
return if basename == '.file-digests.sqlite-wal'
|
225
|
-
return if basename == '.file-digests.sqlite-shm'
|
220
|
+
else
|
221
|
+
@counters[:new] += 1
|
222
|
+
puts "NEW: #{file_path}" unless @options[:quiet]
|
223
|
+
unless @options[:test_only]
|
224
|
+
@new_files[file_path] = digest
|
225
|
+
insert file_path, mtime, digest
|
226
226
|
end
|
227
|
+
end
|
228
|
+
end
|
227
229
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
@
|
233
|
-
|
234
|
-
rescue => exception
|
235
|
-
@counters[:exceptions] += 1
|
236
|
-
STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
|
237
|
-
end
|
238
|
-
|
239
|
-
def get_file_digest filename
|
240
|
-
File.open(filename, 'rb') do |io|
|
241
|
-
digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
|
242
|
-
buffer = ""
|
243
|
-
while io.read(40960, buffer)
|
244
|
-
digest.update(buffer)
|
230
|
+
def track_renames
|
231
|
+
@missing_files.delete_if do |filename, digest|
|
232
|
+
if @new_files.value?(digest)
|
233
|
+
@counters[:renamed] += 1
|
234
|
+
unless @options[:test_only]
|
235
|
+
delete_by_filename filename
|
245
236
|
end
|
246
|
-
|
237
|
+
true
|
238
|
+
end
|
239
|
+
end
|
240
|
+
@counters[:missing] = @missing_files.length
|
241
|
+
end
|
242
|
+
|
243
|
+
def any_missing_files?
|
244
|
+
@missing_files.length > 0
|
245
|
+
end
|
246
|
+
|
247
|
+
def print_missing_files
|
248
|
+
puts "\nMISSING FILES:"
|
249
|
+
@missing_files.sort.to_h.each do |filename, digest|
|
250
|
+
puts filename
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
def remove_missing_files
|
255
|
+
@db.transaction do
|
256
|
+
@missing_files.each do |filename, digest|
|
257
|
+
delete_by_filename filename
|
247
258
|
end
|
248
259
|
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def execute *args, &block
|
263
|
+
@db.execute *args, &block
|
264
|
+
end
|
249
265
|
|
266
|
+
def prepare_method name, query
|
267
|
+
variable = "@#{name}"
|
268
|
+
instance_variable_set(variable, @db.prepare(query))
|
269
|
+
define_singleton_method name do |*args, &block|
|
270
|
+
instance_variable_get(variable).execute(*args, &block)
|
271
|
+
end
|
250
272
|
end
|
273
|
+
|
251
274
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
@@ -28,11 +28,13 @@ description: Calculate file digests and check for the possible file corruption
|
|
28
28
|
email: stan@senotrusov.com
|
29
29
|
executables:
|
30
30
|
- file-digests
|
31
|
+
- file-digests-auto
|
31
32
|
- file-digests-test
|
32
33
|
extensions: []
|
33
34
|
extra_rdoc_files: []
|
34
35
|
files:
|
35
36
|
- bin/file-digests
|
37
|
+
- bin/file-digests-auto
|
36
38
|
- bin/file-digests-test
|
37
39
|
- lib/file-digests.rb
|
38
40
|
homepage: https://github.com/senotrusov/file-digests
|