file-digests 0.0.2 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/file-digests +15 -9
- data/bin/file-digests-test +219 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 182ed181e50c1af8f9c9d395a4ba2536b78b69c6996e7d5852277b5bd19f3d88
|
4
|
+
data.tar.gz: 772b324b0821699e451db68b2859506b56d975843e925601afc3889fe94f0631
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 448f9c73ea520b6a68165c6b94f3509ef561085c069613d8bdc5e34c0f40f61073cb2b105d0bb488c7a4dba00cd8c87a693ec3fe59db74f2f6ed54784cbe4c8e
|
7
|
+
data.tar.gz: cf10a1d008765c3f0296401e6c2d3f0d90acb24aa94664be313657e5f698e40ea457b7e38e65cc74815689c3e1764708ed579953ba28ccdb894163914d614c3e
|
data/bin/file-digests
CHANGED
@@ -21,7 +21,7 @@ def measure_time
|
|
21
21
|
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
22
22
|
yield
|
23
23
|
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
|
24
|
-
puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s"
|
24
|
+
puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless QUIET
|
25
25
|
end
|
26
26
|
|
27
27
|
def patch_path_string path
|
@@ -43,6 +43,11 @@ class DigestDatabase
|
|
43
43
|
@db.execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
|
44
44
|
end
|
45
45
|
|
46
|
+
@db.execute 'PRAGMA journal_mode = "WAL"'
|
47
|
+
@db.execute 'PRAGMA synchronous = "NORMAL"'
|
48
|
+
@db.execute 'PRAGMA locking_mode = "EXCLUSIVE"'
|
49
|
+
@db.execute 'PRAGMA cache_size = "5000"'
|
50
|
+
|
46
51
|
@db.results_as_hash = true
|
47
52
|
@missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
|
48
53
|
@new_files = {}
|
@@ -66,7 +71,7 @@ class DigestDatabase
|
|
66
71
|
|
67
72
|
if found['digest'] == digest
|
68
73
|
COUNTS[:good] += 1
|
69
|
-
puts "GOOD: #{file_path}"
|
74
|
+
# puts "GOOD: #{file_path}" unless QUIET
|
70
75
|
unless TEST_ONLY
|
71
76
|
if found['mtime'] == mtime
|
72
77
|
@touch_digest_check_time.execute found['id']
|
@@ -77,10 +82,10 @@ class DigestDatabase
|
|
77
82
|
else
|
78
83
|
if found['mtime'] == mtime # Digest is different and mtime is the same
|
79
84
|
COUNTS[:likely_damaged] += 1
|
80
|
-
puts "LIKELY DAMAGED: #{file_path}"
|
85
|
+
STDERR.puts "LIKELY DAMAGED: #{file_path}"
|
81
86
|
else
|
82
87
|
COUNTS[:updated] += 1
|
83
|
-
puts "UPDATED: #{file_path}"
|
88
|
+
puts "UPDATED: #{file_path}" unless QUIET
|
84
89
|
unless TEST_ONLY
|
85
90
|
@update_mtime_and_digest.execute mtime, digest, found['id']
|
86
91
|
end
|
@@ -88,7 +93,7 @@ class DigestDatabase
|
|
88
93
|
end
|
89
94
|
else
|
90
95
|
COUNTS[:new] += 1
|
91
|
-
puts "NEW: #{file_path}"
|
96
|
+
puts "NEW: #{file_path}" unless QUIET
|
92
97
|
unless TEST_ONLY
|
93
98
|
@new_files[file_path] = digest
|
94
99
|
@insert.execute! file_path, mtime, digest
|
@@ -133,7 +138,7 @@ class Checker
|
|
133
138
|
@digest_database_path = digest_database_path
|
134
139
|
ensure_dir_exists @digest_database_path.dirname
|
135
140
|
else
|
136
|
-
@digest_database_path = @files_path + 'file-digests.sqlite'
|
141
|
+
@digest_database_path = @files_path + '.file-digests.sqlite'
|
137
142
|
@skip_file_digests_sqlite = true
|
138
143
|
end
|
139
144
|
|
@@ -156,7 +161,8 @@ class Checker
|
|
156
161
|
def walk_files
|
157
162
|
Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
|
158
163
|
next unless File.file? filename
|
159
|
-
next if @skip_file_digests_sqlite && filename == 'file-digests.sqlite'
|
164
|
+
next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite'
|
165
|
+
next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite-wal'
|
160
166
|
yield filename
|
161
167
|
end
|
162
168
|
end
|
@@ -182,7 +188,7 @@ class Checker
|
|
182
188
|
|
183
189
|
end
|
184
190
|
|
185
|
-
|
191
|
+
QUIET = (ENV["QUIET"] == "true")
|
186
192
|
TEST_ONLY = (ENV["TEST_ONLY"] == "true")
|
187
193
|
|
188
194
|
COUNTS = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
@@ -202,7 +208,7 @@ begin
|
|
202
208
|
end
|
203
209
|
|
204
210
|
if COUNTS[:likely_damaged] > 0 || COUNTS[:exceptions] > 0
|
205
|
-
puts "ERRORS WERE OCCURRED
|
211
|
+
STDERR.puts "ERRORS WERE OCCURRED"
|
206
212
|
end
|
207
213
|
|
208
214
|
puts COUNTS.inspect
|
@@ -0,0 +1,219 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
require 'set'
|
5
|
+
require 'digest'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'pathname'
|
8
|
+
require 'sqlite3'
|
9
|
+
|
10
|
+
def ensure_dir_exists path
|
11
|
+
if File.exist?(path)
|
12
|
+
unless File.directory?(path)
|
13
|
+
raise "#{path} is not a directory"
|
14
|
+
end
|
15
|
+
else
|
16
|
+
FileUtils.mkdir_p path
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def measure_time
|
21
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
22
|
+
yield
|
23
|
+
elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
|
24
|
+
puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless QUIET
|
25
|
+
end
|
26
|
+
|
27
|
+
def patch_path_string path
|
28
|
+
Gem.win_platform? ? path.gsub(/\\/, '/') : path
|
29
|
+
end
|
30
|
+
|
31
|
+
class DigestDatabase
|
32
|
+
def initialize path
|
33
|
+
@db = SQLite3::Database.new(path.to_s)
|
34
|
+
|
35
|
+
unless @db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
|
36
|
+
@db.execute 'PRAGMA encoding = "UTF-8"'
|
37
|
+
@db.execute "CREATE TABLE digests (
|
38
|
+
id INTEGER PRIMARY KEY,
|
39
|
+
filename TEXT,
|
40
|
+
mtime TEXT,
|
41
|
+
digest TEXT,
|
42
|
+
digest_check_time TEXT)"
|
43
|
+
@db.execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
|
44
|
+
end
|
45
|
+
|
46
|
+
@db.execute 'PRAGMA journal_mode = "WAL"'
|
47
|
+
@db.execute 'PRAGMA synchronous = "NORMAL"'
|
48
|
+
@db.execute 'PRAGMA locking_mode = "EXCLUSIVE"'
|
49
|
+
@db.execute 'PRAGMA cache_size = "5000"'
|
50
|
+
|
51
|
+
@db.results_as_hash = true
|
52
|
+
@missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
|
53
|
+
@new_files = {}
|
54
|
+
|
55
|
+
|
56
|
+
@insert = @db.prepare("INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))")
|
57
|
+
@find_by_filename = @db.prepare("SELECT id, mtime, digest FROM digests WHERE filename = ?")
|
58
|
+
@touch_digest_check_time = @db.prepare("UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?")
|
59
|
+
@update_mtime_and_digest = @db.prepare("UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?")
|
60
|
+
@update_mtime = @db.prepare("UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?")
|
61
|
+
@delete_by_filename = @db.prepare("DELETE FROM digests WHERE filename = ?")
|
62
|
+
end
|
63
|
+
|
64
|
+
def insert_or_update file_path, mtime, digest
|
65
|
+
result = @find_by_filename.execute file_path
|
66
|
+
|
67
|
+
if found = result.next_hash
|
68
|
+
raise "Multiple records found" if result.next
|
69
|
+
|
70
|
+
@missing_files.delete(file_path)
|
71
|
+
|
72
|
+
if found['digest'] == digest
|
73
|
+
COUNTS[:good] += 1
|
74
|
+
# puts "GOOD: #{file_path}" unless QUIET
|
75
|
+
unless TEST_ONLY
|
76
|
+
if found['mtime'] == mtime
|
77
|
+
@touch_digest_check_time.execute found['id']
|
78
|
+
else
|
79
|
+
@update_mtime.execute mtime, found['id']
|
80
|
+
end
|
81
|
+
end
|
82
|
+
else
|
83
|
+
if found['mtime'] == mtime # Digest is different and mtime is the same
|
84
|
+
COUNTS[:likely_damaged] += 1
|
85
|
+
STDERR.puts "LIKELY DAMAGED: #{file_path}"
|
86
|
+
else
|
87
|
+
COUNTS[:updated] += 1
|
88
|
+
puts "UPDATED: #{file_path}" unless QUIET
|
89
|
+
unless TEST_ONLY
|
90
|
+
@update_mtime_and_digest.execute mtime, digest, found['id']
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
else
|
95
|
+
COUNTS[:new] += 1
|
96
|
+
puts "NEW: #{file_path}" unless QUIET
|
97
|
+
unless TEST_ONLY
|
98
|
+
@new_files[file_path] = digest
|
99
|
+
@insert.execute! file_path, mtime, digest
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def process_missing_files
|
105
|
+
@missing_files.delete_if do |filename, digest|
|
106
|
+
if @new_files.value?(digest)
|
107
|
+
COUNTS[:renamed] += 1
|
108
|
+
unless TEST_ONLY
|
109
|
+
@delete_by_filename.execute filename
|
110
|
+
end
|
111
|
+
true
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
if (COUNTS[:missing] = @missing_files.length) > 0
|
116
|
+
puts "MISSING FILES:"
|
117
|
+
@missing_files.sort.to_h.each do |filename, digest|
|
118
|
+
puts filename
|
119
|
+
end
|
120
|
+
unless TEST_ONLY
|
121
|
+
puts "Remove missing files from the database (y/n)?"
|
122
|
+
if STDIN.gets.strip == "y"
|
123
|
+
@missing_files.each do |filename, digest|
|
124
|
+
@delete_by_filename.execute filename
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
class Checker
|
133
|
+
def initialize files_path, digest_database_path
|
134
|
+
@files_path = files_path
|
135
|
+
ensure_dir_exists @files_path
|
136
|
+
|
137
|
+
if digest_database_path
|
138
|
+
@digest_database_path = digest_database_path
|
139
|
+
ensure_dir_exists @digest_database_path.dirname
|
140
|
+
else
|
141
|
+
@digest_database_path = @files_path + '.file-digests.sqlite'
|
142
|
+
@skip_file_digests_sqlite = true
|
143
|
+
end
|
144
|
+
|
145
|
+
@digest_database = DigestDatabase.new @digest_database_path
|
146
|
+
end
|
147
|
+
|
148
|
+
def check
|
149
|
+
walk_files do |filename|
|
150
|
+
begin
|
151
|
+
process_file filename
|
152
|
+
rescue => exception
|
153
|
+
COUNTS[:exceptions] += 1
|
154
|
+
STDERR.puts "EXCEPTION: #{filename}: #{exception.message}"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
@digest_database.process_missing_files
|
159
|
+
end
|
160
|
+
|
161
|
+
def walk_files
|
162
|
+
Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
|
163
|
+
next unless File.file? filename
|
164
|
+
next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite'
|
165
|
+
next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite-wal'
|
166
|
+
yield filename
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def process_file filename
|
171
|
+
@digest_database.insert_or_update(
|
172
|
+
filename.delete_prefix(@files_path.to_s + '/'),
|
173
|
+
File.mtime(filename).utc.strftime('%Y-%m-%d %H:%M:%S'),
|
174
|
+
get_file_digest(filename)
|
175
|
+
)
|
176
|
+
end
|
177
|
+
|
178
|
+
def get_file_digest filename
|
179
|
+
File.open(filename, 'rb') do |io|
|
180
|
+
digest = Digest::SHA512.new
|
181
|
+
buffer = ""
|
182
|
+
while io.read(40960, buffer)
|
183
|
+
digest.update(buffer)
|
184
|
+
end
|
185
|
+
return digest.hexdigest
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
QUIET = (ENV["QUIET"] == "true")
|
192
|
+
TEST_ONLY = true
|
193
|
+
|
194
|
+
COUNTS = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
|
195
|
+
|
196
|
+
begin
|
197
|
+
if ARGV[0]
|
198
|
+
files_path = Pathname.new patch_path_string(ARGV[0])
|
199
|
+
else
|
200
|
+
files_path = Pathname.new patch_path_string(".")
|
201
|
+
end
|
202
|
+
|
203
|
+
digest_database_path = Pathname.new patch_path_string(ARGV[1]) if ARGV[1]
|
204
|
+
|
205
|
+
measure_time do
|
206
|
+
checker = Checker.new files_path, digest_database_path
|
207
|
+
checker.check
|
208
|
+
end
|
209
|
+
|
210
|
+
if COUNTS[:likely_damaged] > 0 || COUNTS[:exceptions] > 0
|
211
|
+
STDERR.puts "ERRORS WERE OCCURRED"
|
212
|
+
end
|
213
|
+
|
214
|
+
puts COUNTS.inspect
|
215
|
+
|
216
|
+
rescue => exception
|
217
|
+
STDERR.puts "EXCEPTION: #{exception.message}"
|
218
|
+
raise exception
|
219
|
+
end
|
metadata
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file-digests
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stanislav Senotrusov
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-10-08 00:00:00.000000000 Z
|
@@ -28,15 +28,17 @@ description: Calculate file digests and check for the possible file corruption
|
|
28
28
|
email: stan@senotrusov.com
|
29
29
|
executables:
|
30
30
|
- file-digests
|
31
|
+
- file-digests-test
|
31
32
|
extensions: []
|
32
33
|
extra_rdoc_files: []
|
33
34
|
files:
|
34
35
|
- bin/file-digests
|
36
|
+
- bin/file-digests-test
|
35
37
|
homepage: https://github.com/senotrusov/file-digests
|
36
38
|
licenses:
|
37
39
|
- Apache-2.0
|
38
40
|
metadata: {}
|
39
|
-
post_install_message:
|
41
|
+
post_install_message:
|
40
42
|
rdoc_options: []
|
41
43
|
require_paths:
|
42
44
|
- lib
|
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
52
54
|
version: '0'
|
53
55
|
requirements: []
|
54
56
|
rubygems_version: 3.1.2
|
55
|
-
signing_key:
|
57
|
+
signing_key:
|
56
58
|
specification_version: 4
|
57
59
|
summary: file-digests
|
58
60
|
test_files: []
|