file-digests 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/file-digests-test +213 -0
  3. metadata +3 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 527ba8a1621dc40e2ee802f7db825aac55b3341d8bc85def9c9c655c25dbca0a
4
- data.tar.gz: 576ac2b79f62f9da9976500e919e7e54a90f718bd1a7c9f95d2f0ae7acca524e
3
+ metadata.gz: 4b996011c4d3ae950a0081174ea86d0fe52136f58fd09c3e4543bfbdf3240098
4
+ data.tar.gz: 5055ba39b9d1e2cccd1cb89603e088ecf8052cb48bc36443be7b520ffc585e83
5
5
  SHA512:
6
- metadata.gz: 4fbb07312ab1579633d42602ac6443ac694e4d19fb28a66ba9017c33dd36327cfc51fe015f784a8aa6b1df2488271dcf60971ad59280e1754ea0230f12533997
7
- data.tar.gz: ea821a75f4090ff7c9b3416bfa3b29e9ba6732a21bf337d0023b1f93569b751af7f63413caf13e8c5d7609ecab723e0dfe5e96f2e62ec2eb443c7f01216f219b
6
+ metadata.gz: e4325a69e293cdf3b3007bbc100f163bf26ef0ed06da33e0fb58476fd59b2d8c57a6ee9984acd57eaab778b38e334d9d78cd2fa71bd5a63971804b060b0fa167
7
+ data.tar.gz: 02263b3b793bcdd3caf8cccd9b07a5232b001bd25d45709ead019be0a002ed5f6eb94ebbed3f3719bc5b7fceeb8b32a918e8ed59f15fdf345c78d0437eab7999
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'date'
4
+ require 'set'
5
+ require 'digest'
6
+ require 'fileutils'
7
+ require 'pathname'
8
+ require 'sqlite3'
9
+
10
+ def ensure_dir_exists path
11
+ if File.exist?(path)
12
+ unless File.directory?(path)
13
+ raise "#{path} is not a directory"
14
+ end
15
+ else
16
+ FileUtils.mkdir_p path
17
+ end
18
+ end
19
+
20
+ def measure_time
21
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
22
+ yield
23
+ elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
24
+ puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" if VERBOSE
25
+ end
26
+
27
+ def patch_path_string path
28
+ Gem.win_platform? ? path.gsub(/\\/, '/') : path
29
+ end
30
+
31
+ class DigestDatabase
32
+ def initialize path
33
+ @db = SQLite3::Database.new(path.to_s)
34
+
35
+ unless @db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
36
+ @db.execute 'PRAGMA encoding = "UTF-8"'
37
+ @db.execute "CREATE TABLE digests (
38
+ id INTEGER PRIMARY KEY,
39
+ filename TEXT,
40
+ mtime TEXT,
41
+ digest TEXT,
42
+ digest_check_time TEXT)"
43
+ @db.execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
44
+ end
45
+
46
+ @db.results_as_hash = true
47
+ @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
48
+ @new_files = {}
49
+
50
+
51
+ @insert = @db.prepare("INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))")
52
+ @find_by_filename = @db.prepare("SELECT id, mtime, digest FROM digests WHERE filename = ?")
53
+ @touch_digest_check_time = @db.prepare("UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?")
54
+ @update_mtime_and_digest = @db.prepare("UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?")
55
+ @update_mtime = @db.prepare("UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?")
56
+ @delete_by_filename = @db.prepare("DELETE FROM digests WHERE filename = ?")
57
+ end
58
+
59
+ def insert_or_update file_path, mtime, digest
60
+ result = @find_by_filename.execute file_path
61
+
62
+ if found = result.next_hash
63
+ raise "Multiple records found" if result.next
64
+
65
+ @missing_files.delete(file_path)
66
+
67
+ if found['digest'] == digest
68
+ COUNTS[:good] += 1
69
+ puts "GOOD: #{file_path}" if VERBOSE
70
+ unless TEST_ONLY
71
+ if found['mtime'] == mtime
72
+ @touch_digest_check_time.execute found['id']
73
+ else
74
+ @update_mtime.execute mtime, found['id']
75
+ end
76
+ end
77
+ else
78
+ if found['mtime'] == mtime # Digest is different and mtime is the same
79
+ COUNTS[:likely_damaged] += 1
80
+ puts "LIKELY DAMAGED: #{file_path}"
81
+ else
82
+ COUNTS[:updated] += 1
83
+ puts "UPDATED: #{file_path}" if VERBOSE || TEST_ONLY
84
+ unless TEST_ONLY
85
+ @update_mtime_and_digest.execute mtime, digest, found['id']
86
+ end
87
+ end
88
+ end
89
+ else
90
+ COUNTS[:new] += 1
91
+ puts "NEW: #{file_path}" if VERBOSE || TEST_ONLY
92
+ unless TEST_ONLY
93
+ @new_files[file_path] = digest
94
+ @insert.execute! file_path, mtime, digest
95
+ end
96
+ end
97
+ end
98
+
99
+ def process_missing_files
100
+ @missing_files.delete_if do |filename, digest|
101
+ if @new_files.value?(digest)
102
+ COUNTS[:renamed] += 1
103
+ unless TEST_ONLY
104
+ @delete_by_filename.execute filename
105
+ end
106
+ true
107
+ end
108
+ end
109
+
110
+ if (COUNTS[:missing] = @missing_files.length) > 0
111
+ puts "MISSING FILES:"
112
+ @missing_files.sort.to_h.each do |filename, digest|
113
+ puts filename
114
+ end
115
+ unless TEST_ONLY
116
+ puts "Remove missing files from the database (y/n)?"
117
+ if STDIN.gets.strip == "y"
118
+ @missing_files.each do |filename, digest|
119
+ @delete_by_filename.execute filename
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+
127
+ class Checker
128
+ def initialize files_path, digest_database_path
129
+ @files_path = files_path
130
+ ensure_dir_exists @files_path
131
+
132
+ if digest_database_path
133
+ @digest_database_path = digest_database_path
134
+ ensure_dir_exists @digest_database_path.dirname
135
+ else
136
+ @digest_database_path = @files_path + 'file-digests.sqlite'
137
+ @skip_file_digests_sqlite = true
138
+ end
139
+
140
+ @digest_database = DigestDatabase.new @digest_database_path
141
+ end
142
+
143
+ def check
144
+ walk_files do |filename|
145
+ begin
146
+ process_file filename
147
+ rescue => exception
148
+ COUNTS[:exceptions] += 1
149
+ STDERR.puts "EXCEPTION: #{filename}: #{exception.message}"
150
+ end
151
+ end
152
+
153
+ @digest_database.process_missing_files
154
+ end
155
+
156
+ def walk_files
157
+ Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
158
+ next unless File.file? filename
159
+ next if @skip_file_digests_sqlite && filename == 'file-digests.sqlite'
160
+ yield filename
161
+ end
162
+ end
163
+
164
+ def process_file filename
165
+ @digest_database.insert_or_update(
166
+ filename.delete_prefix(@files_path.to_s + '/'),
167
+ File.mtime(filename).utc.strftime('%Y-%m-%d %H:%M:%S'),
168
+ get_file_digest(filename)
169
+ )
170
+ end
171
+
172
+ def get_file_digest filename
173
+ File.open(filename, 'rb') do |io|
174
+ digest = Digest::SHA512.new
175
+ buffer = ""
176
+ while io.read(40960, buffer)
177
+ digest.update(buffer)
178
+ end
179
+ return digest.hexdigest
180
+ end
181
+ end
182
+
183
+ end
184
+
185
+ VERBOSE = (ENV["VERBOSE"] == "true")
186
+ TEST_ONLY = true
187
+
188
+ COUNTS = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
189
+
190
+ begin
191
+ if ARGV[0]
192
+ files_path = Pathname.new patch_path_string(ARGV[0])
193
+ else
194
+ files_path = Pathname.new patch_path_string(".")
195
+ end
196
+
197
+ digest_database_path = Pathname.new patch_path_string(ARGV[1]) if ARGV[1]
198
+
199
+ measure_time do
200
+ checker = Checker.new files_path, digest_database_path
201
+ checker.check
202
+ end
203
+
204
+ if COUNTS[:likely_damaged] > 0 || COUNTS[:exceptions] > 0
205
+ puts "ERRORS WERE OCCURRED, PLEASE CHECK FOR THEM"
206
+ end
207
+
208
+ puts COUNTS.inspect
209
+
210
+ rescue => exception
211
+ STDERR.puts "EXCEPTION: #{exception.message}"
212
+ raise exception
213
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
@@ -28,10 +28,12 @@ description: Calculate file digests and check for the possible file corruption
28
28
  email: stan@senotrusov.com
29
29
  executables:
30
30
  - file-digests
31
+ - file-digests-test
31
32
  extensions: []
32
33
  extra_rdoc_files: []
33
34
  files:
34
35
  - bin/file-digests
36
+ - bin/file-digests-test
35
37
  homepage: https://github.com/senotrusov/file-digests
36
38
  licenses:
37
39
  - Apache-2.0