file-digests 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/bin/file-digests +213 -0
  3. metadata +58 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 37a0004a1a028f687a5377117734ff57b25c5be9a6788a26adba72a188da5342
4
+ data.tar.gz: 357a8e73ad41eb05e3e05655e067c45bdd09b2c907a31847d4f087145d5b6f6f
5
+ SHA512:
6
+ metadata.gz: bcd30f2bc61982f0c5e84a80efa559ccb4d045dae8490e4ecc845af2cc524988abb1bbd9492803b0cbbcfe5a8c29e47db99ef222cc615f392cd9f795d7fdcdbe
7
+ data.tar.gz: 2a71ac2f2a6c42414c05885bfa902d3c7037021bc030e7b06559105e4e4665512a214e9298ae4d8ae5fd32fe530e466fb3f03d03648d1c5b7072c5733b106cb7
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'date'
4
+ require 'set'
5
+ require 'digest'
6
+ require 'fileutils'
7
+ require 'pathname'
8
+ require 'sqlite3'
9
+
10
+ def ensure_dir_exists path
11
+ if File.exist?(path)
12
+ unless File.directory?(path)
13
+ raise "#{path} is not a directory"
14
+ end
15
+ else
16
+ FileUtils.mkdir_p path
17
+ end
18
+ end
19
+
20
+ def measure_time
21
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
22
+ yield
23
+ elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
24
+ puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" if VERBOSE
25
+ end
26
+
27
+ def patch_path_string path
28
+ Gem.win_platform? ? path.gsub(/\\/, '/') : path
29
+ end
30
+
31
+ class DigestDatabase
32
+ def initialize path
33
+ @db = SQLite3::Database.new(path.to_s)
34
+
35
+ unless @db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
36
+ @db.execute 'PRAGMA encoding = "UTF-8"'
37
+ @db.execute "CREATE TABLE digests (
38
+ id INTEGER PRIMARY KEY,
39
+ filename TEXT,
40
+ mtime TEXT,
41
+ digest TEXT,
42
+ digest_check_time TEXT)"
43
+ @db.execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
44
+ end
45
+
46
+ @db.results_as_hash = true
47
+ @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
48
+ @new_files = {}
49
+
50
+
51
+ @insert = @db.prepare("INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))")
52
+ @find_by_filename = @db.prepare("SELECT id, mtime, digest FROM digests WHERE filename = ?")
53
+ @touch_digest_check_time = @db.prepare("UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?")
54
+ @update_mtime_and_digest = @db.prepare("UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?")
55
+ @update_mtime = @db.prepare("UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?")
56
+ @delete_by_filename = @db.prepare("DELETE FROM digests WHERE filename = ?")
57
+ end
58
+
59
+ def insert_or_update file_path, mtime, digest
60
+ result = @find_by_filename.execute file_path
61
+
62
+ if found = result.next_hash
63
+ raise "Multiple records found" if result.next
64
+
65
+ @missing_files.delete(file_path)
66
+
67
+ if found['digest'] == digest
68
+ COUNTS[:good] += 1
69
+ puts "GOOD: #{file_path}" if VERBOSE
70
+ unless TEST_ONLY
71
+ if found['mtime'] == mtime
72
+ @touch_digest_check_time.execute found['id']
73
+ else
74
+ @update_mtime.execute mtime, found['id']
75
+ end
76
+ end
77
+ else
78
+ if found['mtime'] == mtime # Digest is different and mtime is the same
79
+ COUNTS[:likely_damaged] += 1
80
+ puts "LIKELY DAMAGED: #{file_path}"
81
+ else
82
+ COUNTS[:updated] += 1
83
+ puts "UPDATED: #{file_path}" if VERBOSE || TEST_ONLY
84
+ unless TEST_ONLY
85
+ @update_mtime_and_digest.execute mtime, digest, found['id']
86
+ end
87
+ end
88
+ end
89
+ else
90
+ COUNTS[:new] += 1
91
+ puts "NEW: #{file_path}" if VERBOSE || TEST_ONLY
92
+ unless TEST_ONLY
93
+ @new_files[file_path] = digest
94
+ @insert.execute! file_path, mtime, digest
95
+ end
96
+ end
97
+ end
98
+
99
+ def process_missing_files
100
+ @missing_files.delete_if do |filename, digest|
101
+ if @new_files.value?(digest)
102
+ COUNTS[:renamed] += 1
103
+ unless TEST_ONLY
104
+ @delete_by_filename.execute filename
105
+ end
106
+ true
107
+ end
108
+ end
109
+
110
+ if (COUNTS[:missing] = @missing_files.length) > 0
111
+ puts "MISSING FILES:"
112
+ @missing_files.sort.to_h.each do |filename, digest|
113
+ puts filename
114
+ end
115
+ unless TEST_ONLY
116
+ puts "Remove missing files from the database (y/n)?"
117
+ if STDIN.gets.strip == "y"
118
+ @missing_files.each do |filename, digest|
119
+ @delete_by_filename.execute filename
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+
127
+ class Checker
128
+ def initialize files_path, digest_database_path
129
+ @files_path = files_path
130
+ ensure_dir_exists @files_path
131
+
132
+ if digest_database_path
133
+ @digest_database_path = digest_database_path
134
+ ensure_dir_exists @digest_database_path.dirname
135
+ else
136
+ @digest_database_path = @files_path + 'file-digests.sqlite'
137
+ @skip_file_digests_sqlite = true
138
+ end
139
+
140
+ @digest_database = DigestDatabase.new @digest_database_path
141
+ end
142
+
143
+ def check
144
+ walk_files do |filename|
145
+ begin
146
+ process_file filename
147
+ rescue => exception
148
+ COUNTS[:exceptions] += 1
149
+ STDERR.puts "EXCEPTION: #{filename}: #{exception.message}"
150
+ end
151
+ end
152
+
153
+ @digest_database.process_missing_files
154
+ end
155
+
156
+ def walk_files
157
+ Dir.glob(@files_path + '**' + '*') do |filename|
158
+ next unless File.file? filename
159
+ next if @skip_file_digests_sqlite && filename == 'file-digests.sqlite'
160
+ yield filename
161
+ end
162
+ end
163
+
164
+ def process_file filename
165
+ @digest_database.insert_or_update(
166
+ filename.delete_prefix(@files_path.to_s + '/'),
167
+ File.mtime(filename).utc.strftime('%Y-%m-%d %H:%M:%S'),
168
+ get_file_digest(filename)
169
+ )
170
+ end
171
+
172
+ def get_file_digest filename
173
+ File.open(filename, 'rb') do |io|
174
+ digest = Digest::SHA512.new
175
+ buffer = ""
176
+ while io.read(40960, buffer)
177
+ digest.update(buffer)
178
+ end
179
+ return digest.hexdigest
180
+ end
181
+ end
182
+
183
+ end
184
+
185
+ VERBOSE = (ENV["VERBOSE"] == "true")
186
+ TEST_ONLY = (ENV["TEST_ONLY"] == "true")
187
+
188
+ COUNTS = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
189
+
190
+ begin
191
+ if ARGV[0]
192
+ files_path = Pathname.new patch_path_string(ARGV[0])
193
+ else
194
+ files_path = Pathname.new patch_path_string(".")
195
+ end
196
+
197
+ digest_database_path = Pathname.new patch_path_string(ARGV[1]) if ARGV[1]
198
+
199
+ measure_time do
200
+ checker = Checker.new files_path, digest_database_path
201
+ checker.check
202
+ end
203
+
204
+ if COUNTS[:likely_damaged] > 0 || COUNTS[:exceptions] > 0
205
+ puts "ERRORS WERE OCCURRED, PLEASE CHECK FOR THEM"
206
+ end
207
+
208
+ puts COUNTS.inspect
209
+
210
+ rescue => exception
211
+ STDERR.puts "EXCEPTION: #{exception.message}"
212
+ raise exception
213
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: file-digests
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Stanislav Senotrusov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-10-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sqlite3
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.3.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.3.0
27
+ description: Calculate file digests and check for the possible file corruption
28
+ email: stan@senotrusov.com
29
+ executables:
30
+ - file-digests
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - bin/file-digests
35
+ homepage: https://github.com/senotrusov/file-digests
36
+ licenses:
37
+ - Apache-2.0
38
+ metadata: {}
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubygems_version: 3.1.2
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: file-digests
58
+ test_files: []