file-digests 0.0.2 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/file-digests +15 -9
  3. data/bin/file-digests-test +219 -0
  4. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 527ba8a1621dc40e2ee802f7db825aac55b3341d8bc85def9c9c655c25dbca0a
4
- data.tar.gz: 576ac2b79f62f9da9976500e919e7e54a90f718bd1a7c9f95d2f0ae7acca524e
3
+ metadata.gz: 182ed181e50c1af8f9c9d395a4ba2536b78b69c6996e7d5852277b5bd19f3d88
4
+ data.tar.gz: 772b324b0821699e451db68b2859506b56d975843e925601afc3889fe94f0631
5
5
  SHA512:
6
- metadata.gz: 4fbb07312ab1579633d42602ac6443ac694e4d19fb28a66ba9017c33dd36327cfc51fe015f784a8aa6b1df2488271dcf60971ad59280e1754ea0230f12533997
7
- data.tar.gz: ea821a75f4090ff7c9b3416bfa3b29e9ba6732a21bf337d0023b1f93569b751af7f63413caf13e8c5d7609ecab723e0dfe5e96f2e62ec2eb443c7f01216f219b
6
+ metadata.gz: 448f9c73ea520b6a68165c6b94f3509ef561085c069613d8bdc5e34c0f40f61073cb2b105d0bb488c7a4dba00cd8c87a693ec3fe59db74f2f6ed54784cbe4c8e
7
+ data.tar.gz: cf10a1d008765c3f0296401e6c2d3f0d90acb24aa94664be313657e5f698e40ea457b7e38e65cc74815689c3e1764708ed579953ba28ccdb894163914d614c3e
@@ -21,7 +21,7 @@ def measure_time
21
21
  start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
22
22
  yield
23
23
  elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
24
- puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" if VERBOSE
24
+ puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless QUIET
25
25
  end
26
26
 
27
27
  def patch_path_string path
@@ -43,6 +43,11 @@ class DigestDatabase
43
43
  @db.execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
44
44
  end
45
45
 
46
+ @db.execute 'PRAGMA journal_mode = "WAL"'
47
+ @db.execute 'PRAGMA synchronous = "NORMAL"'
48
+ @db.execute 'PRAGMA locking_mode = "EXCLUSIVE"'
49
+ @db.execute 'PRAGMA cache_size = "5000"'
50
+
46
51
  @db.results_as_hash = true
47
52
  @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
48
53
  @new_files = {}
@@ -66,7 +71,7 @@ class DigestDatabase
66
71
 
67
72
  if found['digest'] == digest
68
73
  COUNTS[:good] += 1
69
- puts "GOOD: #{file_path}" if VERBOSE
74
+ # puts "GOOD: #{file_path}" unless QUIET
70
75
  unless TEST_ONLY
71
76
  if found['mtime'] == mtime
72
77
  @touch_digest_check_time.execute found['id']
@@ -77,10 +82,10 @@ class DigestDatabase
77
82
  else
78
83
  if found['mtime'] == mtime # Digest is different and mtime is the same
79
84
  COUNTS[:likely_damaged] += 1
80
- puts "LIKELY DAMAGED: #{file_path}"
85
+ STDERR.puts "LIKELY DAMAGED: #{file_path}"
81
86
  else
82
87
  COUNTS[:updated] += 1
83
- puts "UPDATED: #{file_path}" if VERBOSE || TEST_ONLY
88
+ puts "UPDATED: #{file_path}" unless QUIET
84
89
  unless TEST_ONLY
85
90
  @update_mtime_and_digest.execute mtime, digest, found['id']
86
91
  end
@@ -88,7 +93,7 @@ class DigestDatabase
88
93
  end
89
94
  else
90
95
  COUNTS[:new] += 1
91
- puts "NEW: #{file_path}" if VERBOSE || TEST_ONLY
96
+ puts "NEW: #{file_path}" unless QUIET
92
97
  unless TEST_ONLY
93
98
  @new_files[file_path] = digest
94
99
  @insert.execute! file_path, mtime, digest
@@ -133,7 +138,7 @@ class Checker
133
138
  @digest_database_path = digest_database_path
134
139
  ensure_dir_exists @digest_database_path.dirname
135
140
  else
136
- @digest_database_path = @files_path + 'file-digests.sqlite'
141
+ @digest_database_path = @files_path + '.file-digests.sqlite'
137
142
  @skip_file_digests_sqlite = true
138
143
  end
139
144
 
@@ -156,7 +161,8 @@ class Checker
156
161
  def walk_files
157
162
  Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
158
163
  next unless File.file? filename
159
- next if @skip_file_digests_sqlite && filename == 'file-digests.sqlite'
164
+ next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite'
165
+ next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite-wal'
160
166
  yield filename
161
167
  end
162
168
  end
@@ -182,7 +188,7 @@ class Checker
182
188
 
183
189
  end
184
190
 
185
- VERBOSE = (ENV["VERBOSE"] == "true")
191
+ QUIET = (ENV["QUIET"] == "true")
186
192
  TEST_ONLY = (ENV["TEST_ONLY"] == "true")
187
193
 
188
194
  COUNTS = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
@@ -202,7 +208,7 @@ begin
202
208
  end
203
209
 
204
210
  if COUNTS[:likely_damaged] > 0 || COUNTS[:exceptions] > 0
205
- puts "ERRORS WERE OCCURRED, PLEASE CHECK FOR THEM"
211
+ STDERR.puts "ERRORS WERE OCCURRED"
206
212
  end
207
213
 
208
214
  puts COUNTS.inspect
@@ -0,0 +1,219 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'date'
4
+ require 'set'
5
+ require 'digest'
6
+ require 'fileutils'
7
+ require 'pathname'
8
+ require 'sqlite3'
9
+
10
+ def ensure_dir_exists path
11
+ if File.exist?(path)
12
+ unless File.directory?(path)
13
+ raise "#{path} is not a directory"
14
+ end
15
+ else
16
+ FileUtils.mkdir_p path
17
+ end
18
+ end
19
+
20
+ def measure_time
21
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
22
+ yield
23
+ elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
24
+ puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless QUIET
25
+ end
26
+
27
+ def patch_path_string path
28
+ Gem.win_platform? ? path.gsub(/\\/, '/') : path
29
+ end
30
+
31
+ class DigestDatabase
32
+ def initialize path
33
+ @db = SQLite3::Database.new(path.to_s)
34
+
35
+ unless @db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
36
+ @db.execute 'PRAGMA encoding = "UTF-8"'
37
+ @db.execute "CREATE TABLE digests (
38
+ id INTEGER PRIMARY KEY,
39
+ filename TEXT,
40
+ mtime TEXT,
41
+ digest TEXT,
42
+ digest_check_time TEXT)"
43
+ @db.execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
44
+ end
45
+
46
+ @db.execute 'PRAGMA journal_mode = "WAL"'
47
+ @db.execute 'PRAGMA synchronous = "NORMAL"'
48
+ @db.execute 'PRAGMA locking_mode = "EXCLUSIVE"'
49
+ @db.execute 'PRAGMA cache_size = "5000"'
50
+
51
+ @db.results_as_hash = true
52
+ @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
53
+ @new_files = {}
54
+
55
+
56
+ @insert = @db.prepare("INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))")
57
+ @find_by_filename = @db.prepare("SELECT id, mtime, digest FROM digests WHERE filename = ?")
58
+ @touch_digest_check_time = @db.prepare("UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?")
59
+ @update_mtime_and_digest = @db.prepare("UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?")
60
+ @update_mtime = @db.prepare("UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?")
61
+ @delete_by_filename = @db.prepare("DELETE FROM digests WHERE filename = ?")
62
+ end
63
+
64
+ def insert_or_update file_path, mtime, digest
65
+ result = @find_by_filename.execute file_path
66
+
67
+ if found = result.next_hash
68
+ raise "Multiple records found" if result.next
69
+
70
+ @missing_files.delete(file_path)
71
+
72
+ if found['digest'] == digest
73
+ COUNTS[:good] += 1
74
+ # puts "GOOD: #{file_path}" unless QUIET
75
+ unless TEST_ONLY
76
+ if found['mtime'] == mtime
77
+ @touch_digest_check_time.execute found['id']
78
+ else
79
+ @update_mtime.execute mtime, found['id']
80
+ end
81
+ end
82
+ else
83
+ if found['mtime'] == mtime # Digest is different and mtime is the same
84
+ COUNTS[:likely_damaged] += 1
85
+ STDERR.puts "LIKELY DAMAGED: #{file_path}"
86
+ else
87
+ COUNTS[:updated] += 1
88
+ puts "UPDATED: #{file_path}" unless QUIET
89
+ unless TEST_ONLY
90
+ @update_mtime_and_digest.execute mtime, digest, found['id']
91
+ end
92
+ end
93
+ end
94
+ else
95
+ COUNTS[:new] += 1
96
+ puts "NEW: #{file_path}" unless QUIET
97
+ unless TEST_ONLY
98
+ @new_files[file_path] = digest
99
+ @insert.execute! file_path, mtime, digest
100
+ end
101
+ end
102
+ end
103
+
104
+ def process_missing_files
105
+ @missing_files.delete_if do |filename, digest|
106
+ if @new_files.value?(digest)
107
+ COUNTS[:renamed] += 1
108
+ unless TEST_ONLY
109
+ @delete_by_filename.execute filename
110
+ end
111
+ true
112
+ end
113
+ end
114
+
115
+ if (COUNTS[:missing] = @missing_files.length) > 0
116
+ puts "MISSING FILES:"
117
+ @missing_files.sort.to_h.each do |filename, digest|
118
+ puts filename
119
+ end
120
+ unless TEST_ONLY
121
+ puts "Remove missing files from the database (y/n)?"
122
+ if STDIN.gets.strip == "y"
123
+ @missing_files.each do |filename, digest|
124
+ @delete_by_filename.execute filename
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ class Checker
133
+ def initialize files_path, digest_database_path
134
+ @files_path = files_path
135
+ ensure_dir_exists @files_path
136
+
137
+ if digest_database_path
138
+ @digest_database_path = digest_database_path
139
+ ensure_dir_exists @digest_database_path.dirname
140
+ else
141
+ @digest_database_path = @files_path + '.file-digests.sqlite'
142
+ @skip_file_digests_sqlite = true
143
+ end
144
+
145
+ @digest_database = DigestDatabase.new @digest_database_path
146
+ end
147
+
148
+ def check
149
+ walk_files do |filename|
150
+ begin
151
+ process_file filename
152
+ rescue => exception
153
+ COUNTS[:exceptions] += 1
154
+ STDERR.puts "EXCEPTION: #{filename}: #{exception.message}"
155
+ end
156
+ end
157
+
158
+ @digest_database.process_missing_files
159
+ end
160
+
161
+ def walk_files
162
+ Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
163
+ next unless File.file? filename
164
+ next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite'
165
+ next if @skip_file_digests_sqlite && filename == '.file-digests.sqlite-wal'
166
+ yield filename
167
+ end
168
+ end
169
+
170
+ def process_file filename
171
+ @digest_database.insert_or_update(
172
+ filename.delete_prefix(@files_path.to_s + '/'),
173
+ File.mtime(filename).utc.strftime('%Y-%m-%d %H:%M:%S'),
174
+ get_file_digest(filename)
175
+ )
176
+ end
177
+
178
+ def get_file_digest filename
179
+ File.open(filename, 'rb') do |io|
180
+ digest = Digest::SHA512.new
181
+ buffer = ""
182
+ while io.read(40960, buffer)
183
+ digest.update(buffer)
184
+ end
185
+ return digest.hexdigest
186
+ end
187
+ end
188
+
189
+ end
190
+
191
+ QUIET = (ENV["QUIET"] == "true")
192
+ TEST_ONLY = true
193
+
194
+ COUNTS = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
195
+
196
+ begin
197
+ if ARGV[0]
198
+ files_path = Pathname.new patch_path_string(ARGV[0])
199
+ else
200
+ files_path = Pathname.new patch_path_string(".")
201
+ end
202
+
203
+ digest_database_path = Pathname.new patch_path_string(ARGV[1]) if ARGV[1]
204
+
205
+ measure_time do
206
+ checker = Checker.new files_path, digest_database_path
207
+ checker.check
208
+ end
209
+
210
+ if COUNTS[:likely_damaged] > 0 || COUNTS[:exceptions] > 0
211
+ STDERR.puts "ERRORS WERE OCCURRED"
212
+ end
213
+
214
+ puts COUNTS.inspect
215
+
216
+ rescue => exception
217
+ STDERR.puts "EXCEPTION: #{exception.message}"
218
+ raise exception
219
+ end
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2020-10-08 00:00:00.000000000 Z
@@ -28,15 +28,17 @@ description: Calculate file digests and check for the possible file corruption
28
28
  email: stan@senotrusov.com
29
29
  executables:
30
30
  - file-digests
31
+ - file-digests-test
31
32
  extensions: []
32
33
  extra_rdoc_files: []
33
34
  files:
34
35
  - bin/file-digests
36
+ - bin/file-digests-test
35
37
  homepage: https://github.com/senotrusov/file-digests
36
38
  licenses:
37
39
  - Apache-2.0
38
40
  metadata: {}
39
- post_install_message:
41
+ post_install_message:
40
42
  rdoc_options: []
41
43
  require_paths:
42
44
  - lib
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
54
  version: '0'
53
55
  requirements: []
54
56
  rubygems_version: 3.1.2
55
- signing_key:
57
+ signing_key:
56
58
  specification_version: 4
57
59
  summary: file-digests
58
60
  test_files: []