file-digests 0.0.15 → 0.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 573cd697002f30d083625d586b4d03caf8c0f2cb763bed75e2bc9ac0a8b03612
4
- data.tar.gz: 7cfbef9cdf7a9110c6b84b27591a498960515842675fabf6bf5512a78d7e54bc
3
+ metadata.gz: 053daa0145db845876fb37b63656102744c524034b87cb91e85f0bce4658aec9
4
+ data.tar.gz: eac730f10cbe5812078e3461b74b19a740611e9375dd7c34684b2a896e881fc4
5
5
  SHA512:
6
- metadata.gz: 4916e667dccfb630a31fdaa74271a9c85a5f28a81f28143a4f5b406784e0e78652a29ff8e3fd9ff6507a5d90a4d9baacd2385f7de169f2c9dda6dc0f3b58866b
7
- data.tar.gz: be5ae679122bbcffc703940b113e8d1e1057e46f7ce61ead1f52664f6b147bbd9c201d56b7faeafef29da30297cfde2e40f38c8e9400d615b9bbc6e630557259
6
+ metadata.gz: ce4894b25eb9ecbcbee2ae6c32a9e7bc09fce12286a31896c5d6093ed1ca0f0618e2eaf01facd464c3ebb52f1f9e327198d6a4abffae4f011f9c9d589e6c2d3c
7
+ data.tar.gz: bdd07cc7c958095b11b57174d8d98cd065d9a83289aeac5ce62dc604c9526f46ab512a7b229e1620c3ba0a49139cae341ac51b11af372e8395eb5a9bfbece1c6
@@ -1,8 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- QUIET = (ENV["QUIET"] == "true")
4
- TEST_ONLY = (ENV["TEST_ONLY"] == "true")
5
-
6
3
  require 'file-digests'
7
4
 
8
5
  FileDigests.perform_check
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ENV["AUTO"] = "true"
4
+
5
+ require 'file-digests'
6
+
7
+ FileDigests.perform_check
@@ -1,7 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- QUIET = (ENV["QUIET"] == "true")
4
- TEST_ONLY = true
3
+ ENV["TEST_ONLY"] = "true"
5
4
 
6
5
  require 'file-digests'
7
6
 
@@ -1,4 +1,3 @@
1
-
2
1
  require 'date'
3
2
  require 'set'
4
3
  require 'digest'
@@ -6,252 +5,289 @@ require 'fileutils'
6
5
  require 'pathname'
7
6
  require 'sqlite3'
8
7
 
9
- module FileDigests
8
+ class FileDigests
10
9
 
11
10
  def self.perform_check
12
- checker = Checker.new ARGV[0], ARGV[1]
13
- checker.perform_check
11
+ options = {
12
+ auto: (ENV["AUTO"] == "true"),
13
+ quiet: (ENV["QUIET"] == "true"),
14
+ test_only: (ENV["TEST_ONLY"] == "true")
15
+ }
16
+ file_digests = self.new ARGV[0], ARGV[1], options
17
+ file_digests.perform_check
14
18
  end
15
19
 
16
- class DigestDatabase
17
- def initialize path
18
- @db = SQLite3::Database.new path.to_s
19
- @db.results_as_hash = true
20
-
21
- execute 'PRAGMA journal_mode = "WAL"'
22
- execute 'PRAGMA synchronous = "NORMAL"'
23
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
24
- execute 'PRAGMA cache_size = "5000"'
25
-
26
- unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
27
- execute 'PRAGMA encoding = "UTF-8"'
28
- execute "CREATE TABLE digests (
29
- id INTEGER PRIMARY KEY,
30
- filename TEXT,
31
- mtime TEXT,
32
- digest TEXT,
33
- digest_check_time TEXT)"
34
- execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
35
- end
20
+ def self.show_duplicates
21
+ file_digests = self.new ARGV[0], ARGV[1]
22
+ file_digests.show_duplicates
23
+ end
36
24
 
37
- @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
38
- @new_files = {}
25
+ def initialize files_path, digest_database_path, options = {}
26
+ @options = options
39
27
 
40
- prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
41
- prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
42
- prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
43
- prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
44
- prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
45
- prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
46
- end
28
+ @files_path = cleanup_path(files_path || ".")
29
+ @prefix_to_remove = @files_path.to_s + '/'
47
30
 
48
- def insert_or_update file_path, mtime, digest, counters
49
- result = find_by_filename file_path
31
+ raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
50
32
 
51
- if found = result.next_hash
52
- raise "Multiple records found" if result.next
33
+ @digest_database_path = if digest_database_path
34
+ cleanup_path(digest_database_path)
35
+ else
36
+ @files_path + '.file-digests.sqlite'
37
+ end
53
38
 
54
- @missing_files.delete(file_path)
39
+ if File.directory?(@digest_database_path)
40
+ @digest_database_path = @digest_database_path + '.file-digests.sqlite'
41
+ end
55
42
 
56
- if found['digest'] == digest
57
- counters[:good] += 1
58
- # puts "GOOD: #{file_path}" unless QUIET
59
- unless TEST_ONLY
60
- if found['mtime'] == mtime
61
- touch_digest_check_time found['id']
62
- else
63
- update_mtime mtime, found['id']
64
- end
65
- end
66
- else
67
- if found['mtime'] == mtime # Digest is different and mtime is the same
68
- counters[:likely_damaged] += 1
69
- STDERR.puts "LIKELY DAMAGED: #{file_path}"
70
- else
71
- counters[:updated] += 1
72
- puts "UPDATED: #{file_path}" unless QUIET
73
- unless TEST_ONLY
74
- update_mtime_and_digest mtime, digest, found['id']
75
- end
76
- end
77
- end
78
- else
79
- counters[:new] += 1
80
- puts "NEW: #{file_path}" unless QUIET
81
- unless TEST_ONLY
82
- @new_files[file_path] = digest
83
- insert file_path, mtime, digest
84
- end
85
- end
43
+ if @files_path == @digest_database_path.dirname
44
+ @skip_file_digests_sqlite = true
86
45
  end
87
46
 
88
- def process_missing_files counters
89
- @missing_files.delete_if do |filename, digest|
90
- if @new_files.value?(digest)
91
- counters[:renamed] += 1
92
- unless TEST_ONLY
93
- delete_by_filename filename
94
- end
95
- true
96
- end
97
- end
47
+ ensure_dir_exists @digest_database_path.dirname
98
48
 
99
- if (counters[:missing] = @missing_files.length) > 0
100
- puts "\nMISSING FILES:"
101
- @missing_files.sort.to_h.each do |filename, digest|
102
- puts filename
103
- end
104
- unless TEST_ONLY
105
- puts "Remove missing files from the database (y/n)?"
106
- if STDIN.gets.strip.downcase == "y"
107
- @db.transaction do
108
- @missing_files.each do |filename, digest|
109
- delete_by_filename filename
110
- end
111
- end
112
- end
113
- end
114
- end
49
+ # Please do not use this flag, support for sha512 is here for backward compatibility, and one day it will be removed.
50
+ if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
51
+ @use_sha512 = true
115
52
  end
116
53
 
117
- private
54
+ initialize_database @digest_database_path
55
+ end
118
56
 
119
- def execute *args, &block
120
- @db.execute *args, &block
57
+ def initialize_database path
58
+ @db = SQLite3::Database.new path.to_s
59
+ @db.results_as_hash = true
60
+
61
+ execute 'PRAGMA journal_mode = "WAL"'
62
+ execute 'PRAGMA synchronous = "NORMAL"'
63
+ execute 'PRAGMA locking_mode = "EXCLUSIVE"'
64
+ execute 'PRAGMA cache_size = "5000"'
65
+
66
+ unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
67
+ execute 'PRAGMA encoding = "UTF-8"'
68
+ execute "CREATE TABLE digests (
69
+ id INTEGER PRIMARY KEY,
70
+ filename TEXT,
71
+ mtime TEXT,
72
+ digest TEXT,
73
+ digest_check_time TEXT)"
74
+ execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
121
75
  end
122
76
 
123
- def prepare_method name, query
124
- variable = "@#{name}"
125
- instance_variable_set(variable, @db.prepare(query))
126
- define_singleton_method name do |*args, &block|
127
- instance_variable_get(variable).execute(*args, &block)
128
- end
129
- end
77
+ prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
78
+ prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
79
+ prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
80
+ prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
81
+ prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
82
+ prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
83
+ prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"
130
84
  end
131
85
 
132
- class Checker
133
- def initialize files_path, digest_database_path
134
- @files_path = cleanup_path(files_path || ".")
135
- @prefix_to_remove = @files_path.to_s + '/'
86
+ def perform_check
87
+ @counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
88
+ @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
89
+ @new_files = {}
136
90
 
137
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
138
-
139
- @digest_database_path = if digest_database_path
140
- cleanup_path(digest_database_path)
141
- else
142
- @files_path + '.file-digests.sqlite'
91
+ measure_time do
92
+ walk_files do |filename|
93
+ process_file filename
143
94
  end
95
+ end
144
96
 
145
- if File.directory?(@digest_database_path)
146
- @digest_database_path = @digest_database_path + '.file-digests.sqlite'
147
- end
97
+ track_renames
148
98
 
149
- if @files_path == @digest_database_path.dirname
150
- @skip_file_digests_sqlite = true
99
+ if any_missing_files?
100
+ print_missing_files
101
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
102
+ remove_missing_files
151
103
  end
104
+ end
152
105
 
153
- ensure_dir_exists @digest_database_path.dirname
106
+ if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
107
+ STDERR.puts "ERRORS WERE OCCURRED"
108
+ end
154
109
 
155
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
156
- @use_sha512 = true
157
- end
110
+ puts @counters.inspect
111
+ end
158
112
 
159
- @digest_database = DigestDatabase.new @digest_database_path
160
- @counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
161
- end
113
+ def show_duplicates
114
+ current_digest = nil
115
+ result = query_duplicates
162
116
 
163
- def perform_check
164
- measure_time do
165
- walk_files do |filename|
166
- process_file filename
167
- end
117
+ while found = result.next_hash do
118
+ if current_digest != found['digest']
119
+ puts "" if current_digest
120
+ current_digest = found['digest']
121
+ puts "#{found['digest']}:"
168
122
  end
123
+ puts " #{found['filename']}"
124
+ end
125
+ end
169
126
 
170
- @digest_database.process_missing_files @counters
127
+ private
171
128
 
172
- if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
173
- STDERR.puts "ERRORS WERE OCCURRED"
174
- end
129
+ def process_file filename
130
+ return if File.symlink? filename
175
131
 
176
- puts @counters.inspect
132
+ stat = File.stat filename
133
+
134
+ return if stat.blockdev?
135
+ return if stat.chardev?
136
+ return if stat.directory?
137
+ return if stat.pipe?
138
+ unless stat.readable?
139
+ raise "File is not readable"
140
+ end
141
+ return if stat.socket?
142
+
143
+ if @skip_file_digests_sqlite
144
+ basename = File.basename(filename)
145
+ return if basename == '.file-digests.sha512'
146
+ return if basename == '.file-digests.sqlite'
147
+ return if basename == '.file-digests.sqlite-wal'
148
+ return if basename == '.file-digests.sqlite-shm'
177
149
  end
178
150
 
179
- private
151
+ insert_or_update(
152
+ filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
153
+ stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
154
+ get_file_digest(filename)
155
+ )
156
+ rescue => exception
157
+ @counters[:exceptions] += 1
158
+ STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
159
+ end
180
160
 
181
- def process_file filename
182
- return if File.symlink? filename
161
+ def patch_path_string path
162
+ Gem.win_platform? ? path.gsub(/\\/, '/') : path
163
+ end
183
164
 
184
- stat = File.stat filename
165
+ def cleanup_path path
166
+ Pathname.new(patch_path_string(path)).cleanpath
167
+ end
185
168
 
186
- return if stat.blockdev?
187
- return if stat.chardev?
188
- return if stat.directory?
189
- return if stat.pipe?
190
- unless stat.readable?
191
- raise "File is not readable"
192
- end
193
- return if stat.socket?
194
-
195
- if @skip_file_digests_sqlite
196
- basename = File.basename(filename)
197
- return if basename == '.file-digests.sha512'
198
- return if basename == '.file-digests.sqlite'
199
- return if basename == '.file-digests.sqlite-wal'
200
- return if basename == '.file-digests.sqlite-shm'
169
+ def ensure_dir_exists path
170
+ if File.exist?(path)
171
+ unless File.directory?(path)
172
+ raise "#{path} is not a directory"
201
173
  end
174
+ else
175
+ FileUtils.mkdir_p path
176
+ end
177
+ end
202
178
 
203
- @digest_database.insert_or_update(
204
- filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
205
- stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
206
- get_file_digest(filename),
207
- @counters
208
- )
209
- rescue => exception
210
- @counters[:exceptions] += 1
211
- STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
179
+ def walk_files
180
+ Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
181
+ yield filename
212
182
  end
183
+ end
213
184
 
214
- def patch_path_string path
215
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
185
+ def get_file_digest filename
186
+ File.open(filename, 'rb') do |io|
187
+ digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
188
+ buffer = ""
189
+ while io.read(40960, buffer)
190
+ digest.update(buffer)
191
+ end
192
+ return digest.hexdigest
216
193
  end
194
+ end
217
195
 
218
- def cleanup_path path
219
- Pathname.new(patch_path_string(digest_database_path)).cleanpath
196
+ def confirm text
197
+ if STDIN.tty? && STDOUT.tty?
198
+ puts "#{text} (y/n)?"
199
+ STDIN.gets.strip.downcase == "y"
220
200
  end
201
+ end
202
+
203
+ def measure_time
204
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
205
+ yield
206
+ elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
207
+ puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless @options[:quiet]
208
+ end
221
209
 
222
- def ensure_dir_exists path
223
- if File.exist?(path)
224
- unless File.directory?(path)
225
- raise "#{path} is not a directory"
210
+ def insert_or_update file_path, mtime, digest
211
+ result = find_by_filename file_path
212
+
213
+ if found = result.next_hash
214
+ raise "Multiple records found" if result.next
215
+
216
+ @missing_files.delete(file_path)
217
+
218
+ if found['digest'] == digest
219
+ @counters[:good] += 1
220
+ # puts "GOOD: #{file_path}" unless @options[:quiet]
221
+ unless @options[:test_only]
222
+ if found['mtime'] == mtime
223
+ touch_digest_check_time found['id']
224
+ else
225
+ update_mtime mtime, found['id']
226
+ end
226
227
  end
227
228
  else
228
- FileUtils.mkdir_p path
229
+ if found['mtime'] == mtime # Digest is different and mtime is the same
230
+ @counters[:likely_damaged] += 1
231
+ STDERR.puts "LIKELY DAMAGED: #{file_path}"
232
+ else
233
+ @counters[:updated] += 1
234
+ puts "UPDATED: #{file_path}" unless @options[:quiet]
235
+ unless @options[:test_only]
236
+ update_mtime_and_digest mtime, digest, found['id']
237
+ end
238
+ end
229
239
  end
230
- end
231
-
232
- def walk_files
233
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
234
- yield filename
240
+ else
241
+ @counters[:new] += 1
242
+ puts "NEW: #{file_path}" unless @options[:quiet]
243
+ unless @options[:test_only]
244
+ @new_files[file_path] = digest
245
+ insert file_path, mtime, digest
235
246
  end
236
247
  end
248
+ end
237
249
 
238
- def get_file_digest filename
239
- File.open(filename, 'rb') do |io|
240
- digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
241
- buffer = ""
242
- while io.read(40960, buffer)
243
- digest.update(buffer)
250
+ def track_renames
251
+ @missing_files.delete_if do |filename, digest|
252
+ if @new_files.value?(digest)
253
+ @counters[:renamed] += 1
254
+ unless @options[:test_only]
255
+ delete_by_filename filename
244
256
  end
245
- return digest.hexdigest
257
+ true
246
258
  end
247
259
  end
260
+ @counters[:missing] = @missing_files.length
261
+ end
262
+
263
+ def any_missing_files?
264
+ @missing_files.length > 0
265
+ end
266
+
267
+ def print_missing_files
268
+ puts "\nMISSING FILES:"
269
+ @missing_files.sort.to_h.each do |filename, digest|
270
+ puts filename
271
+ end
272
+ end
248
273
 
249
- def measure_time
250
- start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
251
- yield
252
- elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
253
- puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless QUIET
274
+ def remove_missing_files
275
+ @db.transaction do
276
+ @missing_files.each do |filename, digest|
277
+ delete_by_filename filename
278
+ end
254
279
  end
280
+ end
281
+
282
+ def execute *args, &block
283
+ @db.execute *args, &block
284
+ end
255
285
 
286
+ def prepare_method name, query
287
+ variable = "@#{name}"
288
+ instance_variable_set(variable, @db.prepare(query))
289
+ define_singleton_method name do |*args, &block|
290
+ instance_variable_get(variable).execute(*args, &block)
291
+ end
256
292
  end
257
293
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.15
4
+ version: 0.0.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
@@ -28,11 +28,13 @@ description: Calculate file digests and check for the possible file corruption
28
28
  email: stan@senotrusov.com
29
29
  executables:
30
30
  - file-digests
31
+ - file-digests-auto
31
32
  - file-digests-test
32
33
  extensions: []
33
34
  extra_rdoc_files: []
34
35
  files:
35
36
  - bin/file-digests
37
+ - bin/file-digests-auto
36
38
  - bin/file-digests-test
37
39
  - lib/file-digests.rb
38
40
  homepage: https://github.com/senotrusov/file-digests