file-digests 0.0.15 → 0.0.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 573cd697002f30d083625d586b4d03caf8c0f2cb763bed75e2bc9ac0a8b03612
4
- data.tar.gz: 7cfbef9cdf7a9110c6b84b27591a498960515842675fabf6bf5512a78d7e54bc
3
+ metadata.gz: 053daa0145db845876fb37b63656102744c524034b87cb91e85f0bce4658aec9
4
+ data.tar.gz: eac730f10cbe5812078e3461b74b19a740611e9375dd7c34684b2a896e881fc4
5
5
  SHA512:
6
- metadata.gz: 4916e667dccfb630a31fdaa74271a9c85a5f28a81f28143a4f5b406784e0e78652a29ff8e3fd9ff6507a5d90a4d9baacd2385f7de169f2c9dda6dc0f3b58866b
7
- data.tar.gz: be5ae679122bbcffc703940b113e8d1e1057e46f7ce61ead1f52664f6b147bbd9c201d56b7faeafef29da30297cfde2e40f38c8e9400d615b9bbc6e630557259
6
+ metadata.gz: ce4894b25eb9ecbcbee2ae6c32a9e7bc09fce12286a31896c5d6093ed1ca0f0618e2eaf01facd464c3ebb52f1f9e327198d6a4abffae4f011f9c9d589e6c2d3c
7
+ data.tar.gz: bdd07cc7c958095b11b57174d8d98cd065d9a83289aeac5ce62dc604c9526f46ab512a7b229e1620c3ba0a49139cae341ac51b11af372e8395eb5a9bfbece1c6
@@ -1,8 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- QUIET = (ENV["QUIET"] == "true")
4
- TEST_ONLY = (ENV["TEST_ONLY"] == "true")
5
-
6
3
  require 'file-digests'
7
4
 
8
5
  FileDigests.perform_check
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ENV["AUTO"] = "true"
4
+
5
+ require 'file-digests'
6
+
7
+ FileDigests.perform_check
@@ -1,7 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- QUIET = (ENV["QUIET"] == "true")
4
- TEST_ONLY = true
3
+ ENV["TEST_ONLY"] = "true"
5
4
 
6
5
  require 'file-digests'
7
6
 
@@ -1,4 +1,3 @@
1
-
2
1
  require 'date'
3
2
  require 'set'
4
3
  require 'digest'
@@ -6,252 +5,289 @@ require 'fileutils'
6
5
  require 'pathname'
7
6
  require 'sqlite3'
8
7
 
9
- module FileDigests
8
+ class FileDigests
10
9
 
11
10
  def self.perform_check
12
- checker = Checker.new ARGV[0], ARGV[1]
13
- checker.perform_check
11
+ options = {
12
+ auto: (ENV["AUTO"] == "true"),
13
+ quiet: (ENV["QUIET"] == "true"),
14
+ test_only: (ENV["TEST_ONLY"] == "true")
15
+ }
16
+ file_digests = self.new ARGV[0], ARGV[1], options
17
+ file_digests.perform_check
14
18
  end
15
19
 
16
- class DigestDatabase
17
- def initialize path
18
- @db = SQLite3::Database.new path.to_s
19
- @db.results_as_hash = true
20
-
21
- execute 'PRAGMA journal_mode = "WAL"'
22
- execute 'PRAGMA synchronous = "NORMAL"'
23
- execute 'PRAGMA locking_mode = "EXCLUSIVE"'
24
- execute 'PRAGMA cache_size = "5000"'
25
-
26
- unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
27
- execute 'PRAGMA encoding = "UTF-8"'
28
- execute "CREATE TABLE digests (
29
- id INTEGER PRIMARY KEY,
30
- filename TEXT,
31
- mtime TEXT,
32
- digest TEXT,
33
- digest_check_time TEXT)"
34
- execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
35
- end
20
+ def self.show_duplicates
21
+ file_digests = self.new ARGV[0], ARGV[1]
22
+ file_digests.show_duplicates
23
+ end
36
24
 
37
- @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
38
- @new_files = {}
25
+ def initialize files_path, digest_database_path, options = {}
26
+ @options = options
39
27
 
40
- prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
41
- prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
42
- prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
43
- prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
44
- prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
45
- prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
46
- end
28
+ @files_path = cleanup_path(files_path || ".")
29
+ @prefix_to_remove = @files_path.to_s + '/'
47
30
 
48
- def insert_or_update file_path, mtime, digest, counters
49
- result = find_by_filename file_path
31
+ raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
50
32
 
51
- if found = result.next_hash
52
- raise "Multiple records found" if result.next
33
+ @digest_database_path = if digest_database_path
34
+ cleanup_path(digest_database_path)
35
+ else
36
+ @files_path + '.file-digests.sqlite'
37
+ end
53
38
 
54
- @missing_files.delete(file_path)
39
+ if File.directory?(@digest_database_path)
40
+ @digest_database_path = @digest_database_path + '.file-digests.sqlite'
41
+ end
55
42
 
56
- if found['digest'] == digest
57
- counters[:good] += 1
58
- # puts "GOOD: #{file_path}" unless QUIET
59
- unless TEST_ONLY
60
- if found['mtime'] == mtime
61
- touch_digest_check_time found['id']
62
- else
63
- update_mtime mtime, found['id']
64
- end
65
- end
66
- else
67
- if found['mtime'] == mtime # Digest is different and mtime is the same
68
- counters[:likely_damaged] += 1
69
- STDERR.puts "LIKELY DAMAGED: #{file_path}"
70
- else
71
- counters[:updated] += 1
72
- puts "UPDATED: #{file_path}" unless QUIET
73
- unless TEST_ONLY
74
- update_mtime_and_digest mtime, digest, found['id']
75
- end
76
- end
77
- end
78
- else
79
- counters[:new] += 1
80
- puts "NEW: #{file_path}" unless QUIET
81
- unless TEST_ONLY
82
- @new_files[file_path] = digest
83
- insert file_path, mtime, digest
84
- end
85
- end
43
+ if @files_path == @digest_database_path.dirname
44
+ @skip_file_digests_sqlite = true
86
45
  end
87
46
 
88
- def process_missing_files counters
89
- @missing_files.delete_if do |filename, digest|
90
- if @new_files.value?(digest)
91
- counters[:renamed] += 1
92
- unless TEST_ONLY
93
- delete_by_filename filename
94
- end
95
- true
96
- end
97
- end
47
+ ensure_dir_exists @digest_database_path.dirname
98
48
 
99
- if (counters[:missing] = @missing_files.length) > 0
100
- puts "\nMISSING FILES:"
101
- @missing_files.sort.to_h.each do |filename, digest|
102
- puts filename
103
- end
104
- unless TEST_ONLY
105
- puts "Remove missing files from the database (y/n)?"
106
- if STDIN.gets.strip.downcase == "y"
107
- @db.transaction do
108
- @missing_files.each do |filename, digest|
109
- delete_by_filename filename
110
- end
111
- end
112
- end
113
- end
114
- end
49
+ # Please do not use this flag, support for sha512 is here for backward compatibility, and one day it will be removed.
50
+ if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
51
+ @use_sha512 = true
115
52
  end
116
53
 
117
- private
54
+ initialize_database @digest_database_path
55
+ end
118
56
 
119
- def execute *args, &block
120
- @db.execute *args, &block
57
+ def initialize_database path
58
+ @db = SQLite3::Database.new path.to_s
59
+ @db.results_as_hash = true
60
+
61
+ execute 'PRAGMA journal_mode = "WAL"'
62
+ execute 'PRAGMA synchronous = "NORMAL"'
63
+ execute 'PRAGMA locking_mode = "EXCLUSIVE"'
64
+ execute 'PRAGMA cache_size = "5000"'
65
+
66
+ unless execute("SELECT name FROM sqlite_master WHERE type='table' AND name = 'digests'").length == 1
67
+ execute 'PRAGMA encoding = "UTF-8"'
68
+ execute "CREATE TABLE digests (
69
+ id INTEGER PRIMARY KEY,
70
+ filename TEXT,
71
+ mtime TEXT,
72
+ digest TEXT,
73
+ digest_check_time TEXT)"
74
+ execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
121
75
  end
122
76
 
123
- def prepare_method name, query
124
- variable = "@#{name}"
125
- instance_variable_set(variable, @db.prepare(query))
126
- define_singleton_method name do |*args, &block|
127
- instance_variable_get(variable).execute(*args, &block)
128
- end
129
- end
77
+ prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
78
+ prepare_method :find_by_filename, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
79
+ prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
80
+ prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
81
+ prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
82
+ prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
83
+ prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"
130
84
  end
131
85
 
132
- class Checker
133
- def initialize files_path, digest_database_path
134
- @files_path = cleanup_path(files_path || ".")
135
- @prefix_to_remove = @files_path.to_s + '/'
86
+ def perform_check
87
+ @counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
88
+ @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]
89
+ @new_files = {}
136
90
 
137
- raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))
138
-
139
- @digest_database_path = if digest_database_path
140
- cleanup_path(digest_database_path)
141
- else
142
- @files_path + '.file-digests.sqlite'
91
+ measure_time do
92
+ walk_files do |filename|
93
+ process_file filename
143
94
  end
95
+ end
144
96
 
145
- if File.directory?(@digest_database_path)
146
- @digest_database_path = @digest_database_path + '.file-digests.sqlite'
147
- end
97
+ track_renames
148
98
 
149
- if @files_path == @digest_database_path.dirname
150
- @skip_file_digests_sqlite = true
99
+ if any_missing_files?
100
+ print_missing_files
101
+ if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
102
+ remove_missing_files
151
103
  end
104
+ end
152
105
 
153
- ensure_dir_exists @digest_database_path.dirname
106
+ if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
107
+ STDERR.puts "ERRORS WERE OCCURRED"
108
+ end
154
109
 
155
- if File.exist?(@digest_database_path.dirname + '.file-digests.sha512')
156
- @use_sha512 = true
157
- end
110
+ puts @counters.inspect
111
+ end
158
112
 
159
- @digest_database = DigestDatabase.new @digest_database_path
160
- @counters = {good: 0, updated: 0, new: 0, missing: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
161
- end
113
+ def show_duplicates
114
+ current_digest = nil
115
+ result = query_duplicates
162
116
 
163
- def perform_check
164
- measure_time do
165
- walk_files do |filename|
166
- process_file filename
167
- end
117
+ while found = result.next_hash do
118
+ if current_digest != found['digest']
119
+ puts "" if current_digest
120
+ current_digest = found['digest']
121
+ puts "#{found['digest']}:"
168
122
  end
123
+ puts " #{found['filename']}"
124
+ end
125
+ end
169
126
 
170
- @digest_database.process_missing_files @counters
127
+ private
171
128
 
172
- if @counters[:likely_damaged] > 0 || @counters[:exceptions] > 0
173
- STDERR.puts "ERRORS WERE OCCURRED"
174
- end
129
+ def process_file filename
130
+ return if File.symlink? filename
175
131
 
176
- puts @counters.inspect
132
+ stat = File.stat filename
133
+
134
+ return if stat.blockdev?
135
+ return if stat.chardev?
136
+ return if stat.directory?
137
+ return if stat.pipe?
138
+ unless stat.readable?
139
+ raise "File is not readable"
140
+ end
141
+ return if stat.socket?
142
+
143
+ if @skip_file_digests_sqlite
144
+ basename = File.basename(filename)
145
+ return if basename == '.file-digests.sha512'
146
+ return if basename == '.file-digests.sqlite'
147
+ return if basename == '.file-digests.sqlite-wal'
148
+ return if basename == '.file-digests.sqlite-shm'
177
149
  end
178
150
 
179
- private
151
+ insert_or_update(
152
+ filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
153
+ stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
154
+ get_file_digest(filename)
155
+ )
156
+ rescue => exception
157
+ @counters[:exceptions] += 1
158
+ STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
159
+ end
180
160
 
181
- def process_file filename
182
- return if File.symlink? filename
161
+ def patch_path_string path
162
+ Gem.win_platform? ? path.gsub(/\\/, '/') : path
163
+ end
183
164
 
184
- stat = File.stat filename
165
+ def cleanup_path path
166
+ Pathname.new(patch_path_string(path)).cleanpath
167
+ end
185
168
 
186
- return if stat.blockdev?
187
- return if stat.chardev?
188
- return if stat.directory?
189
- return if stat.pipe?
190
- unless stat.readable?
191
- raise "File is not readable"
192
- end
193
- return if stat.socket?
194
-
195
- if @skip_file_digests_sqlite
196
- basename = File.basename(filename)
197
- return if basename == '.file-digests.sha512'
198
- return if basename == '.file-digests.sqlite'
199
- return if basename == '.file-digests.sqlite-wal'
200
- return if basename == '.file-digests.sqlite-shm'
169
+ def ensure_dir_exists path
170
+ if File.exist?(path)
171
+ unless File.directory?(path)
172
+ raise "#{path} is not a directory"
201
173
  end
174
+ else
175
+ FileUtils.mkdir_p path
176
+ end
177
+ end
202
178
 
203
- @digest_database.insert_or_update(
204
- filename.delete_prefix(@prefix_to_remove).encode('utf-8', universal_newline: true).unicode_normalize(:nfkc),
205
- stat.mtime.utc.strftime('%Y-%m-%d %H:%M:%S'),
206
- get_file_digest(filename),
207
- @counters
208
- )
209
- rescue => exception
210
- @counters[:exceptions] += 1
211
- STDERR.puts "EXCEPTION: #{filename.encode('utf-8', universal_newline: true)}: #{exception.message}"
179
+ def walk_files
180
+ Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
181
+ yield filename
212
182
  end
183
+ end
213
184
 
214
- def patch_path_string path
215
- Gem.win_platform? ? path.gsub(/\\/, '/') : path
185
+ def get_file_digest filename
186
+ File.open(filename, 'rb') do |io|
187
+ digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
188
+ buffer = ""
189
+ while io.read(40960, buffer)
190
+ digest.update(buffer)
191
+ end
192
+ return digest.hexdigest
216
193
  end
194
+ end
217
195
 
218
- def cleanup_path path
219
- Pathname.new(patch_path_string(digest_database_path)).cleanpath
196
+ def confirm text
197
+ if STDIN.tty? && STDOUT.tty?
198
+ puts "#{text} (y/n)?"
199
+ STDIN.gets.strip.downcase == "y"
220
200
  end
201
+ end
202
+
203
+ def measure_time
204
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
205
+ yield
206
+ elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
207
+ puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless @options[:quiet]
208
+ end
221
209
 
222
- def ensure_dir_exists path
223
- if File.exist?(path)
224
- unless File.directory?(path)
225
- raise "#{path} is not a directory"
210
+ def insert_or_update file_path, mtime, digest
211
+ result = find_by_filename file_path
212
+
213
+ if found = result.next_hash
214
+ raise "Multiple records found" if result.next
215
+
216
+ @missing_files.delete(file_path)
217
+
218
+ if found['digest'] == digest
219
+ @counters[:good] += 1
220
+ # puts "GOOD: #{file_path}" unless @options[:quiet]
221
+ unless @options[:test_only]
222
+ if found['mtime'] == mtime
223
+ touch_digest_check_time found['id']
224
+ else
225
+ update_mtime mtime, found['id']
226
+ end
226
227
  end
227
228
  else
228
- FileUtils.mkdir_p path
229
+ if found['mtime'] == mtime # Digest is different and mtime is the same
230
+ @counters[:likely_damaged] += 1
231
+ STDERR.puts "LIKELY DAMAGED: #{file_path}"
232
+ else
233
+ @counters[:updated] += 1
234
+ puts "UPDATED: #{file_path}" unless @options[:quiet]
235
+ unless @options[:test_only]
236
+ update_mtime_and_digest mtime, digest, found['id']
237
+ end
238
+ end
229
239
  end
230
- end
231
-
232
- def walk_files
233
- Dir.glob(@files_path + '**' + '*', File::FNM_DOTMATCH) do |filename|
234
- yield filename
240
+ else
241
+ @counters[:new] += 1
242
+ puts "NEW: #{file_path}" unless @options[:quiet]
243
+ unless @options[:test_only]
244
+ @new_files[file_path] = digest
245
+ insert file_path, mtime, digest
235
246
  end
236
247
  end
248
+ end
237
249
 
238
- def get_file_digest filename
239
- File.open(filename, 'rb') do |io|
240
- digest = (@use_sha512 ? Digest::SHA512 : Digest::SHA256).new
241
- buffer = ""
242
- while io.read(40960, buffer)
243
- digest.update(buffer)
250
+ def track_renames
251
+ @missing_files.delete_if do |filename, digest|
252
+ if @new_files.value?(digest)
253
+ @counters[:renamed] += 1
254
+ unless @options[:test_only]
255
+ delete_by_filename filename
244
256
  end
245
- return digest.hexdigest
257
+ true
246
258
  end
247
259
  end
260
+ @counters[:missing] = @missing_files.length
261
+ end
262
+
263
+ def any_missing_files?
264
+ @missing_files.length > 0
265
+ end
266
+
267
+ def print_missing_files
268
+ puts "\nMISSING FILES:"
269
+ @missing_files.sort.to_h.each do |filename, digest|
270
+ puts filename
271
+ end
272
+ end
248
273
 
249
- def measure_time
250
- start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
251
- yield
252
- elapsed = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start).to_i
253
- puts "Elapsed time: #{elapsed / 3600}h #{(elapsed % 3600) / 60}m #{elapsed % 60}s" unless QUIET
274
+ def remove_missing_files
275
+ @db.transaction do
276
+ @missing_files.each do |filename, digest|
277
+ delete_by_filename filename
278
+ end
254
279
  end
280
+ end
281
+
282
+ def execute *args, &block
283
+ @db.execute *args, &block
284
+ end
255
285
 
286
+ def prepare_method name, query
287
+ variable = "@#{name}"
288
+ instance_variable_set(variable, @db.prepare(query))
289
+ define_singleton_method name do |*args, &block|
290
+ instance_variable_get(variable).execute(*args, &block)
291
+ end
256
292
  end
257
293
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: file-digests
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.15
4
+ version: 0.0.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stanislav Senotrusov
@@ -28,11 +28,13 @@ description: Calculate file digests and check for the possible file corruption
28
28
  email: stan@senotrusov.com
29
29
  executables:
30
30
  - file-digests
31
+ - file-digests-auto
31
32
  - file-digests-test
32
33
  extensions: []
33
34
  extra_rdoc_files: []
34
35
  files:
35
36
  - bin/file-digests
37
+ - bin/file-digests-auto
36
38
  - bin/file-digests-test
37
39
  - lib/file-digests.rb
38
40
  homepage: https://github.com/senotrusov/file-digests