recheck 0.0.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,277 @@
1
+ module Recheck
2
+ module Reporter
3
+ class Base
4
+ @subclasses = []
5
+
6
+ # Register subclasses for `recheck reporters`.
7
+ class << self
8
+ attr_reader :subclasses
9
+
10
+ def inherited(subclass)
11
+ super
12
+ @subclasses << subclass
13
+ end
14
+ end
15
+
16
+ def self.help
17
+ end
18
+
19
+ def initialize(arg:)
20
+ end
21
+
22
+ def fetch_record_id(record)
23
+ if Recheck.unloaded_is_a? record, "ActiveRecord::Base"
24
+ record.id.to_s
25
+ # or: record.to_global_id, if you want to override in
26
+ # your_app/recheck/reporter/base_reporter.rb
27
+ elsif Recheck.unloaded_is_a? record, "Sequel::Model"
28
+ record.pk.to_s # may be an array
29
+ else
30
+ record.to_s
31
+ end
32
+ end
33
+
34
+ # A recheck run flows like this, with indicated calls to each reporter.
35
+ #
36
+ # around_run -> for each Checker class:
37
+ # around_checker ->
38
+ # around_query ->
39
+ # run each query() method
40
+ # for each 'check_' method on the checker:
41
+ # for each record queried:
42
+ # around_check ->
43
+ # check(record)
44
+
45
+ def around_run(checkers: [])
46
+ total_count = yield
47
+ end
48
+
49
+ def around_checker(checker:, queries: [], checks: [])
50
+ counts = yield
51
+ end
52
+
53
+ def around_query(checker:, query:, checks: [])
54
+ yield
55
+ end
56
+
57
+ def around_check(checker:, query:, check:, record:)
58
+ result = yield
59
+ end
60
+
61
+ def halt(checker:, query:, error:, check: nil)
62
+ # running the checker was halted, so there's no result available for yield
63
+ end
64
+ end # Base
65
+
66
+ class Cron < Base
67
+ def self.help
68
+ "Prints failures/exceptions but nothing on pass. For use in cron jobs, which use silence to incidate success."
69
+ end
70
+
71
+ def initialize(arg:)
72
+ raise ArgumentError, "does not take options" unless arg.nil?
73
+ @errors = []
74
+ end
75
+
76
+ def around_run(checkers: [])
77
+ total_counts = yield
78
+
79
+ if total_counts.any_errors?
80
+ puts "Total: #{total_counts.summary}"
81
+ end
82
+ end
83
+
84
+ def around_checker(checker:, queries:, checks:)
85
+ @errors = []
86
+
87
+ counts = yield
88
+
89
+ if counts.any_errors?
90
+ puts "#{checker.class}: #{counts.summary}"
91
+ print_errors
92
+ end
93
+ end
94
+
95
+ def around_check(checker:, query:, check:, record:)
96
+ result = yield
97
+ @errors << result if result.is_a? Error
98
+ end
99
+
100
+ def halt(checker:, query:, error:, check: nil)
101
+ @errors << error
102
+ end
103
+
104
+ def print_errors
105
+ failure_details = []
106
+ grouped_errors = @errors.group_by { |e| [e.checker, e.query, e.check, e.type] }
107
+
108
+ grouped_errors.each do |(checker, query, check), group_errors|
109
+ case group_errors.first.type
110
+ when :fail
111
+ ids = group_errors.map { |e| fetch_record_id(e.record) }.join(", ")
112
+ failure_details << " #{checker}##{query} -> #{check} failed for records: #{ids}"
113
+ when :exception
114
+ error = group_errors.first
115
+ error_message = " #{checker}##{query} -> #{check} exception #{error.exception.message} for #{group_errors.size} records"
116
+ failure_details << error_message
117
+ failure_details << error.record.full_message(highlight: false, order: :top) if error.record.respond_to?(:full_message)
118
+ when :blanket
119
+ failure_details << " #{checker}: Skipping because the first 20 checks all failed. Either there's a lot of bad data or there's something wrong with the checks."
120
+ end
121
+ end
122
+ puts failure_details
123
+ end
124
+ end # Cron
125
+
126
+ class Default < Base
127
+ def self.help
128
+ "Used when no --reporter is named. Prints incremental progress to stdout. No options."
129
+ end
130
+
131
+ def initialize(arg:)
132
+ raise ArgumentError, "does not take options" unless arg.nil?
133
+ @current_counts = CountStats.new
134
+ @errors = []
135
+ end
136
+
137
+ def around_run(checkers: [])
138
+ total_counts = yield
139
+
140
+ puts "Total: #{total_counts.summary}"
141
+ puts "Queries found no records to check (this is OK when a checker queries for invalid data)" if total_counts.all_zero?
142
+
143
+ total_counts
144
+ end
145
+
146
+ def around_checker(checker:, queries:, checks:, check: [])
147
+ @errors = []
148
+
149
+ print "#{checker.class}: "
150
+ counts = yield
151
+
152
+ # don't double-print last progress indicator
153
+ print_progress unless @current_counts.total % 1000 == 0
154
+ print_check_summary(counts)
155
+ print_errors
156
+
157
+ counts
158
+ end
159
+
160
+ def around_check(checker:, query:, check:, record:)
161
+ result = yield
162
+
163
+ @current_counts.increment(result.type)
164
+ print_progress if @current_counts.total % 1000 == 0
165
+
166
+ @errors << result if result.is_a? Error
167
+ end
168
+
169
+ def halt(checker:, query:, error:, check: nil)
170
+ @errors << error
171
+ end
172
+
173
+ def print_check_summary(counts)
174
+ puts " #{counts.summary}"
175
+ end
176
+
177
+ def print_errors
178
+ failure_details = []
179
+ grouped_errors = @errors.group_by { |e| [e.checker, e.query, e.check, e.type] }
180
+
181
+ grouped_errors.each do |(checker, query, check), group_errors|
182
+ case group_errors.first.type
183
+ when :fail
184
+ ids = group_errors.map { |e| fetch_record_id(e.record) }.join(", ")
185
+ failure_details << " #{checker.class}##{query} -> #{check} failed for records: #{ids}"
186
+ when :exception
187
+ error = group_errors.first
188
+ error_message = " #{checker.class}##{query} -> #{check} exception #{error.exception.message} for #{group_errors.size} records"
189
+ failure_details << error_message
190
+ failure_details << error.exception.full_message(highlight: false, order: :top) if error.exception.respond_to?(:full_message)
191
+ when :no_query_methods
192
+ failure_details << " #{checker.class}: Did not define .query_methods"
193
+ when :no_queries
194
+ failure_details << " #{checker.class} Defines .query_methods, but it didn't return any"
195
+ when :no_check_methods
196
+ failure_details << " #{checker.class}: Did not define .check_methods"
197
+ when :no_checks
198
+ failure_details << " #{checker.class} Defines .check_methods, but it didn't return any"
199
+ when :blanket
200
+ failure_details << " #{checker.class}: Skipping because the first 20 checks all failed. Either there's a lot of bad data or there's something wrong with the checker."
201
+ else
202
+ failure_details << " #{checker.class} unknown error"
203
+ end
204
+ end
205
+ puts failure_details
206
+ end
207
+
208
+ def print_progress
209
+ print @current_counts.all_pass? ? "." : "x"
210
+ @current_counts = CountStats.new
211
+ end
212
+ end # Default
213
+
214
+ class Json < Base
215
+ def self.help
216
+ "Outputs JSON-formatted results to a file or stdout. Arg is filename or blank for stdout."
217
+ end
218
+
219
+ def initialize(arg:)
220
+ @filename = arg
221
+ @results = {}
222
+ end
223
+
224
+ def around_checker(checker:, queries:, checks:, check: [])
225
+ @results[checker.class.to_s] = checks.to_h { |method|
226
+ [method, {
227
+ counts: CountStats.new,
228
+ fail: [],
229
+ exception: []
230
+ }]
231
+ }
232
+ yield
233
+ end
234
+
235
+ def around_check(checker:, query:, check:, record:)
236
+ result = yield
237
+
238
+ # puts "around_check(checker: #{checker}, query: #{query}, check: #{check.inspect}, record: #{record}"
239
+ check ||= query
240
+ @results[checker.class.to_s][check][:counts].increment(result.type)
241
+ case result.type
242
+ when :fail
243
+ @results[checker.class.to_s][check][:fail] << fetch_record_id(result.record)
244
+ when :exception
245
+ @results[checker.class.to_s][check][:exception] << {
246
+ id: fetch_record_id(result.record),
247
+ message: result.exception.message,
248
+ backtrace: result.exception.backtrace
249
+ }
250
+ end
251
+ end
252
+
253
+ def around_run(checkers)
254
+ yield
255
+ if @filename
256
+ File.write(@filename, @results.to_json)
257
+ else
258
+ puts @results.to_json
259
+ end
260
+ end
261
+
262
+ def halt(checker:, query:, error:, check: "meta")
263
+ @results[checker.class.to_s][check][:halt] = error.type
264
+ end
265
+ end # Json
266
+
267
+ class Silent < Base
268
+ def self.help
269
+ "Prints nothing. Useful for checks that can automatically fix issues."
270
+ end
271
+
272
+ def initialize(arg:)
273
+ raise ArgumentError, "does not take options" unless arg.nil?
274
+ end
275
+ end # Silent
276
+ end
277
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Recheck
4
+ ERROR_TYPES = [:fail, :exception, :blanket, :no_query_methods, :no_queries, :no_check_methods, :no_checks].freeze
5
+ RESULT_TYPES = ([:pass] + ERROR_TYPES).freeze
6
+
7
+ # This doesn't track all the fields because Recheck is about finding errors and failures.
8
+ # If you need more data, please tell me about your use case?
9
+ Pass = Data.define do
10
+ def type
11
+ :pass
12
+ end
13
+ end
14
+
15
+ Error = Data.define(:type, :checker, :query, :check, :record, :exception) do
16
+ def initialize(*args)
17
+ super
18
+ raise ArgumentError unless ERROR_TYPES.include? type
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,170 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Recheck
4
+ class HookDidNotYield < RuntimeError; end
5
+
6
+ class HookYieldedTwice < RuntimeError; end
7
+
8
+ class UnexpectedHookYield < RuntimeError; end
9
+
10
+ class UnexpectedReporterYield < RuntimeError; end
11
+
12
+ class Yields
13
+ def initialize
14
+ @executions = {}
15
+ end
16
+
17
+ def expect(hook:, reporter:)
18
+ @executions[hook] ||= {}
19
+ @executions[hook][reporter] = false
20
+ # puts "expect #{hook}, #{reporter.class.name}, id #{reporter.id}"
21
+ end
22
+
23
+ def ran(hook:, reporter:)
24
+ raise UnexpectedHookYield, "Ran an unexpected hook #{hook} (for reporter #{reporter})" unless @executions.include? hook
25
+ raise UnexpectedReporterYield, "Ran an expected hook #{hook} for an unexpected reporter #{reporter}" unless @executions[hook].include? reporter
26
+ raise HookYieldedTwice, "Ran a hook #{hook} twice for reporter #{reporter}" unless @executions[hook][reporter] == false
27
+
28
+ # puts "ran #{hook}, #{reporter}, #{reporter.id}"
29
+ @executions[hook][reporter] = true
30
+ end
31
+
32
+ def raise_unless_all_reporters_yielded(hook:)
33
+ didnt_yield = @executions[hook].filter { |reporter, ran| ran == false }
34
+ raise HookDidNotYield, "Reporter(s) [#{didnt_yield.keys.join(", ")}] did not yield in their #{hook} hook" if didnt_yield.any?
35
+ end
36
+ end
37
+
38
+ class Runner
39
+ PASSTHROUGH_EXCEPTIONS = [
40
+ # ours
41
+ HookDidNotYield, HookYieldedTwice, UnexpectedHookYield,
42
+ # Ruby's
43
+ NoMemoryError, SignalException, SystemExit
44
+ ]
45
+
46
+ def initialize(checkers: [], reporters: [])
47
+ # maintain order and we want to check/report in user-provided order; Set lacks .reverse
48
+ @checkers = checkers.uniq
49
+ @reporters = reporters.uniq
50
+ @yields = Yields.new
51
+ end
52
+
53
+ # compose reporter hooks so they each see the block fire once at 'yield'
54
+ def reduce(hook:, kwargs: {}, reporters: [], &blk)
55
+ reporters.reverse.reduce(blk) do |proc, reporter|
56
+ @yields.expect(hook:, reporter:)
57
+ -> {
58
+ result = nil
59
+ reporter.public_send(hook, **kwargs) {
60
+ @yields.ran(hook:, reporter:)
61
+ result = proc.call.freeze
62
+ }
63
+ result
64
+ }
65
+ end.call
66
+ end
67
+
68
+ # only for calling from inside run()
69
+ def cant_run reporters:, checker:, queries:, checks:, type:
70
+ checker_counts = CountStats.new
71
+ checker_counts.increment type
72
+ @total_counts << checker_counts
73
+
74
+ error = Error.new(checker:, query: nil, check: nil, record: nil, type:, exception: nil)
75
+ reduce(reporters:, hook: :around_checker, kwargs: {checker:, queries:, checks:}) do
76
+ reporters.each { it.halt(checker:, query: nil, check: nil, error:) }
77
+ checker_counts
78
+ end
79
+ end
80
+
81
+ # n queries * n check methods * n records = O(1) right?
82
+ def run
83
+ @total_counts = CountStats.new
84
+ # All happy families are alike; each unhappy family is unhappy in its own way.
85
+ pass = Pass.new
86
+
87
+ # for want of a monad...
88
+ reduce(reporters: @reporters, hook: :around_run, kwargs: {checkers: @checkers}) do
89
+ # for each checker...
90
+ @checkers.each do |checker|
91
+ checker_counts = CountStats.new
92
+ if !checker.class.respond_to?(:query_methods)
93
+ cant_run(reporters: @reporters, checker:, type: :no_query_methods, queries: nil, checks: nil)
94
+ next
95
+ end
96
+ if (queries = checker.class.query_methods).empty?
97
+ cant_run(reporters: @reporters, checker:, type: :no_queries, queries:, checks: nil)
98
+ next
99
+ end
100
+
101
+ if !checker.class.respond_to?(:check_methods)
102
+ cant_run(reporters: @reporters, checker:, type: :no_check_methods, queries:, checks: nil)
103
+ next
104
+ end
105
+ if (checks = checker.class.check_methods).empty?
106
+ cant_run(reporters: @reporters, checker:, type: :no_checks, queries:, checks:)
107
+ next
108
+ end
109
+
110
+ reduce(reporters: @reporters, hook: :around_checker, kwargs: {checker:, queries:, checks:}) do
111
+ # for each query_...
112
+ queries.each do |query|
113
+ reduce(reporters: @reporters, hook: :around_query, kwargs: {checker:, query:, checks:}) do
114
+ checker_counts.increment :queries
115
+ # for each record...
116
+ # TODO: must handle if the query method yields (find_each) OR returns (current)
117
+ (checker.public_send(query) || []).each do |record|
118
+ # for each check_method...
119
+ checks.each do |check|
120
+ raw_result = nil
121
+ reduce(reporters: @reporters, hook: :around_check, kwargs: {checker:, query:, check:, record:}) do
122
+ raw_result = checker.public_send(check, record)
123
+ result = raw_result ? pass : Error.new(checker:, query:, check:, record:, type: :fail, exception: nil)
124
+
125
+ checker_counts.increment(result.type)
126
+ break if checker_counts.reached_blanket_failure?
127
+
128
+ result
129
+ rescue *PASSTHROUGH_EXCEPTIONS
130
+ raise
131
+ rescue => e
132
+ Error.new(checker:, query:, check:, record:, type: :exception, exception: e)
133
+ end
134
+ end
135
+ @yields.raise_unless_all_reporters_yielded(hook: :around_check)
136
+
137
+ # if the first 20 error out, halt the check method, it's probably buggy
138
+ if checker_counts.reached_blanket_failure?
139
+ checker_counts.increment :blanket
140
+
141
+ error = Error.new(checker:, query:, check: nil, record: nil, type: :blanket, exception: nil)
142
+ @reporters.each { it.halt(checker:, query:, check: nil, error:) }
143
+
144
+ break
145
+ end
146
+ end
147
+ nil # yield nothing around_query
148
+ end
149
+ @yields.raise_unless_all_reporters_yielded(hook: :around_query)
150
+ rescue *PASSTHROUGH_EXCEPTIONS
151
+ raise
152
+ rescue => e
153
+ # puts "outer rescue: #{e.inspect}"
154
+ @reporters.each do |check_reporter|
155
+ result = Error.new(checker:, query:, check: nil, record: nil, type: :exception, exception: e)
156
+ check_reporter.around_check(checker:, query: query, check: nil, record: nil) { result }
157
+ end
158
+ end
159
+ checker_counts
160
+ end
161
+ @yields.raise_unless_all_reporters_yielded(hook: :around_checker)
162
+ @total_counts << checker_counts
163
+ end
164
+ @total_counts
165
+ end
166
+ @yields.raise_unless_all_reporters_yielded(hook: :around_run)
167
+ @total_counts
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Recheck
4
+ VERSION = '0.5.0'
5
+ end
data/lib/recheck.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Recheck
4
+ # Check if an obj.is_a? Foo without having to depend on or load the foo gem.
5
+ def self.unloaded_is_a? obj, class_name
6
+ raise ArgumentError, "unloaded_is_a? takes class_name as a String" unless class_name.is_a? String
7
+
8
+ Object.const_defined?(class_name) && obj.is_a?(Object.const_get(class_name))
9
+ end
10
+ end
11
+
12
+ require_relative "../vendor/optimist"
13
+ require_relative "recheck/checkers"
14
+ require_relative "recheck/cli"
15
+ require_relative "recheck/commands"
16
+ require_relative "recheck/results"
17
+ require_relative "recheck/count_stats"
18
+ require_relative "recheck/reporters"
19
+ require_relative "recheck/runner"
20
+ require_relative "recheck/version"
@@ -0,0 +1,18 @@
1
+ # This file is automatically required before running any checks.
2
+ # Customize it to load your application environment and provide utility methods.
3
+
4
+ # For Rails applications:
5
+ require File.expand_path("../config/environment", __dir__)
6
+
7
+ # For non-Rails applications, you might want to do something like:
8
+ # $LOAD_PATH.unshift File.expand_path('../../lib', __dir__)
9
+ # require 'your_app'
10
+
11
+ # Load helpers and reporters; not checkers because all loaded checkers are run
12
+ Dir.glob([
13
+ "#{__dir__}/*_helper*.rb",
14
+ "#{__dir__}/reporter/**/*.rb"
15
+ ]).sort.each { |file| require_relative file }
16
+
17
+ # Add any other setup here.
18
+ # You could also share code by writing a YourAppChecker class (or classes) for your checkers to inherit from.