dat-analysis 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +22 -0
- data/README.md +423 -0
- data/dat-analysis.gemspec +17 -0
- data/lib/dat/analysis.rb +446 -0
- data/lib/dat/analysis/library.rb +30 -0
- data/lib/dat/analysis/matcher.rb +43 -0
- data/lib/dat/analysis/registry.rb +50 -0
- data/lib/dat/analysis/result.rb +78 -0
- data/lib/dat/analysis/tally.rb +59 -0
- data/script/bootstrap +9 -0
- data/script/release +38 -0
- data/script/test +9 -0
- data/test/dat_analysis_subclassing_test.rb +119 -0
- data/test/dat_analysis_test.rb +822 -0
- data/test/fixtures/analysis/test-suite-experiment/matcher.rb +7 -0
- data/test/fixtures/experiment-with-classes/matcher_a.rb +5 -0
- data/test/fixtures/experiment-with-classes/matcher_b.rb +11 -0
- data/test/fixtures/experiment-with-classes/wrapper_a.rb +5 -0
- data/test/fixtures/experiment-with-classes/wrapper_b.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_w.rb +5 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_y.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_z.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_w.rb +5 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_y.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_z.rb +11 -0
- data/test/fixtures/initialize-classes/matcher_m.rb +5 -0
- data/test/fixtures/initialize-classes/matcher_n.rb +11 -0
- data/test/fixtures/initialize-classes/wrapper_m.rb +5 -0
- data/test/fixtures/initialize-classes/wrapper_n.rb +11 -0
- data/test/fixtures/invalid-matcher/matcher.rb +1 -0
- metadata +128 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |gem|
|
2
|
+
gem.name = "dat-analysis"
|
3
|
+
gem.version = "1.2.0"
|
4
|
+
gem.authors = ["John Barnette", "Rick Bradley"]
|
5
|
+
gem.email = ["bradley@github.com"]
|
6
|
+
gem.description = "Analyze results from dat-science"
|
7
|
+
gem.summary = "HYPOTHESIZE THIS."
|
8
|
+
gem.homepage = "https://github.com/github/dat-analysis"
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split $/
|
11
|
+
gem.executables = []
|
12
|
+
gem.test_files = gem.files.grep /^test/
|
13
|
+
gem.require_paths = ["lib"]
|
14
|
+
|
15
|
+
gem.add_development_dependency "minitest"
|
16
|
+
gem.add_development_dependency "mocha"
|
17
|
+
end
|
data/lib/dat/analysis.rb
ADDED
@@ -0,0 +1,446 @@
|
|
1
|
+
module Dat
|
2
|
+
# Public: Analyze the findings of an Experiment
|
3
|
+
#
|
4
|
+
# Typically implementors will wish to subclass this to provide their own
|
5
|
+
# implementations of the following methods suited to the environment where
|
6
|
+
# `dat-science` is being used: `#read`, `#count`, `#cook`.
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# class AnalyzeThis < Dat::Analysis
|
11
|
+
# # Read a result out of our redis stash
|
12
|
+
# def read
|
13
|
+
# RedisHandle.rpop "scienceness.#{experiment_name}.results"
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# # Query our redis stash to see how many new results are pending
|
17
|
+
# def count
|
18
|
+
# RedisHandle.llen("scienceness.#{experiment_name}.results")
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# # Deserialize a JSON-encoded result from redis
|
22
|
+
# def cook(raw_result)
|
23
|
+
# return nil unless raw_result
|
24
|
+
# JSON.parse raw_result
|
25
|
+
# end
|
26
|
+
# end
|
27
|
+
class Analysis
|
28
|
+
|
29
|
+
# Public: Returns the name of the experiment
|
30
|
+
attr_reader :experiment_name
|
31
|
+
|
32
|
+
# Public: Returns the current science mismatch result
|
33
|
+
attr_reader :current
|
34
|
+
|
35
|
+
# Public: an alias for #current
|
36
|
+
alias_method :result, :current
|
37
|
+
|
38
|
+
# Public: Returns a raw ("un-cooked") version of the current science mismatch result
|
39
|
+
attr_reader :raw
|
40
|
+
|
41
|
+
# Public: Gets/Sets the base path for loading matcher and wrapper classes.
|
42
|
+
# Note that the base path will be appended with the experiment name
|
43
|
+
# before searching for wrappers and matchers.
|
44
|
+
attr_accessor :path
|
45
|
+
|
46
|
+
# Public: Create a new Dat::Analysis object. Will load any matcher and
|
47
|
+
# wrapper classes for this experiment if `#path` is non-nil.
|
48
|
+
#
|
49
|
+
# experiment_name - The String naming the experiment to analyze.
|
50
|
+
#
|
51
|
+
# Examples
|
52
|
+
#
|
53
|
+
# analyzer = Dat::Analysis.new('bcrypt-passwords')
|
54
|
+
# => #<Dat::Analysis:...>
|
55
|
+
def initialize(experiment_name)
|
56
|
+
@experiment_name = experiment_name
|
57
|
+
@wrappers = []
|
58
|
+
|
59
|
+
load_classes unless path.nil? rescue nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# Public: process a raw science mismatch result to make it usable in analysis.
|
63
|
+
# This is typically overridden by subclasses to do any sort of unmarshalling
|
64
|
+
# or deserialization required.
|
65
|
+
#
|
66
|
+
# raw_result - a raw science mismatch result, typically, as returned by `#read`
|
67
|
+
#
|
68
|
+
# Returns a "cooked" science mismatch result.
|
69
|
+
def cook(raw_result)
|
70
|
+
raw_result
|
71
|
+
end
|
72
|
+
|
73
|
+
# Public: fetch and summarize pending science mismatch results until an
|
74
|
+
# an unrecognized result is found. Outputs summaries to STDOUT. May
|
75
|
+
# modify current mismatch result.
|
76
|
+
#
|
77
|
+
# Returns nil. Leaves current mismatch result set to first unknown result,
|
78
|
+
# if one is found.
|
79
|
+
def analyze
|
80
|
+
track do
|
81
|
+
while true
|
82
|
+
unless more?
|
83
|
+
fetch # clear current result
|
84
|
+
return summarize_unknown_result
|
85
|
+
end
|
86
|
+
|
87
|
+
fetch
|
88
|
+
break if unknown?
|
89
|
+
summarize
|
90
|
+
count_as_seen identify
|
91
|
+
end
|
92
|
+
|
93
|
+
print "\n"
|
94
|
+
summarize_unknown_result
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Public: skip pending mismatch results not satisfying the provided block.
|
99
|
+
# May modify current mismatch result.
|
100
|
+
#
|
101
|
+
# &block - block accepting a prepared mismatch result and returning true
|
102
|
+
# or false.
|
103
|
+
#
|
104
|
+
# Examples:
|
105
|
+
#
|
106
|
+
# skip do |result|
|
107
|
+
# result.user.staff?
|
108
|
+
# end
|
109
|
+
#
|
110
|
+
# skip do |result|
|
111
|
+
# result['group']['id'] > 100 && result['url'] =~ %r{/admin}
|
112
|
+
# end
|
113
|
+
#
|
114
|
+
# skip do |result|
|
115
|
+
# result['timestamp'].to_i > 1.hour.ago
|
116
|
+
# end
|
117
|
+
#
|
118
|
+
# Returns nil if no satisfying results are found. Current result will be nil.
|
119
|
+
# Returns count of remaining results if a satisfying result found. Leaves
|
120
|
+
# current result set to first result for which block returns a truthy value.
|
121
|
+
def skip(&block)
|
122
|
+
raise ArgumentError, "a block is required" unless block_given?
|
123
|
+
|
124
|
+
while more?
|
125
|
+
fetch
|
126
|
+
return count if yield(current)
|
127
|
+
end
|
128
|
+
|
129
|
+
# clear current result since nothing of interest was found.
|
130
|
+
@current = @identified = nil
|
131
|
+
end
|
132
|
+
|
133
|
+
# Public: Are additional science mismatch results available?
|
134
|
+
#
|
135
|
+
# Returns true if more results can be fetched.
|
136
|
+
# Returns false if no more results can be fetched.
|
137
|
+
def more?
|
138
|
+
count != 0
|
139
|
+
end
|
140
|
+
|
141
|
+
# Public: retrieve a new science mismatch result, as returned by `#read`.
|
142
|
+
#
|
143
|
+
# Returns nil if no new science mismatch results are available.
|
144
|
+
# Returns a cooked and wrapped science mismatch result if available.
|
145
|
+
# Raises NoMethodError if `#read` is not defined on this class.
|
146
|
+
def fetch
|
147
|
+
@identified = nil
|
148
|
+
@raw = read
|
149
|
+
@current = raw ? prepare(raw) : nil
|
150
|
+
end
|
151
|
+
|
152
|
+
# Public: Return a readable representation of the current science mismatch
|
153
|
+
# result. This will utilize the `#readable` methods declared on a matcher
|
154
|
+
# which identifies the current result.
|
155
|
+
#
|
156
|
+
# Returns a string containing a readable representation of the current
|
157
|
+
# science mismatch result.
|
158
|
+
# Returns nil if there is no current result.
|
159
|
+
def summary
|
160
|
+
return nil unless current
|
161
|
+
recognizer = identify
|
162
|
+
return readable unless recognizer && recognizer.respond_to?(:readable)
|
163
|
+
recognizer.readable
|
164
|
+
end
|
165
|
+
|
166
|
+
# Public: Print a readable summary for the current science mismatch result
|
167
|
+
# to STDOUT.
|
168
|
+
#
|
169
|
+
# Returns nil.
|
170
|
+
def summarize
|
171
|
+
puts summary
|
172
|
+
end
|
173
|
+
|
174
|
+
# Public: Is the current science mismatch result unidentifiable?
|
175
|
+
#
|
176
|
+
# Returns nil if current result is nil.
|
177
|
+
# Returns true if no matcher can identify current result.
|
178
|
+
# Returns false if a single matcher can identify the current result.
|
179
|
+
# Raises RuntimeError if multiple matchers can identify the current result.
|
180
|
+
def unknown?
|
181
|
+
return nil if current.nil?
|
182
|
+
!identify
|
183
|
+
end
|
184
|
+
|
185
|
+
# Public: Find a matcher which can identify the current science mismatch result.
|
186
|
+
#
|
187
|
+
# Returns nil if current result is nil.
|
188
|
+
# Returns matcher class if a single matcher can identify current result.
|
189
|
+
# Returns false if no matcher can identify the current result.
|
190
|
+
# Raises RuntimeError if multiple matchers can identify the current result.
|
191
|
+
def identify
|
192
|
+
return @identified if @identified
|
193
|
+
|
194
|
+
results = registry.identify(current)
|
195
|
+
if results.size > 1
|
196
|
+
report_multiple_matchers(results)
|
197
|
+
end
|
198
|
+
|
199
|
+
@identified = results.first
|
200
|
+
end
|
201
|
+
|
202
|
+
# Internal: Output failure message about duplicate matchers for a science
|
203
|
+
# mismatch result.
|
204
|
+
#
|
205
|
+
# dupes - Array of Dat::Analysis::Matcher instances, initialized with a result
|
206
|
+
#
|
207
|
+
# Raises RuntimeError.
|
208
|
+
def report_multiple_matchers(dupes)
|
209
|
+
puts "\n\nMultiple matchers identified result:"
|
210
|
+
puts
|
211
|
+
|
212
|
+
dupes.each_with_index do |matcher, i|
|
213
|
+
print " #{i+1}. "
|
214
|
+
if matcher.respond_to?(:readable)
|
215
|
+
puts matcher.readable
|
216
|
+
else
|
217
|
+
puts readable
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
puts
|
222
|
+
raise "Result cannot be uniquely identified."
|
223
|
+
end
|
224
|
+
|
225
|
+
# Internal: cook and wrap a raw science mismatch result.
|
226
|
+
#
|
227
|
+
# raw_result - an unmodified result, typically, as returned by `#read`
|
228
|
+
#
|
229
|
+
# Returns the science mismatch result processed by `#cook` and then by `#wrap`.
|
230
|
+
def prepare(raw_result)
|
231
|
+
wrap(cook(raw_result))
|
232
|
+
end
|
233
|
+
|
234
|
+
# Internal: wrap a "cooked" science mismatch result with any known wrapper methods
|
235
|
+
#
|
236
|
+
# cooked_result - a "cooked" mismatch result, as returned by `#cook`
|
237
|
+
#
|
238
|
+
# Returns the cooked science mismatch result, which will now respond to any
|
239
|
+
# instance methods found on our known wrapper classes
|
240
|
+
def wrap(cooked_result)
|
241
|
+
cooked_result.extend Dat::Analysis::Result::DefaultMethods
|
242
|
+
|
243
|
+
if !wrappers.empty?
|
244
|
+
cooked_result.send(:instance_variable_set, '@analyzer', self)
|
245
|
+
|
246
|
+
class << cooked_result
|
247
|
+
define_method(:method_missing) do |meth, *args|
|
248
|
+
found = nil
|
249
|
+
@analyzer.wrappers.each do |wrapper|
|
250
|
+
next unless wrapper.public_instance_methods.detect {|m| m.to_s == meth.to_s }
|
251
|
+
found = wrapper.new(self).send(meth, *args)
|
252
|
+
break
|
253
|
+
end
|
254
|
+
found
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
cooked_result
|
260
|
+
end
|
261
|
+
|
262
|
+
# Internal: Return the *default* readable representation of the current science
|
263
|
+
# mismatch result. This method is typically overridden by subclasses or defined
|
264
|
+
# in matchers which wish to customize the readable representation of a science
|
265
|
+
# mismatch result. This implementation is provided as a default.
|
266
|
+
#
|
267
|
+
# Returns a string containing a readable representation of the current
|
268
|
+
# science mismatch result.
|
269
|
+
def readable
|
270
|
+
synopsis = []
|
271
|
+
|
272
|
+
synopsis << "Experiment %-20s first: %10s @ %s" % [
|
273
|
+
"[#{current['experiment']}]", current['first'], current['timestamp']
|
274
|
+
]
|
275
|
+
synopsis << "Duration: control (%6.2f) | candidate (%6.2f)" % [
|
276
|
+
current['control']['duration'], current['candidate']['duration']
|
277
|
+
]
|
278
|
+
|
279
|
+
synopsis << ""
|
280
|
+
|
281
|
+
if current['control']['exception']
|
282
|
+
synopsis << "Control raised exception:\n\t#{current['control']['exception'].inspect}"
|
283
|
+
else
|
284
|
+
synopsis << "Control value: [#{current['control']['value']}]"
|
285
|
+
end
|
286
|
+
|
287
|
+
if current['candidate']['exception']
|
288
|
+
synopsis << "Candidate raised exception:\n\t#{current['candidate']['exception'].inspect}"
|
289
|
+
else
|
290
|
+
synopsis << "Candidate value: [#{current['candidate']['value']}]"
|
291
|
+
end
|
292
|
+
|
293
|
+
synopsis << ""
|
294
|
+
|
295
|
+
remaining = current.keys - ['control', 'candidate', 'experiment', 'first', 'timestamp']
|
296
|
+
remaining.sort.each do |key|
|
297
|
+
if current[key].respond_to?(:keys)
|
298
|
+
# do ordered sorting of hash keys
|
299
|
+
subkeys = key_sort(current[key].keys)
|
300
|
+
synopsis << "\t%15s => {" % [ key ]
|
301
|
+
subkeys.each do |subkey|
|
302
|
+
synopsis << "\t%15s %15s => %-20s" % [ '', subkey, current[key][subkey].inspect ]
|
303
|
+
end
|
304
|
+
synopsis << "\t%15s }" % [ '' ]
|
305
|
+
else
|
306
|
+
synopsis << "\t%15s => %-20s" % [ key, current[key] ]
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
synopsis.join "\n"
|
311
|
+
end
|
312
|
+
|
313
|
+
def preferred_fields
|
314
|
+
%w(id name title owner description login username)
|
315
|
+
end
|
316
|
+
|
317
|
+
def key_sort(keys)
|
318
|
+
str_keys = keys.map {|k| k.to_s }
|
319
|
+
(preferred_fields & str_keys) + (str_keys - preferred_fields)
|
320
|
+
end
|
321
|
+
|
322
|
+
# Public: Which matcher classes are known?
|
323
|
+
#
|
324
|
+
# Returns: list of Dat::Analysis::Matcher classes known to this analyzer.
|
325
|
+
def matchers
|
326
|
+
registry.matchers
|
327
|
+
end
|
328
|
+
|
329
|
+
# Public: Which wrapper classes are known?
|
330
|
+
#
|
331
|
+
# Returns: list of Dat::Analysis::Result classes known to this analyzer.
|
332
|
+
def wrappers
|
333
|
+
registry.wrappers
|
334
|
+
end
|
335
|
+
|
336
|
+
# Public: Add a matcher or wrapper class to this analyzer.
|
337
|
+
#
|
338
|
+
# klass - a subclass of either Dat::Analysis::Matcher or Dat::Analysis::Result
|
339
|
+
# to be registered with this analyzer.
|
340
|
+
#
|
341
|
+
# Returns the list of known matchers and wrappers for this analyzer.
|
342
|
+
def add(klass)
|
343
|
+
klass.add_to_analyzer(self)
|
344
|
+
end
|
345
|
+
|
346
|
+
# Public: Load matcher and wrapper classes from the library for our experiment.
|
347
|
+
#
|
348
|
+
# Returns: a list of loaded matcher and wrapper classes.
|
349
|
+
def load_classes
|
350
|
+
new_classes = library.select_classes do
|
351
|
+
experiment_files.each { |file| load file }
|
352
|
+
end
|
353
|
+
|
354
|
+
new_classes.map {|klass| add klass }
|
355
|
+
end
|
356
|
+
|
357
|
+
# Internal: Print to STDOUT a readable summary of the current (unknown) science
|
358
|
+
# mismatch result, as well a summary of the tally of identified science mismatch
|
359
|
+
# results analyzed to this point.
|
360
|
+
#
|
361
|
+
# Returns nil if there are no pending science mismatch results.
|
362
|
+
# Returns the number of pending science mismatch results.
|
363
|
+
def summarize_unknown_result
|
364
|
+
tally.summarize
|
365
|
+
if current
|
366
|
+
puts "\nFirst unidentifiable result:\n\n"
|
367
|
+
summarize
|
368
|
+
else
|
369
|
+
puts "\nNo unidentifiable results found. \\m/\n"
|
370
|
+
end
|
371
|
+
|
372
|
+
more? ? count : nil
|
373
|
+
end
|
374
|
+
|
375
|
+
# Internal: keep a tally of analyzed science mismatch results.
|
376
|
+
#
|
377
|
+
# &block: block which will presumably call `#count_as_seen` to update
|
378
|
+
# tallies of identified science mismatch results.
|
379
|
+
#
|
380
|
+
# Returns: value returned by &block.
|
381
|
+
def track(&block)
|
382
|
+
@tally = Tally.new
|
383
|
+
yield
|
384
|
+
end
|
385
|
+
|
386
|
+
# Internal: Increment count for an object in an ongoing tally.
|
387
|
+
#
|
388
|
+
# obj - an Object for which we are recording occurrence counts
|
389
|
+
#
|
390
|
+
# Returns updated tally count for obj.
|
391
|
+
def count_as_seen(obj)
|
392
|
+
tally.count(obj.class.name || obj.class.inspect)
|
393
|
+
end
|
394
|
+
|
395
|
+
# Internal: The current Tally instance. Cached between calls to `#track`.
|
396
|
+
#
|
397
|
+
# Returns the current Tally instance object.
|
398
|
+
def tally
|
399
|
+
@tally ||= Tally.new
|
400
|
+
end
|
401
|
+
|
402
|
+
# Internal: handle to the library, used for collecting newly discovered
|
403
|
+
# matcher and wrapper classes.
|
404
|
+
#
|
405
|
+
# Returns: handle to the library class.
|
406
|
+
def library
|
407
|
+
Dat::Analysis::Library
|
408
|
+
end
|
409
|
+
|
410
|
+
# Internal: registry of wrapper and matcher classes known to this analyzer.
|
411
|
+
#
|
412
|
+
# Returns a (cached between calls) handle to our registry instance.
|
413
|
+
def registry
|
414
|
+
@registry ||= Dat::Analysis::Registry.new
|
415
|
+
end
|
416
|
+
|
417
|
+
# Internal: which class files are candidates for loading matchers and wrappers
|
418
|
+
# for this experiment?
|
419
|
+
#
|
420
|
+
# Returns: sorted Array of paths to ruby files which may contain declarations
|
421
|
+
# of matcher and wrapper classes for this experiment.
|
422
|
+
def experiment_files
|
423
|
+
Dir[File.join(path, experiment_name, '*.rb')].sort
|
424
|
+
end
|
425
|
+
|
426
|
+
# Internal: Add a matcher class to this analyzer's registry.
|
427
|
+
# (Intended to be called only by Dat::Analysis::Matcher and subclasses)
|
428
|
+
def add_matcher(matcher_class)
|
429
|
+
puts "Loading matcher class [#{matcher_class}]"
|
430
|
+
registry.add matcher_class
|
431
|
+
end
|
432
|
+
|
433
|
+
# Internal: Add a wrapper class to this analyzer's registry.
|
434
|
+
# (Intended to be called only by Dat::Analysis::Result and its subclasses)
|
435
|
+
def add_wrapper(wrapper_class)
|
436
|
+
puts "Loading results wrapper class [#{wrapper_class}]"
|
437
|
+
registry.add wrapper_class
|
438
|
+
end
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
require 'dat/analysis/library'
|
443
|
+
require 'dat/analysis/matcher'
|
444
|
+
require 'dat/analysis/result'
|
445
|
+
require 'dat/analysis/registry'
|
446
|
+
require 'dat/analysis/tally'
|