dat-analysis 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +22 -0
- data/README.md +423 -0
- data/dat-analysis.gemspec +17 -0
- data/lib/dat/analysis.rb +446 -0
- data/lib/dat/analysis/library.rb +30 -0
- data/lib/dat/analysis/matcher.rb +43 -0
- data/lib/dat/analysis/registry.rb +50 -0
- data/lib/dat/analysis/result.rb +78 -0
- data/lib/dat/analysis/tally.rb +59 -0
- data/script/bootstrap +9 -0
- data/script/release +38 -0
- data/script/test +9 -0
- data/test/dat_analysis_subclassing_test.rb +119 -0
- data/test/dat_analysis_test.rb +822 -0
- data/test/fixtures/analysis/test-suite-experiment/matcher.rb +7 -0
- data/test/fixtures/experiment-with-classes/matcher_a.rb +5 -0
- data/test/fixtures/experiment-with-classes/matcher_b.rb +11 -0
- data/test/fixtures/experiment-with-classes/wrapper_a.rb +5 -0
- data/test/fixtures/experiment-with-classes/wrapper_b.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_w.rb +5 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_y.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_z.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_w.rb +5 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_y.rb +11 -0
- data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_z.rb +11 -0
- data/test/fixtures/initialize-classes/matcher_m.rb +5 -0
- data/test/fixtures/initialize-classes/matcher_n.rb +11 -0
- data/test/fixtures/initialize-classes/wrapper_m.rb +5 -0
- data/test/fixtures/initialize-classes/wrapper_n.rb +11 -0
- data/test/fixtures/invalid-matcher/matcher.rb +1 -0
- metadata +128 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |gem|
|
2
|
+
gem.name = "dat-analysis"
|
3
|
+
gem.version = "1.2.0"
|
4
|
+
gem.authors = ["John Barnette", "Rick Bradley"]
|
5
|
+
gem.email = ["bradley@github.com"]
|
6
|
+
gem.description = "Analyze results from dat-science"
|
7
|
+
gem.summary = "HYPOTHESIZE THIS."
|
8
|
+
gem.homepage = "https://github.com/github/dat-analysis"
|
9
|
+
|
10
|
+
gem.files = `git ls-files`.split $/
|
11
|
+
gem.executables = []
|
12
|
+
gem.test_files = gem.files.grep /^test/
|
13
|
+
gem.require_paths = ["lib"]
|
14
|
+
|
15
|
+
gem.add_development_dependency "minitest"
|
16
|
+
gem.add_development_dependency "mocha"
|
17
|
+
end
|
data/lib/dat/analysis.rb
ADDED
@@ -0,0 +1,446 @@
|
|
1
|
+
module Dat
|
2
|
+
# Public: Analyze the findings of an Experiment
|
3
|
+
#
|
4
|
+
# Typically implementors will wish to subclass this to provide their own
|
5
|
+
# implementations of the following methods suited to the environment where
|
6
|
+
# `dat-science` is being used: `#read`, `#count`, `#cook`.
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# class AnalyzeThis < Dat::Analysis
|
11
|
+
# # Read a result out of our redis stash
|
12
|
+
# def read
|
13
|
+
# RedisHandle.rpop "scienceness.#{experiment_name}.results"
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# # Query our redis stash to see how many new results are pending
|
17
|
+
# def count
|
18
|
+
# RedisHandle.llen("scienceness.#{experiment_name}.results")
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# # Deserialize a JSON-encoded result from redis
|
22
|
+
# def cook(raw_result)
|
23
|
+
# return nil unless raw_result
|
24
|
+
# JSON.parse raw_result
|
25
|
+
# end
|
26
|
+
# end
|
27
|
+
class Analysis
|
28
|
+
|
29
|
+
# Public: Returns the name of the experiment
|
30
|
+
attr_reader :experiment_name
|
31
|
+
|
32
|
+
# Public: Returns the current science mismatch result
|
33
|
+
attr_reader :current
|
34
|
+
|
35
|
+
# Public: an alias for #current
|
36
|
+
alias_method :result, :current
|
37
|
+
|
38
|
+
# Public: Returns a raw ("un-cooked") version of the current science mismatch result
|
39
|
+
attr_reader :raw
|
40
|
+
|
41
|
+
# Public: Gets/Sets the base path for loading matcher and wrapper classes.
|
42
|
+
# Note that the base path will be appended with the experiment name
|
43
|
+
# before searching for wrappers and matchers.
|
44
|
+
attr_accessor :path
|
45
|
+
|
46
|
+
# Public: Create a new Dat::Analysis object. Will load any matcher and
|
47
|
+
# wrapper classes for this experiment if `#path` is non-nil.
|
48
|
+
#
|
49
|
+
# experiment_name - The String naming the experiment to analyze.
|
50
|
+
#
|
51
|
+
# Examples
|
52
|
+
#
|
53
|
+
# analyzer = Dat::Analysis.new('bcrypt-passwords')
|
54
|
+
# => #<Dat::Analysis:...>
|
55
|
+
def initialize(experiment_name)
|
56
|
+
@experiment_name = experiment_name
|
57
|
+
@wrappers = []
|
58
|
+
|
59
|
+
load_classes unless path.nil? rescue nil
|
60
|
+
end
|
61
|
+
|
62
|
+
# Public: process a raw science mismatch result to make it usable in analysis.
|
63
|
+
# This is typically overridden by subclasses to do any sort of unmarshalling
|
64
|
+
# or deserialization required.
|
65
|
+
#
|
66
|
+
# raw_result - a raw science mismatch result, typically, as returned by `#read`
|
67
|
+
#
|
68
|
+
# Returns a "cooked" science mismatch result.
|
69
|
+
def cook(raw_result)
|
70
|
+
raw_result
|
71
|
+
end
|
72
|
+
|
73
|
+
# Public: fetch and summarize pending science mismatch results until an
|
74
|
+
# an unrecognized result is found. Outputs summaries to STDOUT. May
|
75
|
+
# modify current mismatch result.
|
76
|
+
#
|
77
|
+
# Returns nil. Leaves current mismatch result set to first unknown result,
|
78
|
+
# if one is found.
|
79
|
+
def analyze
|
80
|
+
track do
|
81
|
+
while true
|
82
|
+
unless more?
|
83
|
+
fetch # clear current result
|
84
|
+
return summarize_unknown_result
|
85
|
+
end
|
86
|
+
|
87
|
+
fetch
|
88
|
+
break if unknown?
|
89
|
+
summarize
|
90
|
+
count_as_seen identify
|
91
|
+
end
|
92
|
+
|
93
|
+
print "\n"
|
94
|
+
summarize_unknown_result
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# Public: skip pending mismatch results not satisfying the provided block.
|
99
|
+
# May modify current mismatch result.
|
100
|
+
#
|
101
|
+
# &block - block accepting a prepared mismatch result and returning true
|
102
|
+
# or false.
|
103
|
+
#
|
104
|
+
# Examples:
|
105
|
+
#
|
106
|
+
# skip do |result|
|
107
|
+
# result.user.staff?
|
108
|
+
# end
|
109
|
+
#
|
110
|
+
# skip do |result|
|
111
|
+
# result['group']['id'] > 100 && result['url'] =~ %r{/admin}
|
112
|
+
# end
|
113
|
+
#
|
114
|
+
# skip do |result|
|
115
|
+
# result['timestamp'].to_i > 1.hour.ago
|
116
|
+
# end
|
117
|
+
#
|
118
|
+
# Returns nil if no satisfying results are found. Current result will be nil.
|
119
|
+
# Returns count of remaining results if a satisfying result found. Leaves
|
120
|
+
# current result set to first result for which block returns a truthy value.
|
121
|
+
def skip(&block)
|
122
|
+
raise ArgumentError, "a block is required" unless block_given?
|
123
|
+
|
124
|
+
while more?
|
125
|
+
fetch
|
126
|
+
return count if yield(current)
|
127
|
+
end
|
128
|
+
|
129
|
+
# clear current result since nothing of interest was found.
|
130
|
+
@current = @identified = nil
|
131
|
+
end
|
132
|
+
|
133
|
+
# Public: Are additional science mismatch results available?
|
134
|
+
#
|
135
|
+
# Returns true if more results can be fetched.
|
136
|
+
# Returns false if no more results can be fetched.
|
137
|
+
def more?
|
138
|
+
count != 0
|
139
|
+
end
|
140
|
+
|
141
|
+
# Public: retrieve a new science mismatch result, as returned by `#read`.
|
142
|
+
#
|
143
|
+
# Returns nil if no new science mismatch results are available.
|
144
|
+
# Returns a cooked and wrapped science mismatch result if available.
|
145
|
+
# Raises NoMethodError if `#read` is not defined on this class.
|
146
|
+
def fetch
|
147
|
+
@identified = nil
|
148
|
+
@raw = read
|
149
|
+
@current = raw ? prepare(raw) : nil
|
150
|
+
end
|
151
|
+
|
152
|
+
# Public: Return a readable representation of the current science mismatch
|
153
|
+
# result. This will utilize the `#readable` methods declared on a matcher
|
154
|
+
# which identifies the current result.
|
155
|
+
#
|
156
|
+
# Returns a string containing a readable representation of the current
|
157
|
+
# science mismatch result.
|
158
|
+
# Returns nil if there is no current result.
|
159
|
+
def summary
|
160
|
+
return nil unless current
|
161
|
+
recognizer = identify
|
162
|
+
return readable unless recognizer && recognizer.respond_to?(:readable)
|
163
|
+
recognizer.readable
|
164
|
+
end
|
165
|
+
|
166
|
+
# Public: Print a readable summary for the current science mismatch result
|
167
|
+
# to STDOUT.
|
168
|
+
#
|
169
|
+
# Returns nil.
|
170
|
+
def summarize
|
171
|
+
puts summary
|
172
|
+
end
|
173
|
+
|
174
|
+
# Public: Is the current science mismatch result unidentifiable?
|
175
|
+
#
|
176
|
+
# Returns nil if current result is nil.
|
177
|
+
# Returns true if no matcher can identify current result.
|
178
|
+
# Returns false if a single matcher can identify the current result.
|
179
|
+
# Raises RuntimeError if multiple matchers can identify the current result.
|
180
|
+
def unknown?
|
181
|
+
return nil if current.nil?
|
182
|
+
!identify
|
183
|
+
end
|
184
|
+
|
185
|
+
# Public: Find a matcher which can identify the current science mismatch result.
|
186
|
+
#
|
187
|
+
# Returns nil if current result is nil.
|
188
|
+
# Returns matcher class if a single matcher can identify current result.
|
189
|
+
# Returns false if no matcher can identify the current result.
|
190
|
+
# Raises RuntimeError if multiple matchers can identify the current result.
|
191
|
+
def identify
|
192
|
+
return @identified if @identified
|
193
|
+
|
194
|
+
results = registry.identify(current)
|
195
|
+
if results.size > 1
|
196
|
+
report_multiple_matchers(results)
|
197
|
+
end
|
198
|
+
|
199
|
+
@identified = results.first
|
200
|
+
end
|
201
|
+
|
202
|
+
# Internal: Output failure message about duplicate matchers for a science
|
203
|
+
# mismatch result.
|
204
|
+
#
|
205
|
+
# dupes - Array of Dat::Analysis::Matcher instances, initialized with a result
|
206
|
+
#
|
207
|
+
# Raises RuntimeError.
|
208
|
+
def report_multiple_matchers(dupes)
|
209
|
+
puts "\n\nMultiple matchers identified result:"
|
210
|
+
puts
|
211
|
+
|
212
|
+
dupes.each_with_index do |matcher, i|
|
213
|
+
print " #{i+1}. "
|
214
|
+
if matcher.respond_to?(:readable)
|
215
|
+
puts matcher.readable
|
216
|
+
else
|
217
|
+
puts readable
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
puts
|
222
|
+
raise "Result cannot be uniquely identified."
|
223
|
+
end
|
224
|
+
|
225
|
+
# Internal: cook and wrap a raw science mismatch result.
|
226
|
+
#
|
227
|
+
# raw_result - an unmodified result, typically, as returned by `#read`
|
228
|
+
#
|
229
|
+
# Returns the science mismatch result processed by `#cook` and then by `#wrap`.
|
230
|
+
def prepare(raw_result)
|
231
|
+
wrap(cook(raw_result))
|
232
|
+
end
|
233
|
+
|
234
|
+
# Internal: wrap a "cooked" science mismatch result with any known wrapper methods
|
235
|
+
#
|
236
|
+
# cooked_result - a "cooked" mismatch result, as returned by `#cook`
|
237
|
+
#
|
238
|
+
# Returns the cooked science mismatch result, which will now respond to any
|
239
|
+
# instance methods found on our known wrapper classes
|
240
|
+
def wrap(cooked_result)
|
241
|
+
cooked_result.extend Dat::Analysis::Result::DefaultMethods
|
242
|
+
|
243
|
+
if !wrappers.empty?
|
244
|
+
cooked_result.send(:instance_variable_set, '@analyzer', self)
|
245
|
+
|
246
|
+
class << cooked_result
|
247
|
+
define_method(:method_missing) do |meth, *args|
|
248
|
+
found = nil
|
249
|
+
@analyzer.wrappers.each do |wrapper|
|
250
|
+
next unless wrapper.public_instance_methods.detect {|m| m.to_s == meth.to_s }
|
251
|
+
found = wrapper.new(self).send(meth, *args)
|
252
|
+
break
|
253
|
+
end
|
254
|
+
found
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
cooked_result
|
260
|
+
end
|
261
|
+
|
262
|
+
# Internal: Return the *default* readable representation of the current science
|
263
|
+
# mismatch result. This method is typically overridden by subclasses or defined
|
264
|
+
# in matchers which wish to customize the readable representation of a science
|
265
|
+
# mismatch result. This implementation is provided as a default.
|
266
|
+
#
|
267
|
+
# Returns a string containing a readable representation of the current
|
268
|
+
# science mismatch result.
|
269
|
+
def readable
|
270
|
+
synopsis = []
|
271
|
+
|
272
|
+
synopsis << "Experiment %-20s first: %10s @ %s" % [
|
273
|
+
"[#{current['experiment']}]", current['first'], current['timestamp']
|
274
|
+
]
|
275
|
+
synopsis << "Duration: control (%6.2f) | candidate (%6.2f)" % [
|
276
|
+
current['control']['duration'], current['candidate']['duration']
|
277
|
+
]
|
278
|
+
|
279
|
+
synopsis << ""
|
280
|
+
|
281
|
+
if current['control']['exception']
|
282
|
+
synopsis << "Control raised exception:\n\t#{current['control']['exception'].inspect}"
|
283
|
+
else
|
284
|
+
synopsis << "Control value: [#{current['control']['value']}]"
|
285
|
+
end
|
286
|
+
|
287
|
+
if current['candidate']['exception']
|
288
|
+
synopsis << "Candidate raised exception:\n\t#{current['candidate']['exception'].inspect}"
|
289
|
+
else
|
290
|
+
synopsis << "Candidate value: [#{current['candidate']['value']}]"
|
291
|
+
end
|
292
|
+
|
293
|
+
synopsis << ""
|
294
|
+
|
295
|
+
remaining = current.keys - ['control', 'candidate', 'experiment', 'first', 'timestamp']
|
296
|
+
remaining.sort.each do |key|
|
297
|
+
if current[key].respond_to?(:keys)
|
298
|
+
# do ordered sorting of hash keys
|
299
|
+
subkeys = key_sort(current[key].keys)
|
300
|
+
synopsis << "\t%15s => {" % [ key ]
|
301
|
+
subkeys.each do |subkey|
|
302
|
+
synopsis << "\t%15s %15s => %-20s" % [ '', subkey, current[key][subkey].inspect ]
|
303
|
+
end
|
304
|
+
synopsis << "\t%15s }" % [ '' ]
|
305
|
+
else
|
306
|
+
synopsis << "\t%15s => %-20s" % [ key, current[key] ]
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
synopsis.join "\n"
|
311
|
+
end
|
312
|
+
|
313
|
+
def preferred_fields
|
314
|
+
%w(id name title owner description login username)
|
315
|
+
end
|
316
|
+
|
317
|
+
def key_sort(keys)
|
318
|
+
str_keys = keys.map {|k| k.to_s }
|
319
|
+
(preferred_fields & str_keys) + (str_keys - preferred_fields)
|
320
|
+
end
|
321
|
+
|
322
|
+
# Public: Which matcher classes are known?
|
323
|
+
#
|
324
|
+
# Returns: list of Dat::Analysis::Matcher classes known to this analyzer.
|
325
|
+
def matchers
|
326
|
+
registry.matchers
|
327
|
+
end
|
328
|
+
|
329
|
+
# Public: Which wrapper classes are known?
|
330
|
+
#
|
331
|
+
# Returns: list of Dat::Analysis::Result classes known to this analyzer.
|
332
|
+
def wrappers
|
333
|
+
registry.wrappers
|
334
|
+
end
|
335
|
+
|
336
|
+
# Public: Add a matcher or wrapper class to this analyzer.
|
337
|
+
#
|
338
|
+
# klass - a subclass of either Dat::Analysis::Matcher or Dat::Analysis::Result
|
339
|
+
# to be registered with this analyzer.
|
340
|
+
#
|
341
|
+
# Returns the list of known matchers and wrappers for this analyzer.
|
342
|
+
def add(klass)
|
343
|
+
klass.add_to_analyzer(self)
|
344
|
+
end
|
345
|
+
|
346
|
+
# Public: Load matcher and wrapper classes from the library for our experiment.
|
347
|
+
#
|
348
|
+
# Returns: a list of loaded matcher and wrapper classes.
|
349
|
+
def load_classes
|
350
|
+
new_classes = library.select_classes do
|
351
|
+
experiment_files.each { |file| load file }
|
352
|
+
end
|
353
|
+
|
354
|
+
new_classes.map {|klass| add klass }
|
355
|
+
end
|
356
|
+
|
357
|
+
# Internal: Print to STDOUT a readable summary of the current (unknown) science
|
358
|
+
# mismatch result, as well a summary of the tally of identified science mismatch
|
359
|
+
# results analyzed to this point.
|
360
|
+
#
|
361
|
+
# Returns nil if there are no pending science mismatch results.
|
362
|
+
# Returns the number of pending science mismatch results.
|
363
|
+
def summarize_unknown_result
|
364
|
+
tally.summarize
|
365
|
+
if current
|
366
|
+
puts "\nFirst unidentifiable result:\n\n"
|
367
|
+
summarize
|
368
|
+
else
|
369
|
+
puts "\nNo unidentifiable results found. \\m/\n"
|
370
|
+
end
|
371
|
+
|
372
|
+
more? ? count : nil
|
373
|
+
end
|
374
|
+
|
375
|
+
# Internal: keep a tally of analyzed science mismatch results.
|
376
|
+
#
|
377
|
+
# &block: block which will presumably call `#count_as_seen` to update
|
378
|
+
# tallies of identified science mismatch results.
|
379
|
+
#
|
380
|
+
# Returns: value returned by &block.
|
381
|
+
def track(&block)
|
382
|
+
@tally = Tally.new
|
383
|
+
yield
|
384
|
+
end
|
385
|
+
|
386
|
+
# Internal: Increment count for an object in an ongoing tally.
|
387
|
+
#
|
388
|
+
# obj - an Object for which we are recording occurrence counts
|
389
|
+
#
|
390
|
+
# Returns updated tally count for obj.
|
391
|
+
def count_as_seen(obj)
|
392
|
+
tally.count(obj.class.name || obj.class.inspect)
|
393
|
+
end
|
394
|
+
|
395
|
+
# Internal: The current Tally instance. Cached between calls to `#track`.
|
396
|
+
#
|
397
|
+
# Returns the current Tally instance object.
|
398
|
+
def tally
|
399
|
+
@tally ||= Tally.new
|
400
|
+
end
|
401
|
+
|
402
|
+
# Internal: handle to the library, used for collecting newly discovered
|
403
|
+
# matcher and wrapper classes.
|
404
|
+
#
|
405
|
+
# Returns: handle to the library class.
|
406
|
+
def library
|
407
|
+
Dat::Analysis::Library
|
408
|
+
end
|
409
|
+
|
410
|
+
# Internal: registry of wrapper and matcher classes known to this analyzer.
|
411
|
+
#
|
412
|
+
# Returns a (cached between calls) handle to our registry instance.
|
413
|
+
def registry
|
414
|
+
@registry ||= Dat::Analysis::Registry.new
|
415
|
+
end
|
416
|
+
|
417
|
+
# Internal: which class files are candidates for loading matchers and wrappers
|
418
|
+
# for this experiment?
|
419
|
+
#
|
420
|
+
# Returns: sorted Array of paths to ruby files which may contain declarations
|
421
|
+
# of matcher and wrapper classes for this experiment.
|
422
|
+
def experiment_files
|
423
|
+
Dir[File.join(path, experiment_name, '*.rb')].sort
|
424
|
+
end
|
425
|
+
|
426
|
+
# Internal: Add a matcher class to this analyzer's registry.
|
427
|
+
# (Intended to be called only by Dat::Analysis::Matcher and subclasses)
|
428
|
+
def add_matcher(matcher_class)
|
429
|
+
puts "Loading matcher class [#{matcher_class}]"
|
430
|
+
registry.add matcher_class
|
431
|
+
end
|
432
|
+
|
433
|
+
# Internal: Add a wrapper class to this analyzer's registry.
|
434
|
+
# (Intended to be called only by Dat::Analysis::Result and its subclasses)
|
435
|
+
def add_wrapper(wrapper_class)
|
436
|
+
puts "Loading results wrapper class [#{wrapper_class}]"
|
437
|
+
registry.add wrapper_class
|
438
|
+
end
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
require 'dat/analysis/library'
|
443
|
+
require 'dat/analysis/matcher'
|
444
|
+
require 'dat/analysis/result'
|
445
|
+
require 'dat/analysis/registry'
|
446
|
+
require 'dat/analysis/tally'
|