dat-analysis 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +423 -0
  5. data/dat-analysis.gemspec +17 -0
  6. data/lib/dat/analysis.rb +446 -0
  7. data/lib/dat/analysis/library.rb +30 -0
  8. data/lib/dat/analysis/matcher.rb +43 -0
  9. data/lib/dat/analysis/registry.rb +50 -0
  10. data/lib/dat/analysis/result.rb +78 -0
  11. data/lib/dat/analysis/tally.rb +59 -0
  12. data/script/bootstrap +9 -0
  13. data/script/release +38 -0
  14. data/script/test +9 -0
  15. data/test/dat_analysis_subclassing_test.rb +119 -0
  16. data/test/dat_analysis_test.rb +822 -0
  17. data/test/fixtures/analysis/test-suite-experiment/matcher.rb +7 -0
  18. data/test/fixtures/experiment-with-classes/matcher_a.rb +5 -0
  19. data/test/fixtures/experiment-with-classes/matcher_b.rb +11 -0
  20. data/test/fixtures/experiment-with-classes/wrapper_a.rb +5 -0
  21. data/test/fixtures/experiment-with-classes/wrapper_b.rb +11 -0
  22. data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_w.rb +5 -0
  23. data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_y.rb +11 -0
  24. data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_z.rb +11 -0
  25. data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_w.rb +5 -0
  26. data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_y.rb +11 -0
  27. data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_z.rb +11 -0
  28. data/test/fixtures/initialize-classes/matcher_m.rb +5 -0
  29. data/test/fixtures/initialize-classes/matcher_n.rb +11 -0
  30. data/test/fixtures/initialize-classes/wrapper_m.rb +5 -0
  31. data/test/fixtures/initialize-classes/wrapper_n.rb +11 -0
  32. data/test/fixtures/invalid-matcher/matcher.rb +1 -0
  33. metadata +128 -0
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |gem|
2
+ gem.name = "dat-analysis"
3
+ gem.version = "1.2.0"
4
+ gem.authors = ["John Barnette", "Rick Bradley"]
5
+ gem.email = ["bradley@github.com"]
6
+ gem.description = "Analyze results from dat-science"
7
+ gem.summary = "HYPOTHESIZE THIS."
8
+ gem.homepage = "https://github.com/github/dat-analysis"
9
+
10
+ gem.files = `git ls-files`.split $/
11
+ gem.executables = []
12
+ gem.test_files = gem.files.grep /^test/
13
+ gem.require_paths = ["lib"]
14
+
15
+ gem.add_development_dependency "minitest"
16
+ gem.add_development_dependency "mocha"
17
+ end
@@ -0,0 +1,446 @@
1
+ module Dat
2
+ # Public: Analyze the findings of an Experiment
3
+ #
4
+ # Typically implementors will wish to subclass this to provide their own
5
+ # implementations of the following methods suited to the environment where
6
+ # `dat-science` is being used: `#read`, `#count`, `#cook`.
7
+ #
8
+ # Example:
9
+ #
10
+ # class AnalyzeThis < Dat::Analysis
11
+ # # Read a result out of our redis stash
12
+ # def read
13
+ # RedisHandle.rpop "scienceness.#{experiment_name}.results"
14
+ # end
15
+ #
16
+ # # Query our redis stash to see how many new results are pending
17
+ # def count
18
+ # RedisHandle.llen("scienceness.#{experiment_name}.results")
19
+ # end
20
+ #
21
+ # # Deserialize a JSON-encoded result from redis
22
+ # def cook(raw_result)
23
+ # return nil unless raw_result
24
+ # JSON.parse raw_result
25
+ # end
26
+ # end
27
+ class Analysis
28
+
29
+ # Public: Returns the name of the experiment
30
+ attr_reader :experiment_name
31
+
32
+ # Public: Returns the current science mismatch result
33
+ attr_reader :current
34
+
35
+ # Public: an alias for #current
36
+ alias_method :result, :current
37
+
38
+ # Public: Returns a raw ("un-cooked") version of the current science mismatch result
39
+ attr_reader :raw
40
+
41
+ # Public: Gets/Sets the base path for loading matcher and wrapper classes.
42
+ # Note that the base path will be appended with the experiment name
43
+ # before searching for wrappers and matchers.
44
+ attr_accessor :path
45
+
46
+ # Public: Create a new Dat::Analysis object. Will load any matcher and
47
+ # wrapper classes for this experiment if `#path` is non-nil.
48
+ #
49
+ # experiment_name - The String naming the experiment to analyze.
50
+ #
51
+ # Examples
52
+ #
53
+ # analyzer = Dat::Analysis.new('bcrypt-passwords')
54
+ # => #<Dat::Analysis:...>
55
+ def initialize(experiment_name)
56
+ @experiment_name = experiment_name
57
+ @wrappers = []
58
+
59
+ load_classes unless path.nil? rescue nil
60
+ end
61
+
62
+ # Public: process a raw science mismatch result to make it usable in analysis.
63
+ # This is typically overridden by subclasses to do any sort of unmarshalling
64
+ # or deserialization required.
65
+ #
66
+ # raw_result - a raw science mismatch result, typically, as returned by `#read`
67
+ #
68
+ # Returns a "cooked" science mismatch result.
69
+ def cook(raw_result)
70
+ raw_result
71
+ end
72
+
73
+ # Public: fetch and summarize pending science mismatch results until an
74
+ # an unrecognized result is found. Outputs summaries to STDOUT. May
75
+ # modify current mismatch result.
76
+ #
77
+ # Returns nil. Leaves current mismatch result set to first unknown result,
78
+ # if one is found.
79
+ def analyze
80
+ track do
81
+ while true
82
+ unless more?
83
+ fetch # clear current result
84
+ return summarize_unknown_result
85
+ end
86
+
87
+ fetch
88
+ break if unknown?
89
+ summarize
90
+ count_as_seen identify
91
+ end
92
+
93
+ print "\n"
94
+ summarize_unknown_result
95
+ end
96
+ end
97
+
98
+ # Public: skip pending mismatch results not satisfying the provided block.
99
+ # May modify current mismatch result.
100
+ #
101
+ # &block - block accepting a prepared mismatch result and returning true
102
+ # or false.
103
+ #
104
+ # Examples:
105
+ #
106
+ # skip do |result|
107
+ # result.user.staff?
108
+ # end
109
+ #
110
+ # skip do |result|
111
+ # result['group']['id'] > 100 && result['url'] =~ %r{/admin}
112
+ # end
113
+ #
114
+ # skip do |result|
115
+ # result['timestamp'].to_i > 1.hour.ago
116
+ # end
117
+ #
118
+ # Returns nil if no satisfying results are found. Current result will be nil.
119
+ # Returns count of remaining results if a satisfying result found. Leaves
120
+ # current result set to first result for which block returns a truthy value.
121
+ def skip(&block)
122
+ raise ArgumentError, "a block is required" unless block_given?
123
+
124
+ while more?
125
+ fetch
126
+ return count if yield(current)
127
+ end
128
+
129
+ # clear current result since nothing of interest was found.
130
+ @current = @identified = nil
131
+ end
132
+
133
+ # Public: Are additional science mismatch results available?
134
+ #
135
+ # Returns true if more results can be fetched.
136
+ # Returns false if no more results can be fetched.
137
+ def more?
138
+ count != 0
139
+ end
140
+
141
+ # Public: retrieve a new science mismatch result, as returned by `#read`.
142
+ #
143
+ # Returns nil if no new science mismatch results are available.
144
+ # Returns a cooked and wrapped science mismatch result if available.
145
+ # Raises NoMethodError if `#read` is not defined on this class.
146
+ def fetch
147
+ @identified = nil
148
+ @raw = read
149
+ @current = raw ? prepare(raw) : nil
150
+ end
151
+
152
+ # Public: Return a readable representation of the current science mismatch
153
+ # result. This will utilize the `#readable` methods declared on a matcher
154
+ # which identifies the current result.
155
+ #
156
+ # Returns a string containing a readable representation of the current
157
+ # science mismatch result.
158
+ # Returns nil if there is no current result.
159
+ def summary
160
+ return nil unless current
161
+ recognizer = identify
162
+ return readable unless recognizer && recognizer.respond_to?(:readable)
163
+ recognizer.readable
164
+ end
165
+
166
+ # Public: Print a readable summary for the current science mismatch result
167
+ # to STDOUT.
168
+ #
169
+ # Returns nil.
170
+ def summarize
171
+ puts summary
172
+ end
173
+
174
+ # Public: Is the current science mismatch result unidentifiable?
175
+ #
176
+ # Returns nil if current result is nil.
177
+ # Returns true if no matcher can identify current result.
178
+ # Returns false if a single matcher can identify the current result.
179
+ # Raises RuntimeError if multiple matchers can identify the current result.
180
+ def unknown?
181
+ return nil if current.nil?
182
+ !identify
183
+ end
184
+
185
+ # Public: Find a matcher which can identify the current science mismatch result.
186
+ #
187
+ # Returns nil if current result is nil.
188
+ # Returns matcher class if a single matcher can identify current result.
189
+ # Returns false if no matcher can identify the current result.
190
+ # Raises RuntimeError if multiple matchers can identify the current result.
191
+ def identify
192
+ return @identified if @identified
193
+
194
+ results = registry.identify(current)
195
+ if results.size > 1
196
+ report_multiple_matchers(results)
197
+ end
198
+
199
+ @identified = results.first
200
+ end
201
+
202
+ # Internal: Output failure message about duplicate matchers for a science
203
+ # mismatch result.
204
+ #
205
+ # dupes - Array of Dat::Analysis::Matcher instances, initialized with a result
206
+ #
207
+ # Raises RuntimeError.
208
+ def report_multiple_matchers(dupes)
209
+ puts "\n\nMultiple matchers identified result:"
210
+ puts
211
+
212
+ dupes.each_with_index do |matcher, i|
213
+ print " #{i+1}. "
214
+ if matcher.respond_to?(:readable)
215
+ puts matcher.readable
216
+ else
217
+ puts readable
218
+ end
219
+ end
220
+
221
+ puts
222
+ raise "Result cannot be uniquely identified."
223
+ end
224
+
225
+ # Internal: cook and wrap a raw science mismatch result.
226
+ #
227
+ # raw_result - an unmodified result, typically, as returned by `#read`
228
+ #
229
+ # Returns the science mismatch result processed by `#cook` and then by `#wrap`.
230
+ def prepare(raw_result)
231
+ wrap(cook(raw_result))
232
+ end
233
+
234
+ # Internal: wrap a "cooked" science mismatch result with any known wrapper methods
235
+ #
236
+ # cooked_result - a "cooked" mismatch result, as returned by `#cook`
237
+ #
238
+ # Returns the cooked science mismatch result, which will now respond to any
239
+ # instance methods found on our known wrapper classes
240
+ def wrap(cooked_result)
241
+ cooked_result.extend Dat::Analysis::Result::DefaultMethods
242
+
243
+ if !wrappers.empty?
244
+ cooked_result.send(:instance_variable_set, '@analyzer', self)
245
+
246
+ class << cooked_result
247
+ define_method(:method_missing) do |meth, *args|
248
+ found = nil
249
+ @analyzer.wrappers.each do |wrapper|
250
+ next unless wrapper.public_instance_methods.detect {|m| m.to_s == meth.to_s }
251
+ found = wrapper.new(self).send(meth, *args)
252
+ break
253
+ end
254
+ found
255
+ end
256
+ end
257
+ end
258
+
259
+ cooked_result
260
+ end
261
+
262
+ # Internal: Return the *default* readable representation of the current science
263
+ # mismatch result. This method is typically overridden by subclasses or defined
264
+ # in matchers which wish to customize the readable representation of a science
265
+ # mismatch result. This implementation is provided as a default.
266
+ #
267
+ # Returns a string containing a readable representation of the current
268
+ # science mismatch result.
269
+ def readable
270
+ synopsis = []
271
+
272
+ synopsis << "Experiment %-20s first: %10s @ %s" % [
273
+ "[#{current['experiment']}]", current['first'], current['timestamp']
274
+ ]
275
+ synopsis << "Duration: control (%6.2f) | candidate (%6.2f)" % [
276
+ current['control']['duration'], current['candidate']['duration']
277
+ ]
278
+
279
+ synopsis << ""
280
+
281
+ if current['control']['exception']
282
+ synopsis << "Control raised exception:\n\t#{current['control']['exception'].inspect}"
283
+ else
284
+ synopsis << "Control value: [#{current['control']['value']}]"
285
+ end
286
+
287
+ if current['candidate']['exception']
288
+ synopsis << "Candidate raised exception:\n\t#{current['candidate']['exception'].inspect}"
289
+ else
290
+ synopsis << "Candidate value: [#{current['candidate']['value']}]"
291
+ end
292
+
293
+ synopsis << ""
294
+
295
+ remaining = current.keys - ['control', 'candidate', 'experiment', 'first', 'timestamp']
296
+ remaining.sort.each do |key|
297
+ if current[key].respond_to?(:keys)
298
+ # do ordered sorting of hash keys
299
+ subkeys = key_sort(current[key].keys)
300
+ synopsis << "\t%15s => {" % [ key ]
301
+ subkeys.each do |subkey|
302
+ synopsis << "\t%15s %15s => %-20s" % [ '', subkey, current[key][subkey].inspect ]
303
+ end
304
+ synopsis << "\t%15s }" % [ '' ]
305
+ else
306
+ synopsis << "\t%15s => %-20s" % [ key, current[key] ]
307
+ end
308
+ end
309
+
310
+ synopsis.join "\n"
311
+ end
312
+
313
+ def preferred_fields
314
+ %w(id name title owner description login username)
315
+ end
316
+
317
+ def key_sort(keys)
318
+ str_keys = keys.map {|k| k.to_s }
319
+ (preferred_fields & str_keys) + (str_keys - preferred_fields)
320
+ end
321
+
322
+ # Public: Which matcher classes are known?
323
+ #
324
+ # Returns: list of Dat::Analysis::Matcher classes known to this analyzer.
325
+ def matchers
326
+ registry.matchers
327
+ end
328
+
329
+ # Public: Which wrapper classes are known?
330
+ #
331
+ # Returns: list of Dat::Analysis::Result classes known to this analyzer.
332
+ def wrappers
333
+ registry.wrappers
334
+ end
335
+
336
+ # Public: Add a matcher or wrapper class to this analyzer.
337
+ #
338
+ # klass - a subclass of either Dat::Analysis::Matcher or Dat::Analysis::Result
339
+ # to be registered with this analyzer.
340
+ #
341
+ # Returns the list of known matchers and wrappers for this analyzer.
342
+ def add(klass)
343
+ klass.add_to_analyzer(self)
344
+ end
345
+
346
+ # Public: Load matcher and wrapper classes from the library for our experiment.
347
+ #
348
+ # Returns: a list of loaded matcher and wrapper classes.
349
+ def load_classes
350
+ new_classes = library.select_classes do
351
+ experiment_files.each { |file| load file }
352
+ end
353
+
354
+ new_classes.map {|klass| add klass }
355
+ end
356
+
357
+ # Internal: Print to STDOUT a readable summary of the current (unknown) science
358
+ # mismatch result, as well a summary of the tally of identified science mismatch
359
+ # results analyzed to this point.
360
+ #
361
+ # Returns nil if there are no pending science mismatch results.
362
+ # Returns the number of pending science mismatch results.
363
+ def summarize_unknown_result
364
+ tally.summarize
365
+ if current
366
+ puts "\nFirst unidentifiable result:\n\n"
367
+ summarize
368
+ else
369
+ puts "\nNo unidentifiable results found. \\m/\n"
370
+ end
371
+
372
+ more? ? count : nil
373
+ end
374
+
375
+ # Internal: keep a tally of analyzed science mismatch results.
376
+ #
377
+ # &block: block which will presumably call `#count_as_seen` to update
378
+ # tallies of identified science mismatch results.
379
+ #
380
+ # Returns: value returned by &block.
381
+ def track(&block)
382
+ @tally = Tally.new
383
+ yield
384
+ end
385
+
386
+ # Internal: Increment count for an object in an ongoing tally.
387
+ #
388
+ # obj - an Object for which we are recording occurrence counts
389
+ #
390
+ # Returns updated tally count for obj.
391
+ def count_as_seen(obj)
392
+ tally.count(obj.class.name || obj.class.inspect)
393
+ end
394
+
395
+ # Internal: The current Tally instance. Cached between calls to `#track`.
396
+ #
397
+ # Returns the current Tally instance object.
398
+ def tally
399
+ @tally ||= Tally.new
400
+ end
401
+
402
+ # Internal: handle to the library, used for collecting newly discovered
403
+ # matcher and wrapper classes.
404
+ #
405
+ # Returns: handle to the library class.
406
+ def library
407
+ Dat::Analysis::Library
408
+ end
409
+
410
+ # Internal: registry of wrapper and matcher classes known to this analyzer.
411
+ #
412
+ # Returns a (cached between calls) handle to our registry instance.
413
+ def registry
414
+ @registry ||= Dat::Analysis::Registry.new
415
+ end
416
+
417
+ # Internal: which class files are candidates for loading matchers and wrappers
418
+ # for this experiment?
419
+ #
420
+ # Returns: sorted Array of paths to ruby files which may contain declarations
421
+ # of matcher and wrapper classes for this experiment.
422
+ def experiment_files
423
+ Dir[File.join(path, experiment_name, '*.rb')].sort
424
+ end
425
+
426
+ # Internal: Add a matcher class to this analyzer's registry.
427
+ # (Intended to be called only by Dat::Analysis::Matcher and subclasses)
428
+ def add_matcher(matcher_class)
429
+ puts "Loading matcher class [#{matcher_class}]"
430
+ registry.add matcher_class
431
+ end
432
+
433
+ # Internal: Add a wrapper class to this analyzer's registry.
434
+ # (Intended to be called only by Dat::Analysis::Result and its subclasses)
435
+ def add_wrapper(wrapper_class)
436
+ puts "Loading results wrapper class [#{wrapper_class}]"
437
+ registry.add wrapper_class
438
+ end
439
+ end
440
+ end
441
+
442
+ require 'dat/analysis/library'
443
+ require 'dat/analysis/matcher'
444
+ require 'dat/analysis/result'
445
+ require 'dat/analysis/registry'
446
+ require 'dat/analysis/tally'