dat-analysis 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +423 -0
  5. data/dat-analysis.gemspec +17 -0
  6. data/lib/dat/analysis.rb +446 -0
  7. data/lib/dat/analysis/library.rb +30 -0
  8. data/lib/dat/analysis/matcher.rb +43 -0
  9. data/lib/dat/analysis/registry.rb +50 -0
  10. data/lib/dat/analysis/result.rb +78 -0
  11. data/lib/dat/analysis/tally.rb +59 -0
  12. data/script/bootstrap +9 -0
  13. data/script/release +38 -0
  14. data/script/test +9 -0
  15. data/test/dat_analysis_subclassing_test.rb +119 -0
  16. data/test/dat_analysis_test.rb +822 -0
  17. data/test/fixtures/analysis/test-suite-experiment/matcher.rb +7 -0
  18. data/test/fixtures/experiment-with-classes/matcher_a.rb +5 -0
  19. data/test/fixtures/experiment-with-classes/matcher_b.rb +11 -0
  20. data/test/fixtures/experiment-with-classes/wrapper_a.rb +5 -0
  21. data/test/fixtures/experiment-with-classes/wrapper_b.rb +11 -0
  22. data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_w.rb +5 -0
  23. data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_y.rb +11 -0
  24. data/test/fixtures/experiment-with-good-and-extraneous-classes/matcher_z.rb +11 -0
  25. data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_w.rb +5 -0
  26. data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_y.rb +11 -0
  27. data/test/fixtures/experiment-with-good-and-extraneous-classes/wrapper_z.rb +11 -0
  28. data/test/fixtures/initialize-classes/matcher_m.rb +5 -0
  29. data/test/fixtures/initialize-classes/matcher_n.rb +11 -0
  30. data/test/fixtures/initialize-classes/wrapper_m.rb +5 -0
  31. data/test/fixtures/initialize-classes/wrapper_n.rb +11 -0
  32. data/test/fixtures/invalid-matcher/matcher.rb +1 -0
  33. metadata +128 -0
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |gem|
2
+ gem.name = "dat-analysis"
3
+ gem.version = "1.2.0"
4
+ gem.authors = ["John Barnette", "Rick Bradley"]
5
+ gem.email = ["bradley@github.com"]
6
+ gem.description = "Analyze results from dat-science"
7
+ gem.summary = "HYPOTHESIZE THIS."
8
+ gem.homepage = "https://github.com/github/dat-analysis"
9
+
10
+ gem.files = `git ls-files`.split $/
11
+ gem.executables = []
12
+ gem.test_files = gem.files.grep /^test/
13
+ gem.require_paths = ["lib"]
14
+
15
+ gem.add_development_dependency "minitest"
16
+ gem.add_development_dependency "mocha"
17
+ end
@@ -0,0 +1,446 @@
1
+ module Dat
2
+ # Public: Analyze the findings of an Experiment
3
+ #
4
+ # Typically implementors will wish to subclass this to provide their own
5
+ # implementations of the following methods suited to the environment where
6
+ # `dat-science` is being used: `#read`, `#count`, `#cook`.
7
+ #
8
+ # Example:
9
+ #
10
+ # class AnalyzeThis < Dat::Analysis
11
+ # # Read a result out of our redis stash
12
+ # def read
13
+ # RedisHandle.rpop "scienceness.#{experiment_name}.results"
14
+ # end
15
+ #
16
+ # # Query our redis stash to see how many new results are pending
17
+ # def count
18
+ # RedisHandle.llen("scienceness.#{experiment_name}.results")
19
+ # end
20
+ #
21
+ # # Deserialize a JSON-encoded result from redis
22
+ # def cook(raw_result)
23
+ # return nil unless raw_result
24
+ # JSON.parse raw_result
25
+ # end
26
+ # end
27
+ class Analysis
28
+
29
+ # Public: Returns the name of the experiment
30
+ attr_reader :experiment_name
31
+
32
+ # Public: Returns the current science mismatch result
33
+ attr_reader :current
34
+
35
+ # Public: an alias for #current
36
+ alias_method :result, :current
37
+
38
+ # Public: Returns a raw ("un-cooked") version of the current science mismatch result
39
+ attr_reader :raw
40
+
41
+ # Public: Gets/Sets the base path for loading matcher and wrapper classes.
42
+ # Note that the base path will be appended with the experiment name
43
+ # before searching for wrappers and matchers.
44
+ attr_accessor :path
45
+
46
+ # Public: Create a new Dat::Analysis object. Will load any matcher and
47
+ # wrapper classes for this experiment if `#path` is non-nil.
48
+ #
49
+ # experiment_name - The String naming the experiment to analyze.
50
+ #
51
+ # Examples
52
+ #
53
+ # analyzer = Dat::Analysis.new('bcrypt-passwords')
54
+ # => #<Dat::Analysis:...>
55
+ def initialize(experiment_name)
56
+ @experiment_name = experiment_name
57
+ @wrappers = []
58
+
59
+ load_classes unless path.nil? rescue nil
60
+ end
61
+
62
+ # Public: process a raw science mismatch result to make it usable in analysis.
63
+ # This is typically overridden by subclasses to do any sort of unmarshalling
64
+ # or deserialization required.
65
+ #
66
+ # raw_result - a raw science mismatch result, typically, as returned by `#read`
67
+ #
68
+ # Returns a "cooked" science mismatch result.
69
+ def cook(raw_result)
70
+ raw_result
71
+ end
72
+
73
+ # Public: fetch and summarize pending science mismatch results until an
74
+ # an unrecognized result is found. Outputs summaries to STDOUT. May
75
+ # modify current mismatch result.
76
+ #
77
+ # Returns nil. Leaves current mismatch result set to first unknown result,
78
+ # if one is found.
79
+ def analyze
80
+ track do
81
+ while true
82
+ unless more?
83
+ fetch # clear current result
84
+ return summarize_unknown_result
85
+ end
86
+
87
+ fetch
88
+ break if unknown?
89
+ summarize
90
+ count_as_seen identify
91
+ end
92
+
93
+ print "\n"
94
+ summarize_unknown_result
95
+ end
96
+ end
97
+
98
+ # Public: skip pending mismatch results not satisfying the provided block.
99
+ # May modify current mismatch result.
100
+ #
101
+ # &block - block accepting a prepared mismatch result and returning true
102
+ # or false.
103
+ #
104
+ # Examples:
105
+ #
106
+ # skip do |result|
107
+ # result.user.staff?
108
+ # end
109
+ #
110
+ # skip do |result|
111
+ # result['group']['id'] > 100 && result['url'] =~ %r{/admin}
112
+ # end
113
+ #
114
+ # skip do |result|
115
+ # result['timestamp'].to_i > 1.hour.ago
116
+ # end
117
+ #
118
+ # Returns nil if no satisfying results are found. Current result will be nil.
119
+ # Returns count of remaining results if a satisfying result found. Leaves
120
+ # current result set to first result for which block returns a truthy value.
121
+ def skip(&block)
122
+ raise ArgumentError, "a block is required" unless block_given?
123
+
124
+ while more?
125
+ fetch
126
+ return count if yield(current)
127
+ end
128
+
129
+ # clear current result since nothing of interest was found.
130
+ @current = @identified = nil
131
+ end
132
+
133
+ # Public: Are additional science mismatch results available?
134
+ #
135
+ # Returns true if more results can be fetched.
136
+ # Returns false if no more results can be fetched.
137
+ def more?
138
+ count != 0
139
+ end
140
+
141
+ # Public: retrieve a new science mismatch result, as returned by `#read`.
142
+ #
143
+ # Returns nil if no new science mismatch results are available.
144
+ # Returns a cooked and wrapped science mismatch result if available.
145
+ # Raises NoMethodError if `#read` is not defined on this class.
146
+ def fetch
147
+ @identified = nil
148
+ @raw = read
149
+ @current = raw ? prepare(raw) : nil
150
+ end
151
+
152
+ # Public: Return a readable representation of the current science mismatch
153
+ # result. This will utilize the `#readable` methods declared on a matcher
154
+ # which identifies the current result.
155
+ #
156
+ # Returns a string containing a readable representation of the current
157
+ # science mismatch result.
158
+ # Returns nil if there is no current result.
159
+ def summary
160
+ return nil unless current
161
+ recognizer = identify
162
+ return readable unless recognizer && recognizer.respond_to?(:readable)
163
+ recognizer.readable
164
+ end
165
+
166
+ # Public: Print a readable summary for the current science mismatch result
167
+ # to STDOUT.
168
+ #
169
+ # Returns nil.
170
+ def summarize
171
+ puts summary
172
+ end
173
+
174
+ # Public: Is the current science mismatch result unidentifiable?
175
+ #
176
+ # Returns nil if current result is nil.
177
+ # Returns true if no matcher can identify current result.
178
+ # Returns false if a single matcher can identify the current result.
179
+ # Raises RuntimeError if multiple matchers can identify the current result.
180
+ def unknown?
181
+ return nil if current.nil?
182
+ !identify
183
+ end
184
+
185
+ # Public: Find a matcher which can identify the current science mismatch result.
186
+ #
187
+ # Returns nil if current result is nil.
188
+ # Returns matcher class if a single matcher can identify current result.
189
+ # Returns false if no matcher can identify the current result.
190
+ # Raises RuntimeError if multiple matchers can identify the current result.
191
+ def identify
192
+ return @identified if @identified
193
+
194
+ results = registry.identify(current)
195
+ if results.size > 1
196
+ report_multiple_matchers(results)
197
+ end
198
+
199
+ @identified = results.first
200
+ end
201
+
202
+ # Internal: Output failure message about duplicate matchers for a science
203
+ # mismatch result.
204
+ #
205
+ # dupes - Array of Dat::Analysis::Matcher instances, initialized with a result
206
+ #
207
+ # Raises RuntimeError.
208
+ def report_multiple_matchers(dupes)
209
+ puts "\n\nMultiple matchers identified result:"
210
+ puts
211
+
212
+ dupes.each_with_index do |matcher, i|
213
+ print " #{i+1}. "
214
+ if matcher.respond_to?(:readable)
215
+ puts matcher.readable
216
+ else
217
+ puts readable
218
+ end
219
+ end
220
+
221
+ puts
222
+ raise "Result cannot be uniquely identified."
223
+ end
224
+
225
+ # Internal: cook and wrap a raw science mismatch result.
226
+ #
227
+ # raw_result - an unmodified result, typically, as returned by `#read`
228
+ #
229
+ # Returns the science mismatch result processed by `#cook` and then by `#wrap`.
230
+ def prepare(raw_result)
231
+ wrap(cook(raw_result))
232
+ end
233
+
234
+ # Internal: wrap a "cooked" science mismatch result with any known wrapper methods
235
+ #
236
+ # cooked_result - a "cooked" mismatch result, as returned by `#cook`
237
+ #
238
+ # Returns the cooked science mismatch result, which will now respond to any
239
+ # instance methods found on our known wrapper classes
240
+ def wrap(cooked_result)
241
+ cooked_result.extend Dat::Analysis::Result::DefaultMethods
242
+
243
+ if !wrappers.empty?
244
+ cooked_result.send(:instance_variable_set, '@analyzer', self)
245
+
246
+ class << cooked_result
247
+ define_method(:method_missing) do |meth, *args|
248
+ found = nil
249
+ @analyzer.wrappers.each do |wrapper|
250
+ next unless wrapper.public_instance_methods.detect {|m| m.to_s == meth.to_s }
251
+ found = wrapper.new(self).send(meth, *args)
252
+ break
253
+ end
254
+ found
255
+ end
256
+ end
257
+ end
258
+
259
+ cooked_result
260
+ end
261
+
262
+ # Internal: Return the *default* readable representation of the current science
263
+ # mismatch result. This method is typically overridden by subclasses or defined
264
+ # in matchers which wish to customize the readable representation of a science
265
+ # mismatch result. This implementation is provided as a default.
266
+ #
267
+ # Returns a string containing a readable representation of the current
268
+ # science mismatch result.
269
+ def readable
270
+ synopsis = []
271
+
272
+ synopsis << "Experiment %-20s first: %10s @ %s" % [
273
+ "[#{current['experiment']}]", current['first'], current['timestamp']
274
+ ]
275
+ synopsis << "Duration: control (%6.2f) | candidate (%6.2f)" % [
276
+ current['control']['duration'], current['candidate']['duration']
277
+ ]
278
+
279
+ synopsis << ""
280
+
281
+ if current['control']['exception']
282
+ synopsis << "Control raised exception:\n\t#{current['control']['exception'].inspect}"
283
+ else
284
+ synopsis << "Control value: [#{current['control']['value']}]"
285
+ end
286
+
287
+ if current['candidate']['exception']
288
+ synopsis << "Candidate raised exception:\n\t#{current['candidate']['exception'].inspect}"
289
+ else
290
+ synopsis << "Candidate value: [#{current['candidate']['value']}]"
291
+ end
292
+
293
+ synopsis << ""
294
+
295
+ remaining = current.keys - ['control', 'candidate', 'experiment', 'first', 'timestamp']
296
+ remaining.sort.each do |key|
297
+ if current[key].respond_to?(:keys)
298
+ # do ordered sorting of hash keys
299
+ subkeys = key_sort(current[key].keys)
300
+ synopsis << "\t%15s => {" % [ key ]
301
+ subkeys.each do |subkey|
302
+ synopsis << "\t%15s %15s => %-20s" % [ '', subkey, current[key][subkey].inspect ]
303
+ end
304
+ synopsis << "\t%15s }" % [ '' ]
305
+ else
306
+ synopsis << "\t%15s => %-20s" % [ key, current[key] ]
307
+ end
308
+ end
309
+
310
+ synopsis.join "\n"
311
+ end
312
+
313
+ def preferred_fields
314
+ %w(id name title owner description login username)
315
+ end
316
+
317
+ def key_sort(keys)
318
+ str_keys = keys.map {|k| k.to_s }
319
+ (preferred_fields & str_keys) + (str_keys - preferred_fields)
320
+ end
321
+
322
+ # Public: Which matcher classes are known?
323
+ #
324
+ # Returns: list of Dat::Analysis::Matcher classes known to this analyzer.
325
+ def matchers
326
+ registry.matchers
327
+ end
328
+
329
+ # Public: Which wrapper classes are known?
330
+ #
331
+ # Returns: list of Dat::Analysis::Result classes known to this analyzer.
332
+ def wrappers
333
+ registry.wrappers
334
+ end
335
+
336
+ # Public: Add a matcher or wrapper class to this analyzer.
337
+ #
338
+ # klass - a subclass of either Dat::Analysis::Matcher or Dat::Analysis::Result
339
+ # to be registered with this analyzer.
340
+ #
341
+ # Returns the list of known matchers and wrappers for this analyzer.
342
+ def add(klass)
343
+ klass.add_to_analyzer(self)
344
+ end
345
+
346
+ # Public: Load matcher and wrapper classes from the library for our experiment.
347
+ #
348
+ # Returns: a list of loaded matcher and wrapper classes.
349
+ def load_classes
350
+ new_classes = library.select_classes do
351
+ experiment_files.each { |file| load file }
352
+ end
353
+
354
+ new_classes.map {|klass| add klass }
355
+ end
356
+
357
+ # Internal: Print to STDOUT a readable summary of the current (unknown) science
358
+ # mismatch result, as well a summary of the tally of identified science mismatch
359
+ # results analyzed to this point.
360
+ #
361
+ # Returns nil if there are no pending science mismatch results.
362
+ # Returns the number of pending science mismatch results.
363
+ def summarize_unknown_result
364
+ tally.summarize
365
+ if current
366
+ puts "\nFirst unidentifiable result:\n\n"
367
+ summarize
368
+ else
369
+ puts "\nNo unidentifiable results found. \\m/\n"
370
+ end
371
+
372
+ more? ? count : nil
373
+ end
374
+
375
+ # Internal: keep a tally of analyzed science mismatch results.
376
+ #
377
+ # &block: block which will presumably call `#count_as_seen` to update
378
+ # tallies of identified science mismatch results.
379
+ #
380
+ # Returns: value returned by &block.
381
+ def track(&block)
382
+ @tally = Tally.new
383
+ yield
384
+ end
385
+
386
+ # Internal: Increment count for an object in an ongoing tally.
387
+ #
388
+ # obj - an Object for which we are recording occurrence counts
389
+ #
390
+ # Returns updated tally count for obj.
391
+ def count_as_seen(obj)
392
+ tally.count(obj.class.name || obj.class.inspect)
393
+ end
394
+
395
+ # Internal: The current Tally instance. Cached between calls to `#track`.
396
+ #
397
+ # Returns the current Tally instance object.
398
+ def tally
399
+ @tally ||= Tally.new
400
+ end
401
+
402
+ # Internal: handle to the library, used for collecting newly discovered
403
+ # matcher and wrapper classes.
404
+ #
405
+ # Returns: handle to the library class.
406
+ def library
407
+ Dat::Analysis::Library
408
+ end
409
+
410
+ # Internal: registry of wrapper and matcher classes known to this analyzer.
411
+ #
412
+ # Returns a (cached between calls) handle to our registry instance.
413
+ def registry
414
+ @registry ||= Dat::Analysis::Registry.new
415
+ end
416
+
417
+ # Internal: which class files are candidates for loading matchers and wrappers
418
+ # for this experiment?
419
+ #
420
+ # Returns: sorted Array of paths to ruby files which may contain declarations
421
+ # of matcher and wrapper classes for this experiment.
422
+ def experiment_files
423
+ Dir[File.join(path, experiment_name, '*.rb')].sort
424
+ end
425
+
426
+ # Internal: Add a matcher class to this analyzer's registry.
427
+ # (Intended to be called only by Dat::Analysis::Matcher and subclasses)
428
+ def add_matcher(matcher_class)
429
+ puts "Loading matcher class [#{matcher_class}]"
430
+ registry.add matcher_class
431
+ end
432
+
433
+ # Internal: Add a wrapper class to this analyzer's registry.
434
+ # (Intended to be called only by Dat::Analysis::Result and its subclasses)
435
+ def add_wrapper(wrapper_class)
436
+ puts "Loading results wrapper class [#{wrapper_class}]"
437
+ registry.add wrapper_class
438
+ end
439
+ end
440
+ end
441
+
442
+ require 'dat/analysis/library'
443
+ require 'dat/analysis/matcher'
444
+ require 'dat/analysis/result'
445
+ require 'dat/analysis/registry'
446
+ require 'dat/analysis/tally'