lab_tech 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +323 -0
  4. data/Rakefile +30 -0
  5. data/app/models/lab_tech/application_record.rb +5 -0
  6. data/app/models/lab_tech/default_cleaner.rb +87 -0
  7. data/app/models/lab_tech/experiment.rb +190 -0
  8. data/app/models/lab_tech/observation.rb +40 -0
  9. data/app/models/lab_tech/percentile.rb +41 -0
  10. data/app/models/lab_tech/result.rb +130 -0
  11. data/app/models/lab_tech/speedup.rb +65 -0
  12. data/app/models/lab_tech/summary.rb +183 -0
  13. data/config/routes.rb +2 -0
  14. data/db/migrate/20190815192130_create_experiment_tables.rb +50 -0
  15. data/lib/lab_tech.rb +176 -0
  16. data/lib/lab_tech/engine.rb +6 -0
  17. data/lib/lab_tech/version.rb +3 -0
  18. data/lib/tasks/lab_tech_tasks.rake +4 -0
  19. data/spec/dummy/Rakefile +6 -0
  20. data/spec/dummy/app/assets/config/manifest.js +1 -0
  21. data/spec/dummy/app/assets/javascripts/application.js +14 -0
  22. data/spec/dummy/app/assets/stylesheets/application.css +15 -0
  23. data/spec/dummy/app/controllers/application_controller.rb +2 -0
  24. data/spec/dummy/app/jobs/application_job.rb +2 -0
  25. data/spec/dummy/app/models/application_record.rb +3 -0
  26. data/spec/dummy/bin/bundle +3 -0
  27. data/spec/dummy/bin/rails +4 -0
  28. data/spec/dummy/bin/rake +4 -0
  29. data/spec/dummy/bin/setup +33 -0
  30. data/spec/dummy/bin/update +28 -0
  31. data/spec/dummy/config.ru +5 -0
  32. data/spec/dummy/config/application.rb +35 -0
  33. data/spec/dummy/config/boot.rb +5 -0
  34. data/spec/dummy/config/database.yml +25 -0
  35. data/spec/dummy/config/environment.rb +5 -0
  36. data/spec/dummy/config/environments/development.rb +46 -0
  37. data/spec/dummy/config/environments/production.rb +71 -0
  38. data/spec/dummy/config/environments/test.rb +36 -0
  39. data/spec/dummy/config/initializers/application_controller_renderer.rb +8 -0
  40. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  41. data/spec/dummy/config/initializers/cors.rb +16 -0
  42. data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  43. data/spec/dummy/config/initializers/inflections.rb +16 -0
  44. data/spec/dummy/config/initializers/mime_types.rb +4 -0
  45. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  46. data/spec/dummy/config/locales/en.yml +33 -0
  47. data/spec/dummy/config/puma.rb +34 -0
  48. data/spec/dummy/config/routes.rb +3 -0
  49. data/spec/dummy/config/spring.rb +6 -0
  50. data/spec/dummy/db/schema.rb +52 -0
  51. data/spec/dummy/db/test.sqlite3 +0 -0
  52. data/spec/dummy/log/development.log +0 -0
  53. data/spec/dummy/log/test.log +1519 -0
  54. data/spec/examples.txt +79 -0
  55. data/spec/models/lab_tech/default_cleaner_spec.rb +32 -0
  56. data/spec/models/lab_tech/experiment_spec.rb +110 -0
  57. data/spec/models/lab_tech/percentile_spec.rb +85 -0
  58. data/spec/models/lab_tech/result_spec.rb +198 -0
  59. data/spec/models/lab_tech/speedup_spec.rb +133 -0
  60. data/spec/models/lab_tech/summary_spec.rb +325 -0
  61. data/spec/models/lab_tech_spec.rb +23 -0
  62. data/spec/rails_helper.rb +62 -0
  63. data/spec/spec_helper.rb +98 -0
  64. data/spec/support/misc_helpers.rb +7 -0
  65. metadata +238 -0
@@ -0,0 +1,190 @@
1
+ module LabTech
2
+ class Experiment < ActiveRecord::Base
3
+ self.table_name = "lab_tech_experiments"
4
+ include ::Scientist::Experiment
5
+
6
+ has_many :results, class_name: "LabTech::Result", dependent: :destroy
7
+ has_many :observations, class_name: "LabTech::Observation", through: :results
8
+
9
+ if defined?( TablePrint ) # a Very Handy Gem Indeed: http://tableprintgem.com/
10
+ tp.set self, *[
11
+ :id,
12
+ { :name => { width: 100 } },
13
+ { :pct_enabled => { display_name: "% Enabled" } },
14
+ { :pct_correct => { display_name: "% Correct" } },
15
+ { :equivalent_count => { display_name: "Equivalent" } },
16
+ { :timed_out_count => { display_name: "Timed Out" } },
17
+ { :other_error_count => { display_name: "Other Errors" } },
18
+ :total_count
19
+ ]
20
+
21
+ def pct_enabled
22
+ format_pct( percent_enabled )
23
+ end
24
+
25
+ def pct_correct
26
+ return "N/A" if total_count.zero?
27
+ format_pct( equivalent_count, total_count )
28
+ end
29
+
30
+ def total_count
31
+ equivalent_count + timed_out_count + other_error_count
32
+ end
33
+
34
+ private def format_pct(x, y=nil)
35
+ x = 100.0 * x / y if y
36
+ "%3.1f%%" % x
37
+ end
38
+ end
39
+
40
+
41
+
42
+ ##### CLASS METHODS #####
43
+
44
+ def self.named(experiment_name_or_id)
45
+ case experiment_name_or_id
46
+ when String ; exp = find_or_create_by(name: experiment_name_or_id)
47
+ when Integer ; exp = find(experiment_name_or_id)
48
+ end
49
+ yield exp if block_given?
50
+ exp
51
+ rescue ActiveRecord::RecordNotUnique
52
+ retry
53
+ end
54
+
55
+
56
+
57
+ ##### INSTANCE METHODS #####
58
+
59
+ def comparator
60
+ @_scientist_comparator
61
+ end
62
+
63
+ # TODO: DRY up the io.puts structure between this and summarize_errors
64
+ def compare_mismatches(limit: nil, io: $stdout, &block)
65
+ mismatches = results.mismatched.includes(:observations)
66
+ return if mismatches.empty?
67
+ mismatches = mismatches.limit(limit) if limit
68
+
69
+ io.puts
70
+ io.puts "=" * 100
71
+ io.puts "Comparing results for #{name}:"
72
+ io.puts
73
+
74
+ mismatches.each do |result|
75
+ io.puts
76
+ io.puts "-" * 100
77
+ io.puts "Result ##{result.id}"
78
+ result.compare_observations( io: io, &block )
79
+ io.puts "-" * 100
80
+ end
81
+
82
+ io.puts
83
+ io.puts "=" * 100
84
+ io.puts
85
+ nil
86
+ end
87
+
88
+ def disable
89
+ update_attribute :percent_enabled, 0
90
+ end
91
+
92
+ def enabled?
93
+ n = rand(100)
94
+ fail "WTF, Ruby?" unless (0..99).cover?(n) # Paranoia? Indirect documentation? YOU DECIDE.
95
+ n < percent_enabled
96
+ end
97
+
98
+ def enable(percent_enabled: 100)
99
+ update_attribute :percent_enabled, percent_enabled
100
+ end
101
+
102
+ # Oh, this is a fun one: apparently Scientist::Experiment#name is
103
+ # overriding the ActiveRecord attribute. Override it back.
104
+ def name ; read_attribute :name ; end
105
+ def name=(value) ; write_attribute :name, value ; end
106
+
107
+ def publish(scientist_result)
108
+ return if Rails.env.test? && !LabTech.publish_results_in_test_mode?
109
+ LabTech::Result.record_a_science( self, scientist_result )
110
+ end
111
+
112
+ # I don't encourage the willy-nilly destruction of experimental results...
113
+ # ...but sometimes you just need to start over.
114
+ def purge_data
115
+ delete_and_count = ->(scope) {
116
+ n0, n1 = 0, 0
117
+ transaction do
118
+ n0 = scope.count
119
+ scope.delete_all
120
+ n1 = scope.count
121
+ end
122
+ n0 - n1
123
+ }
124
+
125
+ n = delete_and_count.call( LabTech::Observation.where(result_id: self.result_ids) )
126
+ m = delete_and_count.call( self.results )
127
+
128
+ update_attributes(
129
+ equivalent_count: 0,
130
+ timed_out_count: 0,
131
+ other_error_count: 0,
132
+ )
133
+
134
+ puts "Deleted #{m} result(s) and #{n} observations"
135
+ end
136
+
137
+ def run(*)
138
+ increment_run_count
139
+ provide_default_cleaner
140
+ super
141
+ end
142
+
143
+ # TODO: DRY up the io.puts structure between this and compare_mismatches
144
+ def summarize_errors(limit: nil, io: $stdout)
145
+ errors = results.other_error
146
+ return if errors.empty?
147
+ errors = errors.limit(limit) if limit
148
+
149
+ io.puts
150
+ io.puts "=" * 100
151
+ io.puts "Comparing results for #{name}:"
152
+ io.puts
153
+
154
+ errors.each do |result|
155
+ io.puts
156
+ io.puts "-" * 100
157
+ io.puts "Result ##{result.id}"
158
+ result.candidates.each do |observation|
159
+ puts " * " + observation.exception_class + ": " + observation.exception_message
160
+ end
161
+ io.puts "-" * 100
162
+ end
163
+
164
+ io.puts
165
+ io.puts "=" * 100
166
+ io.puts
167
+ nil
168
+ end
169
+
170
+ def summarize_results
171
+ puts "", summary, ""
172
+ end
173
+
174
+ def summary
175
+ reload
176
+ LabTech::Summary.new(self)
177
+ end
178
+
179
+ private
180
+
181
+ def increment_run_count
182
+ LabTech.run_count[self.name] += 1
183
+ end
184
+
185
+ def provide_default_cleaner
186
+ return if cleaner.present?
187
+ clean { |value| LabTech::DefaultCleaner.call(value) }
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,40 @@
1
+ module LabTech
2
+ class Observation < ApplicationRecord
3
+ self.table_name = "lab_tech_observations"
4
+
5
+ belongs_to :result, class_name: "LabTech::Result", foreign_key: :result_id, optional: true
6
+
7
+ serialize :value
8
+
9
+ def raised_error?
10
+ exception_values = [ exception_class, exception_message, exception_backtrace ]
11
+ exception_values.any?( &:present? )
12
+ end
13
+
14
+ def record_a_science(scientist_observation)
15
+ unless scientist_observation.kind_of?( Scientist::Observation )
16
+ raise ArgumentError, "expected a Scientist::Observation but got #{scientist_observation.class}"
17
+ end
18
+
19
+ self.name = scientist_observation.name
20
+ self.duration = scientist_observation.duration
21
+
22
+ self.value = scientist_observation.cleaned_value
23
+ record_errors scientist_observation.exception
24
+ end
25
+
26
+ def timed_out?
27
+ exception_class == "Timeout::Error"
28
+ end
29
+
30
+ private
31
+
32
+ def record_errors(exception)
33
+ return if exception.nil?
34
+
35
+ self.exception_class = exception.class
36
+ self.exception_message = exception.message
37
+ self.exception_backtrace = exception.backtrace
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,41 @@
1
+ module LabTech
2
+ module Percentile
3
+ extend self
4
+
5
+ MIN_PERCENTILE = 0
6
+ MAX_PERCENTILE = 100
7
+
8
+ def call(pct, list)
9
+ # Make sure this list is actually sorted
10
+ unless sorted?(list)
11
+ fail "Sorry, this isn't sorted: #{list.inspect}"
12
+ end
13
+
14
+ msg = "Please pass an integer between #{MIN_PERCENTILE} and #{MAX_PERCENTILE}, not #{pct.inspect}"
15
+ raise ArgumentError, msg unless pct.kind_of?(Integer)
16
+ raise ArgumentError, msg unless (MIN_PERCENTILE..MAX_PERCENTILE).cover?(pct)
17
+
18
+ return list.first if pct == MIN_PERCENTILE # Avoid the need for a bounds check later
19
+ return list.last if pct == MAX_PERCENTILE # By definition, I guess
20
+
21
+ i = ( 0.01 * pct * list.length ).ceil - 1 # Don't ask me why this works
22
+ list[ i ]
23
+ end
24
+
25
+ private
26
+
27
+ def sorted?(list)
28
+ ret_val = true
29
+ list.each_cons(2) do |a,b|
30
+ if a <= b
31
+ next
32
+ else
33
+ ret_val = false
34
+ break
35
+ end
36
+ end
37
+ ret_val
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,130 @@
1
+ module LabTech
2
+ class Result < ApplicationRecord
3
+ self.table_name = "lab_tech_results"
4
+
5
+ belongs_to :experiment, class_name: "LabTech::Experiment"
6
+ has_many :observations, class_name: "LabTech::Observation", dependent: :destroy
7
+ has_one :control, ->() { where("name = 'control'") }, class_name: "LabTech::Observation"
8
+ has_many :candidates, ->() { where("name != 'control'") }, class_name: "LabTech::Observation"
9
+ serialize :context
10
+
11
+ # NOTE: I don't think this accounts for the possibility that both the
12
+ # control and candidate might raise, and if so, whether raising the same
13
+ # exception should be considered equivalent. (Unless I already thought of this?)
14
+ scope :correct, -> { where( equivalent: true, raised_error: false ) }
15
+ scope :mismatched, -> { where( equivalent: false, raised_error: false ) }
16
+ scope :errored, -> { where( equivalent: false, raised_error: true ) }
17
+ is_timeout = ->(is_or_is_not) {
18
+ col = LabTech::Observation.table_name + ".exception_class"
19
+ operator = is_or_is_not ? "=" : "!="
20
+ value = '"Timeout::Error"'
21
+ [ col, operator, value ].join(" ")
22
+ }
23
+ scope :timed_out, -> { errored.joins(:candidates).where( is_timeout.(true) ) }
24
+ scope :other_error, -> { errored.joins(:candidates).where( is_timeout.(false) ) }
25
+
26
+ after_create :increment_experiment_counters
27
+
28
+
29
+ ##### CLASS METHODS #####
30
+
31
+ def self.record_a_science( experiment, scientist_result )
32
+ self.create!(experiment: experiment) do |result|
33
+ result.record_a_science scientist_result
34
+ end
35
+ end
36
+
37
+
38
+
39
+ ##### INSTANCE METHODS #####
40
+
41
+ # Having multiple candidates is annoying; I've mistyped this one a lot
42
+ def candidate
43
+ candidates.first
44
+ end
45
+
46
+ DEFAULT_COMPARISON = ->(control, candidate) {
47
+ [ control, candidate ].map { |obs|
48
+ " %20s # => %s" % [ obs.name, obs.value.inspect ]
49
+ }
50
+ }
51
+ def compare_observations(io: $stdout, &block)
52
+ block ||= DEFAULT_COMPARISON
53
+ candidates.each do |candidate|
54
+ io.puts block.( control, candidate )
55
+ end
56
+ return nil
57
+ end
58
+
59
+ def record_a_science(scientist_result)
60
+ unless scientist_result.kind_of?( Scientist::Result )
61
+ raise ArgumentError, "expected a Scientist::Result but got #{scientist_result.class}"
62
+ end
63
+
64
+ self.context = scientist_result.context
65
+
66
+ record_observation scientist_result.control
67
+ scientist_result.candidates.each do |candidate|
68
+ record_observation candidate
69
+ end
70
+
71
+ record_simple_stats scientist_result
72
+ end
73
+
74
+ def speedup
75
+ return nil unless candidates.count == 1
76
+
77
+ LabTech::Speedup.new(
78
+ baseline: control.duration,
79
+ comparison: candidate.duration,
80
+ time: time_delta,
81
+ factor: speedup_factor,
82
+ )
83
+ end
84
+
85
+ def timed_out?
86
+ candidates.any?(&:timed_out?)
87
+ end
88
+
89
+ private
90
+
91
+ def increment_experiment_counters
92
+ increment = ->(count) {
93
+ Experiment.increment_counter count, self.experiment_id
94
+ }
95
+ case
96
+ when equivalent ; increment.( :equivalent_count )
97
+ when timed_out? ; increment.( :timed_out_count )
98
+ else ; increment.( :other_error_count )
99
+ end
100
+ end
101
+
102
+ def record_observation(scientist_observation)
103
+ self.observations.build do |observation|
104
+ observation.record_a_science scientist_observation
105
+ end
106
+ end
107
+
108
+ def record_simple_stats(scientist_result)
109
+ cont, cands = scientist_result.control, scientist_result.candidates
110
+
111
+ self.equivalent = cands.all? { |cand| cand.equivalent_to?(cont, &experiment.comparator) }
112
+
113
+ raised = ->(scientist_observation) { scientist_observation.exception.present? }
114
+ self.raised_error = !raised.(cont) && cands.any?(&raised)
115
+
116
+ # Time delta makes no sense if you're running more than one candidate at a time
117
+ if cands.length == 1
118
+ self.control_duration = cont .duration
119
+ self.candidate_duration = cands.first.duration
120
+
121
+ x = LabTech::Speedup.new(
122
+ baseline: control_duration,
123
+ comparison: candidate_duration,
124
+ )
125
+ self.time_delta = x.time
126
+ self.speedup_factor = x.factor
127
+ end
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,65 @@
1
+ module LabTech
2
+ class Speedup
3
+ attr_reader :baseline, :comparison, :time, :factor
4
+
5
+ def self.compute_time_delta(baseline, comparison)
6
+ return nil if baseline.nil?
7
+ return nil if comparison.nil?
8
+
9
+ baseline.to_f - comparison.to_f
10
+ end
11
+
12
+ def self.compute_factor(baseline, comparison)
13
+ # Timing data should never be zero
14
+ return nil if [ baseline, comparison ].any? { |e| e.to_f.zero? }
15
+
16
+ time = compute_time_delta(baseline, comparison)
17
+ return nil if time.nil?
18
+
19
+ case
20
+ when time > 0 ; +1 * baseline / comparison
21
+ when time.zero? ; 0
22
+ when time < 0 ; -1 * comparison / baseline
23
+ end
24
+ end
25
+
26
+ def initialize(baseline: nil, comparison: nil, time: nil, factor: nil)
27
+ @baseline = baseline &.to_f
28
+ @comparison = comparison &.to_f
29
+ @time = time &.to_f || compute_time_delta
30
+ @factor = factor &.to_f || compute_factor
31
+ end
32
+
33
+ include Comparable
34
+ def <=>(other)
35
+ return nil unless other.kind_of?(self.class)
36
+ return nil if self .factor.nil?
37
+ return other if other.factor.nil?
38
+
39
+ self.factor <=> other.factor
40
+ end
41
+
42
+ def valid?
43
+ return false if time.nil?
44
+ return false if factor.nil?
45
+
46
+ expected_time_delta = compute_time_delta
47
+ expected_factor = compute_factor
48
+
49
+ return false if expected_time_delta && ( time != expected_time_delta )
50
+ return false if expected_factor && ( factor != expected_factor )
51
+
52
+ true
53
+ end
54
+
55
+ private
56
+
57
+ def compute_time_delta
58
+ self.class.compute_time_delta(baseline, comparison)
59
+ end
60
+
61
+ def compute_factor
62
+ self.class.compute_factor(baseline, comparison)
63
+ end
64
+ end
65
+ end