lab_tech 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +323 -0
  4. data/Rakefile +30 -0
  5. data/app/models/lab_tech/application_record.rb +5 -0
  6. data/app/models/lab_tech/default_cleaner.rb +87 -0
  7. data/app/models/lab_tech/experiment.rb +190 -0
  8. data/app/models/lab_tech/observation.rb +40 -0
  9. data/app/models/lab_tech/percentile.rb +41 -0
  10. data/app/models/lab_tech/result.rb +130 -0
  11. data/app/models/lab_tech/speedup.rb +65 -0
  12. data/app/models/lab_tech/summary.rb +183 -0
  13. data/config/routes.rb +2 -0
  14. data/db/migrate/20190815192130_create_experiment_tables.rb +50 -0
  15. data/lib/lab_tech.rb +176 -0
  16. data/lib/lab_tech/engine.rb +6 -0
  17. data/lib/lab_tech/version.rb +3 -0
  18. data/lib/tasks/lab_tech_tasks.rake +4 -0
  19. data/spec/dummy/Rakefile +6 -0
  20. data/spec/dummy/app/assets/config/manifest.js +1 -0
  21. data/spec/dummy/app/assets/javascripts/application.js +14 -0
  22. data/spec/dummy/app/assets/stylesheets/application.css +15 -0
  23. data/spec/dummy/app/controllers/application_controller.rb +2 -0
  24. data/spec/dummy/app/jobs/application_job.rb +2 -0
  25. data/spec/dummy/app/models/application_record.rb +3 -0
  26. data/spec/dummy/bin/bundle +3 -0
  27. data/spec/dummy/bin/rails +4 -0
  28. data/spec/dummy/bin/rake +4 -0
  29. data/spec/dummy/bin/setup +33 -0
  30. data/spec/dummy/bin/update +28 -0
  31. data/spec/dummy/config.ru +5 -0
  32. data/spec/dummy/config/application.rb +35 -0
  33. data/spec/dummy/config/boot.rb +5 -0
  34. data/spec/dummy/config/database.yml +25 -0
  35. data/spec/dummy/config/environment.rb +5 -0
  36. data/spec/dummy/config/environments/development.rb +46 -0
  37. data/spec/dummy/config/environments/production.rb +71 -0
  38. data/spec/dummy/config/environments/test.rb +36 -0
  39. data/spec/dummy/config/initializers/application_controller_renderer.rb +8 -0
  40. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  41. data/spec/dummy/config/initializers/cors.rb +16 -0
  42. data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  43. data/spec/dummy/config/initializers/inflections.rb +16 -0
  44. data/spec/dummy/config/initializers/mime_types.rb +4 -0
  45. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  46. data/spec/dummy/config/locales/en.yml +33 -0
  47. data/spec/dummy/config/puma.rb +34 -0
  48. data/spec/dummy/config/routes.rb +3 -0
  49. data/spec/dummy/config/spring.rb +6 -0
  50. data/spec/dummy/db/schema.rb +52 -0
  51. data/spec/dummy/db/test.sqlite3 +0 -0
  52. data/spec/dummy/log/development.log +0 -0
  53. data/spec/dummy/log/test.log +1519 -0
  54. data/spec/examples.txt +79 -0
  55. data/spec/models/lab_tech/default_cleaner_spec.rb +32 -0
  56. data/spec/models/lab_tech/experiment_spec.rb +110 -0
  57. data/spec/models/lab_tech/percentile_spec.rb +85 -0
  58. data/spec/models/lab_tech/result_spec.rb +198 -0
  59. data/spec/models/lab_tech/speedup_spec.rb +133 -0
  60. data/spec/models/lab_tech/summary_spec.rb +325 -0
  61. data/spec/models/lab_tech_spec.rb +23 -0
  62. data/spec/rails_helper.rb +62 -0
  63. data/spec/spec_helper.rb +98 -0
  64. data/spec/support/misc_helpers.rb +7 -0
  65. metadata +238 -0
@@ -0,0 +1,190 @@
1
+ module LabTech
2
+ class Experiment < ActiveRecord::Base
3
+ self.table_name = "lab_tech_experiments"
4
+ include ::Scientist::Experiment
5
+
6
+ has_many :results, class_name: "LabTech::Result", dependent: :destroy
7
+ has_many :observations, class_name: "LabTech::Observation", through: :results
8
+
9
+ if defined?( TablePrint ) # a Very Handy Gem Indeed: http://tableprintgem.com/
10
+ tp.set self, *[
11
+ :id,
12
+ { :name => { width: 100 } },
13
+ { :pct_enabled => { display_name: "% Enabled" } },
14
+ { :pct_correct => { display_name: "% Correct" } },
15
+ { :equivalent_count => { display_name: "Equivalent" } },
16
+ { :timed_out_count => { display_name: "Timed Out" } },
17
+ { :other_error_count => { display_name: "Other Errors" } },
18
+ :total_count
19
+ ]
20
+
21
+ def pct_enabled
22
+ format_pct( percent_enabled )
23
+ end
24
+
25
+ def pct_correct
26
+ return "N/A" if total_count.zero?
27
+ format_pct( equivalent_count, total_count )
28
+ end
29
+
30
+ def total_count
31
+ equivalent_count + timed_out_count + other_error_count
32
+ end
33
+
34
+ private def format_pct(x, y=nil)
35
+ x = 100.0 * x / y if y
36
+ "%3.1f%%" % x
37
+ end
38
+ end
39
+
40
+
41
+
42
+ ##### CLASS METHODS #####
43
+
44
+ def self.named(experiment_name_or_id)
45
+ case experiment_name_or_id
46
+ when String ; exp = find_or_create_by(name: experiment_name_or_id)
47
+ when Integer ; exp = find(experiment_name_or_id)
48
+ end
49
+ yield exp if block_given?
50
+ exp
51
+ rescue ActiveRecord::RecordNotUnique
52
+ retry
53
+ end
54
+
55
+
56
+
57
+ ##### INSTANCE METHODS #####
58
+
59
+ def comparator
60
+ @_scientist_comparator
61
+ end
62
+
63
+ # TODO: DRY up the io.puts structure between this and summarize_errors
64
+ def compare_mismatches(limit: nil, io: $stdout, &block)
65
+ mismatches = results.mismatched.includes(:observations)
66
+ return if mismatches.empty?
67
+ mismatches = mismatches.limit(limit) if limit
68
+
69
+ io.puts
70
+ io.puts "=" * 100
71
+ io.puts "Comparing results for #{name}:"
72
+ io.puts
73
+
74
+ mismatches.each do |result|
75
+ io.puts
76
+ io.puts "-" * 100
77
+ io.puts "Result ##{result.id}"
78
+ result.compare_observations( io: io, &block )
79
+ io.puts "-" * 100
80
+ end
81
+
82
+ io.puts
83
+ io.puts "=" * 100
84
+ io.puts
85
+ nil
86
+ end
87
+
88
+ def disable
89
+ update_attribute :percent_enabled, 0
90
+ end
91
+
92
+ def enabled?
93
+ n = rand(100)
94
+ fail "WTF, Ruby?" unless (0..99).cover?(n) # Paranoia? Indirect documentation? YOU DECIDE.
95
+ n < percent_enabled
96
+ end
97
+
98
+ def enable(percent_enabled: 100)
99
+ update_attribute :percent_enabled, percent_enabled
100
+ end
101
+
102
+ # Oh, this is a fun one: apparently Scientist::Experiment#name is
103
+ # overriding the ActiveRecord attribute. Override it back.
104
+ def name ; read_attribute :name ; end
105
+ def name=(value) ; write_attribute :name, value ; end
106
+
107
+ def publish(scientist_result)
108
+ return if Rails.env.test? && !LabTech.publish_results_in_test_mode?
109
+ LabTech::Result.record_a_science( self, scientist_result )
110
+ end
111
+
112
+ # I don't encourage the willy-nilly destruction of experimental results...
113
+ # ...but sometimes you just need to start over.
114
+ def purge_data
115
+ delete_and_count = ->(scope) {
116
+ n0, n1 = 0, 0
117
+ transaction do
118
+ n0 = scope.count
119
+ scope.delete_all
120
+ n1 = scope.count
121
+ end
122
+ n0 - n1
123
+ }
124
+
125
+ n = delete_and_count.call( LabTech::Observation.where(result_id: self.result_ids) )
126
+ m = delete_and_count.call( self.results )
127
+
128
+ update_attributes(
129
+ equivalent_count: 0,
130
+ timed_out_count: 0,
131
+ other_error_count: 0,
132
+ )
133
+
134
+ puts "Deleted #{m} result(s) and #{n} observations"
135
+ end
136
+
137
+ def run(*)
138
+ increment_run_count
139
+ provide_default_cleaner
140
+ super
141
+ end
142
+
143
+ # TODO: DRY up the io.puts structure between this and compare_mismatches
144
+ def summarize_errors(limit: nil, io: $stdout)
145
+ errors = results.other_error
146
+ return if errors.empty?
147
+ errors = errors.limit(limit) if limit
148
+
149
+ io.puts
150
+ io.puts "=" * 100
151
+ io.puts "Comparing results for #{name}:"
152
+ io.puts
153
+
154
+ errors.each do |result|
155
+ io.puts
156
+ io.puts "-" * 100
157
+ io.puts "Result ##{result.id}"
158
+ result.candidates.each do |observation|
159
+ puts " * " + observation.exception_class + ": " + observation.exception_message
160
+ end
161
+ io.puts "-" * 100
162
+ end
163
+
164
+ io.puts
165
+ io.puts "=" * 100
166
+ io.puts
167
+ nil
168
+ end
169
+
170
+ def summarize_results
171
+ puts "", summary, ""
172
+ end
173
+
174
+ def summary
175
+ reload
176
+ LabTech::Summary.new(self)
177
+ end
178
+
179
+ private
180
+
181
+ def increment_run_count
182
+ LabTech.run_count[self.name] += 1
183
+ end
184
+
185
+ def provide_default_cleaner
186
+ return if cleaner.present?
187
+ clean { |value| LabTech::DefaultCleaner.call(value) }
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,40 @@
1
+ module LabTech
2
+ class Observation < ApplicationRecord
3
+ self.table_name = "lab_tech_observations"
4
+
5
+ belongs_to :result, class_name: "LabTech::Result", foreign_key: :result_id, optional: true
6
+
7
+ serialize :value
8
+
9
+ def raised_error?
10
+ exception_values = [ exception_class, exception_message, exception_backtrace ]
11
+ exception_values.any?( &:present? )
12
+ end
13
+
14
+ def record_a_science(scientist_observation)
15
+ unless scientist_observation.kind_of?( Scientist::Observation )
16
+ raise ArgumentError, "expected a Scientist::Observation but got #{scientist_observation.class}"
17
+ end
18
+
19
+ self.name = scientist_observation.name
20
+ self.duration = scientist_observation.duration
21
+
22
+ self.value = scientist_observation.cleaned_value
23
+ record_errors scientist_observation.exception
24
+ end
25
+
26
+ def timed_out?
27
+ exception_class == "Timeout::Error"
28
+ end
29
+
30
+ private
31
+
32
+ def record_errors(exception)
33
+ return if exception.nil?
34
+
35
+ self.exception_class = exception.class
36
+ self.exception_message = exception.message
37
+ self.exception_backtrace = exception.backtrace
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,41 @@
1
+ module LabTech
2
+ module Percentile
3
+ extend self
4
+
5
+ MIN_PERCENTILE = 0
6
+ MAX_PERCENTILE = 100
7
+
8
+ def call(pct, list)
9
+ # Make sure this list is actually sorted
10
+ unless sorted?(list)
11
+ fail "Sorry, this isn't sorted: #{list.inspect}"
12
+ end
13
+
14
+ msg = "Please pass an integer between #{MIN_PERCENTILE} and #{MAX_PERCENTILE}, not #{pct.inspect}"
15
+ raise ArgumentError, msg unless pct.kind_of?(Integer)
16
+ raise ArgumentError, msg unless (MIN_PERCENTILE..MAX_PERCENTILE).cover?(pct)
17
+
18
+ return list.first if pct == MIN_PERCENTILE # Avoid the need for a bounds check later
19
+ return list.last if pct == MAX_PERCENTILE # By definition, I guess
20
+
21
+ i = ( 0.01 * pct * list.length ).ceil - 1 # Don't ask me why this works
22
+ list[ i ]
23
+ end
24
+
25
+ private
26
+
27
+ def sorted?(list)
28
+ ret_val = true
29
+ list.each_cons(2) do |a,b|
30
+ if a <= b
31
+ next
32
+ else
33
+ ret_val = false
34
+ break
35
+ end
36
+ end
37
+ ret_val
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,130 @@
1
+ module LabTech
2
+ class Result < ApplicationRecord
3
+ self.table_name = "lab_tech_results"
4
+
5
+ belongs_to :experiment, class_name: "LabTech::Experiment"
6
+ has_many :observations, class_name: "LabTech::Observation", dependent: :destroy
7
+ has_one :control, ->() { where("name = 'control'") }, class_name: "LabTech::Observation"
8
+ has_many :candidates, ->() { where("name != 'control'") }, class_name: "LabTech::Observation"
9
+ serialize :context
10
+
11
+ # NOTE: I don't think this accounts for the possibility that both the
12
+ # control and candidate might raise, and if so, whether raising the same
13
+ # exception should be considered equivalent. (Unless I already thought of this?)
14
+ scope :correct, -> { where( equivalent: true, raised_error: false ) }
15
+ scope :mismatched, -> { where( equivalent: false, raised_error: false ) }
16
+ scope :errored, -> { where( equivalent: false, raised_error: true ) }
17
+ is_timeout = ->(is_or_is_not) {
18
+ col = LabTech::Observation.table_name + ".exception_class"
19
+ operator = is_or_is_not ? "=" : "!="
20
+ value = '"Timeout::Error"'
21
+ [ col, operator, value ].join(" ")
22
+ }
23
+ scope :timed_out, -> { errored.joins(:candidates).where( is_timeout.(true) ) }
24
+ scope :other_error, -> { errored.joins(:candidates).where( is_timeout.(false) ) }
25
+
26
+ after_create :increment_experiment_counters
27
+
28
+
29
+ ##### CLASS METHODS #####
30
+
31
+ def self.record_a_science( experiment, scientist_result )
32
+ self.create!(experiment: experiment) do |result|
33
+ result.record_a_science scientist_result
34
+ end
35
+ end
36
+
37
+
38
+
39
+ ##### INSTANCE METHODS #####
40
+
41
+ # Having multiple candidates is annoying; I've mistyped this one a lot
42
+ def candidate
43
+ candidates.first
44
+ end
45
+
46
+ DEFAULT_COMPARISON = ->(control, candidate) {
47
+ [ control, candidate ].map { |obs|
48
+ " %20s # => %s" % [ obs.name, obs.value.inspect ]
49
+ }
50
+ }
51
+ def compare_observations(io: $stdout, &block)
52
+ block ||= DEFAULT_COMPARISON
53
+ candidates.each do |candidate|
54
+ io.puts block.( control, candidate )
55
+ end
56
+ return nil
57
+ end
58
+
59
+ def record_a_science(scientist_result)
60
+ unless scientist_result.kind_of?( Scientist::Result )
61
+ raise ArgumentError, "expected a Scientist::Result but got #{scientist_result.class}"
62
+ end
63
+
64
+ self.context = scientist_result.context
65
+
66
+ record_observation scientist_result.control
67
+ scientist_result.candidates.each do |candidate|
68
+ record_observation candidate
69
+ end
70
+
71
+ record_simple_stats scientist_result
72
+ end
73
+
74
+ def speedup
75
+ return nil unless candidates.count == 1
76
+
77
+ LabTech::Speedup.new(
78
+ baseline: control.duration,
79
+ comparison: candidate.duration,
80
+ time: time_delta,
81
+ factor: speedup_factor,
82
+ )
83
+ end
84
+
85
+ def timed_out?
86
+ candidates.any?(&:timed_out?)
87
+ end
88
+
89
+ private
90
+
91
+ def increment_experiment_counters
92
+ increment = ->(count) {
93
+ Experiment.increment_counter count, self.experiment_id
94
+ }
95
+ case
96
+ when equivalent ; increment.( :equivalent_count )
97
+ when timed_out? ; increment.( :timed_out_count )
98
+ else ; increment.( :other_error_count )
99
+ end
100
+ end
101
+
102
+ def record_observation(scientist_observation)
103
+ self.observations.build do |observation|
104
+ observation.record_a_science scientist_observation
105
+ end
106
+ end
107
+
108
+ def record_simple_stats(scientist_result)
109
+ cont, cands = scientist_result.control, scientist_result.candidates
110
+
111
+ self.equivalent = cands.all? { |cand| cand.equivalent_to?(cont, &experiment.comparator) }
112
+
113
+ raised = ->(scientist_observation) { scientist_observation.exception.present? }
114
+ self.raised_error = !raised.(cont) && cands.any?(&raised)
115
+
116
+ # Time delta makes no sense if you're running more than one candidate at a time
117
+ if cands.length == 1
118
+ self.control_duration = cont .duration
119
+ self.candidate_duration = cands.first.duration
120
+
121
+ x = LabTech::Speedup.new(
122
+ baseline: control_duration,
123
+ comparison: candidate_duration,
124
+ )
125
+ self.time_delta = x.time
126
+ self.speedup_factor = x.factor
127
+ end
128
+ end
129
+ end
130
+ end
@@ -0,0 +1,65 @@
1
+ module LabTech
2
+ class Speedup
3
+ attr_reader :baseline, :comparison, :time, :factor
4
+
5
+ def self.compute_time_delta(baseline, comparison)
6
+ return nil if baseline.nil?
7
+ return nil if comparison.nil?
8
+
9
+ baseline.to_f - comparison.to_f
10
+ end
11
+
12
+ def self.compute_factor(baseline, comparison)
13
+ # Timing data should never be zero
14
+ return nil if [ baseline, comparison ].any? { |e| e.to_f.zero? }
15
+
16
+ time = compute_time_delta(baseline, comparison)
17
+ return nil if time.nil?
18
+
19
+ case
20
+ when time > 0 ; +1 * baseline / comparison
21
+ when time.zero? ; 0
22
+ when time < 0 ; -1 * comparison / baseline
23
+ end
24
+ end
25
+
26
+ def initialize(baseline: nil, comparison: nil, time: nil, factor: nil)
27
+ @baseline = baseline &.to_f
28
+ @comparison = comparison &.to_f
29
+ @time = time &.to_f || compute_time_delta
30
+ @factor = factor &.to_f || compute_factor
31
+ end
32
+
33
+ include Comparable
34
+ def <=>(other)
35
+ return nil unless other.kind_of?(self.class)
36
+ return nil if self .factor.nil?
37
+ return other if other.factor.nil?
38
+
39
+ self.factor <=> other.factor
40
+ end
41
+
42
+ def valid?
43
+ return false if time.nil?
44
+ return false if factor.nil?
45
+
46
+ expected_time_delta = compute_time_delta
47
+ expected_factor = compute_factor
48
+
49
+ return false if expected_time_delta && ( time != expected_time_delta )
50
+ return false if expected_factor && ( factor != expected_factor )
51
+
52
+ true
53
+ end
54
+
55
+ private
56
+
57
+ def compute_time_delta
58
+ self.class.compute_time_delta(baseline, comparison)
59
+ end
60
+
61
+ def compute_factor
62
+ self.class.compute_factor(baseline, comparison)
63
+ end
64
+ end
65
+ end