RubyGems - async_experiments - Versions diffs - 0.0.1 → 0.1.0 - Mend

async_experiments 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/README.md +55 -11
data/lib/async_experiments.rb +25 -14
data/lib/async_experiments/candidate_worker.rb +3 -3
data/lib/async_experiments/experiment_control.rb +7 -3
data/lib/async_experiments/experiment_error_worker.rb +7 -2
data/lib/async_experiments/experiment_result.rb +19 -16
data/lib/async_experiments/experiment_result_candidate_worker.rb +24 -0
data/lib/async_experiments/experiment_result_control_worker.rb +29 -0
data/lib/async_experiments/version.rb +1 -1
data/spec/async_experiments_spec.rb +81 -20
data/spec/candidate_worker_spec.rb +21 -24
data/spec/examples.txt +48 -37
data/spec/experiment_control_spec.rb +8 -6
data/spec/experiment_error_worker_spec.rb +27 -6
data/spec/experiment_result_candidate_worker_spec.rb +50 -0
data/spec/experiment_result_control_worker_spec.rb +96 -0
data/spec/experiment_result_spec.rb +103 -85
metadata +8 -5
data/lib/async_experiments/experiment_result_worker.rb +0 -36
data/spec/experiment_result_worker_spec.rb +0 -106

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 73d448d952cbbee2567c807bafd327fead90ac38
-  data.tar.gz: 47f6da2a76c6f75c42ea8904c72421164bc77c3c
+  metadata.gz: feb9b5191b98c9363df96dbfaa10d2fb1fa73370
+  data.tar.gz: e2baeb8347ddf14935f9560ea2eef8ab9e015238
 SHA512:
-  metadata.gz: c3d83b96b7143a1594c36d3d29b3ba77e6dd65409a35201dae68059cfdc8e2ffe4e03e28b6805d58251a82340984daee93eaf1ec40c03c820caaaf0e85b28ca2
-  data.tar.gz: a8932415d200081e7a6176175cbd29372a5e91463cdc040e5d72a5424939af9e10176ef149efa891868af3e4c84cc009ffe06fd5bf99f5ebdf55ee24b47c0100
+  metadata.gz: 1261f8e02a56a3147f148d478d5c0476d7a1cd0da47560f6f6524c6ca4c2ba583730e2a106da484655b963619feb7385ff5e854de4f34ef54796c5e9ccd7d845
+  data.tar.gz: 6eb95fa38740c975d5f05cd32cc84e81c1d41a4a9b61549528c7ec7486fb0a94c59a6cda93767b2a83b29c0b4abd1f25bf604a6b72b6e5800e105ac9236cc286

data/README.md CHANGED

@@ -1,10 +1,15 @@
 # Asynchronous Experiments
-Similar to GitHub Scientist, but uses Sidekiq (and its Redis connection pool)
-to run control and candidate branches of experiments in parallel, storing the
-output for later review.
+This tool is used to understand the implications of replacing a section of code
+(known as control) with a different piece of code (the candidate) in terms of
+interchangeable outputs and effects on performance. It reports differences in
+output to redis and differences in duration comparisons to statsd.
-Provides helpers to assist with rendering the output from the comparison.
+It is similar to [GitHub Scientist](https://github.com/github/scientist), but
+uses Sidekiq (and its Redis connection pool) to run control and candidate branches
+of experiments in parallel, storing the output for later review.
+It provides helpers to assist with rendering the output from the comparison.
 ## IMPORTANT NOTE ABOUT SIDEKIQ
@@ -18,7 +23,23 @@ The gem also assumes access to statsd for reporting purposes.
 ## Technical documentation
-Example usage:
+Example usage: [experiments-framework](https://github.com/alphagov/publishing-api/tree/experiments-framework)
+branch of the [Publishing API](https://github.com/alphagov/publishing-api)
+### Evaluate a piece of code for replacing
+- Identify the code you want to consider replacing, your control
+- Include `AsyncExperiments::ExperimentControl` into the class that contains
+  your control code
+- `experiment_control` is passed the user defined name of the experiment,
+  details of a candidate worker it can initialise and a block of the control
+  code
+- `experiment_control` will return the results of the control code and the
+  code can proceed as before
+- By default the results of the experiment will be stored in redis for 24 hours
+  this can be altered by including `results_expiry: {number of seconds}` in
+  the hash of `experiment_control` arguments.
 ```
 require "async_experiments/experiment_control"
@@ -52,6 +73,14 @@ class ContentItemsController < ApplicationController
 end
 ```
+### Run your replacement code asynchronously from a Sidekiq worker
+- A candidate worker is created, which will be created automatically based on
+  the arguments passed to `experiment_control`.
+- The worker receives the arguments defined in the args attribute of the
+  candidate, with an extra argument that is the name of the experiment.
+- The name of the experiment and a block of the candidate code is passed to
+  `experiment_candidate` which will monitor the duration and the response.
 ```
 require "async_experiments/candidate_worker"
@@ -66,6 +95,11 @@ class LinkablesCandidate < AsyncExperiments::CandidateWorker
 end
 ```
+### Access the instances where the response of candidate and control didn't match
+- The static method `get_experiment_data` can be called on `AsyncExperiments`
+  to load an array of the cases where the responses didn't match
 ```
 class DebugController < ApplicationController
   skip_before_action :require_signin_permission!
@@ -88,22 +122,22 @@ end
   <% @mismatched_responses.each_with_index do |mismatch, i| %>
     <li>
       <ul>
-        <li><a href="#missing-#{i}">Missing</a></li>
-        <li><a href="#extra-#{i}">Extra</a></li>
-        <li><a href="#changed-#{i}">Changed</a></li>
+        <li><a href="#missing-<%= i %>">Missing</a></li>
+        <li><a href="#extra-<%= i %>">Extra</a></li>
+        <li><a href="#changed-<%= i %>">Changed</a></li>
       </ul>
-      <h3 id="missing-#{i}">Missing from candidate</h3>
+      <h3 id="missing-<%= i %>">Missing from candidate</h3>
       <% mismatch[:missing].each do |entry| %>
         <pre><%= PP.pp(entry, "") %></pre>
       <% end %>
-      <h3 id="extra-#{i}">Extra in candidate</h3>
+      <h3 id="extra-<%= i %>">Extra in candidate</h3>
       <% mismatch[:extra].each do |entry| %>
         <pre><%= PP.pp(entry, "") %></pre>
       <% end %>
-      <h3 id="changed-#{i}">Changed in candidate</h3>
+      <h3 id="changed-<%= i %>">Changed in candidate</h3>
       <% mismatch[:changed].each do |entry| %>
         <pre><%= PP.pp(entry, "") %></pre>
       <% end %>
@@ -112,12 +146,22 @@ end
 </ul>
 ```
+### Make statsd available
+- For a rails app this would be done in `config/initialisers`
 ```
 statsd_client = Statsd.new("localhost")
 statsd_client.namespace = "govuk.app.publishing-api"
 AsyncExperiments.statsd = statsd_client
 ```
+### Example implementation
+The [experiments-framework](https://github.com/alphagov/publishing-api/tree/experiments-framework)
+branch of GOV.UK [Publishing API](https://github.com/alphagov/publishing-api)
+contains an implementation of this gem.
 ## Licence
 [MIT License](LICENCE)

data/lib/async_experiments.rb CHANGED

@@ -1,6 +1,8 @@
 require "json"
 require "securerandom"
-require "async_experiments/experiment_result_worker"
+require "async_experiments/experiment_result_candidate_worker"
+require "async_experiments/experiment_result_control_worker"
 module AsyncExperiments
   def self.statsd
@@ -12,20 +14,15 @@ module AsyncExperiments
   end
   def self.get_experiment_data(experiment_name)
-    mismatched_responses = Sidekiq.redis { |redis|
-      redis.lrange("experiments:#{experiment_name}:mismatches", 0, -1)
-    }
+    key_pattern = "experiments:#{experiment_name}:mismatches:*"
+    mismatched_responses = redis_scan_and_retrieve(key_pattern).map do |json|
+      JSON.parse(json)
+    end
-    mismatched_responses.map { |json|
-      parsed = JSON.parse(json)
+    mismatched_responses.map do |parsed|
+      missing, other = parsed.partition { |(operator)| operator == "-" }
-      missing, other = parsed.partition {|(operator, _, _)|
-        operator == "-"
-      }
-      extra, changed = other.partition {|(operator, _, _)|
-        operator == "+"
-      }
+      extra, changed = other.partition { |(operator)| operator == "+" }
       missing_entries, extra_entries = self.fix_ordering_issues(
         missing.map(&:last),
@@ -37,7 +34,21 @@ module AsyncExperiments
         extra: extra_entries,
         changed: changed.map(&:last),
       }
-    }
+    end
+  end
+  def self.get_experiment_exceptions(experiment_name)
+    redis_scan_and_retrieve("experiments:#{experiment_name}:exceptions:*")
+  end
+  def self.redis_scan_and_retrieve(key_pattern)
+    Sidekiq.redis do |redis|
+      enumerator = redis.scan_each(
+        match: key_pattern
+      )
+      retrieve = -> (key) { redis.get(key) }
+      enumerator.map(&retrieve).compact
+    end
   end
   def self.fix_ordering_issues(missing_entries, extra_entries)

data/lib/async_experiments/candidate_worker.rb CHANGED

@@ -1,4 +1,4 @@
-require "async_experiments/experiment_result_worker"
+require "async_experiments/experiment_result_candidate_worker"
 require "async_experiments/experiment_error_worker"
 module AsyncExperiments
@@ -13,7 +13,7 @@ module AsyncExperiments
       start_time = Time.now
       run_output = yield
       duration = (Time.now - start_time).to_f
-      ExperimentResultWorker.perform_async(experiment[:name], experiment[:id], run_output, duration, :candidate)
+      ExperimentResultCandidateWorker.perform_async(experiment[:name], experiment[:id], run_output, duration, experiment[:candidate_expiry])
       run_output
     rescue StandardError => exception
@@ -22,7 +22,7 @@ module AsyncExperiments
       else
         backtrace = exception.backtrace
         backtrace.unshift(exception.inspect)
-        ExperimentErrorWorker.perform_async(experiment[:name], backtrace.join("\n"))
+        ExperimentErrorWorker.perform_async(experiment[:name], backtrace.join("\n"), experiment[:results_expiry])
       end
     end
   end

data/lib/async_experiments/experiment_control.rb CHANGED

@@ -1,8 +1,10 @@
-require "async_experiments/experiment_result_worker"
+require "async_experiments/experiment_result_control_worker"
 module AsyncExperiments
   module ExperimentControl
-    def experiment_control(name, candidate:)
+    def experiment_control(
+      name, candidate:, candidate_expiry: 60, results_expiry: 24 * 60 * 60
+    )
       start_time = Time.now
       run_output = yield
       duration = (Time.now - start_time).to_f
@@ -13,12 +15,14 @@ module AsyncExperiments
         run_output = run_output.to_a
       end
-      ExperimentResultWorker.perform_async(name, id, run_output, duration, :control)
+      ExperimentResultControlWorker.perform_in(1, name, id, run_output, duration, results_expiry)
       candidate_worker = candidate.fetch(:worker)
       candidate_worker.perform_async(*candidate.fetch(:args),
         name: name,
         id: id,
+        candidate_expiry: candidate_expiry,
+        results_expiry: results_expiry,
       )
       run_output

data/lib/async_experiments/experiment_error_worker.rb CHANGED

@@ -1,13 +1,18 @@
+require "digest/sha2"
 module AsyncExperiments
   class ExperimentErrorWorker
     include Sidekiq::Worker
     sidekiq_options queue: :experiments
-    def perform(experiment_name, exception_string)
+    def perform(experiment_name, exception_string, expiry)
       Sidekiq.redis do |redis|
         AsyncExperiments.statsd.increment("experiments.#{experiment_name}.exceptions")
-        redis.rpush("experiments:#{experiment_name}:exceptions", exception_string)
+        hash = Digest::SHA2.base64digest(exception_string)
+        redis_key = "experiments:#{experiment_name}:exceptions:#{hash}"
+        redis.set(redis_key, exception_string) unless redis.exists(redis_key)
+        redis.expire(redis_key, expiry)
       end
     end
   end

data/lib/async_experiments/experiment_result.rb CHANGED

@@ -1,5 +1,6 @@
 require "json"
 require "hashdiff"
+require "digest/sha2"
 require "async_experiments/util"
 module AsyncExperiments
@@ -13,7 +14,7 @@ module AsyncExperiments
       @run_output = run_output
       @duration = duration
-      if Util.blank?(run_output) || Util.blank?(duration)
+      if Util.blank?(duration)
         redis_data = data_from_redis
         if redis_data
@@ -25,29 +26,23 @@ module AsyncExperiments
     attr_reader :key, :run_output, :duration
-    def store_run_output
-      redis.set("experiments:#{key}:#{type}", {
+    def store_run_output(expiry)
+      redis_key = "experiments:#{key}:#{type}"
+      redis.set(redis_key, {
         run_output: run_output,
         duration: duration,
       }.to_json)
+      redis.expire(redis_key, expiry)
     end
-    def process_run_output(candidate)
+    def process_run_output(candidate, expiry)
       variation = HashDiff.diff(sort(self.run_output), sort(candidate.run_output))
-      report_data(variation, candidate)
+      report_data(variation, candidate, expiry)
       redis.del("experiments:#{key}:candidate")
     end
-    def control?
-      type == :control
-    end
-    def candidate?
-      type == :candidate
-    end
     def available?
-      Util.present?(run_output) && Util.present?(duration)
+      Util.present?(duration)
     end
   protected
@@ -64,16 +59,24 @@ module AsyncExperiments
       end
     end
-    def report_data(variation, candidate)
+    def report_data(variation, candidate, expiry)
       statsd.timing("experiments.#{name}.control", self.duration)
       statsd.timing("experiments.#{name}.candidate", candidate.duration)
       if variation != []
         statsd.increment("experiments.#{name}.mismatches")
-        redis.rpush("experiments:#{name}:mismatches", JSON.dump(variation))
+        store_mismatch(variation, expiry)
       end
     end
+    def store_mismatch(mismatch, expiry)
+      json = JSON.dump(mismatch)
+      hash = Digest::SHA2.base64digest(json)
+      redis_key = "experiments:#{name}:mismatches:#{hash}"
+      redis.set(redis_key, json) unless redis.exists(redis_key)
+      redis.expire(redis_key, expiry)
+    end
     def sort(object)
       case object
       when Array

data/lib/async_experiments/experiment_result_candidate_worker.rb ADDED

@@ -0,0 +1,24 @@
+require "async_experiments/experiment_result"
+module AsyncExperiments
+  class ExperimentResultCandidateWorker
+    include Sidekiq::Worker
+    sidekiq_options queue: :experiments
+    LOCK_TIMEOUT = 60
+    def perform(name, id, run_output, duration, expiry)
+      Sidekiq.redis do |redis|
+        result = ExperimentResult.new(name, id, :candidate, redis, statsd, run_output, duration)
+        result.store_run_output(expiry)
+      end
+    end
+  private
+    def statsd
+      AsyncExperiments.statsd
+    end
+  end
+end

data/lib/async_experiments/experiment_result_control_worker.rb ADDED

@@ -0,0 +1,29 @@
+require "async_experiments/experiment_result"
+module AsyncExperiments
+  class ExperimentResultControlWorker
+    include Sidekiq::Worker
+    sidekiq_options queue: :experiments
+    LOCK_TIMEOUT = 60
+    def perform(name, id, run_output, duration, expiry, allowed_attempts = 5, attempt = 1)
+      Sidekiq.redis do |redis|
+        result = ExperimentResult.new(name, id, :control, redis, statsd, run_output, duration)
+        candidate = ExperimentResult.new(name, id, :candidate, redis, statsd)
+        if candidate.available?
+          result.process_run_output(candidate, expiry)
+        elsif allowed_attempts > attempt
+          self.class.perform_in(5, name, id, run_output, duration, expiry, allowed_attempts, attempt + 1)
+        end
+      end
+    end
+  private
+    def statsd
+      AsyncExperiments.statsd
+    end
+  end
+end

data/lib/async_experiments/version.rb CHANGED

@@ -1,3 +1,3 @@
 module AsyncExperiments
-  VERSION = "0.0.1"
+  VERSION = "0.1.0"
 end

data/spec/async_experiments_spec.rb CHANGED

@@ -3,29 +3,36 @@ require "json"
 require "async_experiments"
 RSpec.describe AsyncExperiments do
+  let(:redis) do
+    double(
+      :redis,
+      scan_each: nil,
+      get: nil,
+    )
+  end
+  before do
+    allow(Sidekiq).to receive(:redis).and_yield(redis)
+  end
   describe ".get_experiment_data(experiment_name)" do
     let(:name) { "some_experiment" }
-    let(:experiment_results) {
-      [
-        JSON.dump([
-          ["-", "[1]", {same_key: 1, different_key: 2}],
-          ["+", "[2]", {same_key: 1, different_key: 3}],
-          ["+", "[3]", "Extra element"],
-          ["-", "[4]", "Missing element"],
-          ["~", "[5]", "Changed element"],
-          ["-", "[3]", {moved_complex_object: 1}],
-          ["+", "[6]", {moved_complex_object: 1}],
-        ])
-      ]
-    }
-    let(:redis) { double(:redis) }
+    let(:experiment_result) do
+      JSON.dump([
+        ["-", "[1]", { same_key: 1, different_key: 2 }],
+        ["+", "[2]", { same_key: 1, different_key: 3 }],
+        ["+", "[3]", "Extra element"],
+        ["-", "[4]", "Missing element"],
+        ["~", "[5]", "Changed element"],
+        ["-", "[3]", { moved_complex_object: 1 }],
+        ["+", "[6]", { moved_complex_object: 1 }],
+      ])
+    end
     before do
-      allow(Sidekiq).to receive(:redis).and_yield(redis)
-      allow(redis).to receive(:lrange).with("experiments:#{name}:mismatches", 0, -1)
-        .and_return(experiment_results)
+      allow(redis).to receive(:scan_each).and_return([1])
+      allow(redis).to receive(:get).and_return(experiment_result)
     end
     it "partitions and resorts experiment results for useful output" do
@@ -33,11 +40,11 @@ RSpec.describe AsyncExperiments do
       expect(results).to eq([
         missing: [
-          {"same_key" => 1, "different_key" => 2},
+          { "same_key" => 1, "different_key" => 2 },
           "Missing element",
         ],
         extra: [
-          {"same_key" => 1, "different_key" => 3},
+          { "same_key" => 1, "different_key" => 3 },
           "Extra element",
         ],
         changed: [
@@ -46,4 +53,58 @@ RSpec.describe AsyncExperiments do
       ])
     end
   end
+  describe ".get_experiment_exceptions(experiment_name)" do
+    let(:name) { "some_experiment" }
+    let(:errors) do
+      ["error 1", "error 2"]
+    end
+    before do
+      allow(redis).to receive(:scan_each).and_return([1, 2])
+      allow(redis).to receive(:get).with(1).and_return(errors[0])
+      allow(redis).to receive(:get).with(2).and_return(errors[1])
+    end
+    it "returns a list of exceptions" do
+      results = described_class.get_experiment_exceptions(name)
+      expect(results).to eq(errors)
+    end
+  end
+  describe ".redis_scan_and_retrieve" do
+    subject { described_class.redis_scan_and_retrieve(key_pattern) }
+    let(:key_pattern) { "pattern" }
+    context "no items" do
+      before { allow(redis).to receive(:scan_each).and_return([]) }
+      it { is_expected.to eq([]) }
+    end
+    context "all items exist" do
+      let(:items) { ["item 1", "item 2"] }
+      before do
+        allow(redis).to receive(:scan_each).and_return([1, 2])
+        allow(redis).to receive(:get).with(1).and_return(items[0])
+        allow(redis).to receive(:get).with(2).and_return(items[1])
+      end
+      it { is_expected.to eq(items) }
+    end
+    context "some items are not available" do
+      let(:item) { "item" }
+      before do
+        allow(redis).to receive(:scan_each).and_return([1, 2])
+        allow(redis).to receive(:get).with(1).and_return(item)
+        allow(redis).to receive(:get).with(2).and_return(nil)
+      end
+      it { is_expected.to eq([item]) }
+    end
+  end
 end