RubyGems - completion-kit - Versions diffs - 0.5.44 → 0.6.0 - Mend

completion-kit 0.5.44 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/app/assets/stylesheets/completion_kit/application.css +13 -0
data/app/controllers/completion_kit/metrics_controller.rb +14 -4
data/app/controllers/completion_kit/runs_controller.rb +63 -1
data/app/models/completion_kit/calibration.rb +0 -4
data/app/models/completion_kit/metric_version.rb +0 -1
data/app/models/completion_kit/run.rb +32 -0
data/app/services/completion_kit/mcp_tools/judges.rb +2 -4
data/app/services/completion_kit/metric_variant_generator.rb +20 -6
data/app/views/completion_kit/calibrations/_trust_panel.html.erb +6 -1
data/app/views/completion_kit/metrics/show.html.erb +1 -1
data/app/views/completion_kit/runs/_actions.html.erb +1 -0
data/app/views/completion_kit/runs/compare.html.erb +85 -0
data/app/views/completion_kit/runs/compare_picker.html.erb +39 -0
data/app/views/completion_kit/runs/show.html.erb +8 -2
data/config/routes.rb +2 -0
data/lib/completion_kit/version.rb +1 -1
metadata +3 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d81df0996441d12c0fb540b9f29bb514813adcdbea3ceefb515d318f28947731
-  data.tar.gz: 606764f41e74cec3284f1155d7ef86e77a61af708af2320d5b02640827741f7a
+  metadata.gz: d8454bbb11d5064ca0c6d4355c780425a28198280dffe7dd424d266fbeef6a09
+  data.tar.gz: 24c1da76e1e9118d5e2a732e8e45b684f588f553ad4d8bac89e239bc22c953c3
 SHA512:
-  metadata.gz: 9e468cd12eb143f4b5eb64333339199420db4c9d0c78ec548965972eee5e326d574a80c6c3092d63f4d99d88901ce3470ac688468d2813f5370e589568fba669
-  data.tar.gz: 7377f00a31d539297f9e79059083aa7bfef782d18d1ecfcb9f7da1ff648ce1eaf6f8a94bc55d56fcca22a47e09c7fcb1bc89981563aa351e4293c47f8d886570
+  metadata.gz: 6fbc5b8047a20240897e19c389bb3f6104d3e2a219794d190183b5433e14d524bb692eb0a27b36ab6471e596c2b9b8af2d70a4f56ae81aa327726fe92f092eb9
+  data.tar.gz: a3399003a48836fd457a8c8b488305fad6d006596c6f940a82b232e2a731dfbc3df5ded4ba8bc16b94690a88d56baaaff6edc6801f47f2bbf422ca8fb74270df

data/app/assets/stylesheets/completion_kit/application.css CHANGED Viewed

@@ -2834,6 +2834,19 @@ select.ck-input {
 }
 .ck-stale-versions-banner__body { min-width: 0; flex: 1 1 320px; }
 .ck-stale-versions-banner .ck-kicker { color: var(--ck-warning); }
+.ck-delta {
+  font-family: var(--ck-mono);
+  font-size: 0.78rem;
+  letter-spacing: 0.04em;
+  padding: 2px 6px;
+  border-radius: 4px;
+}
+.ck-delta--positive { color: var(--ck-success); background: var(--ck-success-soft); }
+.ck-delta--negative { color: var(--ck-danger); background: var(--ck-danger-soft); }
+.ck-delta--zero { color: var(--ck-dim); }
+.ck-run-compare-table td { vertical-align: middle; }
 .ck-review-card__stale-note {
   margin: 0.4rem 0 0;
   font-family: var(--ck-mono);

data/app/controllers/completion_kit/metrics_controller.rb CHANGED Viewed

@@ -42,8 +42,7 @@ module CompletionKit
                                   .limit(50)
       @edit_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
       @suggestion_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
-      @improve_disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree",
-                                                      metric_version_id: @published_metric_version.id).count
+      @improve_disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree").count
       @versions = MetricVersion.where(metric_id: @metric.id).order(version_number: :desc).to_a
     end
@@ -157,9 +156,20 @@ module CompletionKit
         return
       end
+      was_published_already = version.published?
+      reverting = was_published_already && !version.current?
+      previously_current = MetricVersion.current.find_by(metric_id: @metric.id)
       version.publish!
-      redirect_to metric_path(@metric),
-                  notice: "#{@metric.name} #{version.version_label} is now the published version."
+      if reverting
+        prior_label = previously_current.version_label
+        redirect_to metric_path(@metric),
+                    notice: "Reverted to #{@metric.name} #{version.version_label}. Pinned cases still flow to the judge, and calibration verdicts collected against #{prior_label} stay tied to it."
+      else
+        redirect_to metric_path(@metric),
+                    notice: "#{@metric.name} #{version.version_label} is now the published version."
+      end
     end
     def add_few_shot

data/app/controllers/completion_kit/runs_controller.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module CompletionKit
   class RunsController < ApplicationController
     include CompletionKit::TagFiltering
-    before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :retry_failures, :rerun, :refresh_status]
+    before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :retry_failures, :rerun, :regrade, :refresh_status, :compare]
     before_action :load_form_collections, only: [:new, :edit, :create, :update]
     def index
@@ -78,6 +78,29 @@ module CompletionKit
       end
     end
+    def compare
+      other_id = params[:with]
+      if other_id.blank?
+        @other_runs = Run.where(dataset_id: @run.dataset_id, prompt_id: @run.prompt_id)
+                          .where.not(id: @run.id)
+                          .order(created_at: :desc)
+                          .limit(50)
+        return render(:compare_picker)
+      end
+      @other_run = Run.find(other_id)
+      @comparison = build_run_comparison(@run, @other_run)
+      render(:compare)
+    end
+    def regrade
+      if @run.regrade!
+        redirect_to run_path(@run), notice: "Re-grading existing responses with the current judge."
+      else
+        redirect_to run_path(@run), alert: "Nothing to re-grade. The run has no succeeded responses or no metrics attached."
+      end
+    end
     def rerun
       new_run = Run.create!(
         prompt_id: @run.prompt_id,
@@ -163,6 +186,45 @@ module CompletionKit
       @run = Run.find(params[:id])
     end
+    def build_run_comparison(left, right)
+      left_responses = left.responses.includes(:reviews).order(:row_index, :id)
+      right_responses = right.responses.includes(:reviews).order(:row_index, :id)
+      right_by_input = right_responses.each_with_object({}) { |r, h| h[r.input_data.to_s] ||= r }
+      all_reviews = left_responses.flat_map(&:reviews) + right_responses.flat_map(&:reviews)
+      metric_ids = all_reviews.map(&:metric_id).compact.uniq
+      metric_versions = MetricVersion.where(id: all_reviews.map(&:metric_version_id).compact.uniq).index_by(&:id)
+      rows = left_responses.map do |lr|
+        rr = right_by_input[lr.input_data.to_s]
+        {
+          left_response: lr,
+          right_response: rr,
+          per_metric: metric_ids.map do |mid|
+            l_review = lr.reviews.find { |r| r.metric_id == mid }
+            r_review = rr && rr.reviews.find { |r| r.metric_id == mid }
+            next nil if l_review.nil? && r_review.nil?
+            anchor = l_review || r_review
+            {
+              metric_id: mid,
+              metric_name: anchor.metric_name,
+              left_score: l_review ? l_review.ai_score : nil,
+              right_score: r_review ? r_review.ai_score : nil,
+              left_version_label: version_label_for(l_review, metric_versions),
+              right_version_label: version_label_for(r_review, metric_versions),
+              delta: (l_review&.ai_score && r_review&.ai_score) ? (r_review.ai_score.to_f - l_review.ai_score.to_f).round(2) : nil
+            }
+          end.compact
+        }
+      end
+      { rows: rows, metric_ids: metric_ids }
+    end
+    def version_label_for(review, metric_versions)
+      return nil if review.nil? || review.metric_version_id.nil?
+      metric_versions[review.metric_version_id]&.version_label
+    end
     def load_form_collections
       @prompts = Prompt.order(:name)
       @datasets = Dataset.order(:name)

data/app/models/completion_kit/calibration.rb CHANGED Viewed

@@ -7,10 +7,6 @@ module CompletionKit
     belongs_to :metric
     belongs_to :metric_version
-    alias_attribute :judge_version_id, :metric_version_id
-    alias_method :judge_version, :metric_version
-    alias_method :judge_version=, :metric_version=
     validates :verdict, presence: true, inclusion: { in: VERDICTS }
     validates :response_id,
               uniqueness: { scope: [:metric_id, :created_by] }

data/app/models/completion_kit/metric_version.rb CHANGED Viewed

@@ -77,5 +77,4 @@ module CompletionKit
     end
   end
-  JudgeVersion = MetricVersion
 end

data/app/models/completion_kit/run.rb CHANGED Viewed

@@ -179,6 +179,38 @@ module CompletionKit
       start!
     end
+    def regrade!
+      grading_metrics = metrics
+      return false if grading_metrics.empty? || !judge_configured?
+      eligible_responses = responses.where(status: "succeeded").where.not(response_text: nil)
+      response_ids = eligible_responses.pluck(:id)
+      return false if response_ids.empty?
+      transaction do
+        Review.where(response_id: response_ids).update_all(
+          status: "pending",
+          attempts: 0,
+          metric_version_id: nil,
+          ai_score: nil,
+          ai_feedback: nil,
+          error_provider: nil,
+          error_class: nil,
+          error_status: nil,
+          error_message: nil
+        )
+        update!(status: "running", failure_summary: nil, error_message: nil)
+        response_ids.each do |rid|
+          grading_metrics.each { |m| JudgeReviewJob.perform_later(rid, m.id) }
+        end
+        RunCompletionCheckJob.perform_later(id)
+      end
+      broadcast_ui
+      true
+    end
     def progress_snapshot
       generated_done = responses.where(status: "succeeded").count
       generated_failed = responses.where(status: "failed").count

data/app/services/completion_kit/mcp_tools/judges.rb CHANGED Viewed

@@ -75,10 +75,8 @@ module CompletionKit
       def self.compare(args)
         metric = CompletionKit::Metric.find(args["metric_id"])
-        a_id = args["metric_version_a_id"] || args["judge_version_a_id"]
-        b_id = args["metric_version_b_id"] || args["judge_version_b_id"]
-        a = CompletionKit::MetricVersion.where(metric_id: metric.id).find(a_id)
-        b = CompletionKit::MetricVersion.where(metric_id: metric.id).find(b_id)
+        a = CompletionKit::MetricVersion.where(metric_id: metric.id).find(args["metric_version_a_id"])
+        b = CompletionKit::MetricVersion.where(metric_id: metric.id).find(args["metric_version_b_id"])
         stats_a = CompletionKit::MetricCalibrationStats.for(metric, metric_version: a)
         stats_b = CompletionKit::MetricCalibrationStats.for(metric, metric_version: b)
         text_result({

data/app/services/completion_kit/metric_variant_generator.rb CHANGED Viewed

@@ -43,6 +43,7 @@ module CompletionKit
     def build_meta_prompt
       disagreements = MetricCalibrationExamples.disagreements_for(@metric)
       borderlines = MetricCalibrationExamples.borderlines_for(@metric)
+      pinned_examples = Array(@metric.few_shot_examples)
       sections = []
       sections << "You are an expert evaluator. The judge below is misaligned with humans. Propose #{@count == 1 ? "a single" : "#{@count}"} concrete rewrite that closes the gap."
       sections << ""
@@ -77,6 +78,18 @@ module CompletionKit
           sections << ""
         end
       end
+      if pinned_examples.any?
+        sections << "## Pinned cases the judge already references"
+        sections << "These are cases the operator pinned for the judge to remember. The improved rubric must remain consistent with these — that is, the new instruction + rubric should produce roughly the human_score on these inputs, not the judge_score."
+        pinned_examples.each_with_index do |ex, i|
+          sections << "### Pinned #{i + 1}"
+          sections << "Input: #{ex["input"].to_s.truncate(200)}"
+          sections << "Output: #{ex["response"].to_s.truncate(200)}"
+          sections << "Judge previously said #{ex["judge_score"]}/5: #{ex["judge_feedback"].to_s.truncate(160)}"
+          sections << "Human said #{ex["human_score"]}/5: #{ex["human_note"].to_s.truncate(160)}"
+          sections << ""
+        end
+      end
       sections << "## Task"
       sections << "Make one substantive change. Don't just reword. If the disagreements look like instruction problems, rewrite the instruction. If they look like rubric problems (overlapping bands, undefined edge cases), rewrite the rubric. Rewrite both if both are wrong."
       sections << ""
@@ -133,13 +146,14 @@ module CompletionKit
     end
     def calibrations_for(metric, verdict:, limit:)
-      scope = Calibration.where(metric_id: metric.id, verdict: verdict)
+      base = Calibration.where(metric_id: metric.id, verdict: verdict)
       current_version = MetricVersion.current.find_by(metric_id: metric.id)
-      scope = scope.where(metric_version_id: current_version.id) if current_version
-      scope.includes(response: :reviews)
-           .order(created_at: :desc)
-           .limit(limit)
-           .map do |cal|
+      scoped = current_version ? base.where(metric_version_id: current_version.id) : base
+      effective = scoped.exists? ? scoped : base
+      effective.includes(response: :reviews)
+               .order(created_at: :desc)
+               .limit(limit)
+               .map do |cal|
         review = cal.response.reviews.find { |r| r.metric_id == metric.id }
         {
           input: cal.response.input_data,

data/app/views/completion_kit/calibrations/_trust_panel.html.erb CHANGED Viewed

@@ -15,12 +15,17 @@
        .where.not(id: verdicted_ids)
        .order(created_at: :desc).first
    end %>
+<% prior_version_verdicts = if stats.sample_size.zero? && metric && current_metric_version
+     CompletionKit::Calibration.where(metric_id: metric.id).where.not(metric_version_id: current_metric_version.id).count
+   else
+     0
+   end %>
 <p class="ck-trust-line ck-trust-line--<%= stats.gate %>">
   <span class="ck-trust-line__label"><%= heroicon_tag "adjustments-horizontal", variant: :outline, "aria-hidden": "true" %>Calibration</span>
   <% if stats.sample_size.zero? %>
     <span class="ck-trust-line__state">Not measured yet.</span>
-    <span class="ck-trust-line__hint">Needs <%= CompletionKit::MetricCalibrationStats::PROVISIONAL_MIN %> verdicts on the judge's scores.<% if target_response %>
+    <span class="ck-trust-line__hint">Needs <%= CompletionKit::MetricCalibrationStats::PROVISIONAL_MIN %> verdicts on the judge's scores.<% if prior_version_verdicts > 0 %> (<%= pluralize(prior_version_verdicts, "verdict") %> on prior versions, tied to that version's history.)<% end %><% if target_response %>
       <%= link_to "Give a verdict →", run_response_path(target_response.run, target_response, anchor: anchor), class: "ck-trust-line__link" %>
     <% else %> Run this metric on a dataset, then give a verdict.<% end %></span>
   <% elsif stats.counter_only? %>

data/app/views/completion_kit/metrics/show.html.erb CHANGED Viewed

@@ -165,7 +165,7 @@
       <p class="ck-kicker">Cases to learn from</p>
       <span class="ck-chip"><%= pluralize(@disagreements.size, "case") %></span>
     </div>
-    <% mixed_versions = @disagreements.map(&:metric_version_id).uniq.size > 1 %>
+    <% mixed_versions = @disagreements.any? { |c| c.metric_version_id != @published_metric_version.id } %>
     <p class="ck-meta-copy">Cases where a reviewer's score didn't match the judge's. Pin useful ones with <strong>Remember this</strong> so the judge sees them next time it grades<%= " (pins flow into the current judge regardless of which version produced the verdict)" if mixed_versions %>.</p>
     <% existing_ids = Array(@metric.few_shot_examples).map { |fs| fs["calibration_id"] } %>
     <ul class="ck-disagreement-list">

data/app/views/completion_kit/runs/_actions.html.erb CHANGED Viewed

@@ -11,6 +11,7 @@
       <%= button_to "Retry", generate_run_path(run), method: :post, class: ck_button_classes(:light, variant: :outline), form_class: "inline-block" %>
       <%= button_to "Re-run as new", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
     <% elsif run.status == "completed" %>
+      <%= link_to "Compare", compare_run_path(run), class: ck_button_classes(:light, variant: :outline) %>
       <%= button_to "Re-run", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
     <% end %>
   <% end %>

data/app/views/completion_kit/runs/compare.html.erb ADDED Viewed

@@ -0,0 +1,85 @@
+<ol class="ck-breadcrumb">
+  <li><%= link_to "Runs", runs_path %></li>
+  <li><%= link_to @run.name, run_path(@run) %></li>
+  <li>vs <%= @other_run.name %></li>
+</ol>
+<section class="ck-page-header">
+  <div>
+    <h1 class="ck-title">Comparing runs</h1>
+    <p class="ck-meta-copy"><strong>A</strong>: <%= link_to @run.name, run_path(@run), class: "ck-link" %> &middot; <strong>B</strong>: <%= link_to @other_run.name, run_path(@other_run), class: "ck-link" %></p>
+  </div>
+  <div class="ck-actions">
+    <%= link_to "Pick another", compare_run_path(@run), class: ck_button_classes(:light, variant: :outline) %>
+  </div>
+</section>
+<% if @comparison[:rows].empty? %>
+  <div class="ck-empty">
+    <p>No responses to compare yet.</p>
+  </div>
+<% else %>
+  <table class="ck-results-table ck-run-compare-table">
+    <thead>
+      <tr>
+        <th scope="col">Case</th>
+        <th scope="col">Metric</th>
+        <th scope="col">A score</th>
+        <th scope="col">B score</th>
+        <th scope="col">Δ</th>
+        <th scope="col">A version</th>
+        <th scope="col">B version</th>
+      </tr>
+    </thead>
+    <tbody>
+      <% @comparison[:rows].each do |row| %>
+        <% case_label = ((row[:left_response].row_index || 0) + 1).to_s %>
+        <% row[:per_metric].each_with_index do |pm, idx| %>
+          <tr>
+            <% if idx == 0 %>
+              <td rowspan="<%= row[:per_metric].size %>">
+                <%= link_to case_label, run_response_path(@run, row[:left_response]), class: "ck-link" %>
+                <% if row[:right_response] %>
+                  <span class="ck-meta-copy">/ <%= link_to "B", run_response_path(@other_run, row[:right_response]), class: "ck-link" %></span>
+                <% end %>
+              </td>
+            <% end %>
+            <td><%= pm[:metric_name] %></td>
+            <td>
+              <% if pm[:left_score] %>
+                <span class="<%= ck_badge_classes(ck_score_kind(pm[:left_score].to_f)) %>"><%= pm[:left_score] %></span>
+              <% else %>
+                <span class="ck-meta-copy">—</span>
+              <% end %>
+            </td>
+            <td>
+              <% if pm[:right_score] %>
+                <span class="<%= ck_badge_classes(ck_score_kind(pm[:right_score].to_f)) %>"><%= pm[:right_score] %></span>
+              <% else %>
+                <span class="ck-meta-copy">—</span>
+              <% end %>
+            </td>
+            <td>
+              <% if pm[:delta] %>
+                <% delta_class = pm[:delta] > 0 ? "ck-delta--positive" : pm[:delta] < 0 ? "ck-delta--negative" : "ck-delta--zero" %>
+                <span class="ck-delta <%= delta_class %>"><%= pm[:delta].positive? ? "+#{pm[:delta]}" : pm[:delta].to_s %></span>
+              <% else %>
+                <span class="ck-meta-copy">—</span>
+              <% end %>
+            </td>
+            <td>
+              <% if pm[:left_version_label] %>
+                <span class="ck-source-chip ck-source-chip--current"><%= pm[:left_version_label] %></span>
+              <% end %>
+            </td>
+            <td>
+              <% if pm[:right_version_label] %>
+                <span class="ck-source-chip ck-source-chip--current"><%= pm[:right_version_label] %></span>
+              <% end %>
+            </td>
+          </tr>
+        <% end %>
+      <% end %>
+    </tbody>
+  </table>
+<% end %>

data/app/views/completion_kit/runs/compare_picker.html.erb ADDED Viewed

@@ -0,0 +1,39 @@
+<ol class="ck-breadcrumb">
+  <li><%= link_to "Runs", runs_path %></li>
+  <li><%= link_to @run.name, run_path(@run) %></li>
+  <li>Compare</li>
+</ol>
+<section class="ck-page-header">
+  <div>
+    <h1 class="ck-title">Compare with another run</h1>
+    <p class="ck-lead">Pick a run on the same dataset and prompt to see per-case score deltas side by side.</p>
+  </div>
+</section>
+<% if @other_runs.any? %>
+  <table class="ck-results-table">
+    <thead>
+      <tr>
+        <th scope="col">Run</th>
+        <th scope="col">Judge</th>
+        <th scope="col">Created</th>
+        <th scope="col"></th>
+      </tr>
+    </thead>
+    <tbody>
+      <% @other_runs.each do |other| %>
+        <tr>
+          <td><%= link_to other.name, run_path(other), class: "ck-link" %></td>
+          <td class="ck-meta-copy"><%= other.judge_model %></td>
+          <td class="ck-meta-copy"><time datetime="<%= other.created_at.utc.iso8601 %>"><%= time_ago_in_words(other.created_at) %> ago</time></td>
+          <td class="ck-results-table__arrow"><%= link_to "Compare →", compare_run_path(@run, with: other.id), class: "ck-link" %></td>
+        </tr>
+      <% end %>
+    </tbody>
+  </table>
+<% else %>
+  <div class="ck-empty">
+    <p>No other runs on this dataset + prompt combination yet. <%= link_to "Re-run from this one", rerun_run_path(@run), method: :post, class: "ck-link" %> to create one.</p>
+  </div>
+<% end %>

data/app/views/completion_kit/runs/show.html.erb CHANGED Viewed

@@ -32,10 +32,16 @@
         </p>
       </div>
       <% if @run.status == "completed" %>
-        <%= button_to "Re-run with current judge",
+        <%= button_to "Re-run from scratch",
               rerun_run_path(@run), method: :post,
+              class: ck_button_classes(:light, variant: :outline), form_class: "inline-block",
+              title: "Create a new run that regenerates responses and grades them with the current judge.",
+              data: { turbo_confirm: "Create a new run with fresh responses and the current judge? The original run stays as a record." } %>
+        <%= button_to "Re-grade with current judge",
+              regrade_run_path(@run), method: :post,
               class: ck_button_classes(:dark), form_class: "inline-block",
-              data: { turbo_confirm: "Create a new run with the current metric versions? The original run stays as a record." } %>
+              title: "Re-judge this run's existing responses against the current judge. Faster and cheaper than re-running.",
+              data: { turbo_confirm: "Re-judge this run's existing responses against the current judge?" } %>
       <% end %>
     </div>
   <% end %>

data/config/routes.rb CHANGED Viewed

@@ -37,7 +37,9 @@ CompletionKit::Engine.routes.draw do
       post :suggest
       post :retry_failures
       post :rerun
+      post :regrade
       get :refresh_status
+      get :compare
     end
     resources :responses, only: [:show] do
       resources :calibrations, only: [:create]

data/lib/completion_kit/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module CompletionKit
-  VERSION = "0.5.44"
+  VERSION = "0.6.0"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: completion-kit
 version: !ruby/object:Gem::Version
-  version: 0.5.44
+  version: 0.6.0
 platform: ruby
 authors:
 - Damien Bastin
@@ -377,6 +377,8 @@ files:
 - app/views/completion_kit/runs/_status_header.html.erb
 - app/views/completion_kit/runs/_status_panel.html.erb
 - app/views/completion_kit/runs/_table.html.erb
+- app/views/completion_kit/runs/compare.html.erb
+- app/views/completion_kit/runs/compare_picker.html.erb
 - app/views/completion_kit/runs/edit.html.erb
 - app/views/completion_kit/runs/index.html.erb
 - app/views/completion_kit/runs/new.html.erb