completion-kit 0.5.34 → 0.5.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d6a67c2ef67eff4762711f38e46cb5df85b9136a6de59a82d962a54ef2c1466c
4
- data.tar.gz: 01f86625fec3d18ee712e61d0d601687571125e8333e5f468d3c151f9714651b
3
+ metadata.gz: 7ec9f216056f47d007b8a512a009dea02aeaad328a9c4832ac0c1e122b816b15
4
+ data.tar.gz: 413f1b8e8ca28ed2c14e55210299a28bef42acb1a437a3ee9c3bd88b62d03bf9
5
5
  SHA512:
6
- metadata.gz: 33335e4a89da8b1eb264663c50570841a7afeda8b48bcad1178b2185ceee5ec2148e1e57ea4248835e550ca936a5bccd4ece6819070cde7f44c3ef312aa40970
7
- data.tar.gz: 39b1e8845d2dea0e2ac6bed5679336d639ea501796b0e15242eeef42db3e9313eaeccaa59268d11a2fcb52ce7c110634374f16fa4d7721494c6474caa524b97e
6
+ metadata.gz: c837fd6ddc33c5799145bac2a0b1dba4ca1df807365c621330b619c47607e666a30325b47ce6d34e2ecb93e13d4e14a9538d597e0d56e47b9f7a1a836432be0d
7
+ data.tar.gz: 06b9ac7200883cd19d11ff127898d52d940b3f7a110cd5e7744bfcb2abab797fa05e923ba76121f6c00c6aeb8ac5d7b5d5e8b79b07d773ad0c0d5650ec41b90d
@@ -5119,3 +5119,57 @@ a.tag-mark {
5119
5119
  outline: 2px solid var(--ck-accent);
5120
5120
  outline-offset: 2px;
5121
5121
  }
5122
+
5123
+ .ck-calibration {
5124
+ margin-top: 12px;
5125
+ padding-top: 12px;
5126
+ border-top: 1px dashed rgba(255, 255, 255, 0.08);
5127
+ }
5128
+ .ck-calibration__prompt {
5129
+ font-size: 0.8rem;
5130
+ color: var(--ck-dim);
5131
+ margin: 0 0 8px;
5132
+ display: flex;
5133
+ align-items: center;
5134
+ gap: 8px;
5135
+ }
5136
+ .ck-calibration__count {
5137
+ font-size: 0.75rem;
5138
+ color: var(--ck-accent);
5139
+ }
5140
+ .ck-calibration__buttons {
5141
+ display: flex;
5142
+ gap: 8px;
5143
+ flex-wrap: wrap;
5144
+ }
5145
+ .ck-calibration__pill {
5146
+ display: inline-flex;
5147
+ align-items: center;
5148
+ gap: 6px;
5149
+ padding: 6px 12px;
5150
+ border-radius: 999px;
5151
+ font-size: 0.85rem;
5152
+ background: transparent;
5153
+ border: 1px solid rgba(255, 255, 255, 0.18);
5154
+ color: inherit;
5155
+ cursor: pointer;
5156
+ }
5157
+ .ck-calibration__pill:hover,
5158
+ .ck-calibration__pill:focus-visible {
5159
+ border-color: var(--ck-accent);
5160
+ }
5161
+ .ck-calibration__pill.is-active {
5162
+ background: var(--ck-accent);
5163
+ color: #0b1320;
5164
+ border-color: var(--ck-accent);
5165
+ }
5166
+ .ck-calibration__detail {
5167
+ margin-top: 10px;
5168
+ display: flex;
5169
+ flex-direction: column;
5170
+ gap: 8px;
5171
+ }
5172
+ .ck-calibration__value {
5173
+ color: var(--ck-accent);
5174
+ font-weight: 600;
5175
+ }
@@ -0,0 +1,57 @@
1
+ module CompletionKit
2
+ module Api
3
+ module V1
4
+ class CalibrationsController < BaseController
5
+ before_action :ensure_calibration_enabled
6
+ before_action :set_scope
7
+
8
+ def index
9
+ render json: scope_calibrations
10
+ end
11
+
12
+ def create
13
+ calibration = scope_calibrations.find_or_initialize_by(created_by: created_by_param)
14
+ calibration.assign_attributes(
15
+ run: @run,
16
+ response: @response,
17
+ metric: @metric,
18
+ judge_version: JudgeVersion.ensure_current_for(@metric),
19
+ **calibration_params
20
+ )
21
+
22
+ if calibration.save
23
+ render json: calibration, status: calibration.previously_new_record? ? :created : :ok
24
+ else
25
+ render json: { errors: calibration.errors }, status: :unprocessable_entity
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def ensure_calibration_enabled
32
+ render(json: { error: "Calibration disabled" }, status: :not_found) unless CompletionKit.config.judge_calibration_enabled
33
+ end
34
+
35
+ def set_scope
36
+ @run = Run.find(params[:run_id])
37
+ @response = @run.responses.find(params[:response_id])
38
+ @metric = Metric.find(params[:metric_id])
39
+ rescue ActiveRecord::RecordNotFound
40
+ not_found
41
+ end
42
+
43
+ def scope_calibrations
44
+ Calibration.where(run_id: @run.id, response_id: @response.id, metric_id: @metric.id)
45
+ end
46
+
47
+ def calibration_params
48
+ params.permit(:verdict, :corrected_score, :note).to_h.symbolize_keys
49
+ end
50
+
51
+ def created_by_param
52
+ params[:created_by].presence || "api"
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,50 @@
1
+ module CompletionKit
2
+ class CalibrationsController < ApplicationController
3
+ before_action :ensure_calibration_enabled
4
+ before_action :set_scope
5
+
6
+ def create
7
+ created_by = calibration_creator
8
+ calibration = Calibration.find_or_initialize_by(
9
+ run_id: @run.id, response_id: @response.id, metric_id: @metric.id, created_by: created_by
10
+ )
11
+ calibration.assign_attributes(
12
+ judge_version: JudgeVersion.ensure_current_for(@metric),
13
+ verdict: params[:verdict],
14
+ corrected_score: params[:corrected_score].presence,
15
+ note: params[:note].presence
16
+ )
17
+
18
+ if calibration.save
19
+ render turbo_stream: turbo_stream.replace(
20
+ "calibration_#{@response.id}_#{@metric.id}",
21
+ partial: "completion_kit/calibrations/buttons",
22
+ locals: { review: review_for_metric, calibration: calibration, run: @run, response_row: @response, metric: @metric }
23
+ )
24
+ else
25
+ flash[:alert] = calibration.errors.full_messages.to_sentence
26
+ redirect_to run_response_path(@run, @response)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def ensure_calibration_enabled
33
+ head :not_found unless CompletionKit.config.judge_calibration_enabled
34
+ end
35
+
36
+ def set_scope
37
+ @run = Run.find(params[:run_id])
38
+ @response = @run.responses.find(params[:response_id])
39
+ @metric = Metric.find(params[:metric_id])
40
+ end
41
+
42
+ def review_for_metric
43
+ @response.reviews.find_by(metric_id: @metric.id)
44
+ end
45
+
46
+ def calibration_creator
47
+ request.env["HTTP_X_REMOTE_USER"].presence || CompletionKit.config.username.presence || "operator"
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,47 @@
1
+ module CompletionKit
2
+ class Calibration < ApplicationRecord
3
+ VERDICTS = %w[agree disagree borderline].freeze
4
+
5
+ belongs_to :run
6
+ belongs_to :response
7
+ belongs_to :metric
8
+ belongs_to :judge_version
9
+
10
+ validates :verdict, presence: true, inclusion: { in: VERDICTS }
11
+ validates :response_id,
12
+ uniqueness: { scope: [:metric_id, :created_by] }
13
+ validate :corrected_score_required_when_disagreeing
14
+ validate :corrected_score_within_rubric
15
+
16
+ scope :for_run, ->(run_id) { where(run_id: run_id) }
17
+ scope :for_metric, ->(metric_id) { where(metric_id: metric_id) }
18
+
19
+ def as_json(options = {})
20
+ {
21
+ id: id,
22
+ run_id: run_id,
23
+ response_id: response_id,
24
+ metric_id: metric_id,
25
+ judge_version_id: judge_version_id,
26
+ verdict: verdict,
27
+ corrected_score: corrected_score,
28
+ note: note,
29
+ created_by: created_by,
30
+ created_at: created_at
31
+ }
32
+ end
33
+
34
+ private
35
+
36
+ def corrected_score_required_when_disagreeing
37
+ return unless verdict == "disagree"
38
+ errors.add(:corrected_score, "must be set when disagreeing with the judge") if corrected_score.blank?
39
+ end
40
+
41
+ def corrected_score_within_rubric
42
+ return if corrected_score.blank?
43
+ score = corrected_score.to_f
44
+ errors.add(:corrected_score, "must be between 1 and 5") unless score >= 1 && score <= 5
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,32 @@
1
+ module CompletionKit
2
+ class JudgeVersion < ApplicationRecord
3
+ belongs_to :metric
4
+ has_many :calibrations, dependent: :destroy
5
+
6
+ serialize :rubric_bands, coder: JSON
7
+
8
+ validates :metric_id, presence: true
9
+
10
+ scope :current, -> { where(current: true) }
11
+
12
+ def self.ensure_current_for(metric)
13
+ current.find_by(metric_id: metric.id) || create!(
14
+ metric: metric,
15
+ instruction: metric.instruction,
16
+ rubric_bands: metric.rubric_bands,
17
+ current: true
18
+ )
19
+ end
20
+
21
+ def as_json(options = {})
22
+ {
23
+ id: id,
24
+ metric_id: metric_id,
25
+ instruction: instruction,
26
+ rubric_bands: rubric_bands,
27
+ current: current,
28
+ created_at: created_at
29
+ }
30
+ end
31
+ end
32
+ end
@@ -33,7 +33,8 @@ module CompletionKit
33
33
  McpTools::Metrics.definitions +
34
34
  McpTools::MetricGroups.definitions +
35
35
  McpTools::ProviderCredentials.definitions +
36
- McpTools::Tags.definitions
36
+ McpTools::Tags.definitions +
37
+ McpTools::Calibrations.definitions
37
38
  end
38
39
 
39
40
  def self.call_tool(name, arguments)
@@ -46,6 +47,7 @@ module CompletionKit
46
47
  when /\Ametric_groups_/ then McpTools::MetricGroups.call(name, arguments)
47
48
  when /\Aprovider_credentials_/ then McpTools::ProviderCredentials.call(name, arguments)
48
49
  when /\Atags_/ then McpTools::Tags.call(name, arguments)
50
+ when /\Acalibrations_/ then McpTools::Calibrations.call(name, arguments)
49
51
  else raise MethodNotFound, "Unknown tool: #{name}"
50
52
  end
51
53
  end
@@ -0,0 +1,73 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module Calibrations
4
+ extend Base
5
+
6
+ TOOLS = {
7
+ "calibrations_list" => {
8
+ description: "List calibrations. Filter by run_id, response_id, metric_id, or created_by.",
9
+ inputSchema: {
10
+ type: "object",
11
+ properties: {
12
+ run_id: {type: "integer"},
13
+ response_id: {type: "integer"},
14
+ metric_id: {type: "integer"},
15
+ created_by: {type: "string"}
16
+ },
17
+ required: []
18
+ },
19
+ handler: :list
20
+ },
21
+ "calibrations_create" => {
22
+ description: "Upsert a calibration for (run, response, metric, created_by). Verdict is one of agree, disagree, borderline. corrected_score (1..5) is required when verdict is 'disagree'.",
23
+ inputSchema: {
24
+ type: "object",
25
+ properties: {
26
+ run_id: {type: "integer"},
27
+ response_id: {type: "integer"},
28
+ metric_id: {type: "integer"},
29
+ verdict: {type: "string", enum: %w[agree disagree borderline]},
30
+ corrected_score: {type: "number"},
31
+ note: {type: "string"},
32
+ created_by: {type: "string"}
33
+ },
34
+ required: ["run_id", "response_id", "metric_id", "verdict"]
35
+ },
36
+ handler: :create
37
+ }
38
+ }.freeze
39
+
40
+ def self.list(args)
41
+ scope = CompletionKit::Calibration.all
42
+ scope = scope.where(run_id: args["run_id"]) if args["run_id"]
43
+ scope = scope.where(response_id: args["response_id"]) if args["response_id"]
44
+ scope = scope.where(metric_id: args["metric_id"]) if args["metric_id"]
45
+ scope = scope.where(created_by: args["created_by"]) if args["created_by"]
46
+ text_result(scope.order(:created_at).map(&:as_json))
47
+ end
48
+
49
+ def self.create(args)
50
+ run = CompletionKit::Run.find(args["run_id"])
51
+ response = run.responses.find(args["response_id"])
52
+ metric = CompletionKit::Metric.find(args["metric_id"])
53
+ created_by = args["created_by"].presence || "mcp"
54
+
55
+ calibration = CompletionKit::Calibration.find_or_initialize_by(
56
+ run_id: run.id, response_id: response.id, metric_id: metric.id, created_by: created_by
57
+ )
58
+ calibration.assign_attributes(
59
+ judge_version: CompletionKit::JudgeVersion.ensure_current_for(metric),
60
+ verdict: args["verdict"],
61
+ corrected_score: args["corrected_score"],
62
+ note: args["note"]
63
+ )
64
+
65
+ if calibration.save
66
+ text_result(calibration.as_json)
67
+ else
68
+ error_result(calibration.errors.full_messages.join(", "))
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,50 @@
1
+ <div id="calibration_<%= response_row.id %>_<%= metric.id %>" class="ck-calibration">
2
+ <% current_verdict = calibration&.verdict %>
3
+ <% verdict_count = CompletionKit::Calibration.where(response_id: response_row.id, metric_id: metric.id).count %>
4
+ <p class="ck-calibration__prompt">
5
+ How does this score feel?
6
+ <% if verdict_count > 0 %>
7
+ <span class="ck-calibration__count"><%= pluralize(verdict_count, "verdict") %> collected</span>
8
+ <% end %>
9
+ </p>
10
+ <div class="ck-calibration__buttons">
11
+ <% CompletionKit::Calibration::VERDICTS.each do |verdict| %>
12
+ <%= button_to run_response_calibrations_path(run, response_row, metric_id: metric.id, verdict: verdict),
13
+ method: :post,
14
+ form: { data: { turbo: "true" } },
15
+ class: "ck-calibration__pill ck-calibration__pill--#{verdict}#{' is-active' if verdict == current_verdict}",
16
+ "aria-pressed": (verdict == current_verdict).to_s do %>
17
+ <% case verdict
18
+ when "agree" %>👍 Agree<% when "disagree" %>👎 Disagree<% else %>🤔 Borderline<% end %>
19
+ <% end %>
20
+ <% end %>
21
+ </div>
22
+
23
+ <% if current_verdict == "disagree" %>
24
+ <%= form_with url: run_response_calibrations_path(run, response_row),
25
+ method: :post, local: false,
26
+ class: "ck-calibration__detail" do |f| %>
27
+ <%= hidden_field_tag :metric_id, metric.id %>
28
+ <%= hidden_field_tag :verdict, "disagree" %>
29
+ <label class="ck-label">
30
+ Your score
31
+ <span class="ck-calibration__value" data-calibration-value><%= calibration.corrected_score || review&.ai_score || 3 %></span>
32
+ </label>
33
+ <input type="range" name="corrected_score" min="1" max="5" step="0.5"
34
+ value="<%= calibration.corrected_score || review&.ai_score || 3 %>"
35
+ oninput="this.closest('.ck-calibration__detail').querySelector('[data-calibration-value]').textContent = this.value"
36
+ class="ck-slider">
37
+ <textarea name="note" rows="2" class="ck-input ck-input--area" placeholder="Why? (optional)"><%= calibration.note %></textarea>
38
+ <%= f.submit "Save", class: ck_button_classes(:dark) %>
39
+ <% end %>
40
+ <% elsif current_verdict == "borderline" %>
41
+ <%= form_with url: run_response_calibrations_path(run, response_row),
42
+ method: :post, local: false,
43
+ class: "ck-calibration__detail" do |f| %>
44
+ <%= hidden_field_tag :metric_id, metric.id %>
45
+ <%= hidden_field_tag :verdict, "borderline" %>
46
+ <textarea name="note" rows="2" class="ck-input ck-input--area" placeholder="What made this borderline? (optional)"><%= calibration.note %></textarea>
47
+ <%= f.submit "Save", class: ck_button_classes(:dark) %>
48
+ <% end %>
49
+ <% end %>
50
+ </div>
@@ -116,6 +116,15 @@
116
116
  <div class="ck-note-box"><%= review.ai_feedback %></div>
117
117
  </div>
118
118
  <% end %>
119
+ <% if CompletionKit.config.judge_calibration_enabled && review.metric && review.ai_score %>
120
+ <% existing = CompletionKit::Calibration.find_by(
121
+ response_id: @response.id, metric_id: review.metric_id,
122
+ created_by: CompletionKit.config.username.presence || "operator"
123
+ ) %>
124
+ <%= render "completion_kit/calibrations/buttons",
125
+ review: review, calibration: existing, run: @run,
126
+ response_row: @response, metric: review.metric %>
127
+ <% end %>
119
128
  </div>
120
129
  <% end %>
121
130
  </div>
data/config/routes.rb CHANGED
@@ -26,7 +26,9 @@ CompletionKit::Engine.routes.draw do
26
26
  post :rerun
27
27
  get :refresh_status
28
28
  end
29
- resources :responses, only: [:show]
29
+ resources :responses, only: [:show] do
30
+ resources :calibrations, only: [:create]
31
+ end
30
32
  end
31
33
 
32
34
  resources :suggestions, only: [:show] do
@@ -54,7 +56,11 @@ CompletionKit::Engine.routes.draw do
54
56
  post :generate
55
57
  post :retry_failures
56
58
  end
57
- resources :responses, only: [:index, :show]
59
+ resources :responses, only: [:index, :show] do
60
+ resources :metrics, only: [] do
61
+ resources :calibrations, only: [:index, :create]
62
+ end
63
+ end
58
64
  end
59
65
  resources :datasets
60
66
  resources :metrics
@@ -0,0 +1,28 @@
1
+ class CreateCompletionKitJudgeVersions < ActiveRecord::Migration[8.1]
2
+ def change
3
+ create_table :completion_kit_judge_versions do |t|
4
+ t.references :metric,
5
+ null: false,
6
+ foreign_key: { to_table: :completion_kit_metrics, on_delete: :cascade },
7
+ index: { name: "index_ck_judge_versions_on_metric_id" }
8
+ t.text :instruction
9
+ t.text :rubric_bands
10
+ t.boolean :current, null: false, default: true
11
+ t.timestamps
12
+ end
13
+
14
+ add_index :completion_kit_judge_versions,
15
+ [:metric_id, :current],
16
+ name: "index_ck_judge_versions_on_metric_current"
17
+
18
+ reversible do |dir|
19
+ dir.up do
20
+ metric_model = Class.new(ActiveRecord::Base) { self.table_name = "completion_kit_metrics" }
21
+ jv_model = Class.new(ActiveRecord::Base) { self.table_name = "completion_kit_judge_versions" }
22
+ metric_model.find_each do |m|
23
+ jv_model.create!(metric_id: m.id, instruction: m["instruction"], rubric_bands: m["rubric_bands"], current: true)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,32 @@
1
+ class CreateCompletionKitCalibrations < ActiveRecord::Migration[8.1]
2
+ def change
3
+ create_table :completion_kit_calibrations do |t|
4
+ t.references :run,
5
+ null: false,
6
+ foreign_key: { to_table: :completion_kit_runs, on_delete: :cascade },
7
+ index: { name: "index_ck_calibrations_on_run_id" }
8
+ t.references :response,
9
+ null: false,
10
+ foreign_key: { to_table: :completion_kit_responses, on_delete: :cascade },
11
+ index: { name: "index_ck_calibrations_on_response_id" }
12
+ t.references :metric,
13
+ null: false,
14
+ foreign_key: { to_table: :completion_kit_metrics, on_delete: :cascade },
15
+ index: { name: "index_ck_calibrations_on_metric_id" }
16
+ t.references :judge_version,
17
+ null: false,
18
+ foreign_key: { to_table: :completion_kit_judge_versions, on_delete: :cascade },
19
+ index: { name: "index_ck_calibrations_on_judge_version_id" }
20
+ t.string :verdict, null: false
21
+ t.string :created_by
22
+ t.decimal :corrected_score, precision: 4, scale: 1
23
+ t.text :note
24
+ t.timestamps
25
+ end
26
+
27
+ add_index :completion_kit_calibrations,
28
+ [:response_id, :metric_id, :created_by],
29
+ unique: true,
30
+ name: "index_ck_calibrations_on_response_metric_user"
31
+ end
32
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.34"
2
+ VERSION = "0.5.35"
3
3
  end
@@ -12,6 +12,7 @@ module CompletionKit
12
12
  attr_accessor :api_reference_authentication_partial
13
13
  attr_accessor :api_rate_limit, :web_rate_limit
14
14
  attr_accessor :allow_loopback_endpoints
15
+ attr_accessor :judge_calibration_enabled
15
16
 
16
17
  def initialize
17
18
  @openai_api_key = ENV['OPENAI_API_KEY']
@@ -27,6 +28,7 @@ module CompletionKit
27
28
  @web_rate_limit = 300
28
29
 
29
30
  @allow_loopback_endpoints = true
31
+ @judge_calibration_enabled = true
30
32
 
31
33
  @api_reference_authentication_partial = "completion_kit/api_reference/authentication"
32
34
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.34
4
+ version: 0.5.35
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -235,6 +235,7 @@ files:
235
235
  - app/assets/javascripts/completion_kit/application.js
236
236
  - app/assets/stylesheets/completion_kit/application.css.erb
237
237
  - app/controllers/completion_kit/api/v1/base_controller.rb
238
+ - app/controllers/completion_kit/api/v1/calibrations_controller.rb
238
239
  - app/controllers/completion_kit/api/v1/datasets_controller.rb
239
240
  - app/controllers/completion_kit/api/v1/metric_groups_controller.rb
240
241
  - app/controllers/completion_kit/api/v1/metrics_controller.rb
@@ -245,6 +246,7 @@ files:
245
246
  - app/controllers/completion_kit/api/v1/tags_controller.rb
246
247
  - app/controllers/completion_kit/api_reference_controller.rb
247
248
  - app/controllers/completion_kit/application_controller.rb
249
+ - app/controllers/completion_kit/calibrations_controller.rb
248
250
  - app/controllers/completion_kit/dashboard_controller.rb
249
251
  - app/controllers/completion_kit/dashboard_dismissals_controller.rb
250
252
  - app/controllers/completion_kit/datasets_controller.rb
@@ -267,8 +269,10 @@ files:
267
269
  - app/jobs/completion_kit/run_completion_check_job.rb
268
270
  - app/mailers/completion_kit/application_mailer.rb
269
271
  - app/models/completion_kit/application_record.rb
272
+ - app/models/completion_kit/calibration.rb
270
273
  - app/models/completion_kit/dashboard_dismissal.rb
271
274
  - app/models/completion_kit/dataset.rb
275
+ - app/models/completion_kit/judge_version.rb
272
276
  - app/models/completion_kit/mcp_session.rb
273
277
  - app/models/completion_kit/metric.rb
274
278
  - app/models/completion_kit/metric_group.rb
@@ -292,6 +296,7 @@ files:
292
296
  - app/services/completion_kit/llm_client.rb
293
297
  - app/services/completion_kit/mcp_dispatcher.rb
294
298
  - app/services/completion_kit/mcp_tools/base.rb
299
+ - app/services/completion_kit/mcp_tools/calibrations.rb
295
300
  - app/services/completion_kit/mcp_tools/datasets.rb
296
301
  - app/services/completion_kit/mcp_tools/metric_groups.rb
297
302
  - app/services/completion_kit/mcp_tools/metrics.rb
@@ -317,6 +322,7 @@ files:
317
322
  - app/views/completion_kit/api_reference/_resource_card.html.erb
318
323
  - app/views/completion_kit/api_reference/_resource_list.html.erb
319
324
  - app/views/completion_kit/api_reference/index.html.erb
325
+ - app/views/completion_kit/calibrations/_buttons.html.erb
320
326
  - app/views/completion_kit/dashboard/_eye_icon.html.erb
321
327
  - app/views/completion_kit/dashboard/_eye_off_icon.html.erb
322
328
  - app/views/completion_kit/dashboard/_failures_card.html.erb
@@ -399,6 +405,8 @@ files:
399
405
  - db/migrate/20260513000001_create_completion_kit_mcp_sessions.rb
400
406
  - db/migrate/20260514000001_allow_judge_only_runs.rb
401
407
  - db/migrate/20260516000001_create_completion_kit_dashboard_dismissals.rb
408
+ - db/migrate/20260522000001_create_completion_kit_judge_versions.rb
409
+ - db/migrate/20260522000002_create_completion_kit_calibrations.rb
402
410
  - lib/completion-kit.rb
403
411
  - lib/completion_kit.rb
404
412
  - lib/completion_kit/concurrency_check.rb