completion-kit 0.5.33 → 0.5.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/stylesheets/completion_kit/application.css.erb +85 -0
  3. data/app/controllers/completion_kit/api/v1/calibrations_controller.rb +57 -0
  4. data/app/controllers/completion_kit/calibrations_controller.rb +50 -0
  5. data/app/helpers/completion_kit/application_helper.rb +14 -0
  6. data/app/models/completion_kit/calibration.rb +47 -0
  7. data/app/models/completion_kit/judge_version.rb +32 -0
  8. data/app/models/completion_kit/provider_credential.rb +4 -24
  9. data/app/services/completion_kit/api_config.rb +20 -14
  10. data/app/services/completion_kit/mcp_dispatcher.rb +3 -1
  11. data/app/services/completion_kit/mcp_tools/calibrations.rb +73 -0
  12. data/app/services/completion_kit/ollama_client.rb +5 -1
  13. data/app/services/completion_kit/provider_endpoint.rb +47 -0
  14. data/app/views/completion_kit/calibrations/_buttons.html.erb +50 -0
  15. data/app/views/completion_kit/datasets/_form.html.erb +5 -3
  16. data/app/views/completion_kit/datasets/index.html.erb +6 -6
  17. data/app/views/completion_kit/metric_groups/index.html.erb +5 -5
  18. data/app/views/completion_kit/metrics/_form.html.erb +5 -3
  19. data/app/views/completion_kit/metrics/index.html.erb +5 -5
  20. data/app/views/completion_kit/prompts/_form.html.erb +7 -4
  21. data/app/views/completion_kit/prompts/index.html.erb +7 -7
  22. data/app/views/completion_kit/provider_credentials/_form.html.erb +5 -3
  23. data/app/views/completion_kit/responses/show.html.erb +10 -1
  24. data/app/views/completion_kit/runs/_form.html.erb +5 -3
  25. data/app/views/completion_kit/runs/_response_row.html.erb +1 -1
  26. data/app/views/completion_kit/runs/_row.html.erb +1 -1
  27. data/app/views/completion_kit/runs/_status_header.html.erb +2 -2
  28. data/app/views/completion_kit/runs/_status_panel.html.erb +1 -1
  29. data/app/views/completion_kit/runs/_table.html.erb +6 -6
  30. data/app/views/completion_kit/suggestions/show.html.erb +1 -1
  31. data/app/views/completion_kit/tags/_picker.html.erb +2 -2
  32. data/app/views/completion_kit/tags/index.html.erb +4 -4
  33. data/config/routes.rb +8 -2
  34. data/db/migrate/20260522000001_create_completion_kit_judge_versions.rb +28 -0
  35. data/db/migrate/20260522000002_create_completion_kit_calibrations.rb +32 -0
  36. data/lib/completion_kit/version.rb +1 -1
  37. data/lib/completion_kit.rb +5 -0
  38. metadata +10 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df835ff1e701f30b2d0b2889040f1d5fb0f09e5414ce4b50b8c2aeb0729a09b0
4
- data.tar.gz: b104c726a3aacac06bfaf7332da69845a5d441b60c6d97cb70a3cf69ce6d50ed
3
+ metadata.gz: 7ec9f216056f47d007b8a512a009dea02aeaad328a9c4832ac0c1e122b816b15
4
+ data.tar.gz: 413f1b8e8ca28ed2c14e55210299a28bef42acb1a437a3ee9c3bd88b62d03bf9
5
5
  SHA512:
6
- metadata.gz: 63a12da9635ce4bda787581edd983c4544a51ee0d2dd5816e0473107dc5a46e1b04d751e93dbb8452536daaf4c53961576996fe478fc2f59354fc4d4935dc749
7
- data.tar.gz: b7a6b8b01d28e6dbbd0511ae85f9b3e19c813d623c6e0e8470ec82eae14310b30bf9bae3a0fd22adfac7a3414dda9bdb2c0e220d6a4bfc4952eb1c1acc4bfe38
6
+ metadata.gz: c837fd6ddc33c5799145bac2a0b1dba4ca1df807365c621330b619c47607e666a30325b47ce6d34e2ecb93e13d4e14a9538d597e0d56e47b9f7a1a836432be0d
7
+ data.tar.gz: 06b9ac7200883cd19d11ff127898d52d940b3f7a110cd5e7744bfcb2abab797fa05e923ba76121f6c00c6aeb8ac5d7b5d5e8b79b07d773ad0c0d5650ec41b90d
@@ -5088,3 +5088,88 @@ a.tag-mark {
5088
5088
  letter-spacing: 0.04em;
5089
5089
  text-transform: uppercase;
5090
5090
  }
5091
+
5092
+ .ck-record-name {
5093
+ color: inherit;
5094
+ text-decoration: none;
5095
+ }
5096
+ .ck-record-name:hover,
5097
+ .ck-record-name:focus-visible {
5098
+ color: var(--ck-accent);
5099
+ }
5100
+
5101
+ .ck-field-error {
5102
+ color: var(--ck-error, #d93232);
5103
+ font-size: 0.85rem;
5104
+ margin: 4px 0 0;
5105
+ }
5106
+
5107
+ .ck-visually-hidden {
5108
+ position: absolute;
5109
+ width: 1px;
5110
+ height: 1px;
5111
+ padding: 0;
5112
+ margin: -1px;
5113
+ overflow: hidden;
5114
+ clip: rect(0 0 0 0);
5115
+ white-space: nowrap;
5116
+ border: 0;
5117
+ }
5118
+ .tag-mark:focus-within {
5119
+ outline: 2px solid var(--ck-accent);
5120
+ outline-offset: 2px;
5121
+ }
5122
+
5123
+ .ck-calibration {
5124
+ margin-top: 12px;
5125
+ padding-top: 12px;
5126
+ border-top: 1px dashed rgba(255, 255, 255, 0.08);
5127
+ }
5128
+ .ck-calibration__prompt {
5129
+ font-size: 0.8rem;
5130
+ color: var(--ck-dim);
5131
+ margin: 0 0 8px;
5132
+ display: flex;
5133
+ align-items: center;
5134
+ gap: 8px;
5135
+ }
5136
+ .ck-calibration__count {
5137
+ font-size: 0.75rem;
5138
+ color: var(--ck-accent);
5139
+ }
5140
+ .ck-calibration__buttons {
5141
+ display: flex;
5142
+ gap: 8px;
5143
+ flex-wrap: wrap;
5144
+ }
5145
+ .ck-calibration__pill {
5146
+ display: inline-flex;
5147
+ align-items: center;
5148
+ gap: 6px;
5149
+ padding: 6px 12px;
5150
+ border-radius: 999px;
5151
+ font-size: 0.85rem;
5152
+ background: transparent;
5153
+ border: 1px solid rgba(255, 255, 255, 0.18);
5154
+ color: inherit;
5155
+ cursor: pointer;
5156
+ }
5157
+ .ck-calibration__pill:hover,
5158
+ .ck-calibration__pill:focus-visible {
5159
+ border-color: var(--ck-accent);
5160
+ }
5161
+ .ck-calibration__pill.is-active {
5162
+ background: var(--ck-accent);
5163
+ color: #0b1320;
5164
+ border-color: var(--ck-accent);
5165
+ }
5166
+ .ck-calibration__detail {
5167
+ margin-top: 10px;
5168
+ display: flex;
5169
+ flex-direction: column;
5170
+ gap: 8px;
5171
+ }
5172
+ .ck-calibration__value {
5173
+ color: var(--ck-accent);
5174
+ font-weight: 600;
5175
+ }
@@ -0,0 +1,57 @@
1
+ module CompletionKit
2
+ module Api
3
+ module V1
4
+ class CalibrationsController < BaseController
5
+ before_action :ensure_calibration_enabled
6
+ before_action :set_scope
7
+
8
+ def index
9
+ render json: scope_calibrations
10
+ end
11
+
12
+ def create
13
+ calibration = scope_calibrations.find_or_initialize_by(created_by: created_by_param)
14
+ calibration.assign_attributes(
15
+ run: @run,
16
+ response: @response,
17
+ metric: @metric,
18
+ judge_version: JudgeVersion.ensure_current_for(@metric),
19
+ **calibration_params
20
+ )
21
+
22
+ if calibration.save
23
+ render json: calibration, status: calibration.previously_new_record? ? :created : :ok
24
+ else
25
+ render json: { errors: calibration.errors }, status: :unprocessable_entity
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def ensure_calibration_enabled
32
+ render(json: { error: "Calibration disabled" }, status: :not_found) unless CompletionKit.config.judge_calibration_enabled
33
+ end
34
+
35
+ def set_scope
36
+ @run = Run.find(params[:run_id])
37
+ @response = @run.responses.find(params[:response_id])
38
+ @metric = Metric.find(params[:metric_id])
39
+ rescue ActiveRecord::RecordNotFound
40
+ not_found
41
+ end
42
+
43
+ def scope_calibrations
44
+ Calibration.where(run_id: @run.id, response_id: @response.id, metric_id: @metric.id)
45
+ end
46
+
47
+ def calibration_params
48
+ params.permit(:verdict, :corrected_score, :note).to_h.symbolize_keys
49
+ end
50
+
51
+ def created_by_param
52
+ params[:created_by].presence || "api"
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,50 @@
1
+ module CompletionKit
2
+ class CalibrationsController < ApplicationController
3
+ before_action :ensure_calibration_enabled
4
+ before_action :set_scope
5
+
6
+ def create
7
+ created_by = calibration_creator
8
+ calibration = Calibration.find_or_initialize_by(
9
+ run_id: @run.id, response_id: @response.id, metric_id: @metric.id, created_by: created_by
10
+ )
11
+ calibration.assign_attributes(
12
+ judge_version: JudgeVersion.ensure_current_for(@metric),
13
+ verdict: params[:verdict],
14
+ corrected_score: params[:corrected_score].presence,
15
+ note: params[:note].presence
16
+ )
17
+
18
+ if calibration.save
19
+ render turbo_stream: turbo_stream.replace(
20
+ "calibration_#{@response.id}_#{@metric.id}",
21
+ partial: "completion_kit/calibrations/buttons",
22
+ locals: { review: review_for_metric, calibration: calibration, run: @run, response_row: @response, metric: @metric }
23
+ )
24
+ else
25
+ flash[:alert] = calibration.errors.full_messages.to_sentence
26
+ redirect_to run_response_path(@run, @response)
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def ensure_calibration_enabled
33
+ head :not_found unless CompletionKit.config.judge_calibration_enabled
34
+ end
35
+
36
+ def set_scope
37
+ @run = Run.find(params[:run_id])
38
+ @response = @run.responses.find(params[:response_id])
39
+ @metric = Metric.find(params[:metric_id])
40
+ end
41
+
42
+ def review_for_metric
43
+ @response.reviews.find_by(metric_id: @metric.id)
44
+ end
45
+
46
+ def calibration_creator
47
+ request.env["HTTP_X_REMOTE_USER"].presence || CompletionKit.config.username.presence || "operator"
48
+ end
49
+ end
50
+ end
@@ -218,6 +218,20 @@ module CompletionKit
218
218
  "#{base_path}?#{{ tag: next_set.map(&:name) }.to_query}"
219
219
  end
220
220
 
221
+ def ck_field_aria(form, field)
222
+ return {} unless form.object.errors[field].any?
223
+ { "aria-invalid" => "true", "aria-describedby" => ck_field_error_id(form, field) }
224
+ end
225
+
226
+ def ck_field_error(form, field)
227
+ return nil unless form.object.errors[field].any?
228
+ content_tag(:p, form.object.errors[field].first, class: "ck-field-error", id: ck_field_error_id(form, field))
229
+ end
230
+
231
+ def ck_field_error_id(form, field)
232
+ "#{form.object.model_name.param_key}_#{field}_error"
233
+ end
234
+
221
235
  private
222
236
 
223
237
  def diff_tokens(old_text, new_text, side)
@@ -0,0 +1,47 @@
1
+ module CompletionKit
2
+ class Calibration < ApplicationRecord
3
+ VERDICTS = %w[agree disagree borderline].freeze
4
+
5
+ belongs_to :run
6
+ belongs_to :response
7
+ belongs_to :metric
8
+ belongs_to :judge_version
9
+
10
+ validates :verdict, presence: true, inclusion: { in: VERDICTS }
11
+ validates :response_id,
12
+ uniqueness: { scope: [:metric_id, :created_by] }
13
+ validate :corrected_score_required_when_disagreeing
14
+ validate :corrected_score_within_rubric
15
+
16
+ scope :for_run, ->(run_id) { where(run_id: run_id) }
17
+ scope :for_metric, ->(metric_id) { where(metric_id: metric_id) }
18
+
19
+ def as_json(options = {})
20
+ {
21
+ id: id,
22
+ run_id: run_id,
23
+ response_id: response_id,
24
+ metric_id: metric_id,
25
+ judge_version_id: judge_version_id,
26
+ verdict: verdict,
27
+ corrected_score: corrected_score,
28
+ note: note,
29
+ created_by: created_by,
30
+ created_at: created_at
31
+ }
32
+ end
33
+
34
+ private
35
+
36
+ def corrected_score_required_when_disagreeing
37
+ return unless verdict == "disagree"
38
+ errors.add(:corrected_score, "must be set when disagreeing with the judge") if corrected_score.blank?
39
+ end
40
+
41
+ def corrected_score_within_rubric
42
+ return if corrected_score.blank?
43
+ score = corrected_score.to_f
44
+ errors.add(:corrected_score, "must be between 1 and 5") unless score >= 1 && score <= 5
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,32 @@
1
+ module CompletionKit
2
+ class JudgeVersion < ApplicationRecord
3
+ belongs_to :metric
4
+ has_many :calibrations, dependent: :destroy
5
+
6
+ serialize :rubric_bands, coder: JSON
7
+
8
+ validates :metric_id, presence: true
9
+
10
+ scope :current, -> { where(current: true) }
11
+
12
+ def self.ensure_current_for(metric)
13
+ current.find_by(metric_id: metric.id) || create!(
14
+ metric: metric,
15
+ instruction: metric.instruction,
16
+ rubric_bands: metric.rubric_bands,
17
+ current: true
18
+ )
19
+ end
20
+
21
+ def as_json(options = {})
22
+ {
23
+ id: id,
24
+ metric_id: metric_id,
25
+ instruction: instruction,
26
+ rubric_bands: rubric_bands,
27
+ current: current,
28
+ created_at: created_at
29
+ }
30
+ end
31
+ end
32
+ end
@@ -1,6 +1,3 @@
1
- require "ipaddr"
2
- require "resolv"
3
-
4
1
  module CompletionKit
5
2
  class ProviderCredential < ApplicationRecord
6
3
  include Turbo::Broadcastable
@@ -8,7 +5,7 @@ module CompletionKit
8
5
  PROVIDER_LABELS = {
9
6
  "openai" => "OpenAI",
10
7
  "anthropic" => "Anthropic",
11
- "ollama" => "Ollama / local endpoint",
8
+ "ollama" => "Ollama / OpenAI-compatible endpoint",
12
9
  "openrouter" => "OpenRouter"
13
10
  }.freeze
14
11
 
@@ -139,29 +136,12 @@ module CompletionKit
139
136
  def api_endpoint_not_internal
140
137
  return if api_endpoint.blank?
141
138
 
142
- uri = safe_http_uri(api_endpoint)
143
- unless uri
139
+ issues = ProviderEndpoint.validate(api_endpoint)
140
+ if issues.include?(:invalid_url)
144
141
  errors.add(:api_endpoint, "must be a valid http or https URL")
145
- return
146
- end
147
-
148
- if endpoint_addresses(uri.host).any? { |ip| ip.private? || ip.link_local? }
142
+ elsif issues.include?(:unsafe_host)
149
143
  errors.add(:api_endpoint, "must not point at a private or internal address")
150
144
  end
151
145
  end
152
-
153
- def safe_http_uri(value)
154
- uri = URI.parse(value.to_s.strip)
155
- uri if uri.is_a?(URI::HTTP) && uri.host.present?
156
- rescue URI::InvalidURIError
157
- nil
158
- end
159
-
160
- def endpoint_addresses(host)
161
- bare = host.delete_prefix("[").delete_suffix("]")
162
- [IPAddr.new(bare)]
163
- rescue IPAddr::InvalidAddressError
164
- Resolv.getaddresses(host).map { |addr| IPAddr.new(addr) }
165
- end
166
146
  end
167
147
  end
@@ -1,5 +1,7 @@
1
1
  module CompletionKit
2
2
  class ApiConfig
3
+ PROVIDERS = %w[openai anthropic ollama openrouter].freeze
4
+
3
5
  def self.for_model(model_name)
4
6
  provider = provider_for_model(model_name)
5
7
  provider ? for_provider(provider) : {}
@@ -9,21 +11,25 @@ module CompletionKit
9
11
  provider = provider_name.to_s
10
12
  stored = ProviderCredential.find_by(provider: provider)&.config_hash || {}
11
13
 
12
- defaults = case provider
13
- when "openai"
14
- { provider: "openai", api_key: CompletionKit.config.openai_api_key || ENV["OPENAI_API_KEY"] }
15
- when "anthropic"
16
- { provider: "anthropic", api_key: CompletionKit.config.anthropic_api_key || ENV["ANTHROPIC_API_KEY"] }
17
- when "ollama"
18
- {
19
- provider: "ollama",
20
- api_key: CompletionKit.config.ollama_api_key || ENV["OLLAMA_API_KEY"],
21
- api_endpoint: CompletionKit.config.ollama_api_endpoint || ENV["OLLAMA_API_ENDPOINT"]
22
- }
23
- when "openrouter"
24
- { provider: "openrouter", api_key: ENV["OPENROUTER_API_KEY"] }
14
+ defaults = if CompletionKit.config.tenant_scope
15
+ PROVIDERS.include?(provider) ? { provider: provider } : {}
25
16
  else
26
- {}
17
+ case provider
18
+ when "openai"
19
+ { provider: "openai", api_key: CompletionKit.config.openai_api_key || ENV["OPENAI_API_KEY"] }
20
+ when "anthropic"
21
+ { provider: "anthropic", api_key: CompletionKit.config.anthropic_api_key || ENV["ANTHROPIC_API_KEY"] }
22
+ when "ollama"
23
+ {
24
+ provider: "ollama",
25
+ api_key: CompletionKit.config.ollama_api_key || ENV["OLLAMA_API_KEY"],
26
+ api_endpoint: CompletionKit.config.ollama_api_endpoint || ENV["OLLAMA_API_ENDPOINT"]
27
+ }
28
+ when "openrouter"
29
+ { provider: "openrouter", api_key: ENV["OPENROUTER_API_KEY"] }
30
+ else
31
+ {}
32
+ end
27
33
  end
28
34
 
29
35
  defaults.merge(stored.compact)
@@ -33,7 +33,8 @@ module CompletionKit
33
33
  McpTools::Metrics.definitions +
34
34
  McpTools::MetricGroups.definitions +
35
35
  McpTools::ProviderCredentials.definitions +
36
- McpTools::Tags.definitions
36
+ McpTools::Tags.definitions +
37
+ McpTools::Calibrations.definitions
37
38
  end
38
39
 
39
40
  def self.call_tool(name, arguments)
@@ -46,6 +47,7 @@ module CompletionKit
46
47
  when /\Ametric_groups_/ then McpTools::MetricGroups.call(name, arguments)
47
48
  when /\Aprovider_credentials_/ then McpTools::ProviderCredentials.call(name, arguments)
48
49
  when /\Atags_/ then McpTools::Tags.call(name, arguments)
50
+ when /\Acalibrations_/ then McpTools::Calibrations.call(name, arguments)
49
51
  else raise MethodNotFound, "Unknown tool: #{name}"
50
52
  end
51
53
  end
@@ -0,0 +1,73 @@
1
+ module CompletionKit
2
+ module McpTools
3
+ module Calibrations
4
+ extend Base
5
+
6
+ TOOLS = {
7
+ "calibrations_list" => {
8
+ description: "List calibrations. Filter by run_id, response_id, metric_id, or created_by.",
9
+ inputSchema: {
10
+ type: "object",
11
+ properties: {
12
+ run_id: {type: "integer"},
13
+ response_id: {type: "integer"},
14
+ metric_id: {type: "integer"},
15
+ created_by: {type: "string"}
16
+ },
17
+ required: []
18
+ },
19
+ handler: :list
20
+ },
21
+ "calibrations_create" => {
22
+ description: "Upsert a calibration for (run, response, metric, created_by). Verdict is one of agree, disagree, borderline. corrected_score (1..5) is required when verdict is 'disagree'.",
23
+ inputSchema: {
24
+ type: "object",
25
+ properties: {
26
+ run_id: {type: "integer"},
27
+ response_id: {type: "integer"},
28
+ metric_id: {type: "integer"},
29
+ verdict: {type: "string", enum: %w[agree disagree borderline]},
30
+ corrected_score: {type: "number"},
31
+ note: {type: "string"},
32
+ created_by: {type: "string"}
33
+ },
34
+ required: ["run_id", "response_id", "metric_id", "verdict"]
35
+ },
36
+ handler: :create
37
+ }
38
+ }.freeze
39
+
40
+ def self.list(args)
41
+ scope = CompletionKit::Calibration.all
42
+ scope = scope.where(run_id: args["run_id"]) if args["run_id"]
43
+ scope = scope.where(response_id: args["response_id"]) if args["response_id"]
44
+ scope = scope.where(metric_id: args["metric_id"]) if args["metric_id"]
45
+ scope = scope.where(created_by: args["created_by"]) if args["created_by"]
46
+ text_result(scope.order(:created_at).map(&:as_json))
47
+ end
48
+
49
+ def self.create(args)
50
+ run = CompletionKit::Run.find(args["run_id"])
51
+ response = run.responses.find(args["response_id"])
52
+ metric = CompletionKit::Metric.find(args["metric_id"])
53
+ created_by = args["created_by"].presence || "mcp"
54
+
55
+ calibration = CompletionKit::Calibration.find_or_initialize_by(
56
+ run_id: run.id, response_id: response.id, metric_id: metric.id, created_by: created_by
57
+ )
58
+ calibration.assign_attributes(
59
+ judge_version: CompletionKit::JudgeVersion.ensure_current_for(metric),
60
+ verdict: args["verdict"],
61
+ corrected_score: args["corrected_score"],
62
+ note: args["note"]
63
+ )
64
+
65
+ if calibration.save
66
+ text_result(calibration.as_json)
67
+ else
68
+ error_result(calibration.errors.full_messages.join(", "))
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -7,6 +7,7 @@ module CompletionKit
7
7
  def generate_completion(prompt, options = {})
8
8
  @temperature_dropped = false
9
9
  return "Error: API endpoint not configured" unless configured?
10
+ return "Error: API endpoint resolves to a private address" unless ProviderEndpoint.safe?(api_endpoint)
10
11
 
11
12
  model = options[:model]
12
13
  max_tokens = options[:max_tokens] || 1000
@@ -44,6 +45,7 @@ module CompletionKit
44
45
 
45
46
  def available_models
46
47
  return [] unless configured?
48
+ return [] unless ProviderEndpoint.safe?(api_endpoint)
47
49
 
48
50
  response = build_connection(api_endpoint).get("/v1/models") do |req|
49
51
  req.headers["Authorization"] = "Bearer #{api_key}" if api_key.present?
@@ -74,7 +76,9 @@ module CompletionKit
74
76
  end
75
77
 
76
78
  def api_endpoint
77
- (@config[:api_endpoint] || ENV["OLLAMA_API_ENDPOINT"] || "http://localhost:11434/v1").to_s.delete_suffix("/")
79
+ raw = @config[:api_endpoint] || ENV["OLLAMA_API_ENDPOINT"]
80
+ raw ||= "http://localhost:11434/v1" if CompletionKit.config.allow_loopback_endpoints
81
+ raw.to_s.delete_suffix("/")
78
82
  end
79
83
 
80
84
  def post_completion(model:, prompt:, max_tokens:, temperature:)
@@ -0,0 +1,47 @@
1
+ require "ipaddr"
2
+ require "resolv"
3
+
4
+ module CompletionKit
5
+ module ProviderEndpoint
6
+ ZERO_NET = IPAddr.new("0.0.0.0/8").freeze
7
+
8
+ module_function
9
+
10
+ def validate(url)
11
+ uri = parse(url)
12
+ return [:invalid_url] unless uri
13
+ addrs = addresses(uri.host)
14
+ return [:unresolvable] if addrs.empty?
15
+ return [:unsafe_host] if addrs.any? { |ip| unsafe?(ip) }
16
+ []
17
+ end
18
+
19
+ def safe?(url)
20
+ errors = validate(url)
21
+ errors.empty? || errors == [:unresolvable]
22
+ end
23
+
24
+ def parse(value)
25
+ uri = URI.parse(value.to_s.strip)
26
+ uri if uri.is_a?(URI::HTTP) && uri.host.present?
27
+ rescue URI::InvalidURIError
28
+ nil
29
+ end
30
+
31
+ def addresses(host)
32
+ bare = host.delete_prefix("[").delete_suffix("]")
33
+ [IPAddr.new(bare)]
34
+ rescue IPAddr::InvalidAddressError
35
+ Resolv.getaddresses(host).map { |addr| IPAddr.new(addr) }
36
+ end
37
+
38
+ def unsafe?(ip)
39
+ return true if ip.private?
40
+ return true if ip.link_local?
41
+ return true if ip.to_i.zero?
42
+ return true if ip.ipv4? && ZERO_NET.include?(ip)
43
+ return true if ip.loopback? && !CompletionKit.config.allow_loopback_endpoints
44
+ false
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,50 @@
1
+ <div id="calibration_<%= response_row.id %>_<%= metric.id %>" class="ck-calibration">
2
+ <% current_verdict = calibration&.verdict %>
3
+ <% verdict_count = CompletionKit::Calibration.where(response_id: response_row.id, metric_id: metric.id).count %>
4
+ <p class="ck-calibration__prompt">
5
+ How does this score feel?
6
+ <% if verdict_count > 0 %>
7
+ <span class="ck-calibration__count"><%= pluralize(verdict_count, "verdict") %> collected</span>
8
+ <% end %>
9
+ </p>
10
+ <div class="ck-calibration__buttons">
11
+ <% CompletionKit::Calibration::VERDICTS.each do |verdict| %>
12
+ <%= button_to run_response_calibrations_path(run, response_row, metric_id: metric.id, verdict: verdict),
13
+ method: :post,
14
+ form: { data: { turbo: "true" } },
15
+ class: "ck-calibration__pill ck-calibration__pill--#{verdict}#{' is-active' if verdict == current_verdict}",
16
+ "aria-pressed": (verdict == current_verdict).to_s do %>
17
+ <% case verdict
18
+ when "agree" %>👍 Agree<% when "disagree" %>👎 Disagree<% else %>🤔 Borderline<% end %>
19
+ <% end %>
20
+ <% end %>
21
+ </div>
22
+
23
+ <% if current_verdict == "disagree" %>
24
+ <%= form_with url: run_response_calibrations_path(run, response_row),
25
+ method: :post, local: false,
26
+ class: "ck-calibration__detail" do |f| %>
27
+ <%= hidden_field_tag :metric_id, metric.id %>
28
+ <%= hidden_field_tag :verdict, "disagree" %>
29
+ <label class="ck-label">
30
+ Your score
31
+ <span class="ck-calibration__value" data-calibration-value><%= calibration.corrected_score || review&.ai_score || 3 %></span>
32
+ </label>
33
+ <input type="range" name="corrected_score" min="1" max="5" step="0.5"
34
+ value="<%= calibration.corrected_score || review&.ai_score || 3 %>"
35
+ oninput="this.closest('.ck-calibration__detail').querySelector('[data-calibration-value]').textContent = this.value"
36
+ class="ck-slider">
37
+ <textarea name="note" rows="2" class="ck-input ck-input--area" placeholder="Why? (optional)"><%= calibration.note %></textarea>
38
+ <%= f.submit "Save", class: ck_button_classes(:dark) %>
39
+ <% end %>
40
+ <% elsif current_verdict == "borderline" %>
41
+ <%= form_with url: run_response_calibrations_path(run, response_row),
42
+ method: :post, local: false,
43
+ class: "ck-calibration__detail" do |f| %>
44
+ <%= hidden_field_tag :metric_id, metric.id %>
45
+ <%= hidden_field_tag :verdict, "borderline" %>
46
+ <textarea name="note" rows="2" class="ck-input ck-input--area" placeholder="What made this borderline? (optional)"><%= calibration.note %></textarea>
47
+ <%= f.submit "Save", class: ck_button_classes(:dark) %>
48
+ <% end %>
49
+ <% end %>
50
+ </div>
@@ -1,6 +1,6 @@
1
1
  <%= form_with(model: dataset, local: true) do |form| %>
2
2
  <% if dataset.errors.any? %>
3
- <div class="ck-flash ck-flash--alert">
3
+ <div class="ck-flash ck-flash--alert" role="alert">
4
4
  <p class="ck-flash__title"><%= pluralize(dataset.errors.count, "problem") %> prevented this dataset from being saved.</p>
5
5
  <ul class="ck-error-list">
6
6
  <% dataset.errors.full_messages.each do |message| %>
@@ -13,12 +13,14 @@
13
13
  <div class="ck-card ck-form-card">
14
14
  <div class="ck-field">
15
15
  <%= form.label :name, "Name", class: "ck-label" %>
16
- <%= form.text_field :name, class: "ck-input", placeholder: "Customer support tickets" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: "Customer support tickets", **ck_field_aria(form, :name) %>
17
+ <%= ck_field_error(form, :name) %>
17
18
  </div>
18
19
 
19
20
  <div class="ck-field">
20
21
  <%= form.label :csv_data, "CSV data", class: "ck-label" %>
21
- <%= form.text_area :csv_data, rows: 12, class: "ck-input ck-input--area ck-input--code", placeholder: "content,audience\nFirst ticket text,internal\nSecond ticket text,customer" %>
22
+ <%= form.text_area :csv_data, rows: 12, class: "ck-input ck-input--area ck-input--code", placeholder: "content,audience\nFirst ticket text,internal\nSecond ticket text,customer", **ck_field_aria(form, :csv_data) %>
23
+ <%= ck_field_error(form, :csv_data) %>
22
24
  </div>
23
25
 
24
26
  <%= render "completion_kit/tags/picker", record: dataset, param_namespace: :dataset %>
@@ -17,18 +17,18 @@
17
17
  <table class="ck-results-table ck-datasets-table">
18
18
  <thead>
19
19
  <tr>
20
- <th>Name</th>
21
- <th>Rows</th>
22
- <th>Used in</th>
23
- <th>Created</th>
24
- <th></th>
20
+ <th scope="col">Name</th>
21
+ <th scope="col">Rows</th>
22
+ <th scope="col">Used in</th>
23
+ <th scope="col">Created</th>
24
+ <th scope="col"></th>
25
25
  </tr>
26
26
  </thead>
27
27
  <tbody>
28
28
  <% @datasets.each do |dataset| %>
29
29
  <tr onclick="window.location='<%= dataset_path(dataset) %>'" style="cursor: pointer;">
30
30
  <td>
31
- <strong><%= dataset.name %></strong>
31
+ <%= link_to dataset_path(dataset), class: "ck-record-name" do %><strong><%= dataset.name %></strong><% end %>
32
32
  <% if dataset.tags.any? %>
33
33
  <div class="tag-marks-row">
34
34
  <%= render "completion_kit/tags/marks", tags: dataset.tags %>
@@ -22,17 +22,17 @@
22
22
  <table class="ck-results-table ck-metric-groups-table">
23
23
  <thead>
24
24
  <tr>
25
- <th>Name</th>
26
- <th>Description</th>
27
- <th>Members</th>
28
- <th></th>
25
+ <th scope="col">Name</th>
26
+ <th scope="col">Description</th>
27
+ <th scope="col">Members</th>
28
+ <th scope="col"></th>
29
29
  </tr>
30
30
  </thead>
31
31
  <tbody>
32
32
  <% @metric_groups.each do |metric_group| %>
33
33
  <tr onclick="window.location='<%= metric_group_path(metric_group) %>'" style="cursor: pointer;">
34
34
  <td>
35
- <strong><%= metric_group.name %></strong>
35
+ <%= link_to metric_group_path(metric_group), class: "ck-record-name" do %><strong><%= metric_group.name %></strong><% end %>
36
36
  <% if metric_group.tags.any? %>
37
37
  <div class="tag-marks-row">
38
38
  <%= render "completion_kit/tags/marks", tags: metric_group.tags %>
@@ -1,6 +1,6 @@
1
1
  <%= form_with(model: metric, local: true) do |form| %>
2
2
  <% if metric.errors.any? %>
3
- <div class="ck-flash ck-flash--alert">
3
+ <div class="ck-flash ck-flash--alert" role="alert">
4
4
  <p class="ck-flash__title"><%= pluralize(metric.errors.count, "problem") %> prevented this metric from being saved.</p>
5
5
  <ul class="ck-error-list">
6
6
  <% metric.errors.full_messages.each do |message| %>
@@ -13,13 +13,15 @@
13
13
  <div class="ck-card ck-form-card">
14
14
  <div class="ck-field">
15
15
  <%= form.label :name, "Metric name", class: "ck-label" %>
16
- <%= form.text_field :name, class: "ck-input", placeholder: "Helpfulness" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: "Helpfulness", **ck_field_aria(form, :name) %>
17
+ <%= ck_field_error(form, :name) %>
17
18
  </div>
18
19
 
19
20
  <div class="ck-field ck-field--spacious">
20
21
  <p class="ck-section-title">Instruction</p>
21
22
  <p class="ck-hint">What should the judge assess? This instruction is sent to the LLM judge when scoring outputs.</p>
22
- <%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output..." %>
23
+ <%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output...", **ck_field_aria(form, :instruction) %>
24
+ <%= ck_field_error(form, :instruction) %>
23
25
  </div>
24
26
 
25
27
  <div class="ck-field ck-field--spacious">
@@ -17,17 +17,17 @@
17
17
  <table class="ck-results-table ck-metrics-table">
18
18
  <thead>
19
19
  <tr>
20
- <th>Name</th>
21
- <th>Instruction</th>
22
- <th>In groups</th>
23
- <th></th>
20
+ <th scope="col">Name</th>
21
+ <th scope="col">Instruction</th>
22
+ <th scope="col">In groups</th>
23
+ <th scope="col"></th>
24
24
  </tr>
25
25
  </thead>
26
26
  <tbody>
27
27
  <% @metrics.each do |metric| %>
28
28
  <tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
29
29
  <td>
30
- <strong><%= metric.name %></strong>
30
+ <%= link_to metric_path(metric), class: "ck-record-name" do %><strong><%= metric.name %></strong><% end %>
31
31
  <% if metric.tags.any? %>
32
32
  <div class="tag-marks-row">
33
33
  <%= render "completion_kit/tags/marks", tags: metric.tags %>
@@ -1,6 +1,6 @@
1
1
  <%= form_with(model: prompt, local: true) do |form| %>
2
2
  <% if prompt.errors.any? %>
3
- <div class="ck-flash ck-flash--alert">
3
+ <div class="ck-flash ck-flash--alert" role="alert">
4
4
  <p class="ck-flash__title"><%= pluralize(prompt.errors.count, "problem") %> prevented this prompt from being saved.</p>
5
5
  <ul class="ck-error-list">
6
6
  <% prompt.errors.full_messages.each do |message| %>
@@ -17,17 +17,20 @@
17
17
  <div class="ck-card ck-form-card">
18
18
  <div class="ck-field">
19
19
  <%= form.label :name, "Name", class: "ck-label" %>
20
- <%= form.text_field :name, class: "ck-input", placeholder: "Support summary", autocomplete: "off", data: { "1p-ignore": "" } %>
20
+ <%= form.text_field :name, class: "ck-input", placeholder: "Support summary", autocomplete: "off", data: { "1p-ignore": "" }, **ck_field_aria(form, :name) %>
21
+ <%= ck_field_error(form, :name) %>
21
22
  </div>
22
23
 
23
24
  <div class="ck-field">
24
25
  <%= form.label :description, class: "ck-label" %>
25
- <%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "Short note about this prompt." %>
26
+ <%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "Short note about this prompt.", **ck_field_aria(form, :description) %>
27
+ <%= ck_field_error(form, :description) %>
26
28
  </div>
27
29
 
28
30
  <div class="ck-field">
29
31
  <%= form.label :template, "Prompt text", class: "ck-label" %>
30
- <%= form.text_area :template, rows: 12, class: "ck-input ck-input--area ck-input--code", placeholder: "Summarize {{content}} for {{audience}}" %>
32
+ <%= form.text_area :template, rows: 12, class: "ck-input ck-input--area ck-input--code", placeholder: "Summarize {{content}} for {{audience}}", **ck_field_aria(form, :template) %>
33
+ <%= ck_field_error(form, :template) %>
31
34
  <p class="ck-hint">Use <code>{{variable}}</code>. Match your dataset column names.</p>
32
35
  </div>
33
36
 
@@ -17,19 +17,19 @@
17
17
  <table class="ck-results-table ck-prompts-table">
18
18
  <thead>
19
19
  <tr>
20
- <th>Name</th>
21
- <th>Version</th>
22
- <th>Model</th>
23
- <th>Best score</th>
24
- <th>Runs</th>
25
- <th></th>
20
+ <th scope="col">Name</th>
21
+ <th scope="col">Version</th>
22
+ <th scope="col">Model</th>
23
+ <th scope="col">Best score</th>
24
+ <th scope="col">Runs</th>
25
+ <th scope="col"></th>
26
26
  </tr>
27
27
  </thead>
28
28
  <tbody>
29
29
  <% @prompts.each do |prompt| %>
30
30
  <tr onclick="window.location='<%= prompt_path(prompt) %>'" style="cursor: pointer;">
31
31
  <td>
32
- <strong><%= prompt.name %></strong>
32
+ <%= link_to prompt_path(prompt), class: "ck-record-name" do %><strong><%= prompt.name %></strong><% end %>
33
33
  <% if prompt.description.present? %>
34
34
  <p class="ck-prompts-table__desc"><%= truncate(prompt.description, length: 120) %></p>
35
35
  <% end %>
@@ -1,6 +1,6 @@
1
1
  <%= form_with(model: provider_credential, local: true) do |form| %>
2
2
  <% if provider_credential.errors.any? %>
3
- <div class="ck-flash ck-flash--alert">
3
+ <div class="ck-flash ck-flash--alert" role="alert">
4
4
  <p class="ck-flash__title"><%= pluralize(provider_credential.errors.count, "problem") %> prevented this provider credential from being saved.</p>
5
5
  <ul class="ck-error-list">
6
6
  <% provider_credential.errors.full_messages.each do |message| %>
@@ -18,12 +18,14 @@
18
18
 
19
19
  <div class="ck-field">
20
20
  <%= form.label :api_key, "API key", class: "ck-label" %>
21
- <%= form.text_area :api_key, rows: 3, class: "ck-input ck-input--area ck-input--code", placeholder: "Paste the provider API key" %>
21
+ <%= form.text_area :api_key, rows: 3, class: "ck-input ck-input--area ck-input--code", placeholder: "Paste the provider API key", **ck_field_aria(form, :api_key) %>
22
+ <%= ck_field_error(form, :api_key) %>
22
23
  </div>
23
24
 
24
25
  <div class="ck-field">
25
26
  <%= form.label :api_endpoint, "API endpoint", class: "ck-label" %>
26
- <%= form.text_field :api_endpoint, class: "ck-input", placeholder: "Only needed for Ollama or custom OpenAI-compatible endpoints" %>
27
+ <%= form.text_field :api_endpoint, class: "ck-input", placeholder: "Only needed for Ollama or custom OpenAI-compatible endpoints", **ck_field_aria(form, :api_endpoint) %>
28
+ <%= ck_field_error(form, :api_endpoint) %>
27
29
  </div>
28
30
 
29
31
  <div class="ck-actions">
@@ -1,4 +1,4 @@
1
- <ol class="ck-breadcrumb">
1
+ <ol class="ck-breadcrumb" aria-label="Breadcrumb">
2
2
  <% if @run.prompt %>
3
3
  <li><%= link_to "Prompts", prompts_path %></li>
4
4
  <li><%= link_to @run.prompt.name, prompt_path(@run.prompt) %></li>
@@ -116,6 +116,15 @@
116
116
  <div class="ck-note-box"><%= review.ai_feedback %></div>
117
117
  </div>
118
118
  <% end %>
119
+ <% if CompletionKit.config.judge_calibration_enabled && review.metric && review.ai_score %>
120
+ <% existing = CompletionKit::Calibration.find_by(
121
+ response_id: @response.id, metric_id: review.metric_id,
122
+ created_by: CompletionKit.config.username.presence || "operator"
123
+ ) %>
124
+ <%= render "completion_kit/calibrations/buttons",
125
+ review: review, calibration: existing, run: @run,
126
+ response_row: @response, metric: review.metric %>
127
+ <% end %>
119
128
  </div>
120
129
  <% end %>
121
130
  </div>
@@ -1,6 +1,6 @@
1
1
  <%= form_with(model: run, local: true) do |form| %>
2
2
  <% if run.errors.any? %>
3
- <div class="ck-flash ck-flash--alert">
3
+ <div class="ck-flash ck-flash--alert" role="alert">
4
4
  <p class="ck-flash__title"><%= pluralize(run.errors.count, "problem") %> prevented this run from being saved.</p>
5
5
  <ul class="ck-error-list">
6
6
  <% run.errors.full_messages.each do |message| %>
@@ -13,7 +13,8 @@
13
13
  <div class="ck-card ck-form-card">
14
14
  <div class="ck-field">
15
15
  <%= form.label :name, "Name (auto-generated if blank)", class: "ck-label" %>
16
- <%= form.text_field :name, class: "ck-input", placeholder: run.name.presence || "Auto-generated from prompt + version + timestamp" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: run.name.presence || "Auto-generated from prompt + version + timestamp", **ck_field_aria(form, :name) %>
17
+ <%= ck_field_error(form, :name) %>
17
18
  </div>
18
19
 
19
20
  <div class="ck-field">
@@ -56,7 +57,8 @@
56
57
 
57
58
  <div class="ck-field" id="output-column-field" hidden>
58
59
  <%= form.label :output_column, "Output column", class: "ck-label" %>
59
- <%= form.text_field :output_column, value: run.output_column.presence || "actual_output", class: "ck-input", id: "run_output_column", placeholder: "actual_output" %>
60
+ <%= form.text_field :output_column, value: run.output_column.presence || "actual_output", class: "ck-input", id: "run_output_column", placeholder: "actual_output", **ck_field_aria(form, :output_column) %>
61
+ <%= ck_field_error(form, :output_column) %>
60
62
  <p class="ck-field-hint">Name of the dataset column whose value will be graded as the response. Defaults to <code>actual_output</code>.</p>
61
63
  </div>
62
64
 
@@ -1,6 +1,6 @@
1
1
  <% clickable = response.succeeded? %>
2
2
  <tr id="response_<%= response.id %>"<% if clickable %> onclick="window.location='<%= run_response_path(run, response, sort: params[:sort]) %>'" style="cursor: pointer;"<% end %>>
3
- <td class="ck-response-cell__index"><%= index %></td>
3
+ <td class="ck-response-cell__index"><% if clickable %><%= link_to index, run_response_path(run, response, sort: params[:sort]), class: "ck-record-name" %><% else %><%= index %><% end %></td>
4
4
  <td class="ck-response-cell__text">
5
5
  <% if response.status == "failed" %>
6
6
  <% err = response.error_payload %>
@@ -3,7 +3,7 @@
3
3
  <div class="ck-runs-table__identity">
4
4
  <span class="ck-run-name">
5
5
  <span class="<%= ck_run_dot(run) %>"></span>
6
- <strong><%= run.name %></strong>
6
+ <%= link_to ck_run_path(run), class: "ck-record-name" do %><strong><%= run.name %></strong><% end %>
7
7
  </span>
8
8
  <div class="ck-runs-table__config">
9
9
  <% if run.prompt %>
@@ -1,6 +1,6 @@
1
- <div id="run_status_header">
1
+ <div id="run_status_header" aria-live="polite">
2
2
  <% if run.status == "failed" %>
3
- <div class="ck-flash ck-flash--alert">
3
+ <div class="ck-flash ck-flash--alert" role="alert">
4
4
  <%= run.failure_summary.presence || run.error_message.presence || "Run failed." %>
5
5
  </div>
6
6
  <% end %>
@@ -1,5 +1,5 @@
1
1
  <% snap = run.progress_snapshot %>
2
- <div id="run_status_panel">
2
+ <div id="run_status_panel" aria-live="polite" aria-atomic="true">
3
3
  <% if run.status.in?(%w[running completed]) && snap[:generated_total] > 0 %>
4
4
  <% failed_count = snap[:generated_failed] + snap[:judged_failed] %>
5
5
  <% has_judge = snap[:judged_total] > 0 || run.judge_configured? %>
@@ -3,12 +3,12 @@
3
3
  <table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
4
4
  <thead>
5
5
  <tr>
6
- <th>Run</th>
7
- <th>Responses</th>
8
- <th>Metrics</th>
9
- <th>Avg score</th>
10
- <th>When</th>
11
- <th></th>
6
+ <th scope="col">Run</th>
7
+ <th scope="col">Responses</th>
8
+ <th scope="col">Metrics</th>
9
+ <th scope="col">Avg score</th>
10
+ <th scope="col">When</th>
11
+ <th scope="col"></th>
12
12
  </tr>
13
13
  </thead>
14
14
  <tbody>
@@ -1,4 +1,4 @@
1
- <ol class="ck-breadcrumb">
1
+ <ol class="ck-breadcrumb" aria-label="Breadcrumb">
2
2
  <% if @from == "run" %>
3
3
  <li><%= link_to "Runs", runs_path %></li>
4
4
  <li><%= link_to @run.name, run_path(@run) %></li>
@@ -7,8 +7,8 @@
7
7
  <% all_tags.each do |tag| %>
8
8
  <% checked = selected_ids.include?(tag.id) %>
9
9
  <label class="tag-mark" style="--mark-color: var(--tag-<%= tag.color %>);">
10
- <%= check_box_tag "#{param_namespace}[tag_names][]", tag.name, checked, hidden: true %>
11
- <%= tag.name %>
10
+ <%= check_box_tag "#{param_namespace}[tag_names][]", tag.name, checked, class: "ck-visually-hidden", "aria-label": "Tag: #{tag.name}" %>
11
+ <span aria-hidden="true"><%= tag.name %></span>
12
12
  </label>
13
13
  <% end %>
14
14
  <%= text_field_tag "#{param_namespace}[tag_names][]", "",
@@ -14,9 +14,9 @@
14
14
  <table class="ck-results-table ck-tags-table">
15
15
  <thead>
16
16
  <tr>
17
- <th>Tag</th>
18
- <th>Applied to</th>
19
- <th></th>
17
+ <th scope="col">Tag</th>
18
+ <th scope="col">Applied to</th>
19
+ <th scope="col"></th>
20
20
  </tr>
21
21
  </thead>
22
22
  <tbody>
@@ -25,7 +25,7 @@
25
25
  <% by_type = @tagging_by_type.select { |(tid, _), _| tid == tag.id } %>
26
26
  <% breakdown = by_type.map { |(_, type), n| pluralize(n, type.demodulize.titleize.downcase) }.join(" · ") %>
27
27
  <tr onclick="window.location='<%= edit_tag_path(tag) %>'" style="cursor: pointer;">
28
- <td><span class="tag-mark tag-mark--lg" style="--mark-color: var(--tag-<%= tag.color %>);"><%= tag.name %></span></td>
28
+ <td><%= link_to edit_tag_path(tag), class: "ck-record-name" do %><span class="tag-mark tag-mark--lg" style="--mark-color: var(--tag-<%= tag.color %>);"><%= tag.name %></span><% end %></td>
29
29
  <td data-label="Applied to" class="ck-meta-copy">
30
30
  <% if count.zero? %>
31
31
  <span class="ck-tags-table__unused">Not used yet</span>
data/config/routes.rb CHANGED
@@ -26,7 +26,9 @@ CompletionKit::Engine.routes.draw do
26
26
  post :rerun
27
27
  get :refresh_status
28
28
  end
29
- resources :responses, only: [:show]
29
+ resources :responses, only: [:show] do
30
+ resources :calibrations, only: [:create]
31
+ end
30
32
  end
31
33
 
32
34
  resources :suggestions, only: [:show] do
@@ -54,7 +56,11 @@ CompletionKit::Engine.routes.draw do
54
56
  post :generate
55
57
  post :retry_failures
56
58
  end
57
- resources :responses, only: [:index, :show]
59
+ resources :responses, only: [:index, :show] do
60
+ resources :metrics, only: [] do
61
+ resources :calibrations, only: [:index, :create]
62
+ end
63
+ end
58
64
  end
59
65
  resources :datasets
60
66
  resources :metrics
@@ -0,0 +1,28 @@
1
+ class CreateCompletionKitJudgeVersions < ActiveRecord::Migration[8.1]
2
+ def change
3
+ create_table :completion_kit_judge_versions do |t|
4
+ t.references :metric,
5
+ null: false,
6
+ foreign_key: { to_table: :completion_kit_metrics, on_delete: :cascade },
7
+ index: { name: "index_ck_judge_versions_on_metric_id" }
8
+ t.text :instruction
9
+ t.text :rubric_bands
10
+ t.boolean :current, null: false, default: true
11
+ t.timestamps
12
+ end
13
+
14
+ add_index :completion_kit_judge_versions,
15
+ [:metric_id, :current],
16
+ name: "index_ck_judge_versions_on_metric_current"
17
+
18
+ reversible do |dir|
19
+ dir.up do
20
+ metric_model = Class.new(ActiveRecord::Base) { self.table_name = "completion_kit_metrics" }
21
+ jv_model = Class.new(ActiveRecord::Base) { self.table_name = "completion_kit_judge_versions" }
22
+ metric_model.find_each do |m|
23
+ jv_model.create!(metric_id: m.id, instruction: m["instruction"], rubric_bands: m["rubric_bands"], current: true)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,32 @@
1
+ class CreateCompletionKitCalibrations < ActiveRecord::Migration[8.1]
2
+ def change
3
+ create_table :completion_kit_calibrations do |t|
4
+ t.references :run,
5
+ null: false,
6
+ foreign_key: { to_table: :completion_kit_runs, on_delete: :cascade },
7
+ index: { name: "index_ck_calibrations_on_run_id" }
8
+ t.references :response,
9
+ null: false,
10
+ foreign_key: { to_table: :completion_kit_responses, on_delete: :cascade },
11
+ index: { name: "index_ck_calibrations_on_response_id" }
12
+ t.references :metric,
13
+ null: false,
14
+ foreign_key: { to_table: :completion_kit_metrics, on_delete: :cascade },
15
+ index: { name: "index_ck_calibrations_on_metric_id" }
16
+ t.references :judge_version,
17
+ null: false,
18
+ foreign_key: { to_table: :completion_kit_judge_versions, on_delete: :cascade },
19
+ index: { name: "index_ck_calibrations_on_judge_version_id" }
20
+ t.string :verdict, null: false
21
+ t.string :created_by
22
+ t.decimal :corrected_score, precision: 4, scale: 1
23
+ t.text :note
24
+ t.timestamps
25
+ end
26
+
27
+ add_index :completion_kit_calibrations,
28
+ [:response_id, :metric_id, :created_by],
29
+ unique: true,
30
+ name: "index_ck_calibrations_on_response_metric_user"
31
+ end
32
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.33"
2
+ VERSION = "0.5.35"
3
3
  end
@@ -11,6 +11,8 @@ module CompletionKit
11
11
  attr_accessor :tenant_scope, :tenant_scope_columns
12
12
  attr_accessor :api_reference_authentication_partial
13
13
  attr_accessor :api_rate_limit, :web_rate_limit
14
+ attr_accessor :allow_loopback_endpoints
15
+ attr_accessor :judge_calibration_enabled
14
16
 
15
17
  def initialize
16
18
  @openai_api_key = ENV['OPENAI_API_KEY']
@@ -25,6 +27,9 @@ module CompletionKit
25
27
  @api_rate_limit = 120
26
28
  @web_rate_limit = 300
27
29
 
30
+ @allow_loopback_endpoints = true
31
+ @judge_calibration_enabled = true
32
+
28
33
  @api_reference_authentication_partial = "completion_kit/api_reference/authentication"
29
34
  end
30
35
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.33
4
+ version: 0.5.35
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -235,6 +235,7 @@ files:
235
235
  - app/assets/javascripts/completion_kit/application.js
236
236
  - app/assets/stylesheets/completion_kit/application.css.erb
237
237
  - app/controllers/completion_kit/api/v1/base_controller.rb
238
+ - app/controllers/completion_kit/api/v1/calibrations_controller.rb
238
239
  - app/controllers/completion_kit/api/v1/datasets_controller.rb
239
240
  - app/controllers/completion_kit/api/v1/metric_groups_controller.rb
240
241
  - app/controllers/completion_kit/api/v1/metrics_controller.rb
@@ -245,6 +246,7 @@ files:
245
246
  - app/controllers/completion_kit/api/v1/tags_controller.rb
246
247
  - app/controllers/completion_kit/api_reference_controller.rb
247
248
  - app/controllers/completion_kit/application_controller.rb
249
+ - app/controllers/completion_kit/calibrations_controller.rb
248
250
  - app/controllers/completion_kit/dashboard_controller.rb
249
251
  - app/controllers/completion_kit/dashboard_dismissals_controller.rb
250
252
  - app/controllers/completion_kit/datasets_controller.rb
@@ -267,8 +269,10 @@ files:
267
269
  - app/jobs/completion_kit/run_completion_check_job.rb
268
270
  - app/mailers/completion_kit/application_mailer.rb
269
271
  - app/models/completion_kit/application_record.rb
272
+ - app/models/completion_kit/calibration.rb
270
273
  - app/models/completion_kit/dashboard_dismissal.rb
271
274
  - app/models/completion_kit/dataset.rb
275
+ - app/models/completion_kit/judge_version.rb
272
276
  - app/models/completion_kit/mcp_session.rb
273
277
  - app/models/completion_kit/metric.rb
274
278
  - app/models/completion_kit/metric_group.rb
@@ -292,6 +296,7 @@ files:
292
296
  - app/services/completion_kit/llm_client.rb
293
297
  - app/services/completion_kit/mcp_dispatcher.rb
294
298
  - app/services/completion_kit/mcp_tools/base.rb
299
+ - app/services/completion_kit/mcp_tools/calibrations.rb
295
300
  - app/services/completion_kit/mcp_tools/datasets.rb
296
301
  - app/services/completion_kit/mcp_tools/metric_groups.rb
297
302
  - app/services/completion_kit/mcp_tools/metrics.rb
@@ -308,6 +313,7 @@ files:
308
313
  - app/services/completion_kit/open_ai_client.rb
309
314
  - app/services/completion_kit/open_router_client.rb
310
315
  - app/services/completion_kit/prompt_improvement_service.rb
316
+ - app/services/completion_kit/provider_endpoint.rb
311
317
  - app/services/completion_kit/worker_health.rb
312
318
  - app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb
313
319
  - app/views/completion_kit/api_reference/_authentication.html.erb
@@ -316,6 +322,7 @@ files:
316
322
  - app/views/completion_kit/api_reference/_resource_card.html.erb
317
323
  - app/views/completion_kit/api_reference/_resource_list.html.erb
318
324
  - app/views/completion_kit/api_reference/index.html.erb
325
+ - app/views/completion_kit/calibrations/_buttons.html.erb
319
326
  - app/views/completion_kit/dashboard/_eye_icon.html.erb
320
327
  - app/views/completion_kit/dashboard/_eye_off_icon.html.erb
321
328
  - app/views/completion_kit/dashboard/_failures_card.html.erb
@@ -398,6 +405,8 @@ files:
398
405
  - db/migrate/20260513000001_create_completion_kit_mcp_sessions.rb
399
406
  - db/migrate/20260514000001_allow_judge_only_runs.rb
400
407
  - db/migrate/20260516000001_create_completion_kit_dashboard_dismissals.rb
408
+ - db/migrate/20260522000001_create_completion_kit_judge_versions.rb
409
+ - db/migrate/20260522000002_create_completion_kit_calibrations.rb
401
410
  - lib/completion-kit.rb
402
411
  - lib/completion_kit.rb
403
412
  - lib/completion_kit/concurrency_check.rb