completion-kit 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/stylesheets/completion_kit/application.css +26 -4
  3. data/app/controllers/completion_kit/api/v1/base_controller.rb +36 -4
  4. data/app/controllers/completion_kit/api/v1/calibrations_controller.rb +24 -5
  5. data/app/controllers/completion_kit/api/v1/datasets_controller.rb +5 -3
  6. data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +5 -3
  7. data/app/controllers/completion_kit/api/v1/metric_versions_controller.rb +51 -0
  8. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -4
  9. data/app/controllers/completion_kit/api/v1/prompts_controller.rb +5 -3
  10. data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +3 -3
  11. data/app/controllers/completion_kit/api/v1/responses_controller.rb +3 -1
  12. data/app/controllers/completion_kit/api/v1/runs_controller.rb +79 -6
  13. data/app/controllers/completion_kit/api/v1/tags_controller.rb +3 -3
  14. data/app/controllers/completion_kit/metrics_controller.rb +3 -3
  15. data/app/controllers/completion_kit/runs_controller.rb +1 -1
  16. data/app/helpers/completion_kit/application_helper.rb +0 -14
  17. data/app/jobs/completion_kit/generate_row_job.rb +5 -12
  18. data/app/jobs/completion_kit/judge_review_job.rb +10 -16
  19. data/app/models/completion_kit/metric.rb +1 -0
  20. data/app/models/completion_kit/metric_version.rb +16 -0
  21. data/app/models/completion_kit/response.rb +13 -17
  22. data/app/models/completion_kit/review.rb +18 -22
  23. data/app/models/completion_kit/run.rb +27 -24
  24. data/app/models/concerns/completion_kit/has_job_status.rb +31 -0
  25. data/app/services/completion_kit/mcp_dispatcher.rb +3 -1
  26. data/app/services/completion_kit/mcp_tools/metric_versions.rb +67 -0
  27. data/app/services/completion_kit/starter_metrics.rb +5 -5
  28. data/app/views/completion_kit/api_reference/_body.html.erb +91 -6
  29. data/app/views/completion_kit/api_reference/index.html.erb +12 -0
  30. data/app/views/completion_kit/metrics/index.html.erb +3 -3
  31. data/app/views/completion_kit/metrics/show.html.erb +1 -0
  32. data/config/routes.rb +16 -1
  33. data/lib/completion_kit/version.rb +1 -1
  34. metadata +4 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d8454bbb11d5064ca0c6d4355c780425a28198280dffe7dd424d266fbeef6a09
4
- data.tar.gz: 24c1da76e1e9118d5e2a732e8e45b684f588f553ad4d8bac89e239bc22c953c3
3
+ metadata.gz: 4625f4e3f1afceb34f7603ee7a5025c78cb2499f6c2c287d83a9d02397f407b2
4
+ data.tar.gz: 740fecb69351c418aaececb19b9cfe3579bf9124a5df52a23d6db5851ce63fce
5
5
  SHA512:
6
- metadata.gz: 6fbc5b8047a20240897e19c389bb3f6104d3e2a219794d190183b5433e14d524bb692eb0a27b36ab6471e596c2b9b8af2d70a4f56ae81aa327726fe92f092eb9
7
- data.tar.gz: a3399003a48836fd457a8c8b488305fad6d006596c6f940a82b232e2a731dfbc3df5ded4ba8bc16b94690a88d56baaaff6edc6801f47f2bbf422ca8fb74270df
6
+ metadata.gz: 548cc9666d2cf744babbe6a62047f2c4a44f79fe63490b795d84017742f3db4baff9313dfa4392e4b09c740645224493787d6af4208994d43a02a6610275e3c7
7
+ data.tar.gz: d8f405649cc42b70e4849f5e0fb9bfe733507baf91e47fdd3650c1ca1b3cab0099a337b038777799853a74cca8bc767baa6cf90107fd737eb519678d0dd4bdd0
@@ -298,6 +298,11 @@ form.button_to {
298
298
  margin-right: 0.75rem;
299
299
  }
300
300
 
301
+ .ck-title--sm {
302
+ font-size: clamp(1.1rem, 1.6vw, 1.4rem);
303
+ line-height: 1.25;
304
+ }
305
+
301
306
  .ck-section-title {
302
307
  font-size: 1.35rem;
303
308
  }
@@ -3117,6 +3122,7 @@ select.ck-input {
3117
3122
  #ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
3118
3123
  #ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
3119
3124
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
3125
+ #ck-tab-calibrations:checked ~ .ck-api-tabs__nav label[for="ck-tab-calibrations"],
3120
3126
  #ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
3121
3127
  #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
3122
3128
  color: var(--ck-accent);
@@ -3131,8 +3137,9 @@ select.ck-input {
3131
3137
  #ck-tab-datasets:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(5),
3132
3138
  #ck-tab-metrics:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(6),
3133
3139
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(7),
3134
- #ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
3135
- #ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9) {
3140
+ #ck-tab-calibrations:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
3141
+ #ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9),
3142
+ #ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(10) {
3136
3143
  display: block;
3137
3144
  }
3138
3145
 
@@ -3172,6 +3179,7 @@ select.ck-input {
3172
3179
  #ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
3173
3180
  #ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
3174
3181
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
3182
+ #ck-tab-calibrations:checked ~ .ck-api-tabs__nav label[for="ck-tab-calibrations"],
3175
3183
  #ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
3176
3184
  #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
3177
3185
  border-left-color: transparent;
@@ -3603,6 +3611,11 @@ select.ck-input {
3603
3611
  border-color: var(--ck-line);
3604
3612
  color: var(--ck-dim);
3605
3613
  }
3614
+ .ck-source-chip--revert {
3615
+ border-color: rgba(245, 158, 11, 0.35);
3616
+ background: rgba(245, 158, 11, 0.08);
3617
+ color: rgb(217, 119, 6);
3618
+ }
3606
3619
  .ck-source-chip--current {
3607
3620
  border-color: var(--ck-line-strong);
3608
3621
  color: var(--ck-text);
@@ -6021,8 +6034,17 @@ a.tag-mark {
6021
6034
  }
6022
6035
  .ck-starter-grid {
6023
6036
  display: grid;
6024
- grid-template-columns: repeat(auto-fill, minmax(240px, 1fr));
6025
- gap: 10px;
6037
+ grid-template-columns: repeat(5, 1fr);
6038
+ gap: 12px;
6039
+ }
6040
+ @media (max-width: 1100px) {
6041
+ .ck-starter-grid { grid-template-columns: repeat(3, 1fr); }
6042
+ }
6043
+ @media (max-width: 700px) {
6044
+ .ck-starter-grid { grid-template-columns: repeat(2, 1fr); }
6045
+ }
6046
+ @media (max-width: 500px) {
6047
+ .ck-starter-grid { grid-template-columns: 1fr; }
6026
6048
  }
6027
6049
  .ck-starter-card {
6028
6050
  display: flex;
@@ -3,26 +3,58 @@ module CompletionKit
3
3
  module V1
4
4
  class BaseController < ActionController::API
5
5
  rate_limit to: CompletionKit.config.api_rate_limit, within: 1.minute,
6
- with: -> { render json: {error: "Rate limit exceeded"}, status: :too_many_requests }
6
+ with: -> { render_error("Rate limit exceeded", status: :too_many_requests) }
7
7
  before_action :authenticate_api!
8
8
 
9
9
  private
10
10
 
11
+ def render_error(message, status:, details: nil)
12
+ payload = { error: message }
13
+ payload[:details] = details if details.present?
14
+ render json: payload, status: status
15
+ end
16
+
17
+ def render_validation_errors(record, status: :unprocessable_entity)
18
+ render_error("Validation failed", status: status, details: record.errors.as_json)
19
+ end
20
+
11
21
  def authenticate_api!
12
22
  token = CompletionKit.config.api_token
13
23
  unless token
14
- render json: {error: "API token not configured"}, status: :unauthorized
24
+ render_error("API token not configured", status: :unauthorized)
15
25
  return
16
26
  end
17
27
 
18
28
  provided = request.headers["Authorization"]&.match(/\ABearer (.+)\z/)&.[](1)
19
29
  unless provided && ActiveSupport::SecurityUtils.secure_compare(provided, token)
20
- render json: {error: "Unauthorized"}, status: :unauthorized
30
+ render_error("Unauthorized", status: :unauthorized)
21
31
  end
22
32
  end
23
33
 
24
34
  def not_found
25
- render json: {error: "Record not found"}, status: :not_found
35
+ render_error("Record not found", status: :not_found)
36
+ end
37
+
38
+ PAGINATION_DEFAULT_LIMIT = 50
39
+ PAGINATION_MAX_LIMIT = 500
40
+
41
+ def paginate(scope)
42
+ total = scope.count
43
+ limit = (params[:limit].presence || PAGINATION_DEFAULT_LIMIT).to_i
44
+ limit = PAGINATION_DEFAULT_LIMIT if limit <= 0
45
+ limit = PAGINATION_MAX_LIMIT if limit > PAGINATION_MAX_LIMIT
46
+ offset = params[:offset].to_i
47
+ offset = 0 if offset < 0
48
+ response.set_header("X-Total-Count", total.to_s)
49
+ response.set_header("X-Limit", limit.to_s)
50
+ response.set_header("X-Offset", offset.to_s)
51
+ scope.limit(limit).offset(offset)
52
+ end
53
+
54
+ def filter_by_tags(scope)
55
+ names = Array(params[:tag]).map(&:to_s).reject(&:blank?)
56
+ return scope if names.empty?
57
+ scope.joins(:tags).where(completion_kit_tags: { name: names }).distinct
26
58
  end
27
59
 
28
60
  end
@@ -3,10 +3,18 @@ module CompletionKit
3
3
  module V1
4
4
  class CalibrationsController < BaseController
5
5
  before_action :ensure_calibration_enabled
6
- before_action :set_scope
6
+ before_action :set_nested_scope, only: [:create]
7
+ before_action :load_calibration, only: [:destroy]
7
8
 
8
9
  def index
9
- render json: scope_calibrations
10
+ scope = Calibration.all
11
+ scope = scope.where(run_id: params[:run_id]) if params[:run_id].present?
12
+ scope = scope.where(response_id: params[:response_id]) if params[:response_id].present?
13
+ scope = scope.where(metric_id: params[:metric_id]) if params[:metric_id].present?
14
+ scope = scope.where(metric_version_id: params[:metric_version_id]) if params[:metric_version_id].present?
15
+ scope = scope.where(created_by: params[:created_by]) if params[:created_by].present?
16
+ scope = scope.where(verdict: params[:verdict]) if params[:verdict].present?
17
+ render json: paginate(scope.order(:created_at))
10
18
  end
11
19
 
12
20
  def create
@@ -22,17 +30,22 @@ module CompletionKit
22
30
  if calibration.save
23
31
  render json: calibration, status: calibration.previously_new_record? ? :created : :ok
24
32
  else
25
- render json: { errors: calibration.errors }, status: :unprocessable_entity
33
+ render_validation_errors(calibration)
26
34
  end
27
35
  end
28
36
 
37
+ def destroy
38
+ @calibration.destroy!
39
+ head :no_content
40
+ end
41
+
29
42
  private
30
43
 
31
44
  def ensure_calibration_enabled
32
- render(json: { error: "Calibration disabled" }, status: :not_found) unless CompletionKit.config.judge_calibration_enabled
45
+ render_error("Calibration disabled", status: :not_found) unless CompletionKit.config.judge_calibration_enabled
33
46
  end
34
47
 
35
- def set_scope
48
+ def set_nested_scope
36
49
  @run = Run.find(params[:run_id])
37
50
  @response = @run.responses.find(params[:response_id])
38
51
  @metric = Metric.find(params[:metric_id])
@@ -40,6 +53,12 @@ module CompletionKit
40
53
  not_found
41
54
  end
42
55
 
56
+ def load_calibration
57
+ @calibration = Calibration.find(params[:id])
58
+ rescue ActiveRecord::RecordNotFound
59
+ not_found
60
+ end
61
+
43
62
  def scope_calibrations
44
63
  Calibration.where(run_id: @run.id, response_id: @response.id, metric_id: @metric.id)
45
64
  end
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  before_action :set_dataset, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: Dataset.includes(:tags).order(created_at: :desc)
8
+ scope = Dataset.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -17,7 +19,7 @@ module CompletionKit
17
19
  if dataset.save
18
20
  render json: dataset, status: :created
19
21
  else
20
- render json: {errors: dataset.errors}, status: :unprocessable_entity
22
+ render_validation_errors(dataset)
21
23
  end
22
24
  end
23
25
 
@@ -25,7 +27,7 @@ module CompletionKit
25
27
  if @dataset.update(dataset_params)
26
28
  render json: @dataset
27
29
  else
28
- render json: {errors: @dataset.errors}, status: :unprocessable_entity
30
+ render_validation_errors(@dataset)
29
31
  end
30
32
  end
31
33
 
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  before_action :set_metric_group, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: MetricGroup.includes(:tags).order(created_at: :desc)
8
+ scope = MetricGroup.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -18,7 +20,7 @@ module CompletionKit
18
20
  metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
19
21
  render json: metric_group.reload, status: :created
20
22
  else
21
- render json: {errors: metric_group.errors}, status: :unprocessable_entity
23
+ render_validation_errors(metric_group)
22
24
  end
23
25
  end
24
26
 
@@ -27,7 +29,7 @@ module CompletionKit
27
29
  @metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
28
30
  render json: @metric_group.reload
29
31
  else
30
- render json: {errors: @metric_group.errors}, status: :unprocessable_entity
32
+ render_validation_errors(@metric_group)
31
33
  end
32
34
  end
33
35
 
@@ -0,0 +1,51 @@
1
+ module CompletionKit
2
+ module Api
3
+ module V1
4
+ class MetricVersionsController < BaseController
5
+ before_action :set_metric
6
+ before_action :set_version, only: [:show, :publish, :destroy]
7
+
8
+ def index
9
+ render json: paginate(@metric.metric_versions.order(version_number: :desc))
10
+ end
11
+
12
+ def show
13
+ render json: @version
14
+ end
15
+
16
+ def publish
17
+ if @version.published? && !@version.current?
18
+ audit = @version.revert!
19
+ render json: audit
20
+ else
21
+ @version.publish!
22
+ render json: @version.reload
23
+ end
24
+ end
25
+
26
+ def destroy
27
+ if @version.published?
28
+ render_error("Cannot dismiss a published version. Publish a different version as current instead.", status: :conflict)
29
+ return
30
+ end
31
+ @version.destroy!
32
+ head :no_content
33
+ end
34
+
35
+ private
36
+
37
+ def set_metric
38
+ @metric = Metric.find(params[:metric_id])
39
+ rescue ActiveRecord::RecordNotFound
40
+ not_found
41
+ end
42
+
43
+ def set_version
44
+ @version = @metric.metric_versions.find(params[:id])
45
+ rescue ActiveRecord::RecordNotFound
46
+ not_found
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -2,10 +2,12 @@ module CompletionKit
2
2
  module Api
3
3
  module V1
4
4
  class MetricsController < BaseController
5
- before_action :set_metric, only: [:show, :update, :destroy]
5
+ before_action :set_metric, only: [:show, :update, :destroy, :suggest_variants, :add_few_shot, :remove_few_shot]
6
6
 
7
7
  def index
8
- render json: Metric.includes(:tags).order(created_at: :desc)
8
+ scope = Metric.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -17,7 +19,7 @@ module CompletionKit
17
19
  if metric.save
18
20
  render json: metric, status: :created
19
21
  else
20
- render json: {errors: metric.errors}, status: :unprocessable_entity
22
+ render_validation_errors(metric)
21
23
  end
22
24
  end
23
25
 
@@ -25,7 +27,7 @@ module CompletionKit
25
27
  if @metric.update(metric_params)
26
28
  render json: @metric
27
29
  else
28
- render json: {errors: @metric.errors}, status: :unprocessable_entity
30
+ render_validation_errors(@metric)
29
31
  end
30
32
  end
31
33
 
@@ -34,6 +36,51 @@ module CompletionKit
34
36
  head :no_content
35
37
  end
36
38
 
39
+ def suggest_variants
40
+ disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree").count
41
+ if disagreement_count.zero?
42
+ render_error("Mark at least one case as Disagree before asking the model to suggest a change.", status: :unprocessable_entity)
43
+ return
44
+ end
45
+
46
+ MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").destroy_all
47
+ generator = MetricVariantGenerator.new(@metric, count: params[:count].to_i, model: params[:model])
48
+ variants = generator.call
49
+ if variants.empty?
50
+ render_error("The model returned no usable variants. Try again with a different model.", status: :unprocessable_entity)
51
+ return
52
+ end
53
+ versions = generator.persist!(variants)
54
+ render json: versions, status: :created
55
+ end
56
+
57
+ def add_few_shot
58
+ calibration = Calibration.where(metric_id: @metric.id, verdict: "disagree").find(params[:calibration_id])
59
+ review = calibration.response.reviews.find_by(metric_id: @metric.id)
60
+ examples = Array(@metric.few_shot_examples)
61
+ examples << {
62
+ "input" => calibration.response.input_data.to_s.truncate(2000),
63
+ "response" => calibration.response.response_text.to_s.truncate(2000),
64
+ "judge_score" => review&.ai_score&.to_f,
65
+ "judge_feedback" => review&.ai_feedback.to_s.truncate(1000),
66
+ "human_score" => calibration.corrected_score&.to_f,
67
+ "human_note" => calibration.note.to_s.truncate(1000),
68
+ "calibration_id" => calibration.id,
69
+ "added_at" => Time.current.utc.iso8601
70
+ }
71
+ @metric.update!(few_shot_examples: examples)
72
+ render json: @metric.reload
73
+ rescue ActiveRecord::RecordNotFound
74
+ render_error("Calibration not found or not a disagree on this metric.", status: :not_found)
75
+ end
76
+
77
+ def remove_few_shot
78
+ cal_id = params[:calibration_id].to_i
79
+ remaining = Array(@metric.few_shot_examples).reject { |fs| fs["calibration_id"].to_i == cal_id }
80
+ @metric.update!(few_shot_examples: remaining)
81
+ render json: @metric.reload
82
+ end
83
+
37
84
  private
38
85
 
39
86
  def set_metric
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  before_action :set_prompt, only: [:show, :update, :destroy, :publish]
6
6
 
7
7
  def index
8
- render json: Prompt.includes(:tags).order(created_at: :desc)
8
+ scope = Prompt.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -17,7 +19,7 @@ module CompletionKit
17
19
  if prompt.save
18
20
  render json: prompt, status: :created
19
21
  else
20
- render json: {errors: prompt.errors}, status: :unprocessable_entity
22
+ render_validation_errors(prompt)
21
23
  end
22
24
  end
23
25
 
@@ -30,7 +32,7 @@ module CompletionKit
30
32
  elsif @prompt.update(prompt_params)
31
33
  render json: @prompt
32
34
  else
33
- render json: {errors: @prompt.errors}, status: :unprocessable_entity
35
+ render_validation_errors(@prompt)
34
36
  end
35
37
  end
36
38
 
@@ -5,7 +5,7 @@ module CompletionKit
5
5
  before_action :set_credential, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: ProviderCredential.order(created_at: :desc)
8
+ render json: paginate(ProviderCredential.order(created_at: :desc))
9
9
  end
10
10
 
11
11
  def show
@@ -17,7 +17,7 @@ module CompletionKit
17
17
  if credential.save
18
18
  render json: credential, status: :created
19
19
  else
20
- render json: {errors: credential.errors}, status: :unprocessable_entity
20
+ render_validation_errors(credential)
21
21
  end
22
22
  end
23
23
 
@@ -25,7 +25,7 @@ module CompletionKit
25
25
  if @credential.update(credential_params)
26
26
  render json: @credential
27
27
  else
28
- render json: {errors: @credential.errors}, status: :unprocessable_entity
28
+ render_validation_errors(@credential)
29
29
  end
30
30
  end
31
31
 
@@ -6,7 +6,9 @@ module CompletionKit
6
6
  before_action :set_response, only: [:show]
7
7
 
8
8
  def index
9
- render json: @run.responses.includes(:reviews)
9
+ scope = @run.responses.includes(:reviews)
10
+ scope = scope.where(status: params[:status]) if params[:status].present?
11
+ render json: paginate(scope.order(:id))
10
12
  end
11
13
 
12
14
  def show
@@ -2,10 +2,15 @@ module CompletionKit
2
2
  module Api
3
3
  module V1
4
4
  class RunsController < BaseController
5
- before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures]
5
+ before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures, :rerun, :regrade, :compare]
6
6
 
7
7
  def index
8
- render json: Run.includes(:tags).order(created_at: :desc)
8
+ scope = Run.includes(:tags)
9
+ scope = scope.where(status: params[:status]) if params[:status].present?
10
+ scope = scope.where(prompt_id: params[:prompt_id]) if params[:prompt_id].present?
11
+ scope = scope.where(dataset_id: params[:dataset_id]) if params[:dataset_id].present?
12
+ scope = filter_by_tags(scope)
13
+ render json: paginate(scope.order(created_at: :desc))
9
14
  end
10
15
 
11
16
  def show
@@ -18,7 +23,7 @@ module CompletionKit
18
23
  run.replace_metrics!(params[:metric_ids])
19
24
  render json: run.reload, status: :created
20
25
  else
21
- render json: {errors: run.errors}, status: :unprocessable_entity
26
+ render_validation_errors(run)
22
27
  end
23
28
  end
24
29
 
@@ -27,7 +32,7 @@ module CompletionKit
27
32
  @run.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
28
33
  render json: @run.reload
29
34
  else
30
- render json: {errors: @run.errors}, status: :unprocessable_entity
35
+ render_validation_errors(@run)
31
36
  end
32
37
  end
33
38
 
@@ -40,13 +45,13 @@ module CompletionKit
40
45
  if @run.start!
41
46
  render json: @run.reload, status: :accepted
42
47
  else
43
- render json: { errors: [@run.failure_summary || @run.errors.full_messages.to_sentence] }, status: :unprocessable_entity
48
+ render_error(@run.failure_summary || @run.errors.full_messages.to_sentence, status: :unprocessable_entity)
44
49
  end
45
50
  end
46
51
 
47
52
  def retry_failures
48
53
  if @run.stale_review_summary.any?
49
- return render(json: { error: "Judge has changed since this run executed. Retry would mix versions in the same run; use POST /api/v1/runs/:id/rerun instead." }, status: :conflict)
54
+ return render_error("Judge has changed since this run executed. Retry would mix versions in the same run; use POST /api/v1/runs/:id/rerun instead.", status: :conflict)
50
55
  end
51
56
 
52
57
  scope = @run.responses.where(status: "failed")
@@ -71,8 +76,76 @@ module CompletionKit
71
76
  render json: @run.reload, status: :accepted
72
77
  end
73
78
 
79
+ def rerun
80
+ new_run = Run.create!(
81
+ prompt_id: @run.prompt_id,
82
+ dataset_id: @run.dataset_id,
83
+ judge_model: @run.judge_model,
84
+ temperature: @run.temperature,
85
+ output_column: @run.output_column,
86
+ tag_names: @run.tag_names,
87
+ status: "pending"
88
+ )
89
+ new_run.replace_metrics!(@run.metric_ids)
90
+ if new_run.start!
91
+ render json: new_run.reload, status: :accepted
92
+ else
93
+ render_error(new_run.failure_summary || "Could not start the new run.", status: :unprocessable_entity)
94
+ end
95
+ end
96
+
97
+ def regrade
98
+ if @run.regrade!
99
+ render json: @run.reload, status: :accepted
100
+ else
101
+ render_error("Nothing to re-grade. The run has no succeeded responses or no metrics attached.", status: :unprocessable_entity)
102
+ end
103
+ end
104
+
105
+ def compare
106
+ other = Run.find(params[:with])
107
+ comparison = build_run_comparison(@run, other)
108
+ render json: { left_run_id: @run.id, right_run_id: other.id, metric_ids: comparison[:metric_ids], rows: comparison[:rows] }
109
+ rescue ActiveRecord::RecordNotFound
110
+ render_error("Other run not found. Pass ?with=<run_id>.", status: :not_found)
111
+ end
112
+
74
113
  private
75
114
 
115
+ def build_run_comparison(left, right)
116
+ left_responses = left.responses.includes(:reviews).order(:row_index, :id)
117
+ right_responses = right.responses.includes(:reviews).order(:row_index, :id)
118
+ right_by_input = right_responses.each_with_object({}) { |r, h| h[r.input_data.to_s] ||= r }
119
+ all_reviews = left_responses.flat_map(&:reviews) + right_responses.flat_map(&:reviews)
120
+ metric_ids = all_reviews.map(&:metric_id).compact.uniq
121
+ metric_versions = MetricVersion.where(id: all_reviews.map(&:metric_version_id).compact.uniq).index_by(&:id)
122
+
123
+ rows = left_responses.map do |lr|
124
+ rr = right_by_input[lr.input_data.to_s]
125
+ {
126
+ left_response_id: lr.id,
127
+ right_response_id: rr&.id,
128
+ row_index: lr.row_index,
129
+ per_metric: metric_ids.map do |mid|
130
+ l_review = lr.reviews.find { |r| r.metric_id == mid }
131
+ r_review = rr && rr.reviews.find { |r| r.metric_id == mid }
132
+ next nil if l_review.nil? && r_review.nil?
133
+ anchor = l_review || r_review
134
+ {
135
+ metric_id: mid,
136
+ metric_name: anchor.metric_name,
137
+ left_score: l_review ? l_review.ai_score : nil,
138
+ right_score: r_review ? r_review.ai_score : nil,
139
+ left_metric_version_id: l_review&.metric_version_id,
140
+ right_metric_version_id: r_review&.metric_version_id,
141
+ delta: (l_review&.ai_score && r_review&.ai_score) ? (r_review.ai_score.to_f - l_review.ai_score.to_f).round(2) : nil
142
+ }
143
+ end.compact
144
+ }
145
+ end
146
+ { rows: rows, metric_ids: metric_ids }
147
+ end
148
+
76
149
  def set_run
77
150
  @run = Run.find(params[:id])
78
151
  rescue ActiveRecord::RecordNotFound
@@ -5,7 +5,7 @@ module CompletionKit
5
5
  before_action :set_tag, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: Tag.order(:name)
8
+ render json: paginate(Tag.order(:name))
9
9
  end
10
10
 
11
11
  def show
@@ -17,7 +17,7 @@ module CompletionKit
17
17
  if tag.save
18
18
  render json: tag, status: :created
19
19
  else
20
- render json: {errors: tag.errors}, status: :unprocessable_entity
20
+ render_validation_errors(tag)
21
21
  end
22
22
  end
23
23
 
@@ -25,7 +25,7 @@ module CompletionKit
25
25
  if @tag.update(tag_params)
26
26
  render json: @tag
27
27
  else
28
- render json: {errors: @tag.errors}, status: :unprocessable_entity
28
+ render_validation_errors(@tag)
29
29
  end
30
30
  end
31
31
 
@@ -160,13 +160,13 @@ module CompletionKit
160
160
  reverting = was_published_already && !version.current?
161
161
  previously_current = MetricVersion.current.find_by(metric_id: @metric.id)
162
162
 
163
- version.publish!
164
-
165
163
  if reverting
164
+ audit = version.revert!
166
165
  prior_label = previously_current.version_label
167
166
  redirect_to metric_path(@metric),
168
- notice: "Reverted to #{@metric.name} #{version.version_label}. Pinned cases still flow to the judge, and calibration verdicts collected against #{prior_label} stay tied to it."
167
+ notice: "Reverted to #{@metric.name} #{version.version_label} (now logged as #{audit.version_label}). Pinned cases still flow to the judge, and calibration verdicts collected against #{prior_label} stay tied to it."
169
168
  else
169
+ version.publish!
170
170
  redirect_to metric_path(@metric),
171
171
  notice: "#{@metric.name} #{version.version_label} is now the published version."
172
172
  end
@@ -176,7 +176,7 @@ module CompletionKit
176
176
  failed_response_ids.each { |rid| GenerateRowJob.perform_later(@run.id, rid) }
177
177
  end
178
178
 
179
- @run.send(:broadcast_ui)
179
+ @run.broadcast_ui
180
180
  redirect_to run_path(@run)
181
181
  end
182
182
 
@@ -53,20 +53,6 @@ module CompletionKit
53
53
  end
54
54
  end
55
55
 
56
- def ck_run_status_label(run)
57
- case run.status
58
- when "pending" then "Ready to run"
59
- when "running"
60
- if run.progress_total.to_i > 0
61
- "Running (#{run.progress_current}/#{run.progress_total})"
62
- else
63
- "Running…"
64
- end
65
- when "completed" then "Completed"
66
- when "failed" then "Failed"
67
- else run.status.capitalize
68
- end
69
- end
70
56
 
71
57
  def ck_provider_label(provider)
72
58
  CompletionKit::ProviderCredential::PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize