completion-kit 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/stylesheets/completion_kit/application.css +18 -4
  3. data/app/controllers/completion_kit/api/v1/base_controller.rb +22 -0
  4. data/app/controllers/completion_kit/api/v1/calibrations_controller.rb +22 -3
  5. data/app/controllers/completion_kit/api/v1/datasets_controller.rb +3 -1
  6. data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +3 -1
  7. data/app/controllers/completion_kit/api/v1/metric_versions_controller.rb +51 -0
  8. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +49 -2
  9. data/app/controllers/completion_kit/api/v1/prompts_controller.rb +3 -1
  10. data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +1 -1
  11. data/app/controllers/completion_kit/api/v1/responses_controller.rb +3 -1
  12. data/app/controllers/completion_kit/api/v1/runs_controller.rb +75 -2
  13. data/app/controllers/completion_kit/api/v1/tags_controller.rb +1 -1
  14. data/app/controllers/completion_kit/metrics_controller.rb +3 -3
  15. data/app/controllers/completion_kit/runs_controller.rb +1 -1
  16. data/app/helpers/completion_kit/application_helper.rb +0 -14
  17. data/app/jobs/completion_kit/generate_row_job.rb +3 -8
  18. data/app/jobs/completion_kit/judge_review_job.rb +6 -9
  19. data/app/models/completion_kit/metric.rb +1 -0
  20. data/app/models/completion_kit/metric_version.rb +16 -0
  21. data/app/models/completion_kit/response.rb +13 -17
  22. data/app/models/completion_kit/review.rb +18 -22
  23. data/app/models/completion_kit/run.rb +27 -23
  24. data/app/models/concerns/completion_kit/has_job_status.rb +31 -0
  25. data/app/services/completion_kit/mcp_dispatcher.rb +3 -1
  26. data/app/services/completion_kit/mcp_tools/metric_versions.rb +67 -0
  27. data/app/services/completion_kit/starter_metrics.rb +5 -5
  28. data/app/views/completion_kit/api_reference/_body.html.erb +91 -6
  29. data/app/views/completion_kit/api_reference/index.html.erb +8 -0
  30. data/app/views/completion_kit/metrics/index.html.erb +3 -3
  31. data/app/views/completion_kit/metrics/show.html.erb +1 -0
  32. data/config/routes.rb +16 -1
  33. data/lib/completion_kit/version.rb +1 -1
  34. metadata +4 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d8454bbb11d5064ca0c6d4355c780425a28198280dffe7dd424d266fbeef6a09
4
- data.tar.gz: 24c1da76e1e9118d5e2a732e8e45b684f588f553ad4d8bac89e239bc22c953c3
3
+ metadata.gz: 285fba79d665c4fe077b42f0e8ce888ce84a314b95698a561eea9fb48c75b045
4
+ data.tar.gz: a984971e294bda341824e3696cab11662f82fcee2c50974798356754ba55126c
5
5
  SHA512:
6
- metadata.gz: 6fbc5b8047a20240897e19c389bb3f6104d3e2a219794d190183b5433e14d524bb692eb0a27b36ab6471e596c2b9b8af2d70a4f56ae81aa327726fe92f092eb9
7
- data.tar.gz: a3399003a48836fd457a8c8b488305fad6d006596c6f940a82b232e2a731dfbc3df5ded4ba8bc16b94690a88d56baaaff6edc6801f47f2bbf422ca8fb74270df
6
+ metadata.gz: 1a12beaf77a9d8071949bede78336910eb8da43598609ac6307e09493d1c088a82cc3056ccaf63b3ae4eeb4ea022d19c182a05c0f037bc8a31ba21246cc5bd56
7
+ data.tar.gz: e007e6eeb9f7e89f3aa5ba8397338ce19778b5040f184b8cb6c012faf3f6ea464f6c3d5423fd7f7b36882494fbb90d70e068da7fe15bb76e67e9992585ba80c6
@@ -3117,6 +3117,7 @@ select.ck-input {
3117
3117
  #ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
3118
3118
  #ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
3119
3119
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
3120
+ #ck-tab-calibrations:checked ~ .ck-api-tabs__nav label[for="ck-tab-calibrations"],
3120
3121
  #ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
3121
3122
  #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
3122
3123
  color: var(--ck-accent);
@@ -3131,8 +3132,9 @@ select.ck-input {
3131
3132
  #ck-tab-datasets:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(5),
3132
3133
  #ck-tab-metrics:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(6),
3133
3134
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(7),
3134
- #ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
3135
- #ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9) {
3135
+ #ck-tab-calibrations:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
3136
+ #ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9),
3137
+ #ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(10) {
3136
3138
  display: block;
3137
3139
  }
3138
3140
 
@@ -3172,6 +3174,7 @@ select.ck-input {
3172
3174
  #ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
3173
3175
  #ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
3174
3176
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
3177
+ #ck-tab-calibrations:checked ~ .ck-api-tabs__nav label[for="ck-tab-calibrations"],
3175
3178
  #ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
3176
3179
  #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
3177
3180
  border-left-color: transparent;
@@ -3603,6 +3606,11 @@ select.ck-input {
3603
3606
  border-color: var(--ck-line);
3604
3607
  color: var(--ck-dim);
3605
3608
  }
3609
+ .ck-source-chip--revert {
3610
+ border-color: rgba(245, 158, 11, 0.35);
3611
+ background: rgba(245, 158, 11, 0.08);
3612
+ color: rgb(217, 119, 6);
3613
+ }
3606
3614
  .ck-source-chip--current {
3607
3615
  border-color: var(--ck-line-strong);
3608
3616
  color: var(--ck-text);
@@ -6021,8 +6029,14 @@ a.tag-mark {
6021
6029
  }
6022
6030
  .ck-starter-grid {
6023
6031
  display: grid;
6024
- grid-template-columns: repeat(auto-fill, minmax(240px, 1fr));
6025
- gap: 10px;
6032
+ grid-template-columns: repeat(4, 1fr);
6033
+ gap: 12px;
6034
+ }
6035
+ @media (max-width: 1000px) {
6036
+ .ck-starter-grid { grid-template-columns: repeat(2, 1fr); }
6037
+ }
6038
+ @media (max-width: 600px) {
6039
+ .ck-starter-grid { grid-template-columns: 1fr; }
6026
6040
  }
6027
6041
  .ck-starter-card {
6028
6042
  display: flex;
@@ -25,6 +25,28 @@ module CompletionKit
25
25
  render json: {error: "Record not found"}, status: :not_found
26
26
  end
27
27
 
28
+ PAGINATION_DEFAULT_LIMIT = 50
29
+ PAGINATION_MAX_LIMIT = 500
30
+
31
+ def paginate(scope)
32
+ total = scope.count
33
+ limit = (params[:limit].presence || PAGINATION_DEFAULT_LIMIT).to_i
34
+ limit = PAGINATION_DEFAULT_LIMIT if limit <= 0
35
+ limit = PAGINATION_MAX_LIMIT if limit > PAGINATION_MAX_LIMIT
36
+ offset = params[:offset].to_i
37
+ offset = 0 if offset < 0
38
+ response.set_header("X-Total-Count", total.to_s)
39
+ response.set_header("X-Limit", limit.to_s)
40
+ response.set_header("X-Offset", offset.to_s)
41
+ scope.limit(limit).offset(offset)
42
+ end
43
+
44
+ def filter_by_tags(scope)
45
+ names = Array(params[:tag]).map(&:to_s).reject(&:blank?)
46
+ return scope if names.empty?
47
+ scope.joins(:tags).where(completion_kit_tags: { name: names }).distinct
48
+ end
49
+
28
50
  end
29
51
  end
30
52
  end
@@ -3,10 +3,18 @@ module CompletionKit
3
3
  module V1
4
4
  class CalibrationsController < BaseController
5
5
  before_action :ensure_calibration_enabled
6
- before_action :set_scope
6
+ before_action :set_nested_scope, only: [:create]
7
+ before_action :load_calibration, only: [:destroy]
7
8
 
8
9
  def index
9
- render json: scope_calibrations
10
+ scope = Calibration.all
11
+ scope = scope.where(run_id: params[:run_id]) if params[:run_id].present?
12
+ scope = scope.where(response_id: params[:response_id]) if params[:response_id].present?
13
+ scope = scope.where(metric_id: params[:metric_id]) if params[:metric_id].present?
14
+ scope = scope.where(metric_version_id: params[:metric_version_id]) if params[:metric_version_id].present?
15
+ scope = scope.where(created_by: params[:created_by]) if params[:created_by].present?
16
+ scope = scope.where(verdict: params[:verdict]) if params[:verdict].present?
17
+ render json: paginate(scope.order(:created_at))
10
18
  end
11
19
 
12
20
  def create
@@ -26,13 +34,18 @@ module CompletionKit
26
34
  end
27
35
  end
28
36
 
37
+ def destroy
38
+ @calibration.destroy!
39
+ head :no_content
40
+ end
41
+
29
42
  private
30
43
 
31
44
  def ensure_calibration_enabled
32
45
  render(json: { error: "Calibration disabled" }, status: :not_found) unless CompletionKit.config.judge_calibration_enabled
33
46
  end
34
47
 
35
- def set_scope
48
+ def set_nested_scope
36
49
  @run = Run.find(params[:run_id])
37
50
  @response = @run.responses.find(params[:response_id])
38
51
  @metric = Metric.find(params[:metric_id])
@@ -40,6 +53,12 @@ module CompletionKit
40
53
  not_found
41
54
  end
42
55
 
56
+ def load_calibration
57
+ @calibration = Calibration.find(params[:id])
58
+ rescue ActiveRecord::RecordNotFound
59
+ not_found
60
+ end
61
+
43
62
  def scope_calibrations
44
63
  Calibration.where(run_id: @run.id, response_id: @response.id, metric_id: @metric.id)
45
64
  end
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  before_action :set_dataset, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: Dataset.includes(:tags).order(created_at: :desc)
8
+ scope = Dataset.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  before_action :set_metric_group, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: MetricGroup.includes(:tags).order(created_at: :desc)
8
+ scope = MetricGroup.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -0,0 +1,51 @@
1
+ module CompletionKit
2
+ module Api
3
+ module V1
4
+ class MetricVersionsController < BaseController
5
+ before_action :set_metric
6
+ before_action :set_version, only: [:show, :publish, :destroy]
7
+
8
+ def index
9
+ render json: paginate(@metric.metric_versions.order(version_number: :desc))
10
+ end
11
+
12
+ def show
13
+ render json: @version
14
+ end
15
+
16
+ def publish
17
+ if @version.published? && !@version.current?
18
+ audit = @version.revert!
19
+ render json: audit
20
+ else
21
+ @version.publish!
22
+ render json: @version.reload
23
+ end
24
+ end
25
+
26
+ def destroy
27
+ if @version.published?
28
+ render json: { error: "Cannot dismiss a published version. Publish a different version as current instead." }, status: :conflict
29
+ return
30
+ end
31
+ @version.destroy!
32
+ head :no_content
33
+ end
34
+
35
+ private
36
+
37
+ def set_metric
38
+ @metric = Metric.find(params[:metric_id])
39
+ rescue ActiveRecord::RecordNotFound
40
+ not_found
41
+ end
42
+
43
+ def set_version
44
+ @version = @metric.metric_versions.find(params[:id])
45
+ rescue ActiveRecord::RecordNotFound
46
+ not_found
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -2,10 +2,12 @@ module CompletionKit
2
2
  module Api
3
3
  module V1
4
4
  class MetricsController < BaseController
5
- before_action :set_metric, only: [:show, :update, :destroy]
5
+ before_action :set_metric, only: [:show, :update, :destroy, :suggest_variants, :add_few_shot, :remove_few_shot]
6
6
 
7
7
  def index
8
- render json: Metric.includes(:tags).order(created_at: :desc)
8
+ scope = Metric.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -34,6 +36,51 @@ module CompletionKit
34
36
  head :no_content
35
37
  end
36
38
 
39
+ def suggest_variants
40
+ disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree").count
41
+ if disagreement_count.zero?
42
+ render json: { error: "Mark at least one case as Disagree before asking the model to suggest a change." }, status: :unprocessable_entity
43
+ return
44
+ end
45
+
46
+ MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").destroy_all
47
+ generator = MetricVariantGenerator.new(@metric, count: params[:count].to_i, model: params[:model])
48
+ variants = generator.call
49
+ if variants.empty?
50
+ render json: { error: "The model returned no usable variants. Try again with a different model." }, status: :unprocessable_entity
51
+ return
52
+ end
53
+ versions = generator.persist!(variants)
54
+ render json: versions, status: :created
55
+ end
56
+
57
+ def add_few_shot
58
+ calibration = Calibration.where(metric_id: @metric.id, verdict: "disagree").find(params[:calibration_id])
59
+ review = calibration.response.reviews.find_by(metric_id: @metric.id)
60
+ examples = Array(@metric.few_shot_examples)
61
+ examples << {
62
+ "input" => calibration.response.input_data.to_s.truncate(2000),
63
+ "response" => calibration.response.response_text.to_s.truncate(2000),
64
+ "judge_score" => review&.ai_score&.to_f,
65
+ "judge_feedback" => review&.ai_feedback.to_s.truncate(1000),
66
+ "human_score" => calibration.corrected_score&.to_f,
67
+ "human_note" => calibration.note.to_s.truncate(1000),
68
+ "calibration_id" => calibration.id,
69
+ "added_at" => Time.current.utc.iso8601
70
+ }
71
+ @metric.update!(few_shot_examples: examples)
72
+ render json: @metric.reload
73
+ rescue ActiveRecord::RecordNotFound
74
+ render json: { error: "Calibration not found or not a disagree on this metric." }, status: :not_found
75
+ end
76
+
77
+ def remove_few_shot
78
+ cal_id = params[:calibration_id].to_i
79
+ remaining = Array(@metric.few_shot_examples).reject { |fs| fs["calibration_id"].to_i == cal_id }
80
+ @metric.update!(few_shot_examples: remaining)
81
+ render json: @metric.reload
82
+ end
83
+
37
84
  private
38
85
 
39
86
  def set_metric
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  before_action :set_prompt, only: [:show, :update, :destroy, :publish]
6
6
 
7
7
  def index
8
- render json: Prompt.includes(:tags).order(created_at: :desc)
8
+ scope = Prompt.includes(:tags)
9
+ scope = filter_by_tags(scope)
10
+ render json: paginate(scope.order(created_at: :desc))
9
11
  end
10
12
 
11
13
  def show
@@ -5,7 +5,7 @@ module CompletionKit
5
5
  before_action :set_credential, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: ProviderCredential.order(created_at: :desc)
8
+ render json: paginate(ProviderCredential.order(created_at: :desc))
9
9
  end
10
10
 
11
11
  def show
@@ -6,7 +6,9 @@ module CompletionKit
6
6
  before_action :set_response, only: [:show]
7
7
 
8
8
  def index
9
- render json: @run.responses.includes(:reviews)
9
+ scope = @run.responses.includes(:reviews)
10
+ scope = scope.where(status: params[:status]) if params[:status].present?
11
+ render json: paginate(scope.order(:id))
10
12
  end
11
13
 
12
14
  def show
@@ -2,10 +2,15 @@ module CompletionKit
2
2
  module Api
3
3
  module V1
4
4
  class RunsController < BaseController
5
- before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures]
5
+ before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures, :rerun, :regrade, :compare]
6
6
 
7
7
  def index
8
- render json: Run.includes(:tags).order(created_at: :desc)
8
+ scope = Run.includes(:tags)
9
+ scope = scope.where(status: params[:status]) if params[:status].present?
10
+ scope = scope.where(prompt_id: params[:prompt_id]) if params[:prompt_id].present?
11
+ scope = scope.where(dataset_id: params[:dataset_id]) if params[:dataset_id].present?
12
+ scope = filter_by_tags(scope)
13
+ render json: paginate(scope.order(created_at: :desc))
9
14
  end
10
15
 
11
16
  def show
@@ -71,8 +76,76 @@ module CompletionKit
71
76
  render json: @run.reload, status: :accepted
72
77
  end
73
78
 
79
+ def rerun
80
+ new_run = Run.create!(
81
+ prompt_id: @run.prompt_id,
82
+ dataset_id: @run.dataset_id,
83
+ judge_model: @run.judge_model,
84
+ temperature: @run.temperature,
85
+ output_column: @run.output_column,
86
+ tag_names: @run.tag_names,
87
+ status: "pending"
88
+ )
89
+ new_run.replace_metrics!(@run.metric_ids)
90
+ if new_run.start!
91
+ render json: new_run.reload, status: :accepted
92
+ else
93
+ render json: { errors: [new_run.failure_summary || "Could not start the new run."] }, status: :unprocessable_entity
94
+ end
95
+ end
96
+
97
+ def regrade
98
+ if @run.regrade!
99
+ render json: @run.reload, status: :accepted
100
+ else
101
+ render json: { error: "Nothing to re-grade. The run has no succeeded responses or no metrics attached." }, status: :unprocessable_entity
102
+ end
103
+ end
104
+
105
+ def compare
106
+ other = Run.find(params[:with])
107
+ comparison = build_run_comparison(@run, other)
108
+ render json: { left_run_id: @run.id, right_run_id: other.id, metric_ids: comparison[:metric_ids], rows: comparison[:rows] }
109
+ rescue ActiveRecord::RecordNotFound
110
+ render json: { error: "Other run not found. Pass ?with=<run_id>." }, status: :not_found
111
+ end
112
+
74
113
  private
75
114
 
115
+ def build_run_comparison(left, right)
116
+ left_responses = left.responses.includes(:reviews).order(:row_index, :id)
117
+ right_responses = right.responses.includes(:reviews).order(:row_index, :id)
118
+ right_by_input = right_responses.each_with_object({}) { |r, h| h[r.input_data.to_s] ||= r }
119
+ all_reviews = left_responses.flat_map(&:reviews) + right_responses.flat_map(&:reviews)
120
+ metric_ids = all_reviews.map(&:metric_id).compact.uniq
121
+ metric_versions = MetricVersion.where(id: all_reviews.map(&:metric_version_id).compact.uniq).index_by(&:id)
122
+
123
+ rows = left_responses.map do |lr|
124
+ rr = right_by_input[lr.input_data.to_s]
125
+ {
126
+ left_response_id: lr.id,
127
+ right_response_id: rr&.id,
128
+ row_index: lr.row_index,
129
+ per_metric: metric_ids.map do |mid|
130
+ l_review = lr.reviews.find { |r| r.metric_id == mid }
131
+ r_review = rr && rr.reviews.find { |r| r.metric_id == mid }
132
+ next nil if l_review.nil? && r_review.nil?
133
+ anchor = l_review || r_review
134
+ {
135
+ metric_id: mid,
136
+ metric_name: anchor.metric_name,
137
+ left_score: l_review ? l_review.ai_score : nil,
138
+ right_score: r_review ? r_review.ai_score : nil,
139
+ left_metric_version_id: l_review&.metric_version_id,
140
+ right_metric_version_id: r_review&.metric_version_id,
141
+ delta: (l_review&.ai_score && r_review&.ai_score) ? (r_review.ai_score.to_f - l_review.ai_score.to_f).round(2) : nil
142
+ }
143
+ end.compact
144
+ }
145
+ end
146
+ { rows: rows, metric_ids: metric_ids }
147
+ end
148
+
76
149
  def set_run
77
150
  @run = Run.find(params[:id])
78
151
  rescue ActiveRecord::RecordNotFound
@@ -5,7 +5,7 @@ module CompletionKit
5
5
  before_action :set_tag, only: [:show, :update, :destroy]
6
6
 
7
7
  def index
8
- render json: Tag.order(:name)
8
+ render json: paginate(Tag.order(:name))
9
9
  end
10
10
 
11
11
  def show
@@ -160,13 +160,13 @@ module CompletionKit
160
160
  reverting = was_published_already && !version.current?
161
161
  previously_current = MetricVersion.current.find_by(metric_id: @metric.id)
162
162
 
163
- version.publish!
164
-
165
163
  if reverting
164
+ audit = version.revert!
166
165
  prior_label = previously_current.version_label
167
166
  redirect_to metric_path(@metric),
168
- notice: "Reverted to #{@metric.name} #{version.version_label}. Pinned cases still flow to the judge, and calibration verdicts collected against #{prior_label} stay tied to it."
167
+ notice: "Reverted to #{@metric.name} #{version.version_label} (now logged as #{audit.version_label}). Pinned cases still flow to the judge, and calibration verdicts collected against #{prior_label} stay tied to it."
169
168
  else
169
+ version.publish!
170
170
  redirect_to metric_path(@metric),
171
171
  notice: "#{@metric.name} #{version.version_label} is now the published version."
172
172
  end
@@ -176,7 +176,7 @@ module CompletionKit
176
176
  failed_response_ids.each { |rid| GenerateRowJob.perform_later(@run.id, rid) }
177
177
  end
178
178
 
179
- @run.send(:broadcast_ui)
179
+ @run.broadcast_ui
180
180
  redirect_to run_path(@run)
181
181
  end
182
182
 
@@ -53,20 +53,6 @@ module CompletionKit
53
53
  end
54
54
  end
55
55
 
56
- def ck_run_status_label(run)
57
- case run.status
58
- when "pending" then "Ready to run"
59
- when "running"
60
- if run.progress_total.to_i > 0
61
- "Running (#{run.progress_current}/#{run.progress_total})"
62
- else
63
- "Running…"
64
- end
65
- when "completed" then "Completed"
66
- when "failed" then "Failed"
67
- else run.status.capitalize
68
- end
69
- end
70
56
 
71
57
  def ck_provider_label(provider)
72
58
  CompletionKit::ProviderCredential::PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
@@ -31,8 +31,7 @@ module CompletionKit
31
31
  before_perform do |job|
32
32
  response = Response.find_by(id: job.arguments.last)
33
33
  next unless response
34
- response.update_columns(status: "retrying", attempts: response.attempts + 1)
35
- response.run.send(:broadcast_response_update, response) if response.run
34
+ response.update!(status: "retrying", attempts: response.attempts + 1)
36
35
  end
37
36
 
38
37
  def perform(run_id, response_id)
@@ -61,12 +60,10 @@ module CompletionKit
61
60
  response_text: text,
62
61
  error_provider: nil, error_class: nil, error_status: nil, error_message: nil
63
62
  )
64
- run.send(:broadcast_response_update, response)
65
- run.send(:broadcast_progress)
66
63
 
67
64
  if run.judge_configured?
68
65
  run.metrics.each do |metric|
69
- JudgeReviewJob.perform_later(response.id, metric.id)
66
+ JudgeReviewJob.perform_later(response.id, metric.id, run.id)
70
67
  end
71
68
  end
72
69
 
@@ -87,15 +84,13 @@ module CompletionKit
87
84
  response = Response.find_by(id: response_id)
88
85
  return unless response
89
86
 
90
- response.update_columns(
87
+ response.update!(
91
88
  status: "failed",
92
89
  error_provider: provider_for(response),
93
90
  error_class: error.class.name,
94
91
  error_status: error.respond_to?(:status) ? error.status : nil,
95
92
  error_message: error.message.to_s.truncate(2000)
96
93
  )
97
- response.run&.send(:broadcast_response_update, response)
98
- response.run&.send(:broadcast_progress)
99
94
  end
100
95
 
101
96
  def provider_for(response)
@@ -5,7 +5,9 @@ module CompletionKit
5
5
  queue_as :llm
6
6
 
7
7
  limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
8
- key: ->(response_id, _) { "run:#{Response.find_by(id: response_id)&.run_id}" },
8
+ key: ->(response_id, _metric_id, run_id = nil) {
9
+ "run:#{run_id || Response.where(id: response_id).pick(:run_id)}"
10
+ },
9
11
  duration: 10.minutes
10
12
 
11
13
  def self.rate_limit_wait(executions)
@@ -29,7 +31,7 @@ module CompletionKit
29
31
  end
30
32
 
31
33
  before_perform do |job|
32
- response_id, metric_id = job.arguments
34
+ response_id, metric_id, _run_id = job.arguments
33
35
  response = Response.find_by(id: response_id)
34
36
  next unless response
35
37
  review = response.reviews.find_or_initialize_by(metric_id: metric_id)
@@ -37,10 +39,9 @@ module CompletionKit
37
39
  review.attempts = (review.attempts || 0) + 1
38
40
  review.status = "retrying"
39
41
  review.save!(validate: false)
40
- response.run.send(:broadcast_response_update, response) if response.run
41
42
  end
42
43
 
43
- def perform(response_id, metric_id)
44
+ def perform(response_id, metric_id, _run_id = nil)
44
45
  @response_id = response_id
45
46
  @metric_id = metric_id
46
47
 
@@ -75,8 +76,6 @@ module CompletionKit
75
76
  review.save!
76
77
 
77
78
  confirm_judging_capability(run.judge_model)
78
- run.send(:broadcast_response_update, response)
79
- run.send(:broadcast_progress)
80
79
  enqueue_completion_check
81
80
  end
82
81
 
@@ -107,13 +106,11 @@ module CompletionKit
107
106
  error_message: error.message.to_s.truncate(2000)
108
107
  )
109
108
  review.save!(validate: false)
110
- response.run&.send(:broadcast_response_update, response)
111
- response.run&.send(:broadcast_progress)
112
109
  end
113
110
 
114
111
  def provider_for(response)
115
112
  run = response.run
116
- return nil unless run&.judge_model
113
+ return nil unless run.judge_model
117
114
  ApiConfig.provider_for_model(run.judge_model)
118
115
  end
119
116
 
@@ -12,6 +12,7 @@ module CompletionKit
12
12
 
13
13
  has_many :metric_group_memberships, dependent: :destroy
14
14
  has_many :metric_groups, through: :metric_group_memberships, source: :metric_group
15
+ has_many :metric_versions, dependent: :destroy
15
16
  has_many :reviews, dependent: :nullify
16
17
  has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
17
18
 
@@ -53,6 +53,22 @@ module CompletionKit
53
53
  self
54
54
  end
55
55
 
56
+ def revert!
57
+ raise ArgumentError, "only a published version can be reverted to" unless published?
58
+ audit = nil
59
+ MetricVersion.transaction do
60
+ audit = self.class.create!(
61
+ metric: metric,
62
+ instruction: instruction,
63
+ rubric_bands: rubric_bands,
64
+ state: "draft",
65
+ source: "revert"
66
+ )
67
+ audit.publish!
68
+ end
69
+ audit
70
+ end
71
+
56
72
  def as_json(options = {})
57
73
  {
58
74
  id: id,
@@ -1,7 +1,6 @@
1
1
  module CompletionKit
2
2
  class Response < ApplicationRecord
3
- STATUSES = %w[pending retrying succeeded failed].freeze
4
- TERMINAL_STATUSES = %w[succeeded failed].freeze
3
+ include HasJobStatus
5
4
 
6
5
  belongs_to :run
7
6
  has_many :reviews, dependent: :destroy
@@ -10,17 +9,11 @@ module CompletionKit
10
9
  delegate :prompt, to: :run
11
10
 
12
11
  validates :response_text, presence: true, if: :succeeded?
13
- validates :status, inclusion: { in: STATUSES }
14
12
 
15
13
  before_validation :set_default_status, on: :create
16
14
 
17
- def terminal?
18
- TERMINAL_STATUSES.include?(status)
19
- end
20
-
21
- def succeeded?
22
- status == "succeeded"
23
- end
15
+ after_save_commit :broadcast_row_update, unless: :destroyed?
16
+ after_save_commit :broadcast_run_progress, if: :should_broadcast_progress?
24
17
 
25
18
  def as_json(options = {})
26
19
  {
@@ -47,19 +40,22 @@ module CompletionKit
47
40
  def fully_reviewed?
48
41
  metric_ids = run.metric_ids
49
42
  return true if metric_ids.empty?
50
- reviewed_metric_ids = reviews.where(status: Review::TERMINAL_STATUSES).pluck(:metric_id).uniq
43
+ reviewed_metric_ids = reviews.where(status: HasJobStatus::TERMINAL_STATUSES).pluck(:metric_id).uniq
51
44
  (metric_ids - reviewed_metric_ids).empty?
52
45
  end
53
46
 
54
- def error_payload
55
- return nil if error_class.blank?
56
- { provider: error_provider, class: error_class, status: error_status, message: error_message }
47
+ private
48
+
49
+ def broadcast_row_update
50
+ run.broadcast_response_update(self)
57
51
  end
58
52
 
59
- private
53
+ def broadcast_run_progress
54
+ run.broadcast_progress
55
+ end
60
56
 
61
- def set_default_status
62
- self.status ||= "pending"
57
+ def should_broadcast_progress?
58
+ saved_change_to_status? && terminal?
63
59
  end
64
60
  end
65
61
  end