completion-kit 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +11 -3
- data/app/controllers/completion_kit/api/v1/base_controller.rb +14 -4
- data/app/controllers/completion_kit/api/v1/calibrations_controller.rb +2 -2
- data/app/controllers/completion_kit/api/v1/datasets_controller.rb +2 -2
- data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +2 -2
- data/app/controllers/completion_kit/api/v1/metric_versions_controller.rb +1 -1
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +5 -5
- data/app/controllers/completion_kit/api/v1/prompts_controller.rb +2 -2
- data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +2 -2
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +7 -7
- data/app/controllers/completion_kit/api/v1/tags_controller.rb +2 -2
- data/app/jobs/completion_kit/generate_row_job.rb +2 -4
- data/app/jobs/completion_kit/judge_review_job.rb +4 -7
- data/app/models/completion_kit/run.rb +0 -1
- data/app/views/completion_kit/api_reference/index.html.erb +4 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4625f4e3f1afceb34f7603ee7a5025c78cb2499f6c2c287d83a9d02397f407b2
|
|
4
|
+
data.tar.gz: 740fecb69351c418aaececb19b9cfe3579bf9124a5df52a23d6db5851ce63fce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 548cc9666d2cf744babbe6a62047f2c4a44f79fe63490b795d84017742f3db4baff9313dfa4392e4b09c740645224493787d6af4208994d43a02a6610275e3c7
|
|
7
|
+
data.tar.gz: d8f405649cc42b70e4849f5e0fb9bfe733507baf91e47fdd3650c1ca1b3cab0099a337b038777799853a74cca8bc767baa6cf90107fd737eb519678d0dd4bdd0
|
|
@@ -298,6 +298,11 @@ form.button_to {
|
|
|
298
298
|
margin-right: 0.75rem;
|
|
299
299
|
}
|
|
300
300
|
|
|
301
|
+
.ck-title--sm {
|
|
302
|
+
font-size: clamp(1.1rem, 1.6vw, 1.4rem);
|
|
303
|
+
line-height: 1.25;
|
|
304
|
+
}
|
|
305
|
+
|
|
301
306
|
.ck-section-title {
|
|
302
307
|
font-size: 1.35rem;
|
|
303
308
|
}
|
|
@@ -6029,13 +6034,16 @@ a.tag-mark {
|
|
|
6029
6034
|
}
|
|
6030
6035
|
.ck-starter-grid {
|
|
6031
6036
|
display: grid;
|
|
6032
|
-
grid-template-columns: repeat(
|
|
6037
|
+
grid-template-columns: repeat(5, 1fr);
|
|
6033
6038
|
gap: 12px;
|
|
6034
6039
|
}
|
|
6035
|
-
@media (max-width:
|
|
6040
|
+
@media (max-width: 1100px) {
|
|
6041
|
+
.ck-starter-grid { grid-template-columns: repeat(3, 1fr); }
|
|
6042
|
+
}
|
|
6043
|
+
@media (max-width: 700px) {
|
|
6036
6044
|
.ck-starter-grid { grid-template-columns: repeat(2, 1fr); }
|
|
6037
6045
|
}
|
|
6038
|
-
@media (max-width:
|
|
6046
|
+
@media (max-width: 500px) {
|
|
6039
6047
|
.ck-starter-grid { grid-template-columns: 1fr; }
|
|
6040
6048
|
}
|
|
6041
6049
|
.ck-starter-card {
|
|
@@ -3,26 +3,36 @@ module CompletionKit
|
|
|
3
3
|
module V1
|
|
4
4
|
class BaseController < ActionController::API
|
|
5
5
|
rate_limit to: CompletionKit.config.api_rate_limit, within: 1.minute,
|
|
6
|
-
with: -> {
|
|
6
|
+
with: -> { render_error("Rate limit exceeded", status: :too_many_requests) }
|
|
7
7
|
before_action :authenticate_api!
|
|
8
8
|
|
|
9
9
|
private
|
|
10
10
|
|
|
11
|
+
def render_error(message, status:, details: nil)
|
|
12
|
+
payload = { error: message }
|
|
13
|
+
payload[:details] = details if details.present?
|
|
14
|
+
render json: payload, status: status
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def render_validation_errors(record, status: :unprocessable_entity)
|
|
18
|
+
render_error("Validation failed", status: status, details: record.errors.as_json)
|
|
19
|
+
end
|
|
20
|
+
|
|
11
21
|
def authenticate_api!
|
|
12
22
|
token = CompletionKit.config.api_token
|
|
13
23
|
unless token
|
|
14
|
-
|
|
24
|
+
render_error("API token not configured", status: :unauthorized)
|
|
15
25
|
return
|
|
16
26
|
end
|
|
17
27
|
|
|
18
28
|
provided = request.headers["Authorization"]&.match(/\ABearer (.+)\z/)&.[](1)
|
|
19
29
|
unless provided && ActiveSupport::SecurityUtils.secure_compare(provided, token)
|
|
20
|
-
|
|
30
|
+
render_error("Unauthorized", status: :unauthorized)
|
|
21
31
|
end
|
|
22
32
|
end
|
|
23
33
|
|
|
24
34
|
def not_found
|
|
25
|
-
|
|
35
|
+
render_error("Record not found", status: :not_found)
|
|
26
36
|
end
|
|
27
37
|
|
|
28
38
|
PAGINATION_DEFAULT_LIMIT = 50
|
|
@@ -30,7 +30,7 @@ module CompletionKit
|
|
|
30
30
|
if calibration.save
|
|
31
31
|
render json: calibration, status: calibration.previously_new_record? ? :created : :ok
|
|
32
32
|
else
|
|
33
|
-
|
|
33
|
+
render_validation_errors(calibration)
|
|
34
34
|
end
|
|
35
35
|
end
|
|
36
36
|
|
|
@@ -42,7 +42,7 @@ module CompletionKit
|
|
|
42
42
|
private
|
|
43
43
|
|
|
44
44
|
def ensure_calibration_enabled
|
|
45
|
-
|
|
45
|
+
render_error("Calibration disabled", status: :not_found) unless CompletionKit.config.judge_calibration_enabled
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def set_nested_scope
|
|
@@ -19,7 +19,7 @@ module CompletionKit
|
|
|
19
19
|
if dataset.save
|
|
20
20
|
render json: dataset, status: :created
|
|
21
21
|
else
|
|
22
|
-
|
|
22
|
+
render_validation_errors(dataset)
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
|
|
@@ -27,7 +27,7 @@ module CompletionKit
|
|
|
27
27
|
if @dataset.update(dataset_params)
|
|
28
28
|
render json: @dataset
|
|
29
29
|
else
|
|
30
|
-
|
|
30
|
+
render_validation_errors(@dataset)
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
33
|
|
|
@@ -20,7 +20,7 @@ module CompletionKit
|
|
|
20
20
|
metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
21
21
|
render json: metric_group.reload, status: :created
|
|
22
22
|
else
|
|
23
|
-
|
|
23
|
+
render_validation_errors(metric_group)
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
26
|
|
|
@@ -29,7 +29,7 @@ module CompletionKit
|
|
|
29
29
|
@metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
30
30
|
render json: @metric_group.reload
|
|
31
31
|
else
|
|
32
|
-
|
|
32
|
+
render_validation_errors(@metric_group)
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
|
|
@@ -25,7 +25,7 @@ module CompletionKit
|
|
|
25
25
|
|
|
26
26
|
def destroy
|
|
27
27
|
if @version.published?
|
|
28
|
-
|
|
28
|
+
render_error("Cannot dismiss a published version. Publish a different version as current instead.", status: :conflict)
|
|
29
29
|
return
|
|
30
30
|
end
|
|
31
31
|
@version.destroy!
|
|
@@ -19,7 +19,7 @@ module CompletionKit
|
|
|
19
19
|
if metric.save
|
|
20
20
|
render json: metric, status: :created
|
|
21
21
|
else
|
|
22
|
-
|
|
22
|
+
render_validation_errors(metric)
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
|
|
@@ -27,7 +27,7 @@ module CompletionKit
|
|
|
27
27
|
if @metric.update(metric_params)
|
|
28
28
|
render json: @metric
|
|
29
29
|
else
|
|
30
|
-
|
|
30
|
+
render_validation_errors(@metric)
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
33
|
|
|
@@ -39,7 +39,7 @@ module CompletionKit
|
|
|
39
39
|
def suggest_variants
|
|
40
40
|
disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree").count
|
|
41
41
|
if disagreement_count.zero?
|
|
42
|
-
|
|
42
|
+
render_error("Mark at least one case as Disagree before asking the model to suggest a change.", status: :unprocessable_entity)
|
|
43
43
|
return
|
|
44
44
|
end
|
|
45
45
|
|
|
@@ -47,7 +47,7 @@ module CompletionKit
|
|
|
47
47
|
generator = MetricVariantGenerator.new(@metric, count: params[:count].to_i, model: params[:model])
|
|
48
48
|
variants = generator.call
|
|
49
49
|
if variants.empty?
|
|
50
|
-
|
|
50
|
+
render_error("The model returned no usable variants. Try again with a different model.", status: :unprocessable_entity)
|
|
51
51
|
return
|
|
52
52
|
end
|
|
53
53
|
versions = generator.persist!(variants)
|
|
@@ -71,7 +71,7 @@ module CompletionKit
|
|
|
71
71
|
@metric.update!(few_shot_examples: examples)
|
|
72
72
|
render json: @metric.reload
|
|
73
73
|
rescue ActiveRecord::RecordNotFound
|
|
74
|
-
|
|
74
|
+
render_error("Calibration not found or not a disagree on this metric.", status: :not_found)
|
|
75
75
|
end
|
|
76
76
|
|
|
77
77
|
def remove_few_shot
|
|
@@ -19,7 +19,7 @@ module CompletionKit
|
|
|
19
19
|
if prompt.save
|
|
20
20
|
render json: prompt, status: :created
|
|
21
21
|
else
|
|
22
|
-
|
|
22
|
+
render_validation_errors(prompt)
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
|
|
@@ -32,7 +32,7 @@ module CompletionKit
|
|
|
32
32
|
elsif @prompt.update(prompt_params)
|
|
33
33
|
render json: @prompt
|
|
34
34
|
else
|
|
35
|
-
|
|
35
|
+
render_validation_errors(@prompt)
|
|
36
36
|
end
|
|
37
37
|
end
|
|
38
38
|
|
|
@@ -17,7 +17,7 @@ module CompletionKit
|
|
|
17
17
|
if credential.save
|
|
18
18
|
render json: credential, status: :created
|
|
19
19
|
else
|
|
20
|
-
|
|
20
|
+
render_validation_errors(credential)
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
@@ -25,7 +25,7 @@ module CompletionKit
|
|
|
25
25
|
if @credential.update(credential_params)
|
|
26
26
|
render json: @credential
|
|
27
27
|
else
|
|
28
|
-
|
|
28
|
+
render_validation_errors(@credential)
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
|
|
@@ -23,7 +23,7 @@ module CompletionKit
|
|
|
23
23
|
run.replace_metrics!(params[:metric_ids])
|
|
24
24
|
render json: run.reload, status: :created
|
|
25
25
|
else
|
|
26
|
-
|
|
26
|
+
render_validation_errors(run)
|
|
27
27
|
end
|
|
28
28
|
end
|
|
29
29
|
|
|
@@ -32,7 +32,7 @@ module CompletionKit
|
|
|
32
32
|
@run.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
33
33
|
render json: @run.reload
|
|
34
34
|
else
|
|
35
|
-
|
|
35
|
+
render_validation_errors(@run)
|
|
36
36
|
end
|
|
37
37
|
end
|
|
38
38
|
|
|
@@ -45,13 +45,13 @@ module CompletionKit
|
|
|
45
45
|
if @run.start!
|
|
46
46
|
render json: @run.reload, status: :accepted
|
|
47
47
|
else
|
|
48
|
-
|
|
48
|
+
render_error(@run.failure_summary || @run.errors.full_messages.to_sentence, status: :unprocessable_entity)
|
|
49
49
|
end
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
def retry_failures
|
|
53
53
|
if @run.stale_review_summary.any?
|
|
54
|
-
return
|
|
54
|
+
return render_error("Judge has changed since this run executed. Retry would mix versions in the same run; use POST /api/v1/runs/:id/rerun instead.", status: :conflict)
|
|
55
55
|
end
|
|
56
56
|
|
|
57
57
|
scope = @run.responses.where(status: "failed")
|
|
@@ -90,7 +90,7 @@ module CompletionKit
|
|
|
90
90
|
if new_run.start!
|
|
91
91
|
render json: new_run.reload, status: :accepted
|
|
92
92
|
else
|
|
93
|
-
|
|
93
|
+
render_error(new_run.failure_summary || "Could not start the new run.", status: :unprocessable_entity)
|
|
94
94
|
end
|
|
95
95
|
end
|
|
96
96
|
|
|
@@ -98,7 +98,7 @@ module CompletionKit
|
|
|
98
98
|
if @run.regrade!
|
|
99
99
|
render json: @run.reload, status: :accepted
|
|
100
100
|
else
|
|
101
|
-
|
|
101
|
+
render_error("Nothing to re-grade. The run has no succeeded responses or no metrics attached.", status: :unprocessable_entity)
|
|
102
102
|
end
|
|
103
103
|
end
|
|
104
104
|
|
|
@@ -107,7 +107,7 @@ module CompletionKit
|
|
|
107
107
|
comparison = build_run_comparison(@run, other)
|
|
108
108
|
render json: { left_run_id: @run.id, right_run_id: other.id, metric_ids: comparison[:metric_ids], rows: comparison[:rows] }
|
|
109
109
|
rescue ActiveRecord::RecordNotFound
|
|
110
|
-
|
|
110
|
+
render_error("Other run not found. Pass ?with=<run_id>.", status: :not_found)
|
|
111
111
|
end
|
|
112
112
|
|
|
113
113
|
private
|
|
@@ -17,7 +17,7 @@ module CompletionKit
|
|
|
17
17
|
if tag.save
|
|
18
18
|
render json: tag, status: :created
|
|
19
19
|
else
|
|
20
|
-
|
|
20
|
+
render_validation_errors(tag)
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
@@ -25,7 +25,7 @@ module CompletionKit
|
|
|
25
25
|
if @tag.update(tag_params)
|
|
26
26
|
render json: @tag
|
|
27
27
|
else
|
|
28
|
-
|
|
28
|
+
render_validation_errors(@tag)
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
|
|
@@ -80,8 +80,7 @@ module CompletionKit
|
|
|
80
80
|
end
|
|
81
81
|
|
|
82
82
|
def record_terminal_failure!(error)
|
|
83
|
-
|
|
84
|
-
response = Response.find_by(id: response_id)
|
|
83
|
+
response = Response.find_by(id: @response_id)
|
|
85
84
|
return unless response
|
|
86
85
|
|
|
87
86
|
response.update!(
|
|
@@ -98,8 +97,7 @@ module CompletionKit
|
|
|
98
97
|
end
|
|
99
98
|
|
|
100
99
|
def enqueue_completion_check
|
|
101
|
-
|
|
102
|
-
RunCompletionCheckJob.perform_later(run_id)
|
|
100
|
+
RunCompletionCheckJob.perform_later(@run_id)
|
|
103
101
|
end
|
|
104
102
|
end
|
|
105
103
|
end
|
|
@@ -91,14 +91,12 @@ module CompletionKit
|
|
|
91
91
|
end
|
|
92
92
|
|
|
93
93
|
def record_terminal_failure!(error)
|
|
94
|
-
|
|
95
|
-
metric_id = @metric_id || arguments.last
|
|
96
|
-
response = Response.find_by(id: response_id)
|
|
94
|
+
response = Response.find_by(id: @response_id)
|
|
97
95
|
return unless response
|
|
98
96
|
|
|
99
|
-
review = response.reviews.find_or_initialize_by(metric_id: metric_id)
|
|
97
|
+
review = response.reviews.find_or_initialize_by(metric_id: @metric_id)
|
|
100
98
|
review.assign_attributes(
|
|
101
|
-
metric_name: review.metric_name || Metric.find_by(id: metric_id)&.name || "(deleted metric)",
|
|
99
|
+
metric_name: review.metric_name || Metric.find_by(id: @metric_id)&.name || "(deleted metric)",
|
|
102
100
|
status: "failed",
|
|
103
101
|
error_provider: provider_for(response),
|
|
104
102
|
error_class: error.class.name,
|
|
@@ -115,8 +113,7 @@ module CompletionKit
|
|
|
115
113
|
end
|
|
116
114
|
|
|
117
115
|
def enqueue_completion_check
|
|
118
|
-
|
|
119
|
-
response = Response.find_by(id: response_id)
|
|
116
|
+
response = Response.find_by(id: @response_id)
|
|
120
117
|
RunCompletionCheckJob.perform_later(response.run_id) if response
|
|
121
118
|
end
|
|
122
119
|
|
|
@@ -20,6 +20,10 @@
|
|
|
20
20
|
<p class="ck-kicker">Tag filtering</p>
|
|
21
21
|
<p class="ck-meta-copy">Prompts, runs, metrics, datasets, and metric groups accept <code>?tag[]=name</code> (repeat for OR semantics).</p>
|
|
22
22
|
</div>
|
|
23
|
+
<div>
|
|
24
|
+
<p class="ck-kicker">Error shape</p>
|
|
25
|
+
<p class="ck-meta-copy">Every error response carries a top-level <code>error</code> string. Validation failures (422) add a <code>details</code> object keyed by field: <code>{ "error": "Validation failed", "details": { "name": ["can't be blank"] } }</code>.</p>
|
|
26
|
+
</div>
|
|
23
27
|
</div>
|
|
24
28
|
</div>
|
|
25
29
|
</div>
|