completion-kit 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +26 -4
- data/app/controllers/completion_kit/api/v1/base_controller.rb +36 -4
- data/app/controllers/completion_kit/api/v1/calibrations_controller.rb +24 -5
- data/app/controllers/completion_kit/api/v1/datasets_controller.rb +5 -3
- data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +5 -3
- data/app/controllers/completion_kit/api/v1/metric_versions_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -4
- data/app/controllers/completion_kit/api/v1/prompts_controller.rb +5 -3
- data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +3 -3
- data/app/controllers/completion_kit/api/v1/responses_controller.rb +3 -1
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +79 -6
- data/app/controllers/completion_kit/api/v1/tags_controller.rb +3 -3
- data/app/controllers/completion_kit/metrics_controller.rb +3 -3
- data/app/controllers/completion_kit/runs_controller.rb +1 -1
- data/app/helpers/completion_kit/application_helper.rb +0 -14
- data/app/jobs/completion_kit/generate_row_job.rb +5 -12
- data/app/jobs/completion_kit/judge_review_job.rb +10 -16
- data/app/models/completion_kit/metric.rb +1 -0
- data/app/models/completion_kit/metric_version.rb +16 -0
- data/app/models/completion_kit/response.rb +13 -17
- data/app/models/completion_kit/review.rb +18 -22
- data/app/models/completion_kit/run.rb +27 -24
- data/app/models/concerns/completion_kit/has_job_status.rb +31 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +3 -1
- data/app/services/completion_kit/mcp_tools/metric_versions.rb +67 -0
- data/app/services/completion_kit/starter_metrics.rb +5 -5
- data/app/views/completion_kit/api_reference/_body.html.erb +91 -6
- data/app/views/completion_kit/api_reference/index.html.erb +12 -0
- data/app/views/completion_kit/metrics/index.html.erb +3 -3
- data/app/views/completion_kit/metrics/show.html.erb +1 -0
- data/config/routes.rb +16 -1
- data/lib/completion_kit/version.rb +1 -1
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4625f4e3f1afceb34f7603ee7a5025c78cb2499f6c2c287d83a9d02397f407b2
|
|
4
|
+
data.tar.gz: 740fecb69351c418aaececb19b9cfe3579bf9124a5df52a23d6db5851ce63fce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 548cc9666d2cf744babbe6a62047f2c4a44f79fe63490b795d84017742f3db4baff9313dfa4392e4b09c740645224493787d6af4208994d43a02a6610275e3c7
|
|
7
|
+
data.tar.gz: d8f405649cc42b70e4849f5e0fb9bfe733507baf91e47fdd3650c1ca1b3cab0099a337b038777799853a74cca8bc767baa6cf90107fd737eb519678d0dd4bdd0
|
|
@@ -298,6 +298,11 @@ form.button_to {
|
|
|
298
298
|
margin-right: 0.75rem;
|
|
299
299
|
}
|
|
300
300
|
|
|
301
|
+
.ck-title--sm {
|
|
302
|
+
font-size: clamp(1.1rem, 1.6vw, 1.4rem);
|
|
303
|
+
line-height: 1.25;
|
|
304
|
+
}
|
|
305
|
+
|
|
301
306
|
.ck-section-title {
|
|
302
307
|
font-size: 1.35rem;
|
|
303
308
|
}
|
|
@@ -3117,6 +3122,7 @@ select.ck-input {
|
|
|
3117
3122
|
#ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
|
|
3118
3123
|
#ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
|
|
3119
3124
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
|
|
3125
|
+
#ck-tab-calibrations:checked ~ .ck-api-tabs__nav label[for="ck-tab-calibrations"],
|
|
3120
3126
|
#ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
|
|
3121
3127
|
#ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
|
|
3122
3128
|
color: var(--ck-accent);
|
|
@@ -3131,8 +3137,9 @@ select.ck-input {
|
|
|
3131
3137
|
#ck-tab-datasets:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(5),
|
|
3132
3138
|
#ck-tab-metrics:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(6),
|
|
3133
3139
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(7),
|
|
3134
|
-
#ck-tab-
|
|
3135
|
-
#ck-tab-
|
|
3140
|
+
#ck-tab-calibrations:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
|
|
3141
|
+
#ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9),
|
|
3142
|
+
#ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(10) {
|
|
3136
3143
|
display: block;
|
|
3137
3144
|
}
|
|
3138
3145
|
|
|
@@ -3172,6 +3179,7 @@ select.ck-input {
|
|
|
3172
3179
|
#ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
|
|
3173
3180
|
#ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
|
|
3174
3181
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
|
|
3182
|
+
#ck-tab-calibrations:checked ~ .ck-api-tabs__nav label[for="ck-tab-calibrations"],
|
|
3175
3183
|
#ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
|
|
3176
3184
|
#ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
|
|
3177
3185
|
border-left-color: transparent;
|
|
@@ -3603,6 +3611,11 @@ select.ck-input {
|
|
|
3603
3611
|
border-color: var(--ck-line);
|
|
3604
3612
|
color: var(--ck-dim);
|
|
3605
3613
|
}
|
|
3614
|
+
.ck-source-chip--revert {
|
|
3615
|
+
border-color: rgba(245, 158, 11, 0.35);
|
|
3616
|
+
background: rgba(245, 158, 11, 0.08);
|
|
3617
|
+
color: rgb(217, 119, 6);
|
|
3618
|
+
}
|
|
3606
3619
|
.ck-source-chip--current {
|
|
3607
3620
|
border-color: var(--ck-line-strong);
|
|
3608
3621
|
color: var(--ck-text);
|
|
@@ -6021,8 +6034,17 @@ a.tag-mark {
|
|
|
6021
6034
|
}
|
|
6022
6035
|
.ck-starter-grid {
|
|
6023
6036
|
display: grid;
|
|
6024
|
-
grid-template-columns: repeat(
|
|
6025
|
-
gap:
|
|
6037
|
+
grid-template-columns: repeat(5, 1fr);
|
|
6038
|
+
gap: 12px;
|
|
6039
|
+
}
|
|
6040
|
+
@media (max-width: 1100px) {
|
|
6041
|
+
.ck-starter-grid { grid-template-columns: repeat(3, 1fr); }
|
|
6042
|
+
}
|
|
6043
|
+
@media (max-width: 700px) {
|
|
6044
|
+
.ck-starter-grid { grid-template-columns: repeat(2, 1fr); }
|
|
6045
|
+
}
|
|
6046
|
+
@media (max-width: 500px) {
|
|
6047
|
+
.ck-starter-grid { grid-template-columns: 1fr; }
|
|
6026
6048
|
}
|
|
6027
6049
|
.ck-starter-card {
|
|
6028
6050
|
display: flex;
|
|
@@ -3,26 +3,58 @@ module CompletionKit
|
|
|
3
3
|
module V1
|
|
4
4
|
class BaseController < ActionController::API
|
|
5
5
|
rate_limit to: CompletionKit.config.api_rate_limit, within: 1.minute,
|
|
6
|
-
with: -> {
|
|
6
|
+
with: -> { render_error("Rate limit exceeded", status: :too_many_requests) }
|
|
7
7
|
before_action :authenticate_api!
|
|
8
8
|
|
|
9
9
|
private
|
|
10
10
|
|
|
11
|
+
def render_error(message, status:, details: nil)
|
|
12
|
+
payload = { error: message }
|
|
13
|
+
payload[:details] = details if details.present?
|
|
14
|
+
render json: payload, status: status
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def render_validation_errors(record, status: :unprocessable_entity)
|
|
18
|
+
render_error("Validation failed", status: status, details: record.errors.as_json)
|
|
19
|
+
end
|
|
20
|
+
|
|
11
21
|
def authenticate_api!
|
|
12
22
|
token = CompletionKit.config.api_token
|
|
13
23
|
unless token
|
|
14
|
-
|
|
24
|
+
render_error("API token not configured", status: :unauthorized)
|
|
15
25
|
return
|
|
16
26
|
end
|
|
17
27
|
|
|
18
28
|
provided = request.headers["Authorization"]&.match(/\ABearer (.+)\z/)&.[](1)
|
|
19
29
|
unless provided && ActiveSupport::SecurityUtils.secure_compare(provided, token)
|
|
20
|
-
|
|
30
|
+
render_error("Unauthorized", status: :unauthorized)
|
|
21
31
|
end
|
|
22
32
|
end
|
|
23
33
|
|
|
24
34
|
def not_found
|
|
25
|
-
|
|
35
|
+
render_error("Record not found", status: :not_found)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
PAGINATION_DEFAULT_LIMIT = 50
|
|
39
|
+
PAGINATION_MAX_LIMIT = 500
|
|
40
|
+
|
|
41
|
+
def paginate(scope)
|
|
42
|
+
total = scope.count
|
|
43
|
+
limit = (params[:limit].presence || PAGINATION_DEFAULT_LIMIT).to_i
|
|
44
|
+
limit = PAGINATION_DEFAULT_LIMIT if limit <= 0
|
|
45
|
+
limit = PAGINATION_MAX_LIMIT if limit > PAGINATION_MAX_LIMIT
|
|
46
|
+
offset = params[:offset].to_i
|
|
47
|
+
offset = 0 if offset < 0
|
|
48
|
+
response.set_header("X-Total-Count", total.to_s)
|
|
49
|
+
response.set_header("X-Limit", limit.to_s)
|
|
50
|
+
response.set_header("X-Offset", offset.to_s)
|
|
51
|
+
scope.limit(limit).offset(offset)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def filter_by_tags(scope)
|
|
55
|
+
names = Array(params[:tag]).map(&:to_s).reject(&:blank?)
|
|
56
|
+
return scope if names.empty?
|
|
57
|
+
scope.joins(:tags).where(completion_kit_tags: { name: names }).distinct
|
|
26
58
|
end
|
|
27
59
|
|
|
28
60
|
end
|
|
@@ -3,10 +3,18 @@ module CompletionKit
|
|
|
3
3
|
module V1
|
|
4
4
|
class CalibrationsController < BaseController
|
|
5
5
|
before_action :ensure_calibration_enabled
|
|
6
|
-
before_action :
|
|
6
|
+
before_action :set_nested_scope, only: [:create]
|
|
7
|
+
before_action :load_calibration, only: [:destroy]
|
|
7
8
|
|
|
8
9
|
def index
|
|
9
|
-
|
|
10
|
+
scope = Calibration.all
|
|
11
|
+
scope = scope.where(run_id: params[:run_id]) if params[:run_id].present?
|
|
12
|
+
scope = scope.where(response_id: params[:response_id]) if params[:response_id].present?
|
|
13
|
+
scope = scope.where(metric_id: params[:metric_id]) if params[:metric_id].present?
|
|
14
|
+
scope = scope.where(metric_version_id: params[:metric_version_id]) if params[:metric_version_id].present?
|
|
15
|
+
scope = scope.where(created_by: params[:created_by]) if params[:created_by].present?
|
|
16
|
+
scope = scope.where(verdict: params[:verdict]) if params[:verdict].present?
|
|
17
|
+
render json: paginate(scope.order(:created_at))
|
|
10
18
|
end
|
|
11
19
|
|
|
12
20
|
def create
|
|
@@ -22,17 +30,22 @@ module CompletionKit
|
|
|
22
30
|
if calibration.save
|
|
23
31
|
render json: calibration, status: calibration.previously_new_record? ? :created : :ok
|
|
24
32
|
else
|
|
25
|
-
|
|
33
|
+
render_validation_errors(calibration)
|
|
26
34
|
end
|
|
27
35
|
end
|
|
28
36
|
|
|
37
|
+
def destroy
|
|
38
|
+
@calibration.destroy!
|
|
39
|
+
head :no_content
|
|
40
|
+
end
|
|
41
|
+
|
|
29
42
|
private
|
|
30
43
|
|
|
31
44
|
def ensure_calibration_enabled
|
|
32
|
-
|
|
45
|
+
render_error("Calibration disabled", status: :not_found) unless CompletionKit.config.judge_calibration_enabled
|
|
33
46
|
end
|
|
34
47
|
|
|
35
|
-
def
|
|
48
|
+
def set_nested_scope
|
|
36
49
|
@run = Run.find(params[:run_id])
|
|
37
50
|
@response = @run.responses.find(params[:response_id])
|
|
38
51
|
@metric = Metric.find(params[:metric_id])
|
|
@@ -40,6 +53,12 @@ module CompletionKit
|
|
|
40
53
|
not_found
|
|
41
54
|
end
|
|
42
55
|
|
|
56
|
+
def load_calibration
|
|
57
|
+
@calibration = Calibration.find(params[:id])
|
|
58
|
+
rescue ActiveRecord::RecordNotFound
|
|
59
|
+
not_found
|
|
60
|
+
end
|
|
61
|
+
|
|
43
62
|
def scope_calibrations
|
|
44
63
|
Calibration.where(run_id: @run.id, response_id: @response.id, metric_id: @metric.id)
|
|
45
64
|
end
|
|
@@ -5,7 +5,9 @@ module CompletionKit
|
|
|
5
5
|
before_action :set_dataset, only: [:show, :update, :destroy]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
|
|
8
|
+
scope = Dataset.includes(:tags)
|
|
9
|
+
scope = filter_by_tags(scope)
|
|
10
|
+
render json: paginate(scope.order(created_at: :desc))
|
|
9
11
|
end
|
|
10
12
|
|
|
11
13
|
def show
|
|
@@ -17,7 +19,7 @@ module CompletionKit
|
|
|
17
19
|
if dataset.save
|
|
18
20
|
render json: dataset, status: :created
|
|
19
21
|
else
|
|
20
|
-
|
|
22
|
+
render_validation_errors(dataset)
|
|
21
23
|
end
|
|
22
24
|
end
|
|
23
25
|
|
|
@@ -25,7 +27,7 @@ module CompletionKit
|
|
|
25
27
|
if @dataset.update(dataset_params)
|
|
26
28
|
render json: @dataset
|
|
27
29
|
else
|
|
28
|
-
|
|
30
|
+
render_validation_errors(@dataset)
|
|
29
31
|
end
|
|
30
32
|
end
|
|
31
33
|
|
|
@@ -5,7 +5,9 @@ module CompletionKit
|
|
|
5
5
|
before_action :set_metric_group, only: [:show, :update, :destroy]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
|
|
8
|
+
scope = MetricGroup.includes(:tags)
|
|
9
|
+
scope = filter_by_tags(scope)
|
|
10
|
+
render json: paginate(scope.order(created_at: :desc))
|
|
9
11
|
end
|
|
10
12
|
|
|
11
13
|
def show
|
|
@@ -18,7 +20,7 @@ module CompletionKit
|
|
|
18
20
|
metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
19
21
|
render json: metric_group.reload, status: :created
|
|
20
22
|
else
|
|
21
|
-
|
|
23
|
+
render_validation_errors(metric_group)
|
|
22
24
|
end
|
|
23
25
|
end
|
|
24
26
|
|
|
@@ -27,7 +29,7 @@ module CompletionKit
|
|
|
27
29
|
@metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
28
30
|
render json: @metric_group.reload
|
|
29
31
|
else
|
|
30
|
-
|
|
32
|
+
render_validation_errors(@metric_group)
|
|
31
33
|
end
|
|
32
34
|
end
|
|
33
35
|
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module Api
|
|
3
|
+
module V1
|
|
4
|
+
class MetricVersionsController < BaseController
|
|
5
|
+
before_action :set_metric
|
|
6
|
+
before_action :set_version, only: [:show, :publish, :destroy]
|
|
7
|
+
|
|
8
|
+
def index
|
|
9
|
+
render json: paginate(@metric.metric_versions.order(version_number: :desc))
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def show
|
|
13
|
+
render json: @version
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def publish
|
|
17
|
+
if @version.published? && !@version.current?
|
|
18
|
+
audit = @version.revert!
|
|
19
|
+
render json: audit
|
|
20
|
+
else
|
|
21
|
+
@version.publish!
|
|
22
|
+
render json: @version.reload
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def destroy
|
|
27
|
+
if @version.published?
|
|
28
|
+
render_error("Cannot dismiss a published version. Publish a different version as current instead.", status: :conflict)
|
|
29
|
+
return
|
|
30
|
+
end
|
|
31
|
+
@version.destroy!
|
|
32
|
+
head :no_content
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def set_metric
|
|
38
|
+
@metric = Metric.find(params[:metric_id])
|
|
39
|
+
rescue ActiveRecord::RecordNotFound
|
|
40
|
+
not_found
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def set_version
|
|
44
|
+
@version = @metric.metric_versions.find(params[:id])
|
|
45
|
+
rescue ActiveRecord::RecordNotFound
|
|
46
|
+
not_found
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -2,10 +2,12 @@ module CompletionKit
|
|
|
2
2
|
module Api
|
|
3
3
|
module V1
|
|
4
4
|
class MetricsController < BaseController
|
|
5
|
-
before_action :set_metric, only: [:show, :update, :destroy]
|
|
5
|
+
before_action :set_metric, only: [:show, :update, :destroy, :suggest_variants, :add_few_shot, :remove_few_shot]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
|
|
8
|
+
scope = Metric.includes(:tags)
|
|
9
|
+
scope = filter_by_tags(scope)
|
|
10
|
+
render json: paginate(scope.order(created_at: :desc))
|
|
9
11
|
end
|
|
10
12
|
|
|
11
13
|
def show
|
|
@@ -17,7 +19,7 @@ module CompletionKit
|
|
|
17
19
|
if metric.save
|
|
18
20
|
render json: metric, status: :created
|
|
19
21
|
else
|
|
20
|
-
|
|
22
|
+
render_validation_errors(metric)
|
|
21
23
|
end
|
|
22
24
|
end
|
|
23
25
|
|
|
@@ -25,7 +27,7 @@ module CompletionKit
|
|
|
25
27
|
if @metric.update(metric_params)
|
|
26
28
|
render json: @metric
|
|
27
29
|
else
|
|
28
|
-
|
|
30
|
+
render_validation_errors(@metric)
|
|
29
31
|
end
|
|
30
32
|
end
|
|
31
33
|
|
|
@@ -34,6 +36,51 @@ module CompletionKit
|
|
|
34
36
|
head :no_content
|
|
35
37
|
end
|
|
36
38
|
|
|
39
|
+
def suggest_variants
|
|
40
|
+
disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree").count
|
|
41
|
+
if disagreement_count.zero?
|
|
42
|
+
render_error("Mark at least one case as Disagree before asking the model to suggest a change.", status: :unprocessable_entity)
|
|
43
|
+
return
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").destroy_all
|
|
47
|
+
generator = MetricVariantGenerator.new(@metric, count: params[:count].to_i, model: params[:model])
|
|
48
|
+
variants = generator.call
|
|
49
|
+
if variants.empty?
|
|
50
|
+
render_error("The model returned no usable variants. Try again with a different model.", status: :unprocessable_entity)
|
|
51
|
+
return
|
|
52
|
+
end
|
|
53
|
+
versions = generator.persist!(variants)
|
|
54
|
+
render json: versions, status: :created
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def add_few_shot
|
|
58
|
+
calibration = Calibration.where(metric_id: @metric.id, verdict: "disagree").find(params[:calibration_id])
|
|
59
|
+
review = calibration.response.reviews.find_by(metric_id: @metric.id)
|
|
60
|
+
examples = Array(@metric.few_shot_examples)
|
|
61
|
+
examples << {
|
|
62
|
+
"input" => calibration.response.input_data.to_s.truncate(2000),
|
|
63
|
+
"response" => calibration.response.response_text.to_s.truncate(2000),
|
|
64
|
+
"judge_score" => review&.ai_score&.to_f,
|
|
65
|
+
"judge_feedback" => review&.ai_feedback.to_s.truncate(1000),
|
|
66
|
+
"human_score" => calibration.corrected_score&.to_f,
|
|
67
|
+
"human_note" => calibration.note.to_s.truncate(1000),
|
|
68
|
+
"calibration_id" => calibration.id,
|
|
69
|
+
"added_at" => Time.current.utc.iso8601
|
|
70
|
+
}
|
|
71
|
+
@metric.update!(few_shot_examples: examples)
|
|
72
|
+
render json: @metric.reload
|
|
73
|
+
rescue ActiveRecord::RecordNotFound
|
|
74
|
+
render_error("Calibration not found or not a disagree on this metric.", status: :not_found)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def remove_few_shot
|
|
78
|
+
cal_id = params[:calibration_id].to_i
|
|
79
|
+
remaining = Array(@metric.few_shot_examples).reject { |fs| fs["calibration_id"].to_i == cal_id }
|
|
80
|
+
@metric.update!(few_shot_examples: remaining)
|
|
81
|
+
render json: @metric.reload
|
|
82
|
+
end
|
|
83
|
+
|
|
37
84
|
private
|
|
38
85
|
|
|
39
86
|
def set_metric
|
|
@@ -5,7 +5,9 @@ module CompletionKit
|
|
|
5
5
|
before_action :set_prompt, only: [:show, :update, :destroy, :publish]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
|
|
8
|
+
scope = Prompt.includes(:tags)
|
|
9
|
+
scope = filter_by_tags(scope)
|
|
10
|
+
render json: paginate(scope.order(created_at: :desc))
|
|
9
11
|
end
|
|
10
12
|
|
|
11
13
|
def show
|
|
@@ -17,7 +19,7 @@ module CompletionKit
|
|
|
17
19
|
if prompt.save
|
|
18
20
|
render json: prompt, status: :created
|
|
19
21
|
else
|
|
20
|
-
|
|
22
|
+
render_validation_errors(prompt)
|
|
21
23
|
end
|
|
22
24
|
end
|
|
23
25
|
|
|
@@ -30,7 +32,7 @@ module CompletionKit
|
|
|
30
32
|
elsif @prompt.update(prompt_params)
|
|
31
33
|
render json: @prompt
|
|
32
34
|
else
|
|
33
|
-
|
|
35
|
+
render_validation_errors(@prompt)
|
|
34
36
|
end
|
|
35
37
|
end
|
|
36
38
|
|
|
@@ -5,7 +5,7 @@ module CompletionKit
|
|
|
5
5
|
before_action :set_credential, only: [:show, :update, :destroy]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
render json: ProviderCredential.order(created_at: :desc)
|
|
8
|
+
render json: paginate(ProviderCredential.order(created_at: :desc))
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
def show
|
|
@@ -17,7 +17,7 @@ module CompletionKit
|
|
|
17
17
|
if credential.save
|
|
18
18
|
render json: credential, status: :created
|
|
19
19
|
else
|
|
20
|
-
|
|
20
|
+
render_validation_errors(credential)
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
@@ -25,7 +25,7 @@ module CompletionKit
|
|
|
25
25
|
if @credential.update(credential_params)
|
|
26
26
|
render json: @credential
|
|
27
27
|
else
|
|
28
|
-
|
|
28
|
+
render_validation_errors(@credential)
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
|
|
@@ -6,7 +6,9 @@ module CompletionKit
|
|
|
6
6
|
before_action :set_response, only: [:show]
|
|
7
7
|
|
|
8
8
|
def index
|
|
9
|
-
|
|
9
|
+
scope = @run.responses.includes(:reviews)
|
|
10
|
+
scope = scope.where(status: params[:status]) if params[:status].present?
|
|
11
|
+
render json: paginate(scope.order(:id))
|
|
10
12
|
end
|
|
11
13
|
|
|
12
14
|
def show
|
|
@@ -2,10 +2,15 @@ module CompletionKit
|
|
|
2
2
|
module Api
|
|
3
3
|
module V1
|
|
4
4
|
class RunsController < BaseController
|
|
5
|
-
before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures]
|
|
5
|
+
before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures, :rerun, :regrade, :compare]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
|
|
8
|
+
scope = Run.includes(:tags)
|
|
9
|
+
scope = scope.where(status: params[:status]) if params[:status].present?
|
|
10
|
+
scope = scope.where(prompt_id: params[:prompt_id]) if params[:prompt_id].present?
|
|
11
|
+
scope = scope.where(dataset_id: params[:dataset_id]) if params[:dataset_id].present?
|
|
12
|
+
scope = filter_by_tags(scope)
|
|
13
|
+
render json: paginate(scope.order(created_at: :desc))
|
|
9
14
|
end
|
|
10
15
|
|
|
11
16
|
def show
|
|
@@ -18,7 +23,7 @@ module CompletionKit
|
|
|
18
23
|
run.replace_metrics!(params[:metric_ids])
|
|
19
24
|
render json: run.reload, status: :created
|
|
20
25
|
else
|
|
21
|
-
|
|
26
|
+
render_validation_errors(run)
|
|
22
27
|
end
|
|
23
28
|
end
|
|
24
29
|
|
|
@@ -27,7 +32,7 @@ module CompletionKit
|
|
|
27
32
|
@run.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
28
33
|
render json: @run.reload
|
|
29
34
|
else
|
|
30
|
-
|
|
35
|
+
render_validation_errors(@run)
|
|
31
36
|
end
|
|
32
37
|
end
|
|
33
38
|
|
|
@@ -40,13 +45,13 @@ module CompletionKit
|
|
|
40
45
|
if @run.start!
|
|
41
46
|
render json: @run.reload, status: :accepted
|
|
42
47
|
else
|
|
43
|
-
|
|
48
|
+
render_error(@run.failure_summary || @run.errors.full_messages.to_sentence, status: :unprocessable_entity)
|
|
44
49
|
end
|
|
45
50
|
end
|
|
46
51
|
|
|
47
52
|
def retry_failures
|
|
48
53
|
if @run.stale_review_summary.any?
|
|
49
|
-
return
|
|
54
|
+
return render_error("Judge has changed since this run executed. Retry would mix versions in the same run; use POST /api/v1/runs/:id/rerun instead.", status: :conflict)
|
|
50
55
|
end
|
|
51
56
|
|
|
52
57
|
scope = @run.responses.where(status: "failed")
|
|
@@ -71,8 +76,76 @@ module CompletionKit
|
|
|
71
76
|
render json: @run.reload, status: :accepted
|
|
72
77
|
end
|
|
73
78
|
|
|
79
|
+
def rerun
|
|
80
|
+
new_run = Run.create!(
|
|
81
|
+
prompt_id: @run.prompt_id,
|
|
82
|
+
dataset_id: @run.dataset_id,
|
|
83
|
+
judge_model: @run.judge_model,
|
|
84
|
+
temperature: @run.temperature,
|
|
85
|
+
output_column: @run.output_column,
|
|
86
|
+
tag_names: @run.tag_names,
|
|
87
|
+
status: "pending"
|
|
88
|
+
)
|
|
89
|
+
new_run.replace_metrics!(@run.metric_ids)
|
|
90
|
+
if new_run.start!
|
|
91
|
+
render json: new_run.reload, status: :accepted
|
|
92
|
+
else
|
|
93
|
+
render_error(new_run.failure_summary || "Could not start the new run.", status: :unprocessable_entity)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def regrade
|
|
98
|
+
if @run.regrade!
|
|
99
|
+
render json: @run.reload, status: :accepted
|
|
100
|
+
else
|
|
101
|
+
render_error("Nothing to re-grade. The run has no succeeded responses or no metrics attached.", status: :unprocessable_entity)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def compare
|
|
106
|
+
other = Run.find(params[:with])
|
|
107
|
+
comparison = build_run_comparison(@run, other)
|
|
108
|
+
render json: { left_run_id: @run.id, right_run_id: other.id, metric_ids: comparison[:metric_ids], rows: comparison[:rows] }
|
|
109
|
+
rescue ActiveRecord::RecordNotFound
|
|
110
|
+
render_error("Other run not found. Pass ?with=<run_id>.", status: :not_found)
|
|
111
|
+
end
|
|
112
|
+
|
|
74
113
|
private
|
|
75
114
|
|
|
115
|
+
def build_run_comparison(left, right)
|
|
116
|
+
left_responses = left.responses.includes(:reviews).order(:row_index, :id)
|
|
117
|
+
right_responses = right.responses.includes(:reviews).order(:row_index, :id)
|
|
118
|
+
right_by_input = right_responses.each_with_object({}) { |r, h| h[r.input_data.to_s] ||= r }
|
|
119
|
+
all_reviews = left_responses.flat_map(&:reviews) + right_responses.flat_map(&:reviews)
|
|
120
|
+
metric_ids = all_reviews.map(&:metric_id).compact.uniq
|
|
121
|
+
metric_versions = MetricVersion.where(id: all_reviews.map(&:metric_version_id).compact.uniq).index_by(&:id)
|
|
122
|
+
|
|
123
|
+
rows = left_responses.map do |lr|
|
|
124
|
+
rr = right_by_input[lr.input_data.to_s]
|
|
125
|
+
{
|
|
126
|
+
left_response_id: lr.id,
|
|
127
|
+
right_response_id: rr&.id,
|
|
128
|
+
row_index: lr.row_index,
|
|
129
|
+
per_metric: metric_ids.map do |mid|
|
|
130
|
+
l_review = lr.reviews.find { |r| r.metric_id == mid }
|
|
131
|
+
r_review = rr && rr.reviews.find { |r| r.metric_id == mid }
|
|
132
|
+
next nil if l_review.nil? && r_review.nil?
|
|
133
|
+
anchor = l_review || r_review
|
|
134
|
+
{
|
|
135
|
+
metric_id: mid,
|
|
136
|
+
metric_name: anchor.metric_name,
|
|
137
|
+
left_score: l_review ? l_review.ai_score : nil,
|
|
138
|
+
right_score: r_review ? r_review.ai_score : nil,
|
|
139
|
+
left_metric_version_id: l_review&.metric_version_id,
|
|
140
|
+
right_metric_version_id: r_review&.metric_version_id,
|
|
141
|
+
delta: (l_review&.ai_score && r_review&.ai_score) ? (r_review.ai_score.to_f - l_review.ai_score.to_f).round(2) : nil
|
|
142
|
+
}
|
|
143
|
+
end.compact
|
|
144
|
+
}
|
|
145
|
+
end
|
|
146
|
+
{ rows: rows, metric_ids: metric_ids }
|
|
147
|
+
end
|
|
148
|
+
|
|
76
149
|
def set_run
|
|
77
150
|
@run = Run.find(params[:id])
|
|
78
151
|
rescue ActiveRecord::RecordNotFound
|
|
@@ -5,7 +5,7 @@ module CompletionKit
|
|
|
5
5
|
before_action :set_tag, only: [:show, :update, :destroy]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
render json: Tag.order(:name)
|
|
8
|
+
render json: paginate(Tag.order(:name))
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
def show
|
|
@@ -17,7 +17,7 @@ module CompletionKit
|
|
|
17
17
|
if tag.save
|
|
18
18
|
render json: tag, status: :created
|
|
19
19
|
else
|
|
20
|
-
|
|
20
|
+
render_validation_errors(tag)
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
23
|
|
|
@@ -25,7 +25,7 @@ module CompletionKit
|
|
|
25
25
|
if @tag.update(tag_params)
|
|
26
26
|
render json: @tag
|
|
27
27
|
else
|
|
28
|
-
|
|
28
|
+
render_validation_errors(@tag)
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
|
|
@@ -160,13 +160,13 @@ module CompletionKit
|
|
|
160
160
|
reverting = was_published_already && !version.current?
|
|
161
161
|
previously_current = MetricVersion.current.find_by(metric_id: @metric.id)
|
|
162
162
|
|
|
163
|
-
version.publish!
|
|
164
|
-
|
|
165
163
|
if reverting
|
|
164
|
+
audit = version.revert!
|
|
166
165
|
prior_label = previously_current.version_label
|
|
167
166
|
redirect_to metric_path(@metric),
|
|
168
|
-
notice: "Reverted to #{@metric.name} #{version.version_label}. Pinned cases still flow to the judge, and calibration verdicts collected against #{prior_label} stay tied to it."
|
|
167
|
+
notice: "Reverted to #{@metric.name} #{version.version_label} (now logged as #{audit.version_label}). Pinned cases still flow to the judge, and calibration verdicts collected against #{prior_label} stay tied to it."
|
|
169
168
|
else
|
|
169
|
+
version.publish!
|
|
170
170
|
redirect_to metric_path(@metric),
|
|
171
171
|
notice: "#{@metric.name} #{version.version_label} is now the published version."
|
|
172
172
|
end
|
|
@@ -53,20 +53,6 @@ module CompletionKit
|
|
|
53
53
|
end
|
|
54
54
|
end
|
|
55
55
|
|
|
56
|
-
def ck_run_status_label(run)
|
|
57
|
-
case run.status
|
|
58
|
-
when "pending" then "Ready to run"
|
|
59
|
-
when "running"
|
|
60
|
-
if run.progress_total.to_i > 0
|
|
61
|
-
"Running (#{run.progress_current}/#{run.progress_total})"
|
|
62
|
-
else
|
|
63
|
-
"Running…"
|
|
64
|
-
end
|
|
65
|
-
when "completed" then "Completed"
|
|
66
|
-
when "failed" then "Failed"
|
|
67
|
-
else run.status.capitalize
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
56
|
|
|
71
57
|
def ck_provider_label(provider)
|
|
72
58
|
CompletionKit::ProviderCredential::PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
|