completion-kit 0.5.39 → 0.5.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +128 -50
- data/app/controllers/completion_kit/calibrations_controller.rb +4 -3
- data/app/controllers/completion_kit/metrics_controller.rb +21 -6
- data/app/helpers/completion_kit/application_helper.rb +73 -2
- data/app/services/completion_kit/judge_variant_generator.rb +30 -12
- data/app/services/completion_kit/mcp_tools/judges.rb +2 -4
- data/app/services/completion_kit/onboarding/sample_data.rb +3 -3
- data/app/views/completion_kit/calibrations/_buttons.html.erb +19 -14
- data/app/views/completion_kit/calibrations/_trust_panel.html.erb +1 -1
- data/app/views/completion_kit/metrics/index.html.erb +2 -2
- data/app/views/completion_kit/metrics/show.html.erb +90 -38
- data/app/views/completion_kit/responses/show.html.erb +1 -3
- data/config/routes.rb +1 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d8875fa22de9c8626e401706818b271f3cb26bffa7faae1f583b29503228cead
|
|
4
|
+
data.tar.gz: 5bf39fad883b2eed2f505b11403ab60fddf9efc5735073ff61c290d53f59a36d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a8f2a7f14235c1214b567b891defaf523a645a21a2409ed81df964893a260cccb1fc9bf63903794f952c91bc9c91f3c1e3850db751a08ce0edc49b360ad9642d
|
|
7
|
+
data.tar.gz: e148b500e498a00dc370bf203fea3e2618b9f1c8fccd1dc5f220ae77a4988a8934198ad32bbf25a29bc84f948c687fab9a0ae16b9c03fe575fe9fca4cf98a0ea
|
|
@@ -1541,6 +1541,25 @@ tr:hover .ck-chip--publish {
|
|
|
1541
1541
|
border: 0;
|
|
1542
1542
|
border-radius: 0;
|
|
1543
1543
|
background: transparent;
|
|
1544
|
+
white-space: pre;
|
|
1545
|
+
color: #93c5fd;
|
|
1546
|
+
font-size: 0.86rem;
|
|
1547
|
+
line-height: 1.55;
|
|
1548
|
+
}
|
|
1549
|
+
.ck-code-scroll-wrap > .ck-code .ck-json-key {
|
|
1550
|
+
color: #c4b5fd;
|
|
1551
|
+
}
|
|
1552
|
+
.ck-code-scroll-wrap > .ck-code .ck-json-string {
|
|
1553
|
+
color: #93c5fd;
|
|
1554
|
+
}
|
|
1555
|
+
.ck-code-scroll-wrap > .ck-code .ck-json-number {
|
|
1556
|
+
color: #fcd34d;
|
|
1557
|
+
}
|
|
1558
|
+
.ck-code-scroll-wrap > .ck-code .ck-json-keyword {
|
|
1559
|
+
color: #f9a8d4;
|
|
1560
|
+
}
|
|
1561
|
+
.ck-code-scroll-wrap > .ck-code .ck-json-punct {
|
|
1562
|
+
color: var(--ck-dim);
|
|
1544
1563
|
}
|
|
1545
1564
|
|
|
1546
1565
|
.ck-note-box {
|
|
@@ -2755,7 +2774,7 @@ select.ck-input {
|
|
|
2755
2774
|
border: 1px solid var(--ck-line);
|
|
2756
2775
|
border-radius: var(--ck-radius-lg);
|
|
2757
2776
|
background: var(--ck-surface);
|
|
2758
|
-
padding: 1.
|
|
2777
|
+
padding: 1.25rem;
|
|
2759
2778
|
}
|
|
2760
2779
|
|
|
2761
2780
|
.ck-review-card__header {
|
|
@@ -2778,11 +2797,9 @@ select.ck-input {
|
|
|
2778
2797
|
}
|
|
2779
2798
|
|
|
2780
2799
|
.ck-review-card__feedback {
|
|
2781
|
-
margin
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
.ck-review-card__feedback .ck-note-box {
|
|
2785
|
-
margin-top: 0;
|
|
2800
|
+
margin: 0.6rem 0 0;
|
|
2801
|
+
color: var(--ck-muted);
|
|
2802
|
+
line-height: 1.55;
|
|
2786
2803
|
}
|
|
2787
2804
|
|
|
2788
2805
|
@media (max-width: 900px) {
|
|
@@ -2819,6 +2836,14 @@ select.ck-input {
|
|
|
2819
2836
|
width: 100%;
|
|
2820
2837
|
}
|
|
2821
2838
|
|
|
2839
|
+
/* button_to renders a form.inline-block wrapping the button. When the inner
|
|
2840
|
+
button is a full .ck-button (not an icon-button or chip), the form should
|
|
2841
|
+
stretch with it. */
|
|
2842
|
+
form.inline-block:has(> .ck-button) {
|
|
2843
|
+
width: 100%;
|
|
2844
|
+
display: block;
|
|
2845
|
+
}
|
|
2846
|
+
|
|
2822
2847
|
/* Page header stacks: title, then lead text full-width, then action. */
|
|
2823
2848
|
.ck-page-header {
|
|
2824
2849
|
flex-direction: column;
|
|
@@ -4584,9 +4609,8 @@ a.tag-mark {
|
|
|
4584
4609
|
}
|
|
4585
4610
|
|
|
4586
4611
|
.ck-launch__progress {
|
|
4587
|
-
padding-bottom:
|
|
4588
|
-
margin-bottom:
|
|
4589
|
-
border-bottom: 1px solid var(--ck-line);
|
|
4612
|
+
padding-bottom: 0;
|
|
4613
|
+
margin-bottom: 1.25rem;
|
|
4590
4614
|
}
|
|
4591
4615
|
.ck-launch__progress-head {
|
|
4592
4616
|
display: flex;
|
|
@@ -5151,22 +5175,37 @@ a.tag-mark {
|
|
|
5151
5175
|
border-top: 1px dashed var(--ck-line);
|
|
5152
5176
|
}
|
|
5153
5177
|
.ck-calibration__prompt {
|
|
5178
|
+
margin: 0 0 10px;
|
|
5179
|
+
display: flex;
|
|
5180
|
+
align-items: baseline;
|
|
5181
|
+
flex-wrap: wrap;
|
|
5182
|
+
gap: 8px 12px;
|
|
5183
|
+
}
|
|
5184
|
+
.ck-calibration__label {
|
|
5154
5185
|
font-family: var(--ck-mono);
|
|
5155
5186
|
font-size: 0.72rem;
|
|
5156
5187
|
letter-spacing: 0.06em;
|
|
5157
5188
|
text-transform: uppercase;
|
|
5158
5189
|
color: var(--ck-dim);
|
|
5159
|
-
|
|
5160
|
-
display: flex;
|
|
5161
|
-
align-items: center;
|
|
5162
|
-
gap: 10px;
|
|
5190
|
+
flex-shrink: 0;
|
|
5163
5191
|
}
|
|
5164
5192
|
.ck-calibration__count {
|
|
5165
5193
|
font-family: var(--ck-mono);
|
|
5166
5194
|
font-size: 0.72rem;
|
|
5167
5195
|
letter-spacing: 0.03em;
|
|
5168
5196
|
color: var(--ck-accent);
|
|
5169
|
-
|
|
5197
|
+
}
|
|
5198
|
+
.ck-calibration__hint {
|
|
5199
|
+
font-size: 0.82rem;
|
|
5200
|
+
color: var(--ck-dim);
|
|
5201
|
+
line-height: 1.4;
|
|
5202
|
+
}
|
|
5203
|
+
@media (max-width: 640px) {
|
|
5204
|
+
.ck-calibration__prompt {
|
|
5205
|
+
flex-direction: column;
|
|
5206
|
+
align-items: flex-start;
|
|
5207
|
+
gap: 4px;
|
|
5208
|
+
}
|
|
5170
5209
|
}
|
|
5171
5210
|
.ck-calibration__buttons {
|
|
5172
5211
|
display: flex;
|
|
@@ -5221,11 +5260,13 @@ a.tag-mark {
|
|
|
5221
5260
|
margin-top: 12px;
|
|
5222
5261
|
display: flex;
|
|
5223
5262
|
flex-direction: column;
|
|
5224
|
-
gap:
|
|
5225
|
-
|
|
5226
|
-
|
|
5227
|
-
|
|
5228
|
-
|
|
5263
|
+
gap: 12px;
|
|
5264
|
+
}
|
|
5265
|
+
.ck-calibration__detail > * {
|
|
5266
|
+
margin: 0;
|
|
5267
|
+
}
|
|
5268
|
+
.ck-calibration__detail .ck-button {
|
|
5269
|
+
align-self: flex-start;
|
|
5229
5270
|
}
|
|
5230
5271
|
.ck-calibration__value {
|
|
5231
5272
|
color: var(--ck-accent);
|
|
@@ -5347,44 +5388,28 @@ a.tag-mark {
|
|
|
5347
5388
|
text-transform: uppercase;
|
|
5348
5389
|
}
|
|
5349
5390
|
|
|
5350
|
-
.ck-draft-
|
|
5391
|
+
.ck-draft-pending {
|
|
5392
|
+
border-color: rgba(6, 182, 212, 0.45);
|
|
5393
|
+
background: linear-gradient(180deg, var(--ck-accent-soft), var(--ck-surface));
|
|
5394
|
+
}
|
|
5395
|
+
|
|
5396
|
+
.ck-suggestion-banner {
|
|
5351
5397
|
display: inline-flex;
|
|
5352
5398
|
align-items: center;
|
|
5353
5399
|
gap: 10px;
|
|
5354
5400
|
margin-top: 10px;
|
|
5355
|
-
padding: 8px
|
|
5401
|
+
padding: 8px 14px;
|
|
5356
5402
|
background: var(--ck-accent-soft);
|
|
5357
|
-
border: 1px
|
|
5358
|
-
border-radius: 6px;
|
|
5359
|
-
}
|
|
5360
|
-
|
|
5361
|
-
.ck-suggestion-list {
|
|
5362
|
-
display: flex;
|
|
5363
|
-
flex-direction: column;
|
|
5364
|
-
gap: 12px;
|
|
5365
|
-
}
|
|
5366
|
-
.ck-suggestion-card {
|
|
5367
|
-
padding: 12px 14px;
|
|
5368
|
-
background: var(--ck-surface-soft);
|
|
5369
|
-
border: 1px solid var(--ck-line);
|
|
5403
|
+
border: 1px solid rgba(6, 182, 212, 0.35);
|
|
5370
5404
|
border-radius: 6px;
|
|
5371
|
-
|
|
5372
|
-
|
|
5373
|
-
|
|
5374
|
-
|
|
5375
|
-
.ck-suggestion-card__header {
|
|
5376
|
-
display: flex;
|
|
5377
|
-
align-items: center;
|
|
5378
|
-
gap: 10px;
|
|
5405
|
+
color: var(--ck-accent);
|
|
5406
|
+
font-family: var(--ck-mono);
|
|
5407
|
+
font-size: 0.82rem;
|
|
5408
|
+
text-decoration: none;
|
|
5379
5409
|
}
|
|
5380
|
-
.ck-suggestion-
|
|
5381
|
-
|
|
5382
|
-
|
|
5383
|
-
font-size: 0.85rem;
|
|
5384
|
-
background: var(--ck-bg-strong);
|
|
5385
|
-
padding: 10px 12px;
|
|
5386
|
-
border-radius: 4px;
|
|
5387
|
-
border: 1px solid var(--ck-line);
|
|
5410
|
+
.ck-suggestion-banner:hover,
|
|
5411
|
+
.ck-suggestion-banner:focus-visible {
|
|
5412
|
+
border-color: var(--ck-accent);
|
|
5388
5413
|
}
|
|
5389
5414
|
|
|
5390
5415
|
.ck-metrics-table__trust {
|
|
@@ -5407,3 +5432,56 @@ a.tag-mark {
|
|
|
5407
5432
|
color: var(--ck-danger);
|
|
5408
5433
|
font-size: 0.82rem;
|
|
5409
5434
|
}
|
|
5435
|
+
|
|
5436
|
+
.ck-star-picker {
|
|
5437
|
+
border: 0;
|
|
5438
|
+
padding: 0;
|
|
5439
|
+
margin: 0;
|
|
5440
|
+
}
|
|
5441
|
+
.ck-star-picker__row {
|
|
5442
|
+
display: inline-flex;
|
|
5443
|
+
flex-direction: row-reverse;
|
|
5444
|
+
gap: 2px;
|
|
5445
|
+
}
|
|
5446
|
+
.ck-star-picker input {
|
|
5447
|
+
position: absolute;
|
|
5448
|
+
width: 1px;
|
|
5449
|
+
height: 1px;
|
|
5450
|
+
opacity: 0;
|
|
5451
|
+
pointer-events: none;
|
|
5452
|
+
}
|
|
5453
|
+
.ck-star-picker label {
|
|
5454
|
+
cursor: pointer;
|
|
5455
|
+
display: inline-flex;
|
|
5456
|
+
padding: 4px;
|
|
5457
|
+
border-radius: 4px;
|
|
5458
|
+
}
|
|
5459
|
+
.ck-star-picker label svg {
|
|
5460
|
+
fill: transparent;
|
|
5461
|
+
stroke: var(--ck-line-strong);
|
|
5462
|
+
transition: fill 0.08s, stroke 0.08s;
|
|
5463
|
+
}
|
|
5464
|
+
.ck-star-picker input:checked ~ label svg {
|
|
5465
|
+
fill: var(--ck-warning);
|
|
5466
|
+
stroke: var(--ck-warning);
|
|
5467
|
+
}
|
|
5468
|
+
.ck-star-picker__row:hover label svg {
|
|
5469
|
+
fill: transparent;
|
|
5470
|
+
stroke: var(--ck-line-strong);
|
|
5471
|
+
}
|
|
5472
|
+
.ck-star-picker__row:hover label:hover svg,
|
|
5473
|
+
.ck-star-picker__row:hover label:hover ~ label svg {
|
|
5474
|
+
fill: var(--ck-warning);
|
|
5475
|
+
stroke: var(--ck-warning);
|
|
5476
|
+
}
|
|
5477
|
+
.ck-star-picker input:focus-visible + label {
|
|
5478
|
+
outline: 2px solid var(--ck-accent);
|
|
5479
|
+
outline-offset: 2px;
|
|
5480
|
+
}
|
|
5481
|
+
|
|
5482
|
+
.ck-button--just-saved {
|
|
5483
|
+
animation: ck-saved-flash 1.4s ease-out;
|
|
5484
|
+
}
|
|
5485
|
+
@keyframes ck-saved-flash {
|
|
5486
|
+
0% { background: var(--ck-success); border-color: var(--ck-success); }
|
|
5487
|
+
}
|
|
@@ -25,7 +25,7 @@ module CompletionKit
|
|
|
25
25
|
)
|
|
26
26
|
|
|
27
27
|
if calibration.save
|
|
28
|
-
render_calibration(calibration: calibration)
|
|
28
|
+
render_calibration(calibration: calibration, just_saved: true)
|
|
29
29
|
else
|
|
30
30
|
render_calibration(
|
|
31
31
|
calibration: existing,
|
|
@@ -38,7 +38,7 @@ module CompletionKit
|
|
|
38
38
|
|
|
39
39
|
private
|
|
40
40
|
|
|
41
|
-
def render_calibration(calibration:, pending_verdict: nil, error: nil, status: :ok)
|
|
41
|
+
def render_calibration(calibration:, pending_verdict: nil, error: nil, just_saved: false, status: :ok)
|
|
42
42
|
locals = {
|
|
43
43
|
review: review_for_metric,
|
|
44
44
|
calibration: calibration,
|
|
@@ -46,7 +46,8 @@ module CompletionKit
|
|
|
46
46
|
response_row: @response,
|
|
47
47
|
metric: @metric,
|
|
48
48
|
pending_verdict: pending_verdict,
|
|
49
|
-
error: error
|
|
49
|
+
error: error,
|
|
50
|
+
just_saved: just_saved
|
|
50
51
|
}
|
|
51
52
|
render turbo_stream: turbo_stream.replace(
|
|
52
53
|
"calibration_#{@response.id}_#{@metric.id}",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class MetricsController < ApplicationController
|
|
3
3
|
include CompletionKit::TagFiltering
|
|
4
|
-
before_action :set_metric, only: [:show, :edit, :update, :destroy, :add_few_shot, :publish_draft, :suggest_variants]
|
|
4
|
+
before_action :set_metric, only: [:show, :edit, :update, :destroy, :add_few_shot, :publish_draft, :suggest_variants, :dismiss_suggestion]
|
|
5
5
|
|
|
6
6
|
def index
|
|
7
7
|
@metrics = apply_tag_filter(Metric.includes(:metric_groups, :tags).order(:name))
|
|
@@ -12,8 +12,10 @@ module CompletionKit
|
|
|
12
12
|
.includes(response: [:reviews, :run])
|
|
13
13
|
.order(created_at: :desc)
|
|
14
14
|
.limit(50)
|
|
15
|
-
@
|
|
16
|
-
@
|
|
15
|
+
@edit_draft = JudgeVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
|
|
16
|
+
@published_judge_version = JudgeVersion.published.where(metric_id: @metric.id, current: true).first
|
|
17
|
+
@suggestion_draft = JudgeVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
|
|
18
|
+
@improve_disagreement_count = @disagreements.size
|
|
17
19
|
end
|
|
18
20
|
|
|
19
21
|
def new
|
|
@@ -47,15 +49,28 @@ module CompletionKit
|
|
|
47
49
|
end
|
|
48
50
|
|
|
49
51
|
def suggest_variants
|
|
50
|
-
|
|
52
|
+
disagreement_count = Calibration.where(metric_id: @metric.id, verdict: "disagree").count
|
|
53
|
+
if disagreement_count.zero?
|
|
54
|
+
redirect_to metric_path(@metric), alert: "Mark at least one row as Disagree before asking the model to suggest a change."
|
|
55
|
+
return
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
JudgeVersion.drafts.where(metric_id: @metric.id, source: "suggestion").destroy_all
|
|
59
|
+
|
|
60
|
+
generator = JudgeVariantGenerator.new(@metric, count: 1)
|
|
51
61
|
variants = generator.call
|
|
52
62
|
if variants.empty?
|
|
53
63
|
redirect_to metric_path(@metric), alert: "The model returned no usable variants. Try again with a different model."
|
|
54
64
|
return
|
|
55
65
|
end
|
|
56
66
|
generator.persist!(variants)
|
|
57
|
-
|
|
58
|
-
|
|
67
|
+
redirect_to metric_path(@metric), notice: "Drafted a new version. Review it below."
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def dismiss_suggestion
|
|
71
|
+
draft = JudgeVersion.drafts.where(metric_id: @metric.id, source: "suggestion").find_by(id: params[:draft_id])
|
|
72
|
+
draft&.destroy
|
|
73
|
+
redirect_to metric_path(@metric), notice: "Dismissed."
|
|
59
74
|
end
|
|
60
75
|
|
|
61
76
|
def publish_draft
|
|
@@ -202,15 +202,86 @@ module CompletionKit
|
|
|
202
202
|
def ck_format_maybe_json(text)
|
|
203
203
|
s = text.to_s
|
|
204
204
|
return s if s.strip.empty?
|
|
205
|
-
|
|
205
|
+
payload = ck_unwrap_json_fence(s.strip)
|
|
206
|
+
first = payload[0]
|
|
206
207
|
return s unless first == "{" || first == "["
|
|
207
208
|
begin
|
|
208
|
-
JSON.pretty_generate(JSON.parse(
|
|
209
|
+
ck_highlight_json(JSON.pretty_generate(JSON.parse(payload)))
|
|
209
210
|
rescue JSON::ParserError
|
|
210
211
|
s
|
|
211
212
|
end
|
|
212
213
|
end
|
|
213
214
|
|
|
215
|
+
def ck_unwrap_json_fence(text)
|
|
216
|
+
m = text.match(/\A```(?:json|JSON)?\s*\n(.*?)\n?```\s*\z/m)
|
|
217
|
+
m ? m[1].strip : text
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def ck_highlight_json(text)
|
|
221
|
+
tokens = ck_tokenize_json(text)
|
|
222
|
+
is_key = ck_mark_json_keys(tokens)
|
|
223
|
+
parts = tokens.each_with_index.map do |(type, value), idx|
|
|
224
|
+
escaped = ERB::Util.html_escape(value)
|
|
225
|
+
case type
|
|
226
|
+
when :punct then %(<span class="ck-json-punct">#{escaped}</span>)
|
|
227
|
+
when :string
|
|
228
|
+
%(<span class="#{is_key[idx] ? "ck-json-key" : "ck-json-string"}">#{escaped}</span>)
|
|
229
|
+
when :number then %(<span class="ck-json-number">#{escaped}</span>)
|
|
230
|
+
when :keyword then %(<span class="ck-json-keyword">#{escaped}</span>)
|
|
231
|
+
else escaped
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
parts.join.html_safe
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def ck_tokenize_json(text)
|
|
238
|
+
tokens = []
|
|
239
|
+
i = 0
|
|
240
|
+
len = text.length
|
|
241
|
+
while i < len
|
|
242
|
+
ch = text[i]
|
|
243
|
+
if ch == " " || ch == "\n" || ch == "\t"
|
|
244
|
+
tokens << [:ws, ch]
|
|
245
|
+
i += 1
|
|
246
|
+
elsif "{}[]:,".include?(ch)
|
|
247
|
+
tokens << [:punct, ch]
|
|
248
|
+
i += 1
|
|
249
|
+
elsif ch == '"'
|
|
250
|
+
j = i + 1
|
|
251
|
+
while j < len && text[j] != '"'
|
|
252
|
+
j += text[j] == "\\" ? 2 : 1
|
|
253
|
+
end
|
|
254
|
+
j = len - 1 if j >= len
|
|
255
|
+
tokens << [:string, text[i..j]]
|
|
256
|
+
i = j + 1
|
|
257
|
+
elsif ch == "-" || (ch >= "0" && ch <= "9")
|
|
258
|
+
j = i + 1
|
|
259
|
+
j += 1 while j < len && "0123456789.eE+-".include?(text[j])
|
|
260
|
+
tokens << [:number, text[i...j]]
|
|
261
|
+
i = j
|
|
262
|
+
elsif text[i, 4] == "true" || text[i, 4] == "null"
|
|
263
|
+
tokens << [:keyword, text[i, 4]]
|
|
264
|
+
i += 4
|
|
265
|
+
elsif text[i, 5] == "false"
|
|
266
|
+
tokens << [:keyword, "false"]
|
|
267
|
+
i += 5
|
|
268
|
+
else
|
|
269
|
+
tokens << [:other, ch]
|
|
270
|
+
i += 1
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
tokens
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def ck_mark_json_keys(tokens)
|
|
277
|
+
tokens.each_with_index.map do |(type, _), idx|
|
|
278
|
+
next false unless type == :string
|
|
279
|
+
j = idx + 1
|
|
280
|
+
j += 1 while j < tokens.length && tokens[j][0] == :ws
|
|
281
|
+
j < tokens.length && tokens[j] == [:punct, ":"]
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
|
|
214
285
|
def tag_filter_url(base_path, selected, toggling)
|
|
215
286
|
remaining = selected.reject { |t| t.id == toggling.id }
|
|
216
287
|
next_set = selected.include?(toggling) ? remaining : remaining + [toggling]
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class JudgeVariantGenerator
|
|
3
|
-
DEFAULT_VARIANT_COUNT =
|
|
3
|
+
DEFAULT_VARIANT_COUNT = 1
|
|
4
|
+
MAX_VARIANT_COUNT = 3
|
|
4
5
|
DEFAULT_TEMPERATURE = 0.4
|
|
5
6
|
|
|
6
|
-
Variant = Struct.new(:reasoning, :instruction, keyword_init: true)
|
|
7
|
+
Variant = Struct.new(:reasoning, :instruction, :rubric_bands, keyword_init: true)
|
|
7
8
|
|
|
8
9
|
def initialize(metric, count: DEFAULT_VARIANT_COUNT, model: nil)
|
|
9
10
|
@metric = metric
|
|
10
|
-
|
|
11
|
+
n = count.to_i
|
|
12
|
+
@count = n < 1 ? DEFAULT_VARIANT_COUNT : [n, MAX_VARIANT_COUNT].min
|
|
11
13
|
@model = model || CompletionKit.config.judge_model
|
|
12
14
|
end
|
|
13
15
|
|
|
@@ -23,7 +25,7 @@ module CompletionKit
|
|
|
23
25
|
JudgeVersion.create!(
|
|
24
26
|
metric: @metric,
|
|
25
27
|
instruction: variant.instruction,
|
|
26
|
-
rubric_bands: @metric.rubric_bands,
|
|
28
|
+
rubric_bands: variant.rubric_bands.presence || @metric.rubric_bands,
|
|
27
29
|
state: "draft",
|
|
28
30
|
source: "suggestion",
|
|
29
31
|
current: false
|
|
@@ -42,14 +44,14 @@ module CompletionKit
|
|
|
42
44
|
disagreements = JudgeCalibrationExamples.disagreements_for(@metric)
|
|
43
45
|
borderlines = JudgeCalibrationExamples.borderlines_for(@metric)
|
|
44
46
|
sections = []
|
|
45
|
-
sections << "You are an expert evaluator.
|
|
47
|
+
sections << "You are an expert evaluator. The judge below is misaligned with humans. Propose #{@count == 1 ? "a single" : "#{@count}"} concrete rewrite that closes the gap."
|
|
46
48
|
sections << ""
|
|
47
49
|
sections << "## Current instruction"
|
|
48
50
|
sections << "```"
|
|
49
51
|
sections << @metric.instruction.to_s
|
|
50
52
|
sections << "```"
|
|
51
53
|
sections << ""
|
|
52
|
-
sections << "##
|
|
54
|
+
sections << "## Current rubric (5 to 1)"
|
|
53
55
|
sections << @metric.display_rubric_text
|
|
54
56
|
sections << ""
|
|
55
57
|
if disagreements.any?
|
|
@@ -65,7 +67,7 @@ module CompletionKit
|
|
|
65
67
|
end
|
|
66
68
|
if borderlines.any?
|
|
67
69
|
sections << "## Rubric-ambiguous cases (humans marked these borderline)"
|
|
68
|
-
sections << "
|
|
70
|
+
sections << "These are cases where a human said the rubric itself was unclear. If the rubric needs sharpening, rewrite it."
|
|
69
71
|
borderlines.each_with_index do |ex, i|
|
|
70
72
|
sections << "### Borderline #{i + 1}"
|
|
71
73
|
sections << "Input: #{ex[:input].to_s.truncate(200)}"
|
|
@@ -76,14 +78,20 @@ module CompletionKit
|
|
|
76
78
|
end
|
|
77
79
|
end
|
|
78
80
|
sections << "## Task"
|
|
79
|
-
sections << "
|
|
81
|
+
sections << "Make one substantive change. Don't just reword. If the disagreements look like instruction problems, rewrite the instruction. If they look like rubric problems (overlapping bands, undefined edge cases), rewrite the rubric. Rewrite both if both are wrong."
|
|
80
82
|
sections << ""
|
|
81
|
-
sections << "Respond in EXACTLY this format, repeated #{@count}
|
|
83
|
+
sections << "Respond in EXACTLY this format, repeated #{@count} time#{@count == 1 ? "" : "s"}:"
|
|
82
84
|
sections << ""
|
|
83
85
|
sections << "VARIANT:"
|
|
84
|
-
sections << "REASONING: <one sentence
|
|
86
|
+
sections << "REASONING: <one short sentence: what changes and why>"
|
|
85
87
|
sections << "INSTRUCTION:"
|
|
86
88
|
sections << "<the rewritten instruction>"
|
|
89
|
+
sections << "RUBRIC: # optional — omit this block if the rubric is unchanged"
|
|
90
|
+
sections << "5: <description for 5 stars>"
|
|
91
|
+
sections << "4: <description for 4 stars>"
|
|
92
|
+
sections << "3: <description for 3 stars>"
|
|
93
|
+
sections << "2: <description for 2 stars>"
|
|
94
|
+
sections << "1: <description for 1 star>"
|
|
87
95
|
sections << "END_VARIANT"
|
|
88
96
|
sections.join("\n")
|
|
89
97
|
end
|
|
@@ -92,11 +100,21 @@ module CompletionKit
|
|
|
92
100
|
blocks = text.to_s.scan(/VARIANT:(.*?)END_VARIANT/m).flatten
|
|
93
101
|
blocks.filter_map do |raw|
|
|
94
102
|
reasoning = raw[/REASONING:\s*(.*?)(?=INSTRUCTION:|\z)/m, 1].to_s.strip
|
|
95
|
-
instruction = raw[/INSTRUCTION:\s*(
|
|
103
|
+
instruction = raw[/INSTRUCTION:\s*(.*?)(?=RUBRIC:|\z)/m, 1].to_s.strip
|
|
96
104
|
next if instruction.empty?
|
|
97
|
-
|
|
105
|
+
rubric_block = raw[/RUBRIC:\s*(.*)/m, 1].to_s
|
|
106
|
+
Variant.new(reasoning: reasoning, instruction: instruction, rubric_bands: parse_rubric(rubric_block))
|
|
98
107
|
end
|
|
99
108
|
end
|
|
109
|
+
|
|
110
|
+
def parse_rubric(block)
|
|
111
|
+
return nil if block.strip.empty?
|
|
112
|
+
bands = block.scan(/^\s*([1-5])\s*[:\-]\s*(.+?)\s*$/).map do |stars, description|
|
|
113
|
+
{ "stars" => stars.to_i, "description" => description.strip }
|
|
114
|
+
end
|
|
115
|
+
return nil if bands.length != 5
|
|
116
|
+
bands.sort_by { |b| -b["stars"] }
|
|
117
|
+
end
|
|
100
118
|
end
|
|
101
119
|
|
|
102
120
|
module JudgeCalibrationExamples
|
|
@@ -10,7 +10,7 @@ module CompletionKit
|
|
|
10
10
|
type: "object",
|
|
11
11
|
properties: {
|
|
12
12
|
metric_id: { type: "integer" },
|
|
13
|
-
count: { type: "integer", description: "How many variants to request (default
|
|
13
|
+
count: { type: "integer", description: "How many variants to request (default 1, max 3). One focused rewrite beats five reworded copies." },
|
|
14
14
|
model: { type: "string", description: "Override the model used to generate variants. Defaults to CompletionKit.config.judge_model." }
|
|
15
15
|
},
|
|
16
16
|
required: ["metric_id"]
|
|
@@ -49,9 +49,7 @@ module CompletionKit
|
|
|
49
49
|
|
|
50
50
|
def self.suggest(args)
|
|
51
51
|
metric = CompletionKit::Metric.find(args["metric_id"])
|
|
52
|
-
|
|
53
|
-
count = CompletionKit::JudgeVariantGenerator::DEFAULT_VARIANT_COUNT if count <= 0
|
|
54
|
-
generator = CompletionKit::JudgeVariantGenerator.new(metric, count: count, model: args["model"])
|
|
52
|
+
generator = CompletionKit::JudgeVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
|
|
55
53
|
variants = generator.call
|
|
56
54
|
return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
|
|
57
55
|
versions = generator.persist!(variants)
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module Onboarding
|
|
3
3
|
# Opt-in starter data for the onboarding page: one dataset + one prompt so a
|
|
4
|
-
# brand-new install has something to poke at. Idempotent
|
|
4
|
+
# brand-new install has something to poke at. Idempotent. A no-op once the
|
|
5
5
|
# workspace already has any prompt or dataset. Deliberately does NOT create a
|
|
6
6
|
# provider credential (needs a real API key) or a run (user-initiated).
|
|
7
7
|
module SampleData
|
|
8
8
|
SAMPLE_CSV = <<~CSV.freeze
|
|
9
9
|
ticket
|
|
10
10
|
"My order #4827 arrived with a dented panel. I emailed photos 11 days ago and heard nothing. Today I was told the return window 'closed'. I paid $749. I want a refund or replacement, not store credit."
|
|
11
|
-
"Tracking says delivered to my porch Tuesday 3:47pm. I was home all day, nothing arrived, neighbours' cameras show no van. Order #5102
|
|
11
|
+
"Tracking says delivered to my porch Tuesday 3:47pm. I was home all day, nothing arrived, neighbours' cameras show no van. Order #5102. A $315 mixer, wedding gift, wedding is Saturday. Can someone look today?"
|
|
12
12
|
"WELCOME20 says 'invalid' at checkout but the promo email says it's good through May 31. Same email I'm signed in with. Tried Chrome and Safari. Cart is $186 waiting on you."
|
|
13
13
|
CSV
|
|
14
14
|
|
|
15
15
|
SAMPLE_PROMPT = {
|
|
16
16
|
name: "Sample: Support reply",
|
|
17
|
-
description: "A starter prompt
|
|
17
|
+
description: "A starter prompt. Drafts a warm, professional reply to a customer support ticket. Edit it or delete it; it's just here to get you going.",
|
|
18
18
|
template: "You are a senior customer-support specialist. Write a warm, professional reply to this ticket. Acknowledge the customer's situation, be specific about next steps, and don't be defensive.\n\nTicket:\n{{ticket}}",
|
|
19
19
|
llm_model: "gpt-4o-mini"
|
|
20
20
|
}.freeze
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
<% error = local_assigns[:error] %>
|
|
6
6
|
<% verdict_count = CompletionKit::Calibration.where(response_id: response_row.id, metric_id: metric.id).count %>
|
|
7
7
|
<p class="ck-calibration__prompt">
|
|
8
|
-
Your verdict
|
|
8
|
+
<span class="ck-calibration__label">Your verdict</span>
|
|
9
9
|
<% if verdict_count > 0 %>
|
|
10
|
-
<span class="ck-calibration__count"><%= pluralize(verdict_count, "verdict") %> on this score · <%= link_to "trust
|
|
10
|
+
<span class="ck-calibration__count"><%= pluralize(verdict_count, "verdict") %> on this score · <%= link_to "trust level →", metric_path(metric), class: "ck-link" %></span>
|
|
11
11
|
<% else %>
|
|
12
|
-
<span class="ck-
|
|
12
|
+
<span class="ck-calibration__hint">Tell us what you think — was the score right? Verdicts roll up into the metric's <%= link_to "trust level", metric_path(metric), class: "ck-link" %>.</span>
|
|
13
13
|
<% end %>
|
|
14
14
|
</p>
|
|
15
15
|
<div class="ck-calibration__buttons">
|
|
@@ -37,22 +37,27 @@
|
|
|
37
37
|
<% end %>
|
|
38
38
|
|
|
39
39
|
<% if active_verdict == "disagree" %>
|
|
40
|
+
<% existing_score = (calibration&.corrected_score || review&.ai_score)&.round %>
|
|
40
41
|
<%= form_with url: run_response_calibrations_path(run, response_row),
|
|
41
42
|
method: :post, local: false,
|
|
42
43
|
class: "ck-calibration__detail" do |f| %>
|
|
43
44
|
<%= hidden_field_tag :metric_id, metric.id %>
|
|
44
45
|
<%= hidden_field_tag :verdict, "disagree" %>
|
|
45
|
-
<
|
|
46
|
-
|
|
47
|
-
<
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
46
|
+
<p class="ck-label">What should the score have been?</p>
|
|
47
|
+
<fieldset class="ck-star-picker">
|
|
48
|
+
<legend class="ck-visually-hidden">Pick a score from 1 to 5 stars</legend>
|
|
49
|
+
<div class="ck-star-picker__row">
|
|
50
|
+
<% [5, 4, 3, 2, 1].each do |n| %>
|
|
51
|
+
<% radio_id = "ck-star-#{response_row.id}-#{metric.id}-#{n}" %>
|
|
52
|
+
<input type="radio" name="corrected_score" id="<%= radio_id %>" value="<%= n %>" <%= "checked" if existing_score == n %> required>
|
|
53
|
+
<label for="<%= radio_id %>" title="<%= pluralize(n, 'star') %>" aria-label="<%= pluralize(n, 'star') %>">
|
|
54
|
+
<svg viewBox="0 0 24 24" width="28" height="28" stroke-width="1.5" aria-hidden="true"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
|
|
55
|
+
</label>
|
|
56
|
+
<% end %>
|
|
57
|
+
</div>
|
|
58
|
+
</fieldset>
|
|
54
59
|
<textarea name="note" rows="2" class="ck-input ck-input--area" placeholder="Why? (optional)"><%= calibration&.note %></textarea>
|
|
55
|
-
<%= f.submit
|
|
60
|
+
<%= f.submit "Save", class: "#{ck_button_classes(:dark)}#{' ck-button--just-saved' if local_assigns[:just_saved]}" %>
|
|
56
61
|
<% end %>
|
|
57
62
|
<% elsif active_verdict == "borderline" %>
|
|
58
63
|
<%= form_with url: run_response_calibrations_path(run, response_row),
|
|
@@ -61,7 +66,7 @@
|
|
|
61
66
|
<%= hidden_field_tag :metric_id, metric.id %>
|
|
62
67
|
<%= hidden_field_tag :verdict, "borderline" %>
|
|
63
68
|
<textarea name="note" rows="2" class="ck-input ck-input--area" placeholder="What made this borderline? (optional)"><%= calibration&.note %></textarea>
|
|
64
|
-
<%= f.submit
|
|
69
|
+
<%= f.submit "Save", class: "#{ck_button_classes(:dark)}#{' ck-button--just-saved' if local_assigns[:just_saved]}" %>
|
|
65
70
|
<% end %>
|
|
66
71
|
<% end %>
|
|
67
72
|
</div>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<% stats = local_assigns[:stats] %>
|
|
2
2
|
<div class="ck-trust-panel ck-trust-panel--<%= stats.gate %>">
|
|
3
|
-
<p class="ck-trust-panel__label" title="How often this metric's scores match the humans who reviewed them.">Trust
|
|
3
|
+
<p class="ck-trust-panel__label" title="How often this metric's scores match the humans who reviewed them.">Trust level</p>
|
|
4
4
|
<% if stats.counter_only? %>
|
|
5
5
|
<div class="ck-trust-panel__body">
|
|
6
6
|
<span class="ck-trust-panel__counter"><%= stats.sample_size %><span class="ck-trust-panel__counter-of">/ <%= CompletionKit::MetricCalibrationStats::PROVISIONAL_MIN %></span></span>
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
<tr>
|
|
20
20
|
<th scope="col">Name</th>
|
|
21
21
|
<th scope="col">Instruction</th>
|
|
22
|
-
<th scope="col" title="How often this metric's scores match the humans who reviewed them.">Trust
|
|
22
|
+
<th scope="col" title="How often this metric's scores match the humans who reviewed them.">Trust level</th>
|
|
23
23
|
<th scope="col">In groups</th>
|
|
24
24
|
<th scope="col"></th>
|
|
25
25
|
</tr>
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
<% end %>
|
|
37
37
|
</td>
|
|
38
38
|
<td data-label="Instruction" class="ck-meta-copy"><div class="ck-clamp-2"><%= metric.instruction.presence || "—" %></div></td>
|
|
39
|
-
<td data-label="Trust
|
|
39
|
+
<td data-label="Trust level" class="ck-metrics-table__trust">
|
|
40
40
|
<% if CompletionKit.config.judge_calibration_enabled %>
|
|
41
41
|
<% s = CompletionKit::MetricCalibrationStats.for(metric) %>
|
|
42
42
|
<% if s.counter_only? %>
|
|
@@ -9,24 +9,58 @@
|
|
|
9
9
|
<% if CompletionKit.config.judge_calibration_enabled %>
|
|
10
10
|
<%= render "completion_kit/calibrations/trust_panel",
|
|
11
11
|
stats: CompletionKit::MetricCalibrationStats.for(@metric) %>
|
|
12
|
-
<% if @
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
12
|
+
<% if @edit_draft %>
|
|
13
|
+
<% pub_instr = @published_judge_version&.instruction.to_s %>
|
|
14
|
+
<% draft_instr = @edit_draft.instruction.to_s %>
|
|
15
|
+
<% instruction_changed = pub_instr != draft_instr %>
|
|
16
|
+
<% rubric_changed = @published_judge_version && @published_judge_version.rubric_bands != @edit_draft.rubric_bands %>
|
|
17
|
+
<section class="ck-card ck-card--spaced ck-draft-pending">
|
|
18
|
+
<div class="ck-prompt-preview__header">
|
|
19
|
+
<p class="ck-kicker">Draft pending</p>
|
|
20
|
+
<%= button_to "Publish this version", publish_draft_metric_path(@metric, draft_id: @edit_draft.id),
|
|
21
|
+
method: :post, form_class: "inline-block",
|
|
22
|
+
class: ck_button_classes(:dark) %>
|
|
23
|
+
</div>
|
|
24
|
+
<p class="ck-meta-copy">A draft of this metric is saved. Publishing it replaces the live instruction<%= ", rubric," if rubric_changed %> for future runs. Here's what changes.</p>
|
|
25
|
+
|
|
26
|
+
<% if instruction_changed %>
|
|
27
|
+
<div class="ck-suggest-diff">
|
|
28
|
+
<div class="ck-suggest-diff__pane">
|
|
29
|
+
<div class="ck-suggest-diff__header">
|
|
30
|
+
<span class="ck-suggest-diff__label ck-suggest-diff__label--before">Currently live</span>
|
|
31
|
+
</div>
|
|
32
|
+
<pre class="ck-suggest-diff__code"><%= ck_word_diff_old(pub_instr, draft_instr) %></pre>
|
|
33
|
+
</div>
|
|
34
|
+
<div class="ck-suggest-diff__pane">
|
|
35
|
+
<div class="ck-suggest-diff__header">
|
|
36
|
+
<span class="ck-suggest-diff__label ck-suggest-diff__label--after">Draft</span>
|
|
37
|
+
</div>
|
|
38
|
+
<pre class="ck-suggest-diff__code"><%= ck_word_diff_new(pub_instr, draft_instr) %></pre>
|
|
39
|
+
</div>
|
|
40
|
+
</div>
|
|
41
|
+
<% else %>
|
|
42
|
+
<p class="ck-meta-copy">The instruction is unchanged.</p>
|
|
43
|
+
<% end %>
|
|
44
|
+
|
|
45
|
+
<% if rubric_changed %>
|
|
46
|
+
<p class="ck-meta-copy"><strong>Rubric also changed.</strong> Edit the metric to inspect each band, or publish to apply the new wording.</p>
|
|
47
|
+
<% end %>
|
|
48
|
+
</section>
|
|
20
49
|
<% end %>
|
|
21
50
|
<% end %>
|
|
22
51
|
</div>
|
|
23
52
|
<div class="ck-actions">
|
|
24
53
|
<% if CompletionKit.config.judge_calibration_enabled %>
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
54
|
+
<% if @improve_disagreement_count.positive? %>
|
|
55
|
+
<%= button_to "Improve the metric", suggest_variants_metric_path(@metric),
|
|
56
|
+
method: :post, form_class: "inline-block",
|
|
57
|
+
class: ck_button_classes(:light, variant: :outline),
|
|
58
|
+
title: "Rewrite this metric based on the disagreements collected so far.",
|
|
59
|
+
data: { turbo_confirm: "Rewrite this metric based on the disagreements collected so far?" } %>
|
|
60
|
+
<% else %>
|
|
61
|
+
<button type="button" class="<%= ck_button_classes(:light, variant: :outline) %>" disabled
|
|
62
|
+
title="Mark at least one row as Disagree before the model can suggest a change.">Improve the metric</button>
|
|
63
|
+
<% end %>
|
|
30
64
|
<% end %>
|
|
31
65
|
<%= link_to "Edit", edit_metric_path(@metric), class: ck_button_classes(:light, variant: :outline) %>
|
|
32
66
|
</div>
|
|
@@ -63,6 +97,49 @@
|
|
|
63
97
|
</div>
|
|
64
98
|
</section>
|
|
65
99
|
|
|
100
|
+
<% if CompletionKit.config.judge_calibration_enabled && @suggestion_draft %>
|
|
101
|
+
<% sd_current_instr = @published_judge_version&.instruction.to_s %>
|
|
102
|
+
<% sd_draft_instr = @suggestion_draft.instruction.to_s %>
|
|
103
|
+
<% sd_current_rubric = @published_judge_version&.rubric_bands || [] %>
|
|
104
|
+
<% sd_rubric_changed = @suggestion_draft.rubric_bands != sd_current_rubric %>
|
|
105
|
+
<section class="ck-card ck-card--spaced ck-draft-pending">
|
|
106
|
+
<div class="ck-prompt-preview__header">
|
|
107
|
+
<p class="ck-kicker">Suggested change</p>
|
|
108
|
+
<time class="ck-meta-copy" data-relative-time datetime="<%= @suggestion_draft.created_at.utc.iso8601 %>"><%= time_ago_in_words(@suggestion_draft.created_at) %> ago</time>
|
|
109
|
+
</div>
|
|
110
|
+
<p class="ck-meta-copy">Based on your disagreements, the model proposed this rewrite. Use it to replace the live version, or discard.</p>
|
|
111
|
+
|
|
112
|
+
<div class="ck-suggest-diff">
|
|
113
|
+
<div class="ck-suggest-diff__pane">
|
|
114
|
+
<div class="ck-suggest-diff__header">
|
|
115
|
+
<span class="ck-suggest-diff__label ck-suggest-diff__label--before">Currently live</span>
|
|
116
|
+
</div>
|
|
117
|
+
<pre class="ck-suggest-diff__code"><%= ck_word_diff_old(sd_current_instr, sd_draft_instr) %></pre>
|
|
118
|
+
</div>
|
|
119
|
+
<div class="ck-suggest-diff__pane">
|
|
120
|
+
<div class="ck-suggest-diff__header">
|
|
121
|
+
<span class="ck-suggest-diff__label ck-suggest-diff__label--after">Proposed</span>
|
|
122
|
+
</div>
|
|
123
|
+
<pre class="ck-suggest-diff__code"><%= ck_word_diff_new(sd_current_instr, sd_draft_instr) %></pre>
|
|
124
|
+
</div>
|
|
125
|
+
</div>
|
|
126
|
+
|
|
127
|
+
<% if sd_rubric_changed %>
|
|
128
|
+
<p class="ck-meta-copy"><strong>Rubric also changed.</strong> Publishing applies the new rubric too.</p>
|
|
129
|
+
<% end %>
|
|
130
|
+
|
|
131
|
+
<div class="ck-actions">
|
|
132
|
+
<%= button_to "Discard", dismiss_suggestion_metric_path(@metric, draft_id: @suggestion_draft.id),
|
|
133
|
+
method: :delete, form_class: "inline-block",
|
|
134
|
+
class: ck_button_classes(:light, variant: :outline),
|
|
135
|
+
data: { turbo_confirm: "Drop this suggestion?" } %>
|
|
136
|
+
<%= button_to "Use this version", publish_draft_metric_path(@metric, draft_id: @suggestion_draft.id),
|
|
137
|
+
method: :post, form_class: "inline-block",
|
|
138
|
+
class: ck_button_classes(:dark) %>
|
|
139
|
+
</div>
|
|
140
|
+
</section>
|
|
141
|
+
<% end %>
|
|
142
|
+
|
|
66
143
|
<% if CompletionKit.config.judge_calibration_enabled %>
|
|
67
144
|
<section class="ck-card ck-card--spaced">
|
|
68
145
|
<div class="ck-prompt-preview__header">
|
|
@@ -131,31 +208,6 @@
|
|
|
131
208
|
<% end %>
|
|
132
209
|
</section>
|
|
133
210
|
|
|
134
|
-
<% if @suggestion_drafts.any? %>
|
|
135
|
-
<section class="ck-card ck-card--spaced">
|
|
136
|
-
<div class="ck-prompt-preview__header">
|
|
137
|
-
<p class="ck-kicker">Suggested improvements</p>
|
|
138
|
-
<span class="ck-chip"><%= @suggestion_drafts.size %> option<%= @suggestion_drafts.size == 1 ? "" : "s" %></span>
|
|
139
|
-
</div>
|
|
140
|
-
<p class="ck-meta-copy">Based on your verdicts, the model proposed these alternative instructions for this metric. Pick one to make it live — the previous version stays in history.</p>
|
|
141
|
-
<div class="ck-suggestion-list">
|
|
142
|
-
<% @suggestion_drafts.each do |draft| %>
|
|
143
|
-
<article class="ck-suggestion-card">
|
|
144
|
-
<header class="ck-suggestion-card__header">
|
|
145
|
-
<span class="ck-chip ck-chip--soft">Option #<%= draft.id %></span>
|
|
146
|
-
<time class="ck-meta-copy" data-relative-time datetime="<%= draft.created_at.utc.iso8601 %>"><%= time_ago_in_words(draft.created_at) %> ago</time>
|
|
147
|
-
</header>
|
|
148
|
-
<pre class="ck-code ck-suggestion-card__instruction"><%= draft.instruction %></pre>
|
|
149
|
-
<div class="ck-actions">
|
|
150
|
-
<%= button_to "Use this version", publish_draft_metric_path(@metric, draft_id: draft.id),
|
|
151
|
-
method: :post, form_class: "inline-block",
|
|
152
|
-
class: ck_button_classes(:dark) %>
|
|
153
|
-
</div>
|
|
154
|
-
</article>
|
|
155
|
-
<% end %>
|
|
156
|
-
</div>
|
|
157
|
-
</section>
|
|
158
|
-
<% end %>
|
|
159
211
|
|
|
160
212
|
<% if Array(@metric.few_shot_examples).any? %>
|
|
161
213
|
<section class="ck-card ck-card--spaced">
|
|
@@ -112,9 +112,7 @@
|
|
|
112
112
|
</div>
|
|
113
113
|
</div>
|
|
114
114
|
<% if review.ai_feedback.present? %>
|
|
115
|
-
<
|
|
116
|
-
<div class="ck-note-box"><%= review.ai_feedback %></div>
|
|
117
|
-
</div>
|
|
115
|
+
<p class="ck-review-card__feedback"><%= review.ai_feedback %></p>
|
|
118
116
|
<% end %>
|
|
119
117
|
<% if CompletionKit.config.judge_calibration_enabled && review.metric && review.ai_score %>
|
|
120
118
|
<% existing = CompletionKit::Calibration.find_by(
|
data/config/routes.rb
CHANGED