completion-kit 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +51 -51
- data/app/controllers/completion_kit/{calibrations_controller.rb → agreements_controller.rb} +19 -19
- data/app/controllers/completion_kit/api/v1/{calibrations_controller.rb → agreements_controller.rb} +18 -18
- data/app/controllers/completion_kit/api/v1/metric_versions_controller.rb +2 -7
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
- data/app/controllers/completion_kit/metrics_controller.rb +10 -11
- data/app/jobs/completion_kit/judge_review_job.rb +2 -2
- data/app/models/completion_kit/{calibration.rb → agreement.rb} +1 -1
- data/app/models/completion_kit/metric_version.rb +1 -17
- data/app/models/completion_kit/review.rb +1 -0
- data/app/services/completion_kit/{calibration_math.rb → agreement_math.rb} +1 -1
- data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
- data/app/services/completion_kit/mcp_tools/{calibrations.rb → agreements.rb} +11 -11
- data/app/services/completion_kit/mcp_tools/judges.rb +3 -3
- data/app/services/completion_kit/mcp_tools/metric_versions.rb +2 -7
- data/app/services/completion_kit/{metric_calibration_examples.rb → metric_agreement_examples.rb} +6 -6
- data/app/services/completion_kit/{metric_calibration_stats.rb → metric_agreement_stats.rb} +6 -6
- data/app/services/completion_kit/metric_improvement_validator.rb +1 -1
- data/app/services/completion_kit/metric_variant_generator.rb +2 -2
- data/app/views/completion_kit/{calibrations → agreements}/_buttons.html.erb +33 -33
- data/app/views/completion_kit/{calibrations → agreements}/_trust_panel.html.erb +5 -5
- data/app/views/completion_kit/api_reference/_body.html.erb +15 -15
- data/app/views/completion_kit/metrics/_guiding_examples.html.erb +1 -1
- data/app/views/completion_kit/metrics/edit.html.erb +1 -1
- data/app/views/completion_kit/metrics/show.html.erb +6 -6
- data/app/views/completion_kit/responses/show.html.erb +4 -4
- data/app/views/completion_kit/runs/show.html.erb +1 -1
- data/config/routes.rb +3 -3
- data/db/migrate/20260531000002_backfill_review_metric_versions.rb +33 -0
- data/db/migrate/20260531000003_add_metric_version_fk_to_reviews.rb +6 -0
- data/db/migrate/20260531000004_rename_calibrations_to_agreements.rb +19 -0
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +2 -2
- metadata +13 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cdcc8d4cdaf4b7aa4b3cff7cb0dd3fe65ce213bbce7b8ab1ba52cba304bff19a
|
|
4
|
+
data.tar.gz: d2b25e3b12b187b3df15e9b8347668a3dcf529b765fd5427a1dc2579665ee664
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 81921860b28c13076623a462e9cc82569a3721f035f5eb8dbe236c177fa02f9277aa7425659ae583da6713dfcc9467fc03683b3305f65fd8eaf29525cc93e143
|
|
7
|
+
data.tar.gz: c03aa7c4ad395228e4268ee166a908779a6c50e614c1a0591cf52569712236ffb4ccd9b6d8dc918d40e2ebb253fc48760fdd4e95d3e374e4cf68fad7b1b7ee19
|
|
@@ -3158,7 +3158,7 @@ select.ck-input {
|
|
|
3158
3158
|
#ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
|
|
3159
3159
|
#ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
|
|
3160
3160
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
|
|
3161
|
-
#ck-tab-
|
|
3161
|
+
#ck-tab-agreements:checked ~ .ck-api-tabs__nav label[for="ck-tab-agreements"],
|
|
3162
3162
|
#ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
|
|
3163
3163
|
#ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
|
|
3164
3164
|
color: var(--ck-accent);
|
|
@@ -3173,7 +3173,7 @@ select.ck-input {
|
|
|
3173
3173
|
#ck-tab-datasets:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(5),
|
|
3174
3174
|
#ck-tab-metrics:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(6),
|
|
3175
3175
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(7),
|
|
3176
|
-
#ck-tab-
|
|
3176
|
+
#ck-tab-agreements:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
|
|
3177
3177
|
#ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9),
|
|
3178
3178
|
#ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(10) {
|
|
3179
3179
|
display: block;
|
|
@@ -3215,7 +3215,7 @@ select.ck-input {
|
|
|
3215
3215
|
#ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
|
|
3216
3216
|
#ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
|
|
3217
3217
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
|
|
3218
|
-
#ck-tab-
|
|
3218
|
+
#ck-tab-agreements:checked ~ .ck-api-tabs__nav label[for="ck-tab-agreements"],
|
|
3219
3219
|
#ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
|
|
3220
3220
|
#ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
|
|
3221
3221
|
border-left-color: transparent;
|
|
@@ -5377,33 +5377,33 @@ a.tag-mark {
|
|
|
5377
5377
|
outline-offset: 2px;
|
|
5378
5378
|
}
|
|
5379
5379
|
|
|
5380
|
-
.ck-
|
|
5380
|
+
.ck-agreement {
|
|
5381
5381
|
margin-top: 12px;
|
|
5382
5382
|
padding-top: 12px;
|
|
5383
5383
|
border-top: 1px dashed var(--ck-line);
|
|
5384
5384
|
}
|
|
5385
|
-
.ck-
|
|
5385
|
+
.ck-agreement__prompt {
|
|
5386
5386
|
margin: 0 0 10px;
|
|
5387
5387
|
font-family: var(--ck-mono);
|
|
5388
5388
|
font-size: 0.72rem;
|
|
5389
5389
|
letter-spacing: 0.04em;
|
|
5390
5390
|
color: var(--ck-dim);
|
|
5391
5391
|
}
|
|
5392
|
-
.ck-
|
|
5392
|
+
.ck-agreement__prompt > * + * {
|
|
5393
5393
|
margin-left: 8px;
|
|
5394
5394
|
}
|
|
5395
|
-
.ck-
|
|
5395
|
+
.ck-agreement__label {
|
|
5396
5396
|
letter-spacing: 0.08em;
|
|
5397
5397
|
text-transform: uppercase;
|
|
5398
5398
|
color: var(--ck-dim);
|
|
5399
5399
|
}
|
|
5400
|
-
.ck-
|
|
5400
|
+
.ck-agreement__meta {
|
|
5401
5401
|
color: var(--ck-muted);
|
|
5402
5402
|
}
|
|
5403
|
-
.ck-
|
|
5403
|
+
.ck-agreement__sep {
|
|
5404
5404
|
color: var(--ck-line-strong);
|
|
5405
5405
|
}
|
|
5406
|
-
.ck-
|
|
5406
|
+
.ck-agreement__meta-link {
|
|
5407
5407
|
color: var(--ck-accent);
|
|
5408
5408
|
text-decoration: none;
|
|
5409
5409
|
white-space: nowrap;
|
|
@@ -5411,12 +5411,12 @@ a.tag-mark {
|
|
|
5411
5411
|
letter-spacing: 0.08em;
|
|
5412
5412
|
}
|
|
5413
5413
|
|
|
5414
|
-
.ck-
|
|
5414
|
+
.ck-agreement__others {
|
|
5415
5415
|
margin: 10px 0 0;
|
|
5416
5416
|
font-family: var(--ck-mono);
|
|
5417
5417
|
font-size: 0.78rem;
|
|
5418
5418
|
}
|
|
5419
|
-
.ck-
|
|
5419
|
+
.ck-agreement__others-summary {
|
|
5420
5420
|
display: inline-flex;
|
|
5421
5421
|
align-items: center;
|
|
5422
5422
|
gap: 6px;
|
|
@@ -5426,20 +5426,20 @@ a.tag-mark {
|
|
|
5426
5426
|
user-select: none;
|
|
5427
5427
|
list-style: none;
|
|
5428
5428
|
}
|
|
5429
|
-
.ck-
|
|
5430
|
-
.ck-
|
|
5429
|
+
.ck-agreement__others-summary:hover,
|
|
5430
|
+
.ck-agreement__others-summary:focus-visible {
|
|
5431
5431
|
color: var(--ck-accent-hover);
|
|
5432
5432
|
}
|
|
5433
|
-
.ck-
|
|
5434
|
-
.ck-
|
|
5433
|
+
.ck-agreement__others-summary::-webkit-details-marker { display: none; }
|
|
5434
|
+
.ck-agreement__others-summary svg {
|
|
5435
5435
|
width: 12px;
|
|
5436
5436
|
height: 12px;
|
|
5437
5437
|
transition: transform 0.15s;
|
|
5438
5438
|
}
|
|
5439
|
-
.ck-
|
|
5439
|
+
.ck-agreement__others[open] .ck-agreement__others-summary svg {
|
|
5440
5440
|
transform: rotate(90deg);
|
|
5441
5441
|
}
|
|
5442
|
-
.ck-
|
|
5442
|
+
.ck-agreement__others-list {
|
|
5443
5443
|
list-style: none;
|
|
5444
5444
|
padding: 8px 0 0;
|
|
5445
5445
|
margin: 0;
|
|
@@ -5447,24 +5447,24 @@ a.tag-mark {
|
|
|
5447
5447
|
flex-direction: column;
|
|
5448
5448
|
gap: 6px;
|
|
5449
5449
|
}
|
|
5450
|
-
.ck-
|
|
5450
|
+
.ck-agreement__others-item {
|
|
5451
5451
|
padding: 8px 10px;
|
|
5452
5452
|
background: var(--ck-surface-soft);
|
|
5453
5453
|
border: 1px solid var(--ck-line);
|
|
5454
5454
|
border-radius: 4px;
|
|
5455
5455
|
color: var(--ck-dim);
|
|
5456
5456
|
}
|
|
5457
|
-
.ck-
|
|
5458
|
-
.ck-
|
|
5459
|
-
.ck-
|
|
5460
|
-
.ck-
|
|
5457
|
+
.ck-agreement__others-item--agree { border-left: 2px solid var(--ck-success); }
|
|
5458
|
+
.ck-agreement__others-item--disagree { border-left: 2px solid var(--ck-danger); }
|
|
5459
|
+
.ck-agreement__others-item--borderline { border-left: 2px solid var(--ck-warning); }
|
|
5460
|
+
.ck-agreement__others-row {
|
|
5461
5461
|
display: flex;
|
|
5462
5462
|
flex-wrap: wrap;
|
|
5463
5463
|
align-items: center;
|
|
5464
5464
|
gap: 10px;
|
|
5465
5465
|
line-height: 1;
|
|
5466
5466
|
}
|
|
5467
|
-
.ck-
|
|
5467
|
+
.ck-agreement__others-verdict {
|
|
5468
5468
|
display: inline-flex;
|
|
5469
5469
|
align-items: center;
|
|
5470
5470
|
gap: 4px;
|
|
@@ -5473,24 +5473,24 @@ a.tag-mark {
|
|
|
5473
5473
|
font-weight: 500;
|
|
5474
5474
|
color: var(--ck-text);
|
|
5475
5475
|
}
|
|
5476
|
-
.ck-
|
|
5477
|
-
.ck-
|
|
5478
|
-
.ck-
|
|
5479
|
-
.ck-
|
|
5476
|
+
.ck-agreement__others-item--agree .ck-agreement__others-verdict { color: var(--ck-success); }
|
|
5477
|
+
.ck-agreement__others-item--disagree .ck-agreement__others-verdict { color: var(--ck-danger); }
|
|
5478
|
+
.ck-agreement__others-item--borderline .ck-agreement__others-verdict { color: var(--ck-warning); }
|
|
5479
|
+
.ck-agreement__others-by {
|
|
5480
5480
|
color: var(--ck-muted);
|
|
5481
5481
|
}
|
|
5482
|
-
.ck-
|
|
5482
|
+
.ck-agreement__others-stars {
|
|
5483
5483
|
display: inline-flex;
|
|
5484
5484
|
align-items: center;
|
|
5485
5485
|
gap: 2px;
|
|
5486
5486
|
}
|
|
5487
|
-
.ck-
|
|
5488
|
-
.ck-
|
|
5487
|
+
.ck-agreement__others-stars svg { display: block; }
|
|
5488
|
+
.ck-agreement__others-note {
|
|
5489
5489
|
margin: 6px 0 0;
|
|
5490
5490
|
color: var(--ck-dim);
|
|
5491
5491
|
line-height: 1.5;
|
|
5492
5492
|
}
|
|
5493
|
-
.ck-
|
|
5493
|
+
.ck-agreement__meta-link svg {
|
|
5494
5494
|
display: inline-block;
|
|
5495
5495
|
width: 12px;
|
|
5496
5496
|
height: 12px;
|
|
@@ -5499,16 +5499,16 @@ a.tag-mark {
|
|
|
5499
5499
|
position: relative;
|
|
5500
5500
|
top: -1px;
|
|
5501
5501
|
}
|
|
5502
|
-
.ck-
|
|
5503
|
-
.ck-
|
|
5502
|
+
.ck-agreement__meta-link:hover,
|
|
5503
|
+
.ck-agreement__meta-link:focus-visible {
|
|
5504
5504
|
color: var(--ck-accent-hover);
|
|
5505
5505
|
}
|
|
5506
|
-
.ck-
|
|
5506
|
+
.ck-agreement__buttons {
|
|
5507
5507
|
display: flex;
|
|
5508
5508
|
gap: 6px;
|
|
5509
5509
|
flex-wrap: wrap;
|
|
5510
5510
|
}
|
|
5511
|
-
.ck-
|
|
5511
|
+
.ck-agreement__pill {
|
|
5512
5512
|
display: inline-flex;
|
|
5513
5513
|
align-items: center;
|
|
5514
5514
|
gap: 0.4rem;
|
|
@@ -5525,50 +5525,50 @@ a.tag-mark {
|
|
|
5525
5525
|
cursor: pointer;
|
|
5526
5526
|
transition: background 0.12s, border-color 0.12s, color 0.12s;
|
|
5527
5527
|
}
|
|
5528
|
-
.ck-
|
|
5528
|
+
.ck-agreement__pill svg {
|
|
5529
5529
|
width: 14px;
|
|
5530
5530
|
height: 14px;
|
|
5531
5531
|
}
|
|
5532
|
-
.ck-
|
|
5533
|
-
.ck-
|
|
5532
|
+
.ck-agreement__pill:hover,
|
|
5533
|
+
.ck-agreement__pill:focus-visible {
|
|
5534
5534
|
color: var(--ck-text);
|
|
5535
5535
|
border-color: var(--ck-dim);
|
|
5536
5536
|
}
|
|
5537
|
-
.ck-
|
|
5537
|
+
.ck-agreement__pill--agree.is-active {
|
|
5538
5538
|
background: var(--ck-success-soft);
|
|
5539
5539
|
border-color: rgba(45, 212, 168, 0.35);
|
|
5540
5540
|
color: var(--ck-success);
|
|
5541
5541
|
}
|
|
5542
|
-
.ck-
|
|
5542
|
+
.ck-agreement__pill--disagree.is-active {
|
|
5543
5543
|
background: var(--ck-danger-soft);
|
|
5544
5544
|
border-color: rgba(248, 113, 113, 0.35);
|
|
5545
5545
|
color: var(--ck-danger);
|
|
5546
5546
|
}
|
|
5547
|
-
.ck-
|
|
5547
|
+
.ck-agreement__pill--borderline.is-active {
|
|
5548
5548
|
background: var(--ck-warning-soft);
|
|
5549
5549
|
border-color: rgba(224, 164, 88, 0.35);
|
|
5550
5550
|
color: var(--ck-warning);
|
|
5551
5551
|
}
|
|
5552
|
-
.ck-
|
|
5553
|
-
.ck-
|
|
5554
|
-
.ck-
|
|
5555
|
-
.ck-
|
|
5552
|
+
.ck-agreement__pill--agree:hover { border-color: rgba(45, 212, 168, 0.45); color: var(--ck-success); }
|
|
5553
|
+
.ck-agreement__pill--disagree:hover { border-color: rgba(248, 113, 113, 0.45); color: var(--ck-danger); }
|
|
5554
|
+
.ck-agreement__pill--borderline:hover { border-color: rgba(224, 164, 88, 0.45); color: var(--ck-warning); }
|
|
5555
|
+
.ck-agreement__detail {
|
|
5556
5556
|
margin-top: 12px;
|
|
5557
5557
|
display: flex;
|
|
5558
5558
|
flex-direction: column;
|
|
5559
5559
|
gap: 12px;
|
|
5560
5560
|
}
|
|
5561
|
-
.ck-
|
|
5561
|
+
.ck-agreement__detail > * {
|
|
5562
5562
|
margin: 0;
|
|
5563
5563
|
}
|
|
5564
|
-
.ck-
|
|
5564
|
+
.ck-agreement__detail .ck-button {
|
|
5565
5565
|
align-self: flex-start;
|
|
5566
5566
|
}
|
|
5567
|
-
.ck-
|
|
5567
|
+
.ck-agreement__detail textarea {
|
|
5568
5568
|
font-family: var(--ck-mono);
|
|
5569
5569
|
font-size: 0.82rem;
|
|
5570
5570
|
}
|
|
5571
|
-
.ck-
|
|
5571
|
+
.ck-agreement__value {
|
|
5572
5572
|
color: var(--ck-accent);
|
|
5573
5573
|
font-family: var(--ck-mono);
|
|
5574
5574
|
font-weight: 600;
|
|
@@ -5660,7 +5660,7 @@ a.tag-mark {
|
|
|
5660
5660
|
background: linear-gradient(180deg, var(--ck-accent-soft), var(--ck-surface));
|
|
5661
5661
|
}
|
|
5662
5662
|
|
|
5663
|
-
.ck-
|
|
5663
|
+
.ck-agreement__error {
|
|
5664
5664
|
margin: 8px 0 0;
|
|
5665
5665
|
padding: 8px 10px;
|
|
5666
5666
|
background: var(--ck-danger-soft);
|
|
@@ -1,36 +1,36 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
|
-
class
|
|
3
|
-
before_action :
|
|
2
|
+
class AgreementsController < ApplicationController
|
|
3
|
+
before_action :ensure_agreement_enabled
|
|
4
4
|
before_action :set_scope
|
|
5
5
|
|
|
6
6
|
def create
|
|
7
|
-
created_by =
|
|
8
|
-
existing =
|
|
7
|
+
created_by = agreement_creator
|
|
8
|
+
existing = Agreement.find_by(
|
|
9
9
|
run_id: @run.id, response_id: @response.id, metric_id: @metric.id, created_by: created_by
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
if params[:verdict] == "disagree" && params[:corrected_score].blank?
|
|
13
|
-
|
|
13
|
+
render_agreement(agreement: existing, pending_verdict: "disagree")
|
|
14
14
|
return
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
agreement = existing || Agreement.new(
|
|
18
18
|
run: @run, response: @response, metric: @metric, created_by: created_by
|
|
19
19
|
)
|
|
20
|
-
|
|
20
|
+
agreement.assign_attributes(
|
|
21
21
|
metric_version: MetricVersion.ensure_current_for(@metric),
|
|
22
22
|
verdict: params[:verdict],
|
|
23
23
|
corrected_score: params[:corrected_score].presence,
|
|
24
24
|
note: params[:note].presence
|
|
25
25
|
)
|
|
26
26
|
|
|
27
|
-
if
|
|
28
|
-
|
|
27
|
+
if agreement.save
|
|
28
|
+
render_agreement(agreement: agreement, just_saved: true)
|
|
29
29
|
else
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
render_agreement(
|
|
31
|
+
agreement: existing,
|
|
32
32
|
pending_verdict: params[:verdict],
|
|
33
|
-
error:
|
|
33
|
+
error: agreement.errors.full_messages.to_sentence,
|
|
34
34
|
status: :unprocessable_entity
|
|
35
35
|
)
|
|
36
36
|
end
|
|
@@ -38,10 +38,10 @@ module CompletionKit
|
|
|
38
38
|
|
|
39
39
|
private
|
|
40
40
|
|
|
41
|
-
def
|
|
41
|
+
def render_agreement(agreement:, pending_verdict: nil, error: nil, just_saved: false, status: :ok)
|
|
42
42
|
locals = {
|
|
43
43
|
review: review_for_metric,
|
|
44
|
-
|
|
44
|
+
agreement: agreement,
|
|
45
45
|
run: @run,
|
|
46
46
|
response_row: @response,
|
|
47
47
|
metric: @metric,
|
|
@@ -50,14 +50,14 @@ module CompletionKit
|
|
|
50
50
|
just_saved: just_saved
|
|
51
51
|
}
|
|
52
52
|
render turbo_stream: turbo_stream.replace(
|
|
53
|
-
"
|
|
54
|
-
partial: "completion_kit/
|
|
53
|
+
"agreement_#{@response.id}_#{@metric.id}",
|
|
54
|
+
partial: "completion_kit/agreements/buttons",
|
|
55
55
|
locals: locals
|
|
56
56
|
), status: status
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
def
|
|
60
|
-
head :not_found unless CompletionKit.config.
|
|
59
|
+
def ensure_agreement_enabled
|
|
60
|
+
head :not_found unless CompletionKit.config.judge_agreement_enabled
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
def set_scope
|
|
@@ -70,7 +70,7 @@ module CompletionKit
|
|
|
70
70
|
@response.reviews.find_by(metric_id: @metric.id)
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
def
|
|
73
|
+
def agreement_creator
|
|
74
74
|
request.env["HTTP_X_REMOTE_USER"].presence || CompletionKit.config.username.presence || "operator"
|
|
75
75
|
end
|
|
76
76
|
end
|
data/app/controllers/completion_kit/api/v1/{calibrations_controller.rb → agreements_controller.rb}
RENAMED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module Api
|
|
3
3
|
module V1
|
|
4
|
-
class
|
|
5
|
-
before_action :
|
|
4
|
+
class AgreementsController < BaseController
|
|
5
|
+
before_action :ensure_agreement_enabled
|
|
6
6
|
before_action :set_nested_scope, only: [:create]
|
|
7
|
-
before_action :
|
|
7
|
+
before_action :load_agreement, only: [:destroy]
|
|
8
8
|
|
|
9
9
|
def index
|
|
10
|
-
scope =
|
|
10
|
+
scope = Agreement.all
|
|
11
11
|
scope = scope.where(run_id: params[:run_id]) if params[:run_id].present?
|
|
12
12
|
scope = scope.where(response_id: params[:response_id]) if params[:response_id].present?
|
|
13
13
|
scope = scope.where(metric_id: params[:metric_id]) if params[:metric_id].present?
|
|
@@ -18,31 +18,31 @@ module CompletionKit
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def create
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
agreement = scope_agreements.find_or_initialize_by(created_by: created_by_param)
|
|
22
|
+
agreement.assign_attributes(
|
|
23
23
|
run: @run,
|
|
24
24
|
response: @response,
|
|
25
25
|
metric: @metric,
|
|
26
26
|
metric_version: MetricVersion.ensure_current_for(@metric),
|
|
27
|
-
**
|
|
27
|
+
**agreement_params
|
|
28
28
|
)
|
|
29
29
|
|
|
30
|
-
if
|
|
31
|
-
render json:
|
|
30
|
+
if agreement.save
|
|
31
|
+
render json: agreement, status: agreement.previously_new_record? ? :created : :ok
|
|
32
32
|
else
|
|
33
|
-
render_validation_errors(
|
|
33
|
+
render_validation_errors(agreement)
|
|
34
34
|
end
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
def destroy
|
|
38
|
-
@
|
|
38
|
+
@agreement.destroy!
|
|
39
39
|
head :no_content
|
|
40
40
|
end
|
|
41
41
|
|
|
42
42
|
private
|
|
43
43
|
|
|
44
|
-
def
|
|
45
|
-
render_error("
|
|
44
|
+
def ensure_agreement_enabled
|
|
45
|
+
render_error("Agreement disabled", status: :not_found) unless CompletionKit.config.judge_agreement_enabled
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def set_nested_scope
|
|
@@ -53,17 +53,17 @@ module CompletionKit
|
|
|
53
53
|
not_found
|
|
54
54
|
end
|
|
55
55
|
|
|
56
|
-
def
|
|
57
|
-
@
|
|
56
|
+
def load_agreement
|
|
57
|
+
@agreement = Agreement.find(params[:id])
|
|
58
58
|
rescue ActiveRecord::RecordNotFound
|
|
59
59
|
not_found
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
-
def
|
|
63
|
-
|
|
62
|
+
def scope_agreements
|
|
63
|
+
Agreement.where(run_id: @run.id, response_id: @response.id, metric_id: @metric.id)
|
|
64
64
|
end
|
|
65
65
|
|
|
66
|
-
def
|
|
66
|
+
def agreement_params
|
|
67
67
|
params.permit(:verdict, :corrected_score, :note).to_h.symbolize_keys
|
|
68
68
|
end
|
|
69
69
|
|
|
@@ -14,13 +14,8 @@ module CompletionKit
|
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
def publish
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
render json: audit
|
|
20
|
-
else
|
|
21
|
-
@version.publish!
|
|
22
|
-
render json: @version.reload
|
|
23
|
-
end
|
|
17
|
+
@version.publish!
|
|
18
|
+
render json: @version.reload
|
|
24
19
|
end
|
|
25
20
|
|
|
26
21
|
def destroy
|
|
@@ -37,7 +37,7 @@ module CompletionKit
|
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
def suggest_variants
|
|
40
|
-
disagreement_count =
|
|
40
|
+
disagreement_count = Agreement.where(metric_id: @metric.id, verdict: "disagree").count
|
|
41
41
|
if disagreement_count.zero?
|
|
42
42
|
render_error("Mark at least one case as Disagree before asking the model to suggest a change.", status: :unprocessable_entity)
|
|
43
43
|
return
|
|
@@ -39,9 +39,9 @@ module CompletionKit
|
|
|
39
39
|
def show
|
|
40
40
|
@edit_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
|
|
41
41
|
@suggestion_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
|
|
42
|
-
@improve_disagreement_count =
|
|
42
|
+
@improve_disagreement_count = Agreement.where(metric_id: @metric.id, verdict: "disagree").count
|
|
43
43
|
@versions = MetricVersion.where(metric_id: @metric.id).order(version_number: :desc).to_a
|
|
44
|
-
@guiding_examples = CompletionKit.config.judge_examples_from_reviews ?
|
|
44
|
+
@guiding_examples = CompletionKit.config.judge_examples_from_reviews ? MetricAgreementExamples.judge_examples_for(@metric) : []
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def new
|
|
@@ -52,7 +52,7 @@ module CompletionKit
|
|
|
52
52
|
@suggestion_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
|
|
53
53
|
@edit_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
|
|
54
54
|
@published_metric_version = MetricVersion.published.where(metric_id: @metric.id, current: true).first
|
|
55
|
-
@improve_disagreement_count =
|
|
55
|
+
@improve_disagreement_count = Agreement.where(metric_id: @metric.id, verdict: "disagree").count
|
|
56
56
|
|
|
57
57
|
if @edit_draft
|
|
58
58
|
@metric.instruction = @edit_draft.instruction
|
|
@@ -117,7 +117,7 @@ module CompletionKit
|
|
|
117
117
|
|
|
118
118
|
def suggest_variants
|
|
119
119
|
target = params[:back_to] == "edit" ? edit_metric_path(@metric) : metric_path(@metric)
|
|
120
|
-
counts =
|
|
120
|
+
counts = Agreement.where(metric_id: @metric.id, verdict: %w[agree disagree]).group(:verdict).count
|
|
121
121
|
if counts["disagree"].to_i.zero?
|
|
122
122
|
redirect_to target, alert: "Mark at least one case as Disagree before asking the model to suggest a change."
|
|
123
123
|
return
|
|
@@ -145,12 +145,12 @@ module CompletionKit
|
|
|
145
145
|
end
|
|
146
146
|
|
|
147
147
|
def exclude_example
|
|
148
|
-
|
|
149
|
-
|
|
148
|
+
agreement = Agreement.where(metric_id: @metric.id).find(params[:agreement_id])
|
|
149
|
+
agreement.update!(excluded_from_examples: true)
|
|
150
150
|
render turbo_stream: turbo_stream.replace(
|
|
151
151
|
"ck-guiding-#{@metric.id}",
|
|
152
152
|
partial: "completion_kit/metrics/guiding_examples",
|
|
153
|
-
locals: { metric: @metric, examples:
|
|
153
|
+
locals: { metric: @metric, examples: MetricAgreementExamples.judge_examples_for(@metric) }
|
|
154
154
|
)
|
|
155
155
|
end
|
|
156
156
|
|
|
@@ -171,13 +171,12 @@ module CompletionKit
|
|
|
171
171
|
reverting = was_published_already && !version.current?
|
|
172
172
|
previously_current = MetricVersion.current.find_by(metric_id: @metric.id)
|
|
173
173
|
|
|
174
|
+
version.publish!
|
|
175
|
+
|
|
174
176
|
if reverting
|
|
175
|
-
audit = version.revert!
|
|
176
|
-
prior_label = previously_current.version_label
|
|
177
177
|
redirect_to metric_path(@metric),
|
|
178
|
-
notice: "
|
|
178
|
+
notice: "#{@metric.name} is back on #{version.version_label}. Its reviews count again; the ones you gave on #{previously_current.version_label} stay with that version."
|
|
179
179
|
else
|
|
180
|
-
version.publish!
|
|
181
180
|
redirect_to metric_path(@metric),
|
|
182
181
|
notice: "#{@metric.name} #{version.version_label} is now the published version."
|
|
183
182
|
end
|
|
@@ -82,10 +82,10 @@ module CompletionKit
|
|
|
82
82
|
private
|
|
83
83
|
|
|
84
84
|
def review_examples_for(metric, response)
|
|
85
|
-
return nil unless CompletionKit.config.
|
|
85
|
+
return nil unless CompletionKit.config.judge_agreement_enabled
|
|
86
86
|
return nil unless CompletionKit.config.judge_examples_from_reviews
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
MetricAgreementExamples.judge_examples_for(metric, exclude_response_id: response.id)
|
|
89
89
|
end
|
|
90
90
|
|
|
91
91
|
def confirm_judging_capability(judge_model_id)
|
|
@@ -3,7 +3,7 @@ module CompletionKit
|
|
|
3
3
|
STATES = %w[draft published].freeze
|
|
4
4
|
|
|
5
5
|
belongs_to :metric
|
|
6
|
-
has_many :
|
|
6
|
+
has_many :agreements, dependent: :destroy
|
|
7
7
|
|
|
8
8
|
serialize :rubric_bands, coder: JSON
|
|
9
9
|
serialize :validation_summary, coder: JSON
|
|
@@ -83,22 +83,6 @@ module CompletionKit
|
|
|
83
83
|
self
|
|
84
84
|
end
|
|
85
85
|
|
|
86
|
-
def revert!
|
|
87
|
-
raise ArgumentError, "only a published version can be reverted to" unless published?
|
|
88
|
-
audit = nil
|
|
89
|
-
MetricVersion.transaction do
|
|
90
|
-
audit = self.class.create!(
|
|
91
|
-
metric: metric,
|
|
92
|
-
instruction: instruction,
|
|
93
|
-
rubric_bands: rubric_bands,
|
|
94
|
-
state: "draft",
|
|
95
|
-
source: "revert"
|
|
96
|
-
)
|
|
97
|
-
audit.publish!
|
|
98
|
-
end
|
|
99
|
-
audit
|
|
100
|
-
end
|
|
101
|
-
|
|
102
86
|
def as_json(options = {})
|
|
103
87
|
{
|
|
104
88
|
id: id,
|
|
@@ -8,6 +8,7 @@ module CompletionKit
|
|
|
8
8
|
has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
|
|
9
9
|
|
|
10
10
|
validates :metric_name, presence: true
|
|
11
|
+
validates :metric_version, presence: true
|
|
11
12
|
validates :ai_score, numericality: { greater_than_or_equal_to: 1, less_than_or_equal_to: 5 }, allow_nil: true
|
|
12
13
|
|
|
13
14
|
before_validation :set_default_status
|
|
@@ -35,7 +35,7 @@ module CompletionKit
|
|
|
35
35
|
McpTools::MetricVersions.definitions +
|
|
36
36
|
McpTools::ProviderCredentials.definitions +
|
|
37
37
|
McpTools::Tags.definitions +
|
|
38
|
-
McpTools::
|
|
38
|
+
McpTools::Agreements.definitions +
|
|
39
39
|
McpTools::Judges.definitions
|
|
40
40
|
end
|
|
41
41
|
|
|
@@ -50,7 +50,7 @@ module CompletionKit
|
|
|
50
50
|
when /\Ametrics_/ then McpTools::Metrics.call(name, arguments)
|
|
51
51
|
when /\Aprovider_credentials_/ then McpTools::ProviderCredentials.call(name, arguments)
|
|
52
52
|
when /\Atags_/ then McpTools::Tags.call(name, arguments)
|
|
53
|
-
when /\
|
|
53
|
+
when /\Aagreements_/ then McpTools::Agreements.call(name, arguments)
|
|
54
54
|
when /\Ajudges_/ then McpTools::Judges.call(name, arguments)
|
|
55
55
|
else raise MethodNotFound, "Unknown tool: #{name}"
|
|
56
56
|
end
|