completion-kit 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +118 -55
- data/app/controllers/completion_kit/{calibrations_controller.rb → agreements_controller.rb} +19 -19
- data/app/controllers/completion_kit/api/v1/{calibrations_controller.rb → agreements_controller.rb} +18 -18
- data/app/controllers/completion_kit/api/v1/metric_versions_controller.rb +2 -7
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
- data/app/controllers/completion_kit/metrics_controller.rb +18 -23
- data/app/jobs/completion_kit/judge_review_job.rb +2 -2
- data/app/jobs/completion_kit/metric_suggestion_job.rb +46 -0
- data/app/models/completion_kit/{calibration.rb → agreement.rb} +1 -1
- data/app/models/completion_kit/metric_version.rb +2 -17
- data/app/models/completion_kit/review.rb +1 -0
- data/app/services/completion_kit/{calibration_math.rb → agreement_math.rb} +1 -1
- data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
- data/app/services/completion_kit/mcp_tools/{calibrations.rb → agreements.rb} +11 -11
- data/app/services/completion_kit/mcp_tools/judges.rb +3 -3
- data/app/services/completion_kit/mcp_tools/metric_versions.rb +2 -7
- data/app/services/completion_kit/{metric_calibration_examples.rb → metric_agreement_examples.rb} +6 -6
- data/app/services/completion_kit/{metric_calibration_stats.rb → metric_agreement_stats.rb} +6 -6
- data/app/services/completion_kit/metric_improvement_validator.rb +101 -0
- data/app/services/completion_kit/metric_variant_generator.rb +2 -2
- data/app/views/completion_kit/{calibrations → agreements}/_buttons.html.erb +33 -33
- data/app/views/completion_kit/{calibrations → agreements}/_trust_panel.html.erb +6 -9
- data/app/views/completion_kit/api_reference/_body.html.erb +15 -15
- data/app/views/completion_kit/metrics/_guiding_examples.html.erb +1 -1
- data/app/views/completion_kit/metrics/_suggestion_failed.html.erb +3 -0
- data/app/views/completion_kit/metrics/_suggestion_pending.html.erb +3 -0
- data/app/views/completion_kit/metrics/_suggestion_ready.html.erb +4 -0
- data/app/views/completion_kit/metrics/_validation_scoreboard.html.erb +12 -0
- data/app/views/completion_kit/metrics/edit.html.erb +1 -1
- data/app/views/completion_kit/metrics/show.html.erb +25 -11
- data/app/views/completion_kit/responses/show.html.erb +4 -4
- data/app/views/completion_kit/runs/show.html.erb +1 -1
- data/config/routes.rb +3 -3
- data/db/migrate/20260531000001_add_validation_summary_to_completion_kit_metric_versions.rb +5 -0
- data/db/migrate/20260531000002_backfill_review_metric_versions.rb +33 -0
- data/db/migrate/20260531000003_add_metric_version_fk_to_reviews.rb +6 -0
- data/db/migrate/20260531000004_rename_calibrations_to_agreements.rb +19 -0
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +2 -2
- metadata +20 -10
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cdcc8d4cdaf4b7aa4b3cff7cb0dd3fe65ce213bbce7b8ab1ba52cba304bff19a
|
|
4
|
+
data.tar.gz: d2b25e3b12b187b3df15e9b8347668a3dcf529b765fd5427a1dc2579665ee664
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 81921860b28c13076623a462e9cc82569a3721f035f5eb8dbe236c177fa02f9277aa7425659ae583da6713dfcc9467fc03683b3305f65fd8eaf29525cc93e143
|
|
7
|
+
data.tar.gz: c03aa7c4ad395228e4268ee166a908779a6c50e614c1a0591cf52569712236ffb4ccd9b6d8dc918d40e2ebb253fc48760fdd4e95d3e374e4cf68fad7b1b7ee19
|
|
@@ -3158,7 +3158,7 @@ select.ck-input {
|
|
|
3158
3158
|
#ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
|
|
3159
3159
|
#ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
|
|
3160
3160
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
|
|
3161
|
-
#ck-tab-
|
|
3161
|
+
#ck-tab-agreements:checked ~ .ck-api-tabs__nav label[for="ck-tab-agreements"],
|
|
3162
3162
|
#ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
|
|
3163
3163
|
#ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
|
|
3164
3164
|
color: var(--ck-accent);
|
|
@@ -3173,7 +3173,7 @@ select.ck-input {
|
|
|
3173
3173
|
#ck-tab-datasets:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(5),
|
|
3174
3174
|
#ck-tab-metrics:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(6),
|
|
3175
3175
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(7),
|
|
3176
|
-
#ck-tab-
|
|
3176
|
+
#ck-tab-agreements:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
|
|
3177
3177
|
#ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9),
|
|
3178
3178
|
#ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(10) {
|
|
3179
3179
|
display: block;
|
|
@@ -3215,7 +3215,7 @@ select.ck-input {
|
|
|
3215
3215
|
#ck-tab-datasets:checked ~ .ck-api-tabs__nav label[for="ck-tab-datasets"],
|
|
3216
3216
|
#ck-tab-metrics:checked ~ .ck-api-tabs__nav label[for="ck-tab-metrics"],
|
|
3217
3217
|
#ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
|
|
3218
|
-
#ck-tab-
|
|
3218
|
+
#ck-tab-agreements:checked ~ .ck-api-tabs__nav label[for="ck-tab-agreements"],
|
|
3219
3219
|
#ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
|
|
3220
3220
|
#ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
|
|
3221
3221
|
border-left-color: transparent;
|
|
@@ -3619,10 +3619,9 @@ select.ck-input {
|
|
|
3619
3619
|
.ck-prompt-versions-table th:nth-child(3), .ck-prompt-versions-table td:nth-child(3) { width: 8rem; white-space: nowrap; }
|
|
3620
3620
|
.ck-prompt-versions-table th:nth-child(4), .ck-prompt-versions-table td:nth-child(4) { width: auto; }
|
|
3621
3621
|
|
|
3622
|
-
.ck-metric-versions-table th:nth-child(1), .ck-metric-versions-table td:nth-child(1) { width:
|
|
3623
|
-
.ck-metric-versions-table th:nth-child(2), .ck-metric-versions-table td:nth-child(2) { width:
|
|
3624
|
-
.ck-metric-versions-table th:nth-child(3), .ck-metric-versions-table td:nth-child(3) { width:
|
|
3625
|
-
.ck-metric-versions-table .ck-version-cell { justify-content: flex-start; gap: 0.75rem; }
|
|
3622
|
+
.ck-metric-versions-table th:nth-child(1), .ck-metric-versions-table td:nth-child(1) { width: 18rem; }
|
|
3623
|
+
.ck-metric-versions-table th:nth-child(2), .ck-metric-versions-table td:nth-child(2) { width: 16rem; white-space: nowrap; }
|
|
3624
|
+
.ck-metric-versions-table th:nth-child(3), .ck-metric-versions-table td:nth-child(3) { width: auto; white-space: nowrap; }
|
|
3626
3625
|
|
|
3627
3626
|
|
|
3628
3627
|
.ck-source-chip {
|
|
@@ -5378,33 +5377,33 @@ a.tag-mark {
|
|
|
5378
5377
|
outline-offset: 2px;
|
|
5379
5378
|
}
|
|
5380
5379
|
|
|
5381
|
-
.ck-
|
|
5380
|
+
.ck-agreement {
|
|
5382
5381
|
margin-top: 12px;
|
|
5383
5382
|
padding-top: 12px;
|
|
5384
5383
|
border-top: 1px dashed var(--ck-line);
|
|
5385
5384
|
}
|
|
5386
|
-
.ck-
|
|
5385
|
+
.ck-agreement__prompt {
|
|
5387
5386
|
margin: 0 0 10px;
|
|
5388
5387
|
font-family: var(--ck-mono);
|
|
5389
5388
|
font-size: 0.72rem;
|
|
5390
5389
|
letter-spacing: 0.04em;
|
|
5391
5390
|
color: var(--ck-dim);
|
|
5392
5391
|
}
|
|
5393
|
-
.ck-
|
|
5392
|
+
.ck-agreement__prompt > * + * {
|
|
5394
5393
|
margin-left: 8px;
|
|
5395
5394
|
}
|
|
5396
|
-
.ck-
|
|
5395
|
+
.ck-agreement__label {
|
|
5397
5396
|
letter-spacing: 0.08em;
|
|
5398
5397
|
text-transform: uppercase;
|
|
5399
5398
|
color: var(--ck-dim);
|
|
5400
5399
|
}
|
|
5401
|
-
.ck-
|
|
5400
|
+
.ck-agreement__meta {
|
|
5402
5401
|
color: var(--ck-muted);
|
|
5403
5402
|
}
|
|
5404
|
-
.ck-
|
|
5403
|
+
.ck-agreement__sep {
|
|
5405
5404
|
color: var(--ck-line-strong);
|
|
5406
5405
|
}
|
|
5407
|
-
.ck-
|
|
5406
|
+
.ck-agreement__meta-link {
|
|
5408
5407
|
color: var(--ck-accent);
|
|
5409
5408
|
text-decoration: none;
|
|
5410
5409
|
white-space: nowrap;
|
|
@@ -5412,12 +5411,12 @@ a.tag-mark {
|
|
|
5412
5411
|
letter-spacing: 0.08em;
|
|
5413
5412
|
}
|
|
5414
5413
|
|
|
5415
|
-
.ck-
|
|
5414
|
+
.ck-agreement__others {
|
|
5416
5415
|
margin: 10px 0 0;
|
|
5417
5416
|
font-family: var(--ck-mono);
|
|
5418
5417
|
font-size: 0.78rem;
|
|
5419
5418
|
}
|
|
5420
|
-
.ck-
|
|
5419
|
+
.ck-agreement__others-summary {
|
|
5421
5420
|
display: inline-flex;
|
|
5422
5421
|
align-items: center;
|
|
5423
5422
|
gap: 6px;
|
|
@@ -5427,20 +5426,20 @@ a.tag-mark {
|
|
|
5427
5426
|
user-select: none;
|
|
5428
5427
|
list-style: none;
|
|
5429
5428
|
}
|
|
5430
|
-
.ck-
|
|
5431
|
-
.ck-
|
|
5429
|
+
.ck-agreement__others-summary:hover,
|
|
5430
|
+
.ck-agreement__others-summary:focus-visible {
|
|
5432
5431
|
color: var(--ck-accent-hover);
|
|
5433
5432
|
}
|
|
5434
|
-
.ck-
|
|
5435
|
-
.ck-
|
|
5433
|
+
.ck-agreement__others-summary::-webkit-details-marker { display: none; }
|
|
5434
|
+
.ck-agreement__others-summary svg {
|
|
5436
5435
|
width: 12px;
|
|
5437
5436
|
height: 12px;
|
|
5438
5437
|
transition: transform 0.15s;
|
|
5439
5438
|
}
|
|
5440
|
-
.ck-
|
|
5439
|
+
.ck-agreement__others[open] .ck-agreement__others-summary svg {
|
|
5441
5440
|
transform: rotate(90deg);
|
|
5442
5441
|
}
|
|
5443
|
-
.ck-
|
|
5442
|
+
.ck-agreement__others-list {
|
|
5444
5443
|
list-style: none;
|
|
5445
5444
|
padding: 8px 0 0;
|
|
5446
5445
|
margin: 0;
|
|
@@ -5448,24 +5447,24 @@ a.tag-mark {
|
|
|
5448
5447
|
flex-direction: column;
|
|
5449
5448
|
gap: 6px;
|
|
5450
5449
|
}
|
|
5451
|
-
.ck-
|
|
5450
|
+
.ck-agreement__others-item {
|
|
5452
5451
|
padding: 8px 10px;
|
|
5453
5452
|
background: var(--ck-surface-soft);
|
|
5454
5453
|
border: 1px solid var(--ck-line);
|
|
5455
5454
|
border-radius: 4px;
|
|
5456
5455
|
color: var(--ck-dim);
|
|
5457
5456
|
}
|
|
5458
|
-
.ck-
|
|
5459
|
-
.ck-
|
|
5460
|
-
.ck-
|
|
5461
|
-
.ck-
|
|
5457
|
+
.ck-agreement__others-item--agree { border-left: 2px solid var(--ck-success); }
|
|
5458
|
+
.ck-agreement__others-item--disagree { border-left: 2px solid var(--ck-danger); }
|
|
5459
|
+
.ck-agreement__others-item--borderline { border-left: 2px solid var(--ck-warning); }
|
|
5460
|
+
.ck-agreement__others-row {
|
|
5462
5461
|
display: flex;
|
|
5463
5462
|
flex-wrap: wrap;
|
|
5464
5463
|
align-items: center;
|
|
5465
5464
|
gap: 10px;
|
|
5466
5465
|
line-height: 1;
|
|
5467
5466
|
}
|
|
5468
|
-
.ck-
|
|
5467
|
+
.ck-agreement__others-verdict {
|
|
5469
5468
|
display: inline-flex;
|
|
5470
5469
|
align-items: center;
|
|
5471
5470
|
gap: 4px;
|
|
@@ -5474,24 +5473,24 @@ a.tag-mark {
|
|
|
5474
5473
|
font-weight: 500;
|
|
5475
5474
|
color: var(--ck-text);
|
|
5476
5475
|
}
|
|
5477
|
-
.ck-
|
|
5478
|
-
.ck-
|
|
5479
|
-
.ck-
|
|
5480
|
-
.ck-
|
|
5476
|
+
.ck-agreement__others-item--agree .ck-agreement__others-verdict { color: var(--ck-success); }
|
|
5477
|
+
.ck-agreement__others-item--disagree .ck-agreement__others-verdict { color: var(--ck-danger); }
|
|
5478
|
+
.ck-agreement__others-item--borderline .ck-agreement__others-verdict { color: var(--ck-warning); }
|
|
5479
|
+
.ck-agreement__others-by {
|
|
5481
5480
|
color: var(--ck-muted);
|
|
5482
5481
|
}
|
|
5483
|
-
.ck-
|
|
5482
|
+
.ck-agreement__others-stars {
|
|
5484
5483
|
display: inline-flex;
|
|
5485
5484
|
align-items: center;
|
|
5486
5485
|
gap: 2px;
|
|
5487
5486
|
}
|
|
5488
|
-
.ck-
|
|
5489
|
-
.ck-
|
|
5487
|
+
.ck-agreement__others-stars svg { display: block; }
|
|
5488
|
+
.ck-agreement__others-note {
|
|
5490
5489
|
margin: 6px 0 0;
|
|
5491
5490
|
color: var(--ck-dim);
|
|
5492
5491
|
line-height: 1.5;
|
|
5493
5492
|
}
|
|
5494
|
-
.ck-
|
|
5493
|
+
.ck-agreement__meta-link svg {
|
|
5495
5494
|
display: inline-block;
|
|
5496
5495
|
width: 12px;
|
|
5497
5496
|
height: 12px;
|
|
@@ -5500,16 +5499,16 @@ a.tag-mark {
|
|
|
5500
5499
|
position: relative;
|
|
5501
5500
|
top: -1px;
|
|
5502
5501
|
}
|
|
5503
|
-
.ck-
|
|
5504
|
-
.ck-
|
|
5502
|
+
.ck-agreement__meta-link:hover,
|
|
5503
|
+
.ck-agreement__meta-link:focus-visible {
|
|
5505
5504
|
color: var(--ck-accent-hover);
|
|
5506
5505
|
}
|
|
5507
|
-
.ck-
|
|
5506
|
+
.ck-agreement__buttons {
|
|
5508
5507
|
display: flex;
|
|
5509
5508
|
gap: 6px;
|
|
5510
5509
|
flex-wrap: wrap;
|
|
5511
5510
|
}
|
|
5512
|
-
.ck-
|
|
5511
|
+
.ck-agreement__pill {
|
|
5513
5512
|
display: inline-flex;
|
|
5514
5513
|
align-items: center;
|
|
5515
5514
|
gap: 0.4rem;
|
|
@@ -5526,50 +5525,50 @@ a.tag-mark {
|
|
|
5526
5525
|
cursor: pointer;
|
|
5527
5526
|
transition: background 0.12s, border-color 0.12s, color 0.12s;
|
|
5528
5527
|
}
|
|
5529
|
-
.ck-
|
|
5528
|
+
.ck-agreement__pill svg {
|
|
5530
5529
|
width: 14px;
|
|
5531
5530
|
height: 14px;
|
|
5532
5531
|
}
|
|
5533
|
-
.ck-
|
|
5534
|
-
.ck-
|
|
5532
|
+
.ck-agreement__pill:hover,
|
|
5533
|
+
.ck-agreement__pill:focus-visible {
|
|
5535
5534
|
color: var(--ck-text);
|
|
5536
5535
|
border-color: var(--ck-dim);
|
|
5537
5536
|
}
|
|
5538
|
-
.ck-
|
|
5537
|
+
.ck-agreement__pill--agree.is-active {
|
|
5539
5538
|
background: var(--ck-success-soft);
|
|
5540
5539
|
border-color: rgba(45, 212, 168, 0.35);
|
|
5541
5540
|
color: var(--ck-success);
|
|
5542
5541
|
}
|
|
5543
|
-
.ck-
|
|
5542
|
+
.ck-agreement__pill--disagree.is-active {
|
|
5544
5543
|
background: var(--ck-danger-soft);
|
|
5545
5544
|
border-color: rgba(248, 113, 113, 0.35);
|
|
5546
5545
|
color: var(--ck-danger);
|
|
5547
5546
|
}
|
|
5548
|
-
.ck-
|
|
5547
|
+
.ck-agreement__pill--borderline.is-active {
|
|
5549
5548
|
background: var(--ck-warning-soft);
|
|
5550
5549
|
border-color: rgba(224, 164, 88, 0.35);
|
|
5551
5550
|
color: var(--ck-warning);
|
|
5552
5551
|
}
|
|
5553
|
-
.ck-
|
|
5554
|
-
.ck-
|
|
5555
|
-
.ck-
|
|
5556
|
-
.ck-
|
|
5552
|
+
.ck-agreement__pill--agree:hover { border-color: rgba(45, 212, 168, 0.45); color: var(--ck-success); }
|
|
5553
|
+
.ck-agreement__pill--disagree:hover { border-color: rgba(248, 113, 113, 0.45); color: var(--ck-danger); }
|
|
5554
|
+
.ck-agreement__pill--borderline:hover { border-color: rgba(224, 164, 88, 0.45); color: var(--ck-warning); }
|
|
5555
|
+
.ck-agreement__detail {
|
|
5557
5556
|
margin-top: 12px;
|
|
5558
5557
|
display: flex;
|
|
5559
5558
|
flex-direction: column;
|
|
5560
5559
|
gap: 12px;
|
|
5561
5560
|
}
|
|
5562
|
-
.ck-
|
|
5561
|
+
.ck-agreement__detail > * {
|
|
5563
5562
|
margin: 0;
|
|
5564
5563
|
}
|
|
5565
|
-
.ck-
|
|
5564
|
+
.ck-agreement__detail .ck-button {
|
|
5566
5565
|
align-self: flex-start;
|
|
5567
5566
|
}
|
|
5568
|
-
.ck-
|
|
5567
|
+
.ck-agreement__detail textarea {
|
|
5569
5568
|
font-family: var(--ck-mono);
|
|
5570
5569
|
font-size: 0.82rem;
|
|
5571
5570
|
}
|
|
5572
|
-
.ck-
|
|
5571
|
+
.ck-agreement__value {
|
|
5573
5572
|
color: var(--ck-accent);
|
|
5574
5573
|
font-family: var(--ck-mono);
|
|
5575
5574
|
font-weight: 600;
|
|
@@ -5661,7 +5660,7 @@ a.tag-mark {
|
|
|
5661
5660
|
background: linear-gradient(180deg, var(--ck-accent-soft), var(--ck-surface));
|
|
5662
5661
|
}
|
|
5663
5662
|
|
|
5664
|
-
.ck-
|
|
5663
|
+
.ck-agreement__error {
|
|
5665
5664
|
margin: 8px 0 0;
|
|
5666
5665
|
padding: 8px 10px;
|
|
5667
5666
|
background: var(--ck-danger-soft);
|
|
@@ -6001,3 +6000,67 @@ a.tag-mark {
|
|
|
6001
6000
|
width: 2rem;
|
|
6002
6001
|
height: 2rem;
|
|
6003
6002
|
}
|
|
6003
|
+
|
|
6004
|
+
.ck-suggestion-status:empty { display: none; }
|
|
6005
|
+
.ck-suggestion-status {
|
|
6006
|
+
margin-top: 10px;
|
|
6007
|
+
display: flex;
|
|
6008
|
+
align-items: baseline;
|
|
6009
|
+
gap: 10px;
|
|
6010
|
+
flex-wrap: wrap;
|
|
6011
|
+
}
|
|
6012
|
+
|
|
6013
|
+
.ck-scoreboard {
|
|
6014
|
+
margin-bottom: 16px;
|
|
6015
|
+
padding-bottom: 14px;
|
|
6016
|
+
border-bottom: 1px solid var(--ck-line);
|
|
6017
|
+
}
|
|
6018
|
+
.ck-scoreboard__headline {
|
|
6019
|
+
margin: 0 0 8px;
|
|
6020
|
+
font-size: 0.95rem;
|
|
6021
|
+
color: var(--ck-text);
|
|
6022
|
+
}
|
|
6023
|
+
.ck-scoreboard__was {
|
|
6024
|
+
font-family: var(--ck-mono);
|
|
6025
|
+
font-size: 0.74rem;
|
|
6026
|
+
color: var(--ck-muted);
|
|
6027
|
+
margin-left: 6px;
|
|
6028
|
+
}
|
|
6029
|
+
.ck-scoreboard__tally {
|
|
6030
|
+
list-style: none;
|
|
6031
|
+
margin: 0;
|
|
6032
|
+
padding: 0;
|
|
6033
|
+
display: flex;
|
|
6034
|
+
gap: 18px;
|
|
6035
|
+
}
|
|
6036
|
+
.ck-scoreboard__stat {
|
|
6037
|
+
font-family: var(--ck-mono);
|
|
6038
|
+
font-size: 0.72rem;
|
|
6039
|
+
letter-spacing: 0.06em;
|
|
6040
|
+
text-transform: uppercase;
|
|
6041
|
+
color: var(--ck-muted);
|
|
6042
|
+
}
|
|
6043
|
+
.ck-scoreboard__stat strong { color: var(--ck-text); }
|
|
6044
|
+
.ck-scoreboard__stat--break strong { color: var(--ck-warning); }
|
|
6045
|
+
.ck-scoreboard__note {
|
|
6046
|
+
margin: 8px 0 0;
|
|
6047
|
+
font-size: 0.78rem;
|
|
6048
|
+
color: var(--ck-muted);
|
|
6049
|
+
}
|
|
6050
|
+
.ck-version-change {
|
|
6051
|
+
display: inline-flex;
|
|
6052
|
+
align-items: baseline;
|
|
6053
|
+
gap: 0.6rem;
|
|
6054
|
+
}
|
|
6055
|
+
.ck-version-score {
|
|
6056
|
+
font-family: var(--ck-mono);
|
|
6057
|
+
font-size: 0.74rem;
|
|
6058
|
+
color: var(--ck-dim);
|
|
6059
|
+
}
|
|
6060
|
+
.ck-version-score__label {
|
|
6061
|
+
font-size: 0.6rem;
|
|
6062
|
+
letter-spacing: 0.08em;
|
|
6063
|
+
text-transform: uppercase;
|
|
6064
|
+
color: var(--ck-muted);
|
|
6065
|
+
margin-right: 0.2rem;
|
|
6066
|
+
}
|
|
@@ -1,36 +1,36 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
|
-
class
|
|
3
|
-
before_action :
|
|
2
|
+
class AgreementsController < ApplicationController
|
|
3
|
+
before_action :ensure_agreement_enabled
|
|
4
4
|
before_action :set_scope
|
|
5
5
|
|
|
6
6
|
def create
|
|
7
|
-
created_by =
|
|
8
|
-
existing =
|
|
7
|
+
created_by = agreement_creator
|
|
8
|
+
existing = Agreement.find_by(
|
|
9
9
|
run_id: @run.id, response_id: @response.id, metric_id: @metric.id, created_by: created_by
|
|
10
10
|
)
|
|
11
11
|
|
|
12
12
|
if params[:verdict] == "disagree" && params[:corrected_score].blank?
|
|
13
|
-
|
|
13
|
+
render_agreement(agreement: existing, pending_verdict: "disagree")
|
|
14
14
|
return
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
agreement = existing || Agreement.new(
|
|
18
18
|
run: @run, response: @response, metric: @metric, created_by: created_by
|
|
19
19
|
)
|
|
20
|
-
|
|
20
|
+
agreement.assign_attributes(
|
|
21
21
|
metric_version: MetricVersion.ensure_current_for(@metric),
|
|
22
22
|
verdict: params[:verdict],
|
|
23
23
|
corrected_score: params[:corrected_score].presence,
|
|
24
24
|
note: params[:note].presence
|
|
25
25
|
)
|
|
26
26
|
|
|
27
|
-
if
|
|
28
|
-
|
|
27
|
+
if agreement.save
|
|
28
|
+
render_agreement(agreement: agreement, just_saved: true)
|
|
29
29
|
else
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
render_agreement(
|
|
31
|
+
agreement: existing,
|
|
32
32
|
pending_verdict: params[:verdict],
|
|
33
|
-
error:
|
|
33
|
+
error: agreement.errors.full_messages.to_sentence,
|
|
34
34
|
status: :unprocessable_entity
|
|
35
35
|
)
|
|
36
36
|
end
|
|
@@ -38,10 +38,10 @@ module CompletionKit
|
|
|
38
38
|
|
|
39
39
|
private
|
|
40
40
|
|
|
41
|
-
def
|
|
41
|
+
def render_agreement(agreement:, pending_verdict: nil, error: nil, just_saved: false, status: :ok)
|
|
42
42
|
locals = {
|
|
43
43
|
review: review_for_metric,
|
|
44
|
-
|
|
44
|
+
agreement: agreement,
|
|
45
45
|
run: @run,
|
|
46
46
|
response_row: @response,
|
|
47
47
|
metric: @metric,
|
|
@@ -50,14 +50,14 @@ module CompletionKit
|
|
|
50
50
|
just_saved: just_saved
|
|
51
51
|
}
|
|
52
52
|
render turbo_stream: turbo_stream.replace(
|
|
53
|
-
"
|
|
54
|
-
partial: "completion_kit/
|
|
53
|
+
"agreement_#{@response.id}_#{@metric.id}",
|
|
54
|
+
partial: "completion_kit/agreements/buttons",
|
|
55
55
|
locals: locals
|
|
56
56
|
), status: status
|
|
57
57
|
end
|
|
58
58
|
|
|
59
|
-
def
|
|
60
|
-
head :not_found unless CompletionKit.config.
|
|
59
|
+
def ensure_agreement_enabled
|
|
60
|
+
head :not_found unless CompletionKit.config.judge_agreement_enabled
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
def set_scope
|
|
@@ -70,7 +70,7 @@ module CompletionKit
|
|
|
70
70
|
@response.reviews.find_by(metric_id: @metric.id)
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
def
|
|
73
|
+
def agreement_creator
|
|
74
74
|
request.env["HTTP_X_REMOTE_USER"].presence || CompletionKit.config.username.presence || "operator"
|
|
75
75
|
end
|
|
76
76
|
end
|
data/app/controllers/completion_kit/api/v1/{calibrations_controller.rb → agreements_controller.rb}
RENAMED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module Api
|
|
3
3
|
module V1
|
|
4
|
-
class
|
|
5
|
-
before_action :
|
|
4
|
+
class AgreementsController < BaseController
|
|
5
|
+
before_action :ensure_agreement_enabled
|
|
6
6
|
before_action :set_nested_scope, only: [:create]
|
|
7
|
-
before_action :
|
|
7
|
+
before_action :load_agreement, only: [:destroy]
|
|
8
8
|
|
|
9
9
|
def index
|
|
10
|
-
scope =
|
|
10
|
+
scope = Agreement.all
|
|
11
11
|
scope = scope.where(run_id: params[:run_id]) if params[:run_id].present?
|
|
12
12
|
scope = scope.where(response_id: params[:response_id]) if params[:response_id].present?
|
|
13
13
|
scope = scope.where(metric_id: params[:metric_id]) if params[:metric_id].present?
|
|
@@ -18,31 +18,31 @@ module CompletionKit
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def create
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
agreement = scope_agreements.find_or_initialize_by(created_by: created_by_param)
|
|
22
|
+
agreement.assign_attributes(
|
|
23
23
|
run: @run,
|
|
24
24
|
response: @response,
|
|
25
25
|
metric: @metric,
|
|
26
26
|
metric_version: MetricVersion.ensure_current_for(@metric),
|
|
27
|
-
**
|
|
27
|
+
**agreement_params
|
|
28
28
|
)
|
|
29
29
|
|
|
30
|
-
if
|
|
31
|
-
render json:
|
|
30
|
+
if agreement.save
|
|
31
|
+
render json: agreement, status: agreement.previously_new_record? ? :created : :ok
|
|
32
32
|
else
|
|
33
|
-
render_validation_errors(
|
|
33
|
+
render_validation_errors(agreement)
|
|
34
34
|
end
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
def destroy
|
|
38
|
-
@
|
|
38
|
+
@agreement.destroy!
|
|
39
39
|
head :no_content
|
|
40
40
|
end
|
|
41
41
|
|
|
42
42
|
private
|
|
43
43
|
|
|
44
|
-
def
|
|
45
|
-
render_error("
|
|
44
|
+
def ensure_agreement_enabled
|
|
45
|
+
render_error("Agreement disabled", status: :not_found) unless CompletionKit.config.judge_agreement_enabled
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def set_nested_scope
|
|
@@ -53,17 +53,17 @@ module CompletionKit
|
|
|
53
53
|
not_found
|
|
54
54
|
end
|
|
55
55
|
|
|
56
|
-
def
|
|
57
|
-
@
|
|
56
|
+
def load_agreement
|
|
57
|
+
@agreement = Agreement.find(params[:id])
|
|
58
58
|
rescue ActiveRecord::RecordNotFound
|
|
59
59
|
not_found
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
-
def
|
|
63
|
-
|
|
62
|
+
def scope_agreements
|
|
63
|
+
Agreement.where(run_id: @run.id, response_id: @response.id, metric_id: @metric.id)
|
|
64
64
|
end
|
|
65
65
|
|
|
66
|
-
def
|
|
66
|
+
def agreement_params
|
|
67
67
|
params.permit(:verdict, :corrected_score, :note).to_h.symbolize_keys
|
|
68
68
|
end
|
|
69
69
|
|
|
@@ -14,13 +14,8 @@ module CompletionKit
|
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
def publish
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
render json: audit
|
|
20
|
-
else
|
|
21
|
-
@version.publish!
|
|
22
|
-
render json: @version.reload
|
|
23
|
-
end
|
|
17
|
+
@version.publish!
|
|
18
|
+
render json: @version.reload
|
|
24
19
|
end
|
|
25
20
|
|
|
26
21
|
def destroy
|
|
@@ -37,7 +37,7 @@ module CompletionKit
|
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
def suggest_variants
|
|
40
|
-
disagreement_count =
|
|
40
|
+
disagreement_count = Agreement.where(metric_id: @metric.id, verdict: "disagree").count
|
|
41
41
|
if disagreement_count.zero?
|
|
42
42
|
render_error("Mark at least one case as Disagree before asking the model to suggest a change.", status: :unprocessable_entity)
|
|
43
43
|
return
|
|
@@ -39,9 +39,9 @@ module CompletionKit
|
|
|
39
39
|
def show
|
|
40
40
|
@edit_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
|
|
41
41
|
@suggestion_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
|
|
42
|
-
@improve_disagreement_count =
|
|
42
|
+
@improve_disagreement_count = Agreement.where(metric_id: @metric.id, verdict: "disagree").count
|
|
43
43
|
@versions = MetricVersion.where(metric_id: @metric.id).order(version_number: :desc).to_a
|
|
44
|
-
@guiding_examples = CompletionKit.config.judge_examples_from_reviews ?
|
|
44
|
+
@guiding_examples = CompletionKit.config.judge_examples_from_reviews ? MetricAgreementExamples.judge_examples_for(@metric) : []
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def new
|
|
@@ -52,7 +52,7 @@ module CompletionKit
|
|
|
52
52
|
@suggestion_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "suggestion").order(created_at: :desc).first
|
|
53
53
|
@edit_draft = MetricVersion.drafts.where(metric_id: @metric.id, source: "edit").order(created_at: :desc).first
|
|
54
54
|
@published_metric_version = MetricVersion.published.where(metric_id: @metric.id, current: true).first
|
|
55
|
-
@improve_disagreement_count =
|
|
55
|
+
@improve_disagreement_count = Agreement.where(metric_id: @metric.id, verdict: "disagree").count
|
|
56
56
|
|
|
57
57
|
if @edit_draft
|
|
58
58
|
@metric.instruction = @edit_draft.instruction
|
|
@@ -117,26 +117,22 @@ module CompletionKit
|
|
|
117
117
|
|
|
118
118
|
def suggest_variants
|
|
119
119
|
target = params[:back_to] == "edit" ? edit_metric_path(@metric) : metric_path(@metric)
|
|
120
|
-
|
|
121
|
-
if
|
|
120
|
+
counts = Agreement.where(metric_id: @metric.id, verdict: %w[agree disagree]).group(:verdict).count
|
|
121
|
+
if counts["disagree"].to_i.zero?
|
|
122
122
|
redirect_to target, alert: "Mark at least one case as Disagree before asking the model to suggest a change."
|
|
123
123
|
return
|
|
124
124
|
end
|
|
125
125
|
|
|
126
|
-
|
|
126
|
+
MetricSuggestionJob.perform_later(@metric.id)
|
|
127
127
|
|
|
128
|
-
generator = MetricVariantGenerator.new(@metric, count: 1)
|
|
129
|
-
variants = generator.call
|
|
130
|
-
if variants.empty?
|
|
131
|
-
redirect_to target, alert: "The model returned no usable variants. Try again with a different model."
|
|
132
|
-
return
|
|
133
|
-
end
|
|
134
|
-
versions = generator.persist!(variants)
|
|
135
|
-
new_version = versions.max_by(&:version_number)
|
|
136
128
|
if params[:back_to] == "edit"
|
|
137
|
-
redirect_to
|
|
129
|
+
redirect_to metric_path(@metric), notice: "Drafting a change from your reviews. It will appear here once it's tested."
|
|
138
130
|
else
|
|
139
|
-
|
|
131
|
+
render turbo_stream: turbo_stream.replace(
|
|
132
|
+
"ck-suggestion-status-#{@metric.id}",
|
|
133
|
+
partial: "completion_kit/metrics/suggestion_pending",
|
|
134
|
+
locals: { metric: @metric, count: counts.values.sum }
|
|
135
|
+
)
|
|
140
136
|
end
|
|
141
137
|
end
|
|
142
138
|
|
|
@@ -149,12 +145,12 @@ module CompletionKit
|
|
|
149
145
|
end
|
|
150
146
|
|
|
151
147
|
def exclude_example
|
|
152
|
-
|
|
153
|
-
|
|
148
|
+
agreement = Agreement.where(metric_id: @metric.id).find(params[:agreement_id])
|
|
149
|
+
agreement.update!(excluded_from_examples: true)
|
|
154
150
|
render turbo_stream: turbo_stream.replace(
|
|
155
151
|
"ck-guiding-#{@metric.id}",
|
|
156
152
|
partial: "completion_kit/metrics/guiding_examples",
|
|
157
|
-
locals: { metric: @metric, examples:
|
|
153
|
+
locals: { metric: @metric, examples: MetricAgreementExamples.judge_examples_for(@metric) }
|
|
158
154
|
)
|
|
159
155
|
end
|
|
160
156
|
|
|
@@ -175,13 +171,12 @@ module CompletionKit
|
|
|
175
171
|
reverting = was_published_already && !version.current?
|
|
176
172
|
previously_current = MetricVersion.current.find_by(metric_id: @metric.id)
|
|
177
173
|
|
|
174
|
+
version.publish!
|
|
175
|
+
|
|
178
176
|
if reverting
|
|
179
|
-
audit = version.revert!
|
|
180
|
-
prior_label = previously_current.version_label
|
|
181
177
|
redirect_to metric_path(@metric),
|
|
182
|
-
notice: "
|
|
178
|
+
notice: "#{@metric.name} is back on #{version.version_label}. Its reviews count again; the ones you gave on #{previously_current.version_label} stay with that version."
|
|
183
179
|
else
|
|
184
|
-
version.publish!
|
|
185
180
|
redirect_to metric_path(@metric),
|
|
186
181
|
notice: "#{@metric.name} #{version.version_label} is now the published version."
|
|
187
182
|
end
|
|
@@ -82,10 +82,10 @@ module CompletionKit
|
|
|
82
82
|
private
|
|
83
83
|
|
|
84
84
|
def review_examples_for(metric, response)
|
|
85
|
-
return nil unless CompletionKit.config.
|
|
85
|
+
return nil unless CompletionKit.config.judge_agreement_enabled
|
|
86
86
|
return nil unless CompletionKit.config.judge_examples_from_reviews
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
MetricAgreementExamples.judge_examples_for(metric, exclude_response_id: response.id)
|
|
89
89
|
end
|
|
90
90
|
|
|
91
91
|
def confirm_judging_capability(judge_model_id)
|