completion-kit 0.5.18 → 0.5.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fe8be5a5838f6a270f5ed934e250f0dd55d673b51ba15ed29579429dadcaaa03
|
|
4
|
+
data.tar.gz: 6044e3c6805e697c1e8e80b28760ab5f749e2ba14bd976b67225f0d3cffb6eff
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: efc0e7b9a1b23eeec3b3ba91ab65d94d2454bec0554cdd8e36ab6d1286b9c208de2e1ea24fec1c19f73d01dc044097df6e2ac004a0de2565bc6e6b4965af1d85
|
|
7
|
+
data.tar.gz: e9fca34118e8a1b10107807e1243f946ad6148f7af8e92d23c3c9bfc20f10f2d91c6db0c4b285996f8267b6511593417a11a7f6c1002b9d8dd2a9f32f5793860
|
|
@@ -345,6 +345,176 @@ form.button_to {
|
|
|
345
345
|
grid-template-columns: repeat(2, minmax(0, 1fr));
|
|
346
346
|
}
|
|
347
347
|
|
|
348
|
+
.ck-grid--cards-3 {
|
|
349
|
+
grid-template-columns: repeat(3, minmax(0, 1fr));
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/* ── Dashboard: workspace stat ribbon ─────────────────────────────────
|
|
353
|
+
Replaces the old oversized count cards. One thin instrument strip,
|
|
354
|
+
four navigable segments split by hairline dividers. */
|
|
355
|
+
.ck-statbar {
|
|
356
|
+
display: grid;
|
|
357
|
+
grid-template-columns: repeat(4, minmax(0, 1fr));
|
|
358
|
+
margin-top: 1.5rem;
|
|
359
|
+
border: 1px solid var(--ck-line);
|
|
360
|
+
border-radius: var(--ck-radius-lg);
|
|
361
|
+
background: var(--ck-surface);
|
|
362
|
+
overflow: hidden;
|
|
363
|
+
}
|
|
364
|
+
.ck-statbar__item {
|
|
365
|
+
display: flex;
|
|
366
|
+
flex-direction: column;
|
|
367
|
+
gap: 0.25rem;
|
|
368
|
+
padding: 0.95rem 1.3rem;
|
|
369
|
+
text-decoration: none;
|
|
370
|
+
border-left: 1px solid var(--ck-line);
|
|
371
|
+
transition: background 0.15s ease;
|
|
372
|
+
}
|
|
373
|
+
.ck-statbar__item:first-child { border-left: 0; }
|
|
374
|
+
.ck-statbar__item:hover { background: var(--ck-surface-hover); }
|
|
375
|
+
.ck-statbar__label {
|
|
376
|
+
font-family: var(--ck-mono);
|
|
377
|
+
font-size: 0.68rem;
|
|
378
|
+
letter-spacing: 0.14em;
|
|
379
|
+
text-transform: uppercase;
|
|
380
|
+
color: var(--ck-dim);
|
|
381
|
+
}
|
|
382
|
+
.ck-statbar__value {
|
|
383
|
+
font-family: var(--ck-mono);
|
|
384
|
+
font-size: 1.65rem;
|
|
385
|
+
line-height: 1;
|
|
386
|
+
color: var(--ck-text);
|
|
387
|
+
transition: color 0.15s ease;
|
|
388
|
+
}
|
|
389
|
+
.ck-statbar__item:hover .ck-statbar__value { color: var(--ck-accent); }
|
|
390
|
+
|
|
391
|
+
/* ── Dashboard: pulse cards (activity / worst metric / failed reviews) ── */
|
|
392
|
+
.ck-pulse-grid { margin-top: 0.75rem; }
|
|
393
|
+
|
|
394
|
+
.ck-stat-card {
|
|
395
|
+
display: flex;
|
|
396
|
+
flex-direction: column;
|
|
397
|
+
min-height: 10.5rem;
|
|
398
|
+
}
|
|
399
|
+
.ck-stat-card .ck-kicker { margin-bottom: auto; }
|
|
400
|
+
.ck-stat-card__foot {
|
|
401
|
+
margin-top: auto;
|
|
402
|
+
padding-top: 0.85rem;
|
|
403
|
+
font-family: var(--ck-mono);
|
|
404
|
+
font-size: 0.78rem;
|
|
405
|
+
color: var(--ck-muted);
|
|
406
|
+
}
|
|
407
|
+
.ck-stat-card__figure { color: var(--ck-text); }
|
|
408
|
+
|
|
409
|
+
.ck-stat-card__body {
|
|
410
|
+
display: flex;
|
|
411
|
+
align-items: baseline;
|
|
412
|
+
gap: 0.6rem;
|
|
413
|
+
margin: 0.9rem 0 0.3rem;
|
|
414
|
+
}
|
|
415
|
+
.ck-stat-card__metric {
|
|
416
|
+
font-size: 1.2rem;
|
|
417
|
+
line-height: 1.25;
|
|
418
|
+
color: var(--ck-text);
|
|
419
|
+
}
|
|
420
|
+
.ck-stat-card__metric--empty { color: var(--ck-dim); }
|
|
421
|
+
.ck-stat-card__score { align-self: center; }
|
|
422
|
+
.ck-stat-card__count {
|
|
423
|
+
font-family: var(--ck-mono);
|
|
424
|
+
font-size: 2.6rem;
|
|
425
|
+
line-height: 1;
|
|
426
|
+
}
|
|
427
|
+
.ck-stat-card__count.is-clean { color: var(--ck-success, #4ade80); }
|
|
428
|
+
.ck-stat-card__count.is-danger { color: var(--ck-danger, #f87171); }
|
|
429
|
+
|
|
430
|
+
/* Dashboard activity sparkline — fixed-height row of bars, height set
|
|
431
|
+
inline per day. Bars hug the bottom; quiet days collapse to a sliver.
|
|
432
|
+
The busiest day(s) get the bright accent so the peak reads instantly. */
|
|
433
|
+
.ck-sparkline {
|
|
434
|
+
display: flex;
|
|
435
|
+
align-items: flex-end;
|
|
436
|
+
gap: 3px;
|
|
437
|
+
height: 3.5rem;
|
|
438
|
+
margin: 0.9rem 0 0.3rem;
|
|
439
|
+
border-bottom: 1px solid var(--ck-line);
|
|
440
|
+
padding-bottom: 1px;
|
|
441
|
+
}
|
|
442
|
+
.ck-sparkline__bar {
|
|
443
|
+
flex: 1;
|
|
444
|
+
min-height: 2px;
|
|
445
|
+
background: var(--ck-line-strong);
|
|
446
|
+
border-radius: 2px 2px 0 0;
|
|
447
|
+
transition: background 0.15s ease;
|
|
448
|
+
}
|
|
449
|
+
.ck-sparkline__bar.is-peak { background: var(--ck-accent); }
|
|
450
|
+
.ck-sparkline__bar:hover { background: var(--ck-accent-hover); }
|
|
451
|
+
|
|
452
|
+
/* One orchestrated page-load reveal — the dashboard "boots up". */
|
|
453
|
+
@keyframes ck-rise {
|
|
454
|
+
from { opacity: 0; transform: translateY(8px); }
|
|
455
|
+
to { opacity: 1; transform: translateY(0); }
|
|
456
|
+
}
|
|
457
|
+
.ck-rise {
|
|
458
|
+
animation: ck-rise 0.32s ease both;
|
|
459
|
+
animation-delay: var(--rise-delay, 0ms);
|
|
460
|
+
}
|
|
461
|
+
@media (prefers-reduced-motion: reduce) {
|
|
462
|
+
.ck-rise { animation: none; }
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/* ── Dashboard: prompt improvements list ──────────────────────────────
|
|
466
|
+
One row per family that improved version-over-version. Name takes the
|
|
467
|
+
slack; version transition, score transition, and delta sit to the right. */
|
|
468
|
+
.ck-improvements {
|
|
469
|
+
list-style: none;
|
|
470
|
+
margin: 0.85rem 0 0;
|
|
471
|
+
padding: 0;
|
|
472
|
+
}
|
|
473
|
+
.ck-improvement {
|
|
474
|
+
display: flex;
|
|
475
|
+
align-items: baseline;
|
|
476
|
+
gap: 1.1rem;
|
|
477
|
+
padding: 0.7rem 0;
|
|
478
|
+
border-top: 1px solid var(--ck-line);
|
|
479
|
+
font-family: var(--ck-mono);
|
|
480
|
+
font-size: 0.85rem;
|
|
481
|
+
}
|
|
482
|
+
.ck-improvement:first-child { border-top: 0; }
|
|
483
|
+
.ck-improvement__name {
|
|
484
|
+
flex: 1;
|
|
485
|
+
min-width: 0;
|
|
486
|
+
overflow: hidden;
|
|
487
|
+
text-overflow: ellipsis;
|
|
488
|
+
white-space: nowrap;
|
|
489
|
+
}
|
|
490
|
+
.ck-improvement__versions {
|
|
491
|
+
color: var(--ck-dim);
|
|
492
|
+
font-size: 0.78rem;
|
|
493
|
+
white-space: nowrap;
|
|
494
|
+
}
|
|
495
|
+
.ck-improvement__scores {
|
|
496
|
+
display: inline-flex;
|
|
497
|
+
align-items: baseline;
|
|
498
|
+
gap: 0.4rem;
|
|
499
|
+
color: var(--ck-muted);
|
|
500
|
+
white-space: nowrap;
|
|
501
|
+
}
|
|
502
|
+
.ck-improvement__arrow { color: var(--ck-dim); }
|
|
503
|
+
.ck-improvement__to { color: var(--ck-text); }
|
|
504
|
+
.ck-improvement__delta {
|
|
505
|
+
min-width: 5rem;
|
|
506
|
+
text-align: right;
|
|
507
|
+
}
|
|
508
|
+
.ck-improvement__delta.is-gain { color: var(--ck-success); }
|
|
509
|
+
.ck-improvement__delta.is-loss { color: var(--ck-danger); }
|
|
510
|
+
.ck-improvements__empty {
|
|
511
|
+
margin: 0.85rem 0 0;
|
|
512
|
+
max-width: 46rem;
|
|
513
|
+
color: var(--ck-muted);
|
|
514
|
+
font-size: 0.85rem;
|
|
515
|
+
line-height: 1.6;
|
|
516
|
+
}
|
|
517
|
+
|
|
348
518
|
.ck-grid--spaced,
|
|
349
519
|
.ck-card--spaced,
|
|
350
520
|
.ck-empty--spaced {
|
|
@@ -608,15 +778,18 @@ tr:hover .ck-chip--publish {
|
|
|
608
778
|
max-width: 380px;
|
|
609
779
|
}
|
|
610
780
|
|
|
781
|
+
/* Login brand lockup — the standard puzzle logo + two-tone wordmark,
|
|
782
|
+
stacked and centred above the sign-in form. */
|
|
611
783
|
.ck-login__brand {
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
784
|
+
display: flex;
|
|
785
|
+
flex-direction: column;
|
|
786
|
+
align-items: center;
|
|
787
|
+
gap: 0.45rem;
|
|
788
|
+
margin: 0 auto 2rem;
|
|
789
|
+
}
|
|
790
|
+
.ck-login__brand .ck-brand__name {
|
|
791
|
+
padding-top: 0;
|
|
792
|
+
font-size: 1.4rem;
|
|
620
793
|
}
|
|
621
794
|
|
|
622
795
|
.ck-login__form {
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
# Read-only aggregate queries powering the standalone dashboard cards.
|
|
3
|
+
# Each method is a small, scoped query — nothing here writes or caches.
|
|
4
|
+
class DashboardStats
|
|
5
|
+
# Runs per calendar day for the trailing `days` window, oldest first.
|
|
6
|
+
# Always returns one entry per day (count 0 for quiet days) so callers
|
|
7
|
+
# can render a fixed-width sparkline.
|
|
8
|
+
def self.activity(days: 14)
|
|
9
|
+
since = (days - 1).days.ago.to_date
|
|
10
|
+
counts = Run.where("created_at >= ?", since.beginning_of_day)
|
|
11
|
+
.group("DATE(created_at)")
|
|
12
|
+
.count
|
|
13
|
+
(0...days).map do |offset|
|
|
14
|
+
date = since + offset
|
|
15
|
+
{ date: date, count: counts[date] || counts[date.to_s] || 0 }
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# The metric with the lowest average judge score across succeeded reviews
|
|
20
|
+
# in the window — the prompt-engineering target. Returns nil when there
|
|
21
|
+
# are no scored reviews. `response` is the single worst-scoring response
|
|
22
|
+
# for that metric, for a deep link.
|
|
23
|
+
def self.worst_metric(since:)
|
|
24
|
+
averages = scored_reviews_since(since).group(:metric_name).average(:ai_score)
|
|
25
|
+
return nil if averages.empty?
|
|
26
|
+
|
|
27
|
+
name, avg = averages.min_by { |_, value| value }
|
|
28
|
+
# averages is non-empty, so at least one review carries this
|
|
29
|
+
# metric_name — worst is always present here.
|
|
30
|
+
worst = scored_reviews_since(since)
|
|
31
|
+
.where(metric_name: name)
|
|
32
|
+
.order(:ai_score)
|
|
33
|
+
.first
|
|
34
|
+
{
|
|
35
|
+
name: name,
|
|
36
|
+
avg: avg.to_f.round(2),
|
|
37
|
+
response: worst.response,
|
|
38
|
+
score: worst.ai_score.to_f
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Reviews that terminally failed in the window — parse failures, judge
|
|
43
|
+
# truncations, provider errors. Invisible on the dashboard otherwise.
|
|
44
|
+
def self.failed_review_count(since:)
|
|
45
|
+
Review.where(status: "failed").where("created_at >= ?", since).count
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# The most recent measurable change per prompt family — gains and
|
|
49
|
+
# regressions both. For each family the comparison is:
|
|
50
|
+
# * latest scored version vs the published version, when a draft sits
|
|
51
|
+
# ahead of what's live ("is my work-in-progress better?")
|
|
52
|
+
# * published vs the previous scored version, when the latest version
|
|
53
|
+
# IS the published one ("did my last publish help?")
|
|
54
|
+
# Biggest movement first. Empty until something has been iterated and
|
|
55
|
+
# re-judged on both sides of the comparison.
|
|
56
|
+
def self.prompt_changes(limit: 5)
|
|
57
|
+
scores = Review.joins(response: :run)
|
|
58
|
+
.where(status: "succeeded")
|
|
59
|
+
.where.not(ai_score: nil)
|
|
60
|
+
.group("completion_kit_runs.prompt_id")
|
|
61
|
+
.average(:ai_score)
|
|
62
|
+
return [] if scores.empty?
|
|
63
|
+
|
|
64
|
+
Prompt.where(id: scores.keys).group_by(&:family_key).filter_map do |_key, versions|
|
|
65
|
+
scored = versions.select { |v| scores[v.id] }.sort_by(&:version_number)
|
|
66
|
+
next if scored.size < 2
|
|
67
|
+
|
|
68
|
+
candidate = scored.last
|
|
69
|
+
published = versions.find(&:current?)
|
|
70
|
+
baseline =
|
|
71
|
+
if published && published != candidate && scores[published.id]
|
|
72
|
+
published
|
|
73
|
+
else
|
|
74
|
+
scored[-2]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
delta = (scores[candidate.id] - scores[baseline.id]).to_f.round(2)
|
|
78
|
+
next if delta.zero?
|
|
79
|
+
|
|
80
|
+
{
|
|
81
|
+
prompt: candidate,
|
|
82
|
+
from_version: baseline.version_number,
|
|
83
|
+
to_version: candidate.version_number,
|
|
84
|
+
from_score: scores[baseline.id].to_f.round(2),
|
|
85
|
+
to_score: scores[candidate.id].to_f.round(2),
|
|
86
|
+
delta: delta
|
|
87
|
+
}
|
|
88
|
+
end.sort_by { |row| -row[:delta].abs }.first(limit)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def self.scored_reviews_since(since)
|
|
92
|
+
Review.joins(:response)
|
|
93
|
+
.where(status: "succeeded")
|
|
94
|
+
.where("completion_kit_reviews.created_at >= ?", since)
|
|
95
|
+
.where.not(ai_score: nil)
|
|
96
|
+
end
|
|
97
|
+
private_class_method :scored_reviews_since
|
|
98
|
+
end
|
|
99
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: completion-kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.5.
|
|
4
|
+
version: 0.5.19
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Damien Bastin
|
|
@@ -281,6 +281,7 @@ files:
|
|
|
281
281
|
- app/services/completion_kit/anthropic_client.rb
|
|
282
282
|
- app/services/completion_kit/api_config.rb
|
|
283
283
|
- app/services/completion_kit/csv_processor.rb
|
|
284
|
+
- app/services/completion_kit/dashboard_stats.rb
|
|
284
285
|
- app/services/completion_kit/judge_service.rb
|
|
285
286
|
- app/services/completion_kit/llm_client.rb
|
|
286
287
|
- app/services/completion_kit/mcp_dispatcher.rb
|