completion-kit 0.5.18 → 0.5.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25e795a8d94b2f0984e65d312eb152ffabcda8e7929a52529906ff1fab6cfa4f
4
- data.tar.gz: 91110f1c5736d0c6d3c12ef5aa5a75b66ffe0d565cf5777bd9f2d322936e01d5
3
+ metadata.gz: fe8be5a5838f6a270f5ed934e250f0dd55d673b51ba15ed29579429dadcaaa03
4
+ data.tar.gz: 6044e3c6805e697c1e8e80b28760ab5f749e2ba14bd976b67225f0d3cffb6eff
5
5
  SHA512:
6
- metadata.gz: 87678468d49193e088d4b1c6c28abe5a63ef67de679cfeb894dd64238810d456e7505bae78224e2c330860f177deff8bf834fab0a1c9abdca4e2cafd81e5463b
7
- data.tar.gz: 2755a9885f3242017dce04a91680f4499b5cd8907f1043f9bab41d46651ec234ebe4c0f03886b72a0dac421a8787f56923da884b6d5112f85c409392dbfea969
6
+ metadata.gz: efc0e7b9a1b23eeec3b3ba91ab65d94d2454bec0554cdd8e36ab6d1286b9c208de2e1ea24fec1c19f73d01dc044097df6e2ac004a0de2565bc6e6b4965af1d85
7
+ data.tar.gz: e9fca34118e8a1b10107807e1243f946ad6148f7af8e92d23c3c9bfc20f10f2d91c6db0c4b285996f8267b6511593417a11a7f6c1002b9d8dd2a9f32f5793860
@@ -345,6 +345,176 @@ form.button_to {
345
345
  grid-template-columns: repeat(2, minmax(0, 1fr));
346
346
  }
347
347
 
348
+ .ck-grid--cards-3 {
349
+ grid-template-columns: repeat(3, minmax(0, 1fr));
350
+ }
351
+
352
+ /* ── Dashboard: workspace stat ribbon ─────────────────────────────────
353
+ Replaces the old oversized count cards. One thin instrument strip,
354
+ four navigable segments split by hairline dividers. */
355
+ .ck-statbar {
356
+ display: grid;
357
+ grid-template-columns: repeat(4, minmax(0, 1fr));
358
+ margin-top: 1.5rem;
359
+ border: 1px solid var(--ck-line);
360
+ border-radius: var(--ck-radius-lg);
361
+ background: var(--ck-surface);
362
+ overflow: hidden;
363
+ }
364
+ .ck-statbar__item {
365
+ display: flex;
366
+ flex-direction: column;
367
+ gap: 0.25rem;
368
+ padding: 0.95rem 1.3rem;
369
+ text-decoration: none;
370
+ border-left: 1px solid var(--ck-line);
371
+ transition: background 0.15s ease;
372
+ }
373
+ .ck-statbar__item:first-child { border-left: 0; }
374
+ .ck-statbar__item:hover { background: var(--ck-surface-hover); }
375
+ .ck-statbar__label {
376
+ font-family: var(--ck-mono);
377
+ font-size: 0.68rem;
378
+ letter-spacing: 0.14em;
379
+ text-transform: uppercase;
380
+ color: var(--ck-dim);
381
+ }
382
+ .ck-statbar__value {
383
+ font-family: var(--ck-mono);
384
+ font-size: 1.65rem;
385
+ line-height: 1;
386
+ color: var(--ck-text);
387
+ transition: color 0.15s ease;
388
+ }
389
+ .ck-statbar__item:hover .ck-statbar__value { color: var(--ck-accent); }
390
+
391
+ /* ── Dashboard: pulse cards (activity / worst metric / failed reviews) ── */
392
+ .ck-pulse-grid { margin-top: 0.75rem; }
393
+
394
+ .ck-stat-card {
395
+ display: flex;
396
+ flex-direction: column;
397
+ min-height: 10.5rem;
398
+ }
399
+ .ck-stat-card .ck-kicker { margin-bottom: auto; }
400
+ .ck-stat-card__foot {
401
+ margin-top: auto;
402
+ padding-top: 0.85rem;
403
+ font-family: var(--ck-mono);
404
+ font-size: 0.78rem;
405
+ color: var(--ck-muted);
406
+ }
407
+ .ck-stat-card__figure { color: var(--ck-text); }
408
+
409
+ .ck-stat-card__body {
410
+ display: flex;
411
+ align-items: baseline;
412
+ gap: 0.6rem;
413
+ margin: 0.9rem 0 0.3rem;
414
+ }
415
+ .ck-stat-card__metric {
416
+ font-size: 1.2rem;
417
+ line-height: 1.25;
418
+ color: var(--ck-text);
419
+ }
420
+ .ck-stat-card__metric--empty { color: var(--ck-dim); }
421
+ .ck-stat-card__score { align-self: center; }
422
+ .ck-stat-card__count {
423
+ font-family: var(--ck-mono);
424
+ font-size: 2.6rem;
425
+ line-height: 1;
426
+ }
427
+ .ck-stat-card__count.is-clean { color: var(--ck-success, #4ade80); }
428
+ .ck-stat-card__count.is-danger { color: var(--ck-danger, #f87171); }
429
+
430
+ /* Dashboard activity sparkline — fixed-height row of bars, height set
431
+ inline per day. Bars hug the bottom; quiet days collapse to a sliver.
432
+ The busiest day(s) get the bright accent so the peak reads instantly. */
433
+ .ck-sparkline {
434
+ display: flex;
435
+ align-items: flex-end;
436
+ gap: 3px;
437
+ height: 3.5rem;
438
+ margin: 0.9rem 0 0.3rem;
439
+ border-bottom: 1px solid var(--ck-line);
440
+ padding-bottom: 1px;
441
+ }
442
+ .ck-sparkline__bar {
443
+ flex: 1;
444
+ min-height: 2px;
445
+ background: var(--ck-line-strong);
446
+ border-radius: 2px 2px 0 0;
447
+ transition: background 0.15s ease;
448
+ }
449
+ .ck-sparkline__bar.is-peak { background: var(--ck-accent); }
450
+ .ck-sparkline__bar:hover { background: var(--ck-accent-hover); }
451
+
452
+ /* One orchestrated page-load reveal — the dashboard "boots up". */
453
+ @keyframes ck-rise {
454
+ from { opacity: 0; transform: translateY(8px); }
455
+ to { opacity: 1; transform: translateY(0); }
456
+ }
457
+ .ck-rise {
458
+ animation: ck-rise 0.32s ease both;
459
+ animation-delay: var(--rise-delay, 0ms);
460
+ }
461
+ @media (prefers-reduced-motion: reduce) {
462
+ .ck-rise { animation: none; }
463
+ }
464
+
465
+ /* ── Dashboard: prompt improvements list ──────────────────────────────
466
+ One row per family that improved version-over-version. Name takes the
467
+ slack; version transition, score transition, and delta sit to the right. */
468
+ .ck-improvements {
469
+ list-style: none;
470
+ margin: 0.85rem 0 0;
471
+ padding: 0;
472
+ }
473
+ .ck-improvement {
474
+ display: flex;
475
+ align-items: baseline;
476
+ gap: 1.1rem;
477
+ padding: 0.7rem 0;
478
+ border-top: 1px solid var(--ck-line);
479
+ font-family: var(--ck-mono);
480
+ font-size: 0.85rem;
481
+ }
482
+ .ck-improvement:first-child { border-top: 0; }
483
+ .ck-improvement__name {
484
+ flex: 1;
485
+ min-width: 0;
486
+ overflow: hidden;
487
+ text-overflow: ellipsis;
488
+ white-space: nowrap;
489
+ }
490
+ .ck-improvement__versions {
491
+ color: var(--ck-dim);
492
+ font-size: 0.78rem;
493
+ white-space: nowrap;
494
+ }
495
+ .ck-improvement__scores {
496
+ display: inline-flex;
497
+ align-items: baseline;
498
+ gap: 0.4rem;
499
+ color: var(--ck-muted);
500
+ white-space: nowrap;
501
+ }
502
+ .ck-improvement__arrow { color: var(--ck-dim); }
503
+ .ck-improvement__to { color: var(--ck-text); }
504
+ .ck-improvement__delta {
505
+ min-width: 5rem;
506
+ text-align: right;
507
+ }
508
+ .ck-improvement__delta.is-gain { color: var(--ck-success); }
509
+ .ck-improvement__delta.is-loss { color: var(--ck-danger); }
510
+ .ck-improvements__empty {
511
+ margin: 0.85rem 0 0;
512
+ max-width: 46rem;
513
+ color: var(--ck-muted);
514
+ font-size: 0.85rem;
515
+ line-height: 1.6;
516
+ }
517
+
348
518
  .ck-grid--spaced,
349
519
  .ck-card--spaced,
350
520
  .ck-empty--spaced {
@@ -608,15 +778,18 @@ tr:hover .ck-chip--publish {
608
778
  max-width: 380px;
609
779
  }
610
780
 
781
+ /* Login brand lockup — the standard puzzle logo + two-tone wordmark,
782
+ stacked and centred above the sign-in form. */
611
783
  .ck-login__brand {
612
- font-family: var(--ck-mono);
613
- font-size: 1.1rem;
614
- font-weight: 700;
615
- color: var(--ck-accent);
616
- text-transform: uppercase;
617
- letter-spacing: 0.04em;
618
- text-align: center;
619
- margin: 0 0 2rem;
784
+ display: flex;
785
+ flex-direction: column;
786
+ align-items: center;
787
+ gap: 0.45rem;
788
+ margin: 0 auto 2rem;
789
+ }
790
+ .ck-login__brand .ck-brand__name {
791
+ padding-top: 0;
792
+ font-size: 1.4rem;
620
793
  }
621
794
 
622
795
  .ck-login__form {
@@ -0,0 +1,99 @@
1
+ module CompletionKit
2
+ # Read-only aggregate queries powering the standalone dashboard cards.
3
+ # Each method is a small, scoped query — nothing here writes or caches.
4
+ class DashboardStats
5
+ # Runs per calendar day for the trailing `days` window, oldest first.
6
+ # Always returns one entry per day (count 0 for quiet days) so callers
7
+ # can render a fixed-width sparkline.
8
+ def self.activity(days: 14)
9
+ since = (days - 1).days.ago.to_date
10
+ counts = Run.where("created_at >= ?", since.beginning_of_day)
11
+ .group("DATE(created_at)")
12
+ .count
13
+ (0...days).map do |offset|
14
+ date = since + offset
15
+ { date: date, count: counts[date] || counts[date.to_s] || 0 }
16
+ end
17
+ end
18
+
19
+ # The metric with the lowest average judge score across succeeded reviews
20
+ # in the window — the prompt-engineering target. Returns nil when there
21
+ # are no scored reviews. `response` is the single worst-scoring response
22
+ # for that metric, for a deep link.
23
+ def self.worst_metric(since:)
24
+ averages = scored_reviews_since(since).group(:metric_name).average(:ai_score)
25
+ return nil if averages.empty?
26
+
27
+ name, avg = averages.min_by { |_, value| value }
28
+ # averages is non-empty, so at least one review carries this
29
+ # metric_name — worst is always present here.
30
+ worst = scored_reviews_since(since)
31
+ .where(metric_name: name)
32
+ .order(:ai_score)
33
+ .first
34
+ {
35
+ name: name,
36
+ avg: avg.to_f.round(2),
37
+ response: worst.response,
38
+ score: worst.ai_score.to_f
39
+ }
40
+ end
41
+
42
+ # Reviews that terminally failed in the window — parse failures, judge
43
+ # truncations, provider errors. Invisible on the dashboard otherwise.
44
+ def self.failed_review_count(since:)
45
+ Review.where(status: "failed").where("created_at >= ?", since).count
46
+ end
47
+
48
+ # The most recent measurable change per prompt family — gains and
49
+ # regressions both. For each family the comparison is:
50
+ # * latest scored version vs the published version, when a draft sits
51
+ # ahead of what's live ("is my work-in-progress better?")
52
+ # * published vs the previous scored version, when the latest version
53
+ # IS the published one ("did my last publish help?")
54
+ # Biggest movement first. Empty until something has been iterated and
55
+ # re-judged on both sides of the comparison.
56
+ def self.prompt_changes(limit: 5)
57
+ scores = Review.joins(response: :run)
58
+ .where(status: "succeeded")
59
+ .where.not(ai_score: nil)
60
+ .group("completion_kit_runs.prompt_id")
61
+ .average(:ai_score)
62
+ return [] if scores.empty?
63
+
64
+ Prompt.where(id: scores.keys).group_by(&:family_key).filter_map do |_key, versions|
65
+ scored = versions.select { |v| scores[v.id] }.sort_by(&:version_number)
66
+ next if scored.size < 2
67
+
68
+ candidate = scored.last
69
+ published = versions.find(&:current?)
70
+ baseline =
71
+ if published && published != candidate && scores[published.id]
72
+ published
73
+ else
74
+ scored[-2]
75
+ end
76
+
77
+ delta = (scores[candidate.id] - scores[baseline.id]).to_f.round(2)
78
+ next if delta.zero?
79
+
80
+ {
81
+ prompt: candidate,
82
+ from_version: baseline.version_number,
83
+ to_version: candidate.version_number,
84
+ from_score: scores[baseline.id].to_f.round(2),
85
+ to_score: scores[candidate.id].to_f.round(2),
86
+ delta: delta
87
+ }
88
+ end.sort_by { |row| -row[:delta].abs }.first(limit)
89
+ end
90
+
91
+ def self.scored_reviews_since(since)
92
+ Review.joins(:response)
93
+ .where(status: "succeeded")
94
+ .where("completion_kit_reviews.created_at >= ?", since)
95
+ .where.not(ai_score: nil)
96
+ end
97
+ private_class_method :scored_reviews_since
98
+ end
99
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.18"
2
+ VERSION = "0.5.19"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.18
4
+ version: 0.5.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -281,6 +281,7 @@ files:
281
281
  - app/services/completion_kit/anthropic_client.rb
282
282
  - app/services/completion_kit/api_config.rb
283
283
  - app/services/completion_kit/csv_processor.rb
284
+ - app/services/completion_kit/dashboard_stats.rb
284
285
  - app/services/completion_kit/judge_service.rb
285
286
  - app/services/completion_kit/llm_client.rb
286
287
  - app/services/completion_kit/mcp_dispatcher.rb