completion-kit 0.5.18 → 0.5.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 25e795a8d94b2f0984e65d312eb152ffabcda8e7929a52529906ff1fab6cfa4f
4
- data.tar.gz: 91110f1c5736d0c6d3c12ef5aa5a75b66ffe0d565cf5777bd9f2d322936e01d5
3
+ metadata.gz: b8563167a68aab93caf0e05022c5f1216d096eaae162173e39c5061ba253f3e0
4
+ data.tar.gz: 7de1eb04e23d658744195330b7d3f0383d9c02c5e3d3a78788aaf96bc9a0d1b0
5
5
  SHA512:
6
- metadata.gz: 87678468d49193e088d4b1c6c28abe5a63ef67de679cfeb894dd64238810d456e7505bae78224e2c330860f177deff8bf834fab0a1c9abdca4e2cafd81e5463b
7
- data.tar.gz: 2755a9885f3242017dce04a91680f4499b5cd8907f1043f9bab41d46651ec234ebe4c0f03886b72a0dac421a8787f56923da884b6d5112f85c409392dbfea969
6
+ metadata.gz: 0ad34abea12f77b56701f313aa471193e384698a0a2add21724510f210018ee86de287d7fc449df5574a0d9ca12c90883c16d8170b53f254852659b9c4e76ca1
7
+ data.tar.gz: 3b4118ce8416f45bf42dff0a88dbe20f0d2061cf66f456f5070c3dc51c80dc81e400329ea342b476a77fc44aa461ad161d784e8fe3c50e6d8cfff9df3331b042
@@ -345,6 +345,185 @@ form.button_to {
345
345
  grid-template-columns: repeat(2, minmax(0, 1fr));
346
346
  }
347
347
 
348
+ .ck-grid--cards-3 {
349
+ grid-template-columns: repeat(3, minmax(0, 1fr));
350
+ }
351
+
352
+ /* ── Dashboard: workspace stat ribbon ─────────────────────────────────
353
+ Replaces the old oversized count cards. One thin instrument strip,
354
+ four navigable segments split by hairline dividers. */
355
+ .ck-statbar {
356
+ display: grid;
357
+ grid-template-columns: repeat(4, minmax(0, 1fr));
358
+ margin-top: 1.5rem;
359
+ border: 1px solid var(--ck-line);
360
+ border-radius: var(--ck-radius-lg);
361
+ background: var(--ck-surface);
362
+ overflow: hidden;
363
+ }
364
+ .ck-statbar__item {
365
+ display: flex;
366
+ flex-direction: column;
367
+ gap: 0.25rem;
368
+ padding: 0.95rem 1.3rem;
369
+ text-decoration: none;
370
+ border-left: 1px solid var(--ck-line);
371
+ transition: background 0.15s ease;
372
+ }
373
+ .ck-statbar__item:first-child { border-left: 0; }
374
+ .ck-statbar__item:hover { background: var(--ck-surface-hover); }
375
+ .ck-statbar__label {
376
+ font-family: var(--ck-mono);
377
+ font-size: 0.68rem;
378
+ letter-spacing: 0.14em;
379
+ text-transform: uppercase;
380
+ color: var(--ck-dim);
381
+ }
382
+ .ck-statbar__value {
383
+ font-family: var(--ck-mono);
384
+ font-size: 1.65rem;
385
+ line-height: 1;
386
+ color: var(--ck-text);
387
+ transition: color 0.15s ease;
388
+ }
389
+ .ck-statbar__item:hover .ck-statbar__value { color: var(--ck-accent); }
390
+
391
+ /* ── Dashboard: pulse cards (activity / worst metric / failed reviews) ── */
392
+ .ck-pulse-grid { margin-top: 0.75rem; }
393
+
394
+ .ck-stat-card {
395
+ display: flex;
396
+ flex-direction: column;
397
+ min-height: 10.5rem;
398
+ }
399
+ .ck-stat-card .ck-kicker { margin-bottom: auto; }
400
+ .ck-stat-card__foot {
401
+ margin-top: auto;
402
+ padding-top: 0.85rem;
403
+ font-family: var(--ck-mono);
404
+ font-size: 0.78rem;
405
+ color: var(--ck-muted);
406
+ }
407
+ .ck-stat-card__foot--split {
408
+ display: flex;
409
+ align-items: center;
410
+ gap: 0.55rem;
411
+ }
412
+ .ck-stat-card__foot--split > :first-child {
413
+ margin-right: auto;
414
+ min-width: 0;
415
+ }
416
+ .ck-stat-card__figure { color: var(--ck-text); }
417
+
418
+ .ck-stat-card__body {
419
+ display: flex;
420
+ align-items: baseline;
421
+ gap: 0.6rem;
422
+ margin: 0.9rem 0 0.3rem;
423
+ }
424
+ .ck-stat-card__metric {
425
+ font-size: 1.2rem;
426
+ line-height: 1.25;
427
+ color: var(--ck-text);
428
+ }
429
+ .ck-stat-card__metric--empty { color: var(--ck-dim); }
430
+ .ck-stat-card__score { align-self: center; }
431
+ .ck-stat-card__count {
432
+ font-family: var(--ck-mono);
433
+ font-size: 2.6rem;
434
+ line-height: 1;
435
+ }
436
+ .ck-stat-card__count.is-clean { color: var(--ck-success, #4ade80); }
437
+ .ck-stat-card__count.is-danger { color: var(--ck-danger, #f87171); }
438
+
439
+ /* Dashboard activity sparkline — fixed-height row of bars, height set
440
+ inline per day. Bars hug the bottom; quiet days collapse to a sliver.
441
+ The busiest day(s) get the bright accent so the peak reads instantly. */
442
+ .ck-sparkline {
443
+ display: flex;
444
+ align-items: flex-end;
445
+ gap: 3px;
446
+ height: 3.5rem;
447
+ margin: 0.9rem 0 0.3rem;
448
+ border-bottom: 1px solid var(--ck-line);
449
+ padding-bottom: 1px;
450
+ }
451
+ .ck-sparkline__bar {
452
+ flex: 1;
453
+ min-height: 2px;
454
+ background: var(--ck-line-strong);
455
+ border-radius: 2px 2px 0 0;
456
+ transition: background 0.15s ease;
457
+ }
458
+ .ck-sparkline__bar.is-peak { background: var(--ck-accent); }
459
+ .ck-sparkline__bar:hover { background: var(--ck-accent-hover); }
460
+
461
+ /* One orchestrated page-load reveal — the dashboard "boots up". */
462
+ @keyframes ck-rise {
463
+ from { opacity: 0; transform: translateY(8px); }
464
+ to { opacity: 1; transform: translateY(0); }
465
+ }
466
+ .ck-rise {
467
+ animation: ck-rise 0.32s ease both;
468
+ animation-delay: var(--rise-delay, 0ms);
469
+ }
470
+ @media (prefers-reduced-motion: reduce) {
471
+ .ck-rise { animation: none; }
472
+ }
473
+
474
+ /* ── Dashboard: prompt improvements list ──────────────────────────────
475
+ One row per family that improved version-over-version. Name takes the
476
+ slack; version transition, score transition, and delta sit to the right. */
477
+ .ck-improvements {
478
+ list-style: none;
479
+ margin: 0.85rem 0 0;
480
+ padding: 0;
481
+ }
482
+ .ck-improvement {
483
+ display: flex;
484
+ align-items: baseline;
485
+ gap: 1.1rem;
486
+ padding: 0.7rem 0;
487
+ border-top: 1px solid var(--ck-line);
488
+ font-family: var(--ck-mono);
489
+ font-size: 0.85rem;
490
+ }
491
+ .ck-improvement:first-child { border-top: 0; }
492
+ .ck-improvement__name {
493
+ flex: 1;
494
+ min-width: 0;
495
+ overflow: hidden;
496
+ text-overflow: ellipsis;
497
+ white-space: nowrap;
498
+ }
499
+ .ck-improvement__versions {
500
+ color: var(--ck-dim);
501
+ font-size: 0.78rem;
502
+ white-space: nowrap;
503
+ }
504
+ .ck-improvement__scores {
505
+ display: inline-flex;
506
+ align-items: baseline;
507
+ gap: 0.4rem;
508
+ color: var(--ck-muted);
509
+ white-space: nowrap;
510
+ }
511
+ .ck-improvement__arrow { color: var(--ck-dim); }
512
+ .ck-improvement__to { color: var(--ck-text); }
513
+ .ck-improvement__delta {
514
+ min-width: 5rem;
515
+ text-align: right;
516
+ }
517
+ .ck-improvement__delta.is-gain { color: var(--ck-success); }
518
+ .ck-improvement__delta.is-loss { color: var(--ck-danger); }
519
+ .ck-improvements__empty {
520
+ margin: 0.85rem 0 0;
521
+ max-width: 46rem;
522
+ color: var(--ck-muted);
523
+ font-size: 0.85rem;
524
+ line-height: 1.6;
525
+ }
526
+
348
527
  .ck-grid--spaced,
349
528
  .ck-card--spaced,
350
529
  .ck-empty--spaced {
@@ -608,15 +787,18 @@ tr:hover .ck-chip--publish {
608
787
  max-width: 380px;
609
788
  }
610
789
 
790
+ /* Login brand lockup — the standard puzzle logo + two-tone wordmark,
791
+ stacked and centred above the sign-in form. */
611
792
  .ck-login__brand {
612
- font-family: var(--ck-mono);
613
- font-size: 1.1rem;
614
- font-weight: 700;
615
- color: var(--ck-accent);
616
- text-transform: uppercase;
617
- letter-spacing: 0.04em;
618
- text-align: center;
619
- margin: 0 0 2rem;
793
+ display: flex;
794
+ flex-direction: column;
795
+ align-items: center;
796
+ gap: 0.45rem;
797
+ margin: 0 auto 2rem;
798
+ }
799
+ .ck-login__brand .ck-brand__name {
800
+ padding-top: 0;
801
+ font-size: 1.4rem;
620
802
  }
621
803
 
622
804
  .ck-login__form {
@@ -4457,3 +4639,121 @@ a.tag-mark {
4457
4639
  .ck-launch__ready-panel,
4458
4640
  .ck-launch__step { animation: none; }
4459
4641
  }
4642
+
4643
+ .ck-icon-btn {
4644
+ display: inline-flex;
4645
+ align-items: center;
4646
+ justify-content: center;
4647
+ flex: none;
4648
+ width: 1.65rem;
4649
+ height: 1.65rem;
4650
+ padding: 0;
4651
+ color: var(--ck-dim);
4652
+ background: transparent;
4653
+ border: 1px solid var(--ck-line-strong);
4654
+ border-radius: 6px;
4655
+ cursor: pointer;
4656
+ transition: color 0.15s ease, border-color 0.15s ease;
4657
+ }
4658
+ .ck-icon-btn:hover {
4659
+ color: var(--ck-text);
4660
+ border-color: var(--ck-dim);
4661
+ }
4662
+ .ck-icon-btn svg { display: block; }
4663
+ .ck-icon-btn form,
4664
+ .ck-failure-list__item form,
4665
+ .ck-flyout__item form { display: inline-flex; margin: 0; }
4666
+
4667
+ .ck-failure-list {
4668
+ list-style: none;
4669
+ margin: 0.6rem 0 0;
4670
+ padding: 0;
4671
+ display: flex;
4672
+ flex-direction: column;
4673
+ gap: 0.35rem;
4674
+ }
4675
+ .ck-failure-list__item {
4676
+ display: flex;
4677
+ align-items: center;
4678
+ gap: 0.5rem;
4679
+ font-size: 0.8rem;
4680
+ }
4681
+ .ck-failure-list__surface {
4682
+ flex: none;
4683
+ padding: 0.06rem 0.45rem;
4684
+ font-size: 0.64rem;
4685
+ font-weight: 700;
4686
+ text-transform: uppercase;
4687
+ letter-spacing: 0.04em;
4688
+ border-radius: 4px;
4689
+ background: var(--ck-surface-soft);
4690
+ color: var(--ck-dim);
4691
+ }
4692
+ .ck-failure-list__surface--run { color: var(--ck-warning); }
4693
+ .ck-failure-list__surface--generation { color: var(--ck-danger); }
4694
+ .ck-failure-list__surface--judge { color: var(--ck-info); }
4695
+ .ck-failure-list__cause {
4696
+ overflow: hidden;
4697
+ text-overflow: ellipsis;
4698
+ white-space: nowrap;
4699
+ }
4700
+ .ck-failure-list__item .ck-icon-btn { margin-left: auto; }
4701
+
4702
+ /* Flyout: lives inline in the card's always-present footer row, so the
4703
+ toggle appearing or disappearing never changes the card's height. The
4704
+ panel is an absolute popover that opens upward over the card body. */
4705
+ .ck-flyout {
4706
+ position: relative;
4707
+ flex: none;
4708
+ }
4709
+ .ck-flyout__toggle {
4710
+ display: inline-flex;
4711
+ align-items: center;
4712
+ gap: 0.3rem;
4713
+ font-family: var(--ck-mono);
4714
+ font-size: 0.66rem;
4715
+ letter-spacing: 0.05em;
4716
+ text-transform: uppercase;
4717
+ color: var(--ck-dim);
4718
+ cursor: pointer;
4719
+ list-style: none;
4720
+ }
4721
+ .ck-flyout__toggle::-webkit-details-marker { display: none; }
4722
+ .ck-flyout__toggle::marker { content: ""; }
4723
+ .ck-flyout__toggle:hover { color: var(--ck-text); }
4724
+ .ck-flyout[open] .ck-flyout__toggle { color: var(--ck-text); }
4725
+ .ck-flyout__panel {
4726
+ position: absolute;
4727
+ bottom: calc(100% + 0.5rem);
4728
+ right: 0;
4729
+ z-index: 30;
4730
+ width: 16rem;
4731
+ max-height: 13rem;
4732
+ overflow-y: auto;
4733
+ margin: 0;
4734
+ padding: 0.45rem;
4735
+ list-style: none;
4736
+ display: flex;
4737
+ flex-direction: column;
4738
+ gap: 0.2rem;
4739
+ background: var(--ck-bg-strong);
4740
+ border: 1px solid var(--ck-line-strong);
4741
+ border-radius: var(--ck-radius);
4742
+ box-shadow: 0 -14px 30px rgba(0, 0, 0, 0.5);
4743
+ }
4744
+ .ck-flyout__item {
4745
+ display: flex;
4746
+ align-items: center;
4747
+ justify-content: space-between;
4748
+ gap: 0.6rem;
4749
+ padding: 0.3rem 0.4rem;
4750
+ font-size: 0.78rem;
4751
+ border-radius: 5px;
4752
+ }
4753
+ .ck-flyout__item:hover { background: var(--ck-surface-hover); }
4754
+ .ck-flyout__label { color: var(--ck-text); }
4755
+ .ck-flyout__meta {
4756
+ margin-left: 0.35rem;
4757
+ color: var(--ck-dim);
4758
+ font-size: 0.7rem;
4759
+ }
@@ -0,0 +1,41 @@
1
+ module CompletionKit
2
+ class DashboardDismissalsController < ApplicationController
3
+ WINDOW = 7.days
4
+
5
+ def create
6
+ record = resolve_dismissable
7
+ DashboardDismissal.create(dismissable: record, baseline_score: baseline_for(record))
8
+ render_cards
9
+ end
10
+
11
+ def destroy
12
+ DashboardDismissal.find(params[:id]).destroy
13
+ render_cards
14
+ end
15
+
16
+ private
17
+
18
+ def dismissal_params
19
+ params.require(:dashboard_dismissal).permit(:dismissable_type, :dismissable_id)
20
+ end
21
+
22
+ def resolve_dismissable
23
+ type = dismissal_params[:dismissable_type]
24
+ raise ActiveRecord::RecordNotFound unless DashboardDismissal::DISMISSABLE_TYPES.include?(type)
25
+ type.constantize.find(dismissal_params[:dismissable_id])
26
+ end
27
+
28
+ def baseline_for(record)
29
+ return nil unless record.is_a?(Metric)
30
+ DashboardStats.metric_average(record.id, since: WINDOW.ago)
31
+ end
32
+
33
+ def render_cards
34
+ @worst_metric = DashboardStats.worst_metric(since: WINDOW.ago)
35
+ @failures = DashboardStats.failures(since: WINDOW.ago)
36
+ @ignored_metrics = DashboardDismissal.metrics
37
+ @ignored_failures = DashboardDismissal.failures
38
+ render :refresh, formats: [:turbo_stream]
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,18 @@
1
+ module CompletionKit
2
+ class DashboardDismissal < ApplicationRecord
3
+ FAILURE_TYPES = %w[
4
+ CompletionKit::Run
5
+ CompletionKit::Response
6
+ CompletionKit::Review
7
+ ].freeze
8
+ DISMISSABLE_TYPES = (["CompletionKit::Metric"] + FAILURE_TYPES).freeze
9
+
10
+ belongs_to :dismissable, polymorphic: true
11
+
12
+ validates :dismissable_type, inclusion: { in: DISMISSABLE_TYPES }
13
+ validates :dismissable_id, uniqueness: { scope: :dismissable_type }
14
+
15
+ scope :metrics, -> { where(dismissable_type: "CompletionKit::Metric").includes(:dismissable) }
16
+ scope :failures, -> { where(dismissable_type: FAILURE_TYPES).includes(:dismissable) }
17
+ end
18
+ end
@@ -13,6 +13,7 @@ module CompletionKit
13
13
  has_many :metric_group_memberships, dependent: :destroy
14
14
  has_many :metric_groups, through: :metric_group_memberships, source: :metric_group
15
15
  has_many :reviews, dependent: :nullify
16
+ has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
16
17
 
17
18
  serialize :rubric_bands, coder: JSON
18
19
 
@@ -5,6 +5,7 @@ module CompletionKit
5
5
 
6
6
  belongs_to :run
7
7
  has_many :reviews, dependent: :destroy
8
+ has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
8
9
 
9
10
  delegate :prompt, to: :run
10
11
 
@@ -5,6 +5,7 @@ module CompletionKit
5
5
 
6
6
  belongs_to :response
7
7
  belongs_to :metric, optional: true
8
+ has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
8
9
 
9
10
  validates :metric_name, presence: true
10
11
  validates :status, inclusion: { in: STATUSES }
@@ -11,6 +11,7 @@ module CompletionKit
11
11
  has_many :run_metrics, -> { order(:position) }, dependent: :destroy
12
12
  has_many :metrics, through: :run_metrics
13
13
  has_many :suggestions, dependent: :destroy
14
+ has_many :dashboard_dismissals, as: :dismissable, dependent: :destroy
14
15
 
15
16
  validates :name, presence: true
16
17
  validates :status, inclusion: { in: STATUSES }
@@ -0,0 +1,167 @@
1
+ module CompletionKit
2
+ # Read-only aggregate queries powering the standalone dashboard cards.
3
+ # Each method is a small, scoped query — nothing here writes or caches.
4
+ class DashboardStats
5
+ # Runs per calendar day for the trailing `days` window, oldest first.
6
+ # Always returns one entry per day (count 0 for quiet days) so callers
7
+ # can render a fixed-width sparkline.
8
+ def self.activity(days: 14)
9
+ since = (days - 1).days.ago.to_date
10
+ counts = Run.where("created_at >= ?", since.beginning_of_day)
11
+ .group("DATE(created_at)")
12
+ .count
13
+ (0...days).map do |offset|
14
+ date = since + offset
15
+ { date: date, count: counts[date] || counts[date.to_s] || 0 }
16
+ end
17
+ end
18
+
19
+ # The metric with the lowest average judge score across succeeded reviews
20
+ # in the window — the prompt-engineering target. Dismissed metrics are
21
+ # skipped while their average holds at or above the score snapshotted when
22
+ # they were dismissed; a metric that regresses below that baseline
23
+ # resurfaces and its stale dismissal is cleared. Returns nil when nothing
24
+ # qualifies. `response` is the single worst-scoring response, for a deep
25
+ # link.
26
+ def self.worst_metric(since:)
27
+ averages = scored_reviews_since(since)
28
+ .joins(:metric)
29
+ .group("completion_kit_metrics.id")
30
+ .average(:ai_score)
31
+ return nil if averages.empty?
32
+
33
+ dismissals = metric_dismissals
34
+ metrics = Metric.where(id: averages.keys).index_by(&:id)
35
+
36
+ averages.sort_by { |_id, avg| avg }.each do |metric_id, avg|
37
+ rounded = avg.to_f.round(2)
38
+ dismissal = dismissals[metric_id]
39
+ next if dismissal && rounded >= dismissal.baseline_score.to_f
40
+
41
+ dismissal&.destroy
42
+ worst = scored_reviews_since(since).where(metric_id: metric_id).order(:ai_score).first
43
+ metric = metrics[metric_id]
44
+ return {
45
+ metric: metric,
46
+ name: metric.name,
47
+ avg: rounded,
48
+ response: worst.response,
49
+ score: worst.ai_score.to_f
50
+ }
51
+ end
52
+ nil
53
+ end
54
+
55
+ # The rounded average judge score for one metric across the window, or nil
56
+ # when it has no scored reviews. Used to snapshot a dismissal's baseline.
57
+ def self.metric_average(metric_id, since:)
58
+ scored_reviews_since(since).where(metric_id: metric_id).average(:ai_score)&.to_f&.round(2)
59
+ end
60
+
61
+ # Everything that terminally failed in the window across all three
62
+ # surfaces — failed runs, failed generations, failed judge reviews —
63
+ # excluding any the user has dismissed. Returns a count and an items list
64
+ # ordered most-recent-first; each item carries its surface, the failing
65
+ # record, the run it belongs to (for a deep link), and a cause string.
66
+ def self.failures(since:)
67
+ dismissed = failure_dismissal_keys
68
+ items = []
69
+
70
+ Run.where(status: "failed").where("created_at >= ?", since).find_each do |run|
71
+ next if dismissed.include?(["CompletionKit::Run", run.id])
72
+ items << {
73
+ surface: "run", record: run, run: run,
74
+ cause: run.failure_summary.presence || "Run failed", at: run.updated_at
75
+ }
76
+ end
77
+
78
+ Response.where(status: "failed").where("created_at >= ?", since)
79
+ .includes(:run).find_each do |response|
80
+ next if dismissed.include?(["CompletionKit::Response", response.id])
81
+ items << {
82
+ surface: "generation", record: response, run: response.run,
83
+ cause: failure_cause(response), at: response.updated_at
84
+ }
85
+ end
86
+
87
+ Review.where(status: "failed").where("completion_kit_reviews.created_at >= ?", since)
88
+ .includes(response: :run).find_each do |review|
89
+ next if dismissed.include?(["CompletionKit::Review", review.id])
90
+ items << {
91
+ surface: "judge", record: review, run: review.response.run,
92
+ cause: failure_cause(review), at: review.updated_at
93
+ }
94
+ end
95
+
96
+ items.sort_by! { |item| item[:at] }
97
+ items.reverse!
98
+ { count: items.size, items: items }
99
+ end
100
+
101
+ # The most recent measurable change per prompt family — gains and
102
+ # regressions both. For each family the comparison is:
103
+ # * latest scored version vs the published version, when a draft sits
104
+ # ahead of what's live ("is my work-in-progress better?")
105
+ # * published vs the previous scored version, when the latest version
106
+ # IS the published one ("did my last publish help?")
107
+ # Biggest movement first. Empty until something has been iterated and
108
+ # re-judged on both sides of the comparison.
109
+ def self.prompt_changes(limit: 5)
110
+ scores = Review.joins(response: :run)
111
+ .where(status: "succeeded")
112
+ .where.not(ai_score: nil)
113
+ .group("completion_kit_runs.prompt_id")
114
+ .average(:ai_score)
115
+ return [] if scores.empty?
116
+
117
+ Prompt.where(id: scores.keys).group_by(&:family_key).filter_map do |_key, versions|
118
+ scored = versions.select { |v| scores[v.id] }.sort_by(&:version_number)
119
+ next if scored.size < 2
120
+
121
+ candidate = scored.last
122
+ published = versions.find(&:current?)
123
+ baseline =
124
+ if published && published != candidate && scores[published.id]
125
+ published
126
+ else
127
+ scored[-2]
128
+ end
129
+
130
+ delta = (scores[candidate.id] - scores[baseline.id]).to_f.round(2)
131
+ next if delta.zero?
132
+
133
+ {
134
+ prompt: candidate,
135
+ from_version: baseline.version_number,
136
+ to_version: candidate.version_number,
137
+ from_score: scores[baseline.id].to_f.round(2),
138
+ to_score: scores[candidate.id].to_f.round(2),
139
+ delta: delta
140
+ }
141
+ end.sort_by { |row| -row[:delta].abs }.first(limit)
142
+ end
143
+
144
+ def self.scored_reviews_since(since)
145
+ Review.joins(:response)
146
+ .where(status: "succeeded")
147
+ .where("completion_kit_reviews.created_at >= ?", since)
148
+ .where.not(ai_score: nil)
149
+ end
150
+ private_class_method :scored_reviews_since
151
+
152
+ def self.metric_dismissals
153
+ DashboardDismissal.where(dismissable_type: "CompletionKit::Metric").index_by(&:dismissable_id)
154
+ end
155
+ private_class_method :metric_dismissals
156
+
157
+ def self.failure_dismissal_keys
158
+ DashboardDismissal.failures.map { |d| [d.dismissable_type, d.dismissable_id] }.to_set
159
+ end
160
+ private_class_method :failure_dismissal_keys
161
+
162
+ def self.failure_cause(record)
163
+ record.error_class.presence || "Unknown error"
164
+ end
165
+ private_class_method :failure_cause
166
+ end
167
+ end
@@ -0,0 +1 @@
1
+ <svg viewBox="0 0 24 24" width="14" height="14" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M1 12s4-8 11-8 11 8 11 8-4 8-11 8-11-8-11-8z"/><circle cx="12" cy="12" r="3"/></svg>
@@ -0,0 +1 @@
1
+ <svg viewBox="0 0 24 24" width="14" height="14" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" aria-hidden="true"><path d="M17.94 17.94A10.07 10.07 0 0 1 12 20c-7 0-11-8-11-8a18.45 18.45 0 0 1 5.06-5.94M9.9 4.24A9.12 9.12 0 0 1 12 4c7 0 11 8 11 8a18.5 18.5 0 0 1-2.16 3.19m-6.72-1.07a3 3 0 1 1-4.24-4.24"/><line x1="1" y1="1" x2="23" y2="23"/></svg>
@@ -0,0 +1,47 @@
1
+ <div class="ck-card ck-stat-card ck-rise" id="ck-failures-card" style="--rise-delay: 180ms;">
2
+ <p class="ck-kicker">Failures · last 7 days</p>
3
+ <div class="ck-stat-card__body">
4
+ <span class="ck-stat-card__count<%= failures[:count].positive? ? ' is-danger' : ' is-clean' %>"><%= failures[:count] %></span>
5
+ </div>
6
+
7
+ <% if failures[:items].any? %>
8
+ <ul class="ck-failure-list">
9
+ <% failures[:items].each do |item| %>
10
+ <li class="ck-failure-list__item">
11
+ <span class="ck-failure-list__surface ck-failure-list__surface--<%= item[:surface] %>"><%= item[:surface] %></span>
12
+ <% if item[:run] %>
13
+ <%= link_to item[:cause], completion_kit.run_path(item[:run]), class: "ck-link ck-failure-list__cause" %>
14
+ <% else %>
15
+ <span class="ck-failure-list__cause"><%= item[:cause] %></span>
16
+ <% end %>
17
+ <%= button_to completion_kit.dashboard_dismissals_path,
18
+ params: { dashboard_dismissal: { dismissable_type: item[:record].class.name,
19
+ dismissable_id: item[:record].id } },
20
+ class: "ck-icon-btn", title: "Ignore this failure",
21
+ "aria-label": "Ignore #{item[:surface]} failure" do %><%= render "completion_kit/dashboard/eye_off_icon" %><% end %>
22
+ </li>
23
+ <% end %>
24
+ </ul>
25
+ <% end %>
26
+
27
+ <% if failures[:items].empty? || ignored_failures.any? %>
28
+ <div class="ck-stat-card__foot ck-stat-card__foot--split">
29
+ <span><% if failures[:items].empty? %>All clear — nothing failed this week.<% end %></span>
30
+ <% if ignored_failures.any? %>
31
+ <details class="ck-flyout">
32
+ <summary class="ck-flyout__toggle"><%= ignored_failures.size %> ignored</summary>
33
+ <ul class="ck-flyout__panel">
34
+ <% ignored_failures.each do |dismissal| %>
35
+ <li class="ck-flyout__item">
36
+ <span class="ck-flyout__label"><%= dismissal.dismissable_type.demodulize %> #<%= dismissal.dismissable_id %></span>
37
+ <%= button_to completion_kit.dashboard_dismissal_path(dismissal),
38
+ method: :delete, class: "ck-icon-btn", title: "Un-ignore",
39
+ "aria-label": "Un-ignore #{dismissal.dismissable_type.demodulize} #{dismissal.dismissable_id}" do %><%= render "completion_kit/dashboard/eye_icon" %><% end %>
40
+ </li>
41
+ <% end %>
42
+ </ul>
43
+ </details>
44
+ <% end %>
45
+ </div>
46
+ <% end %>
47
+ </div>
@@ -0,0 +1,54 @@
1
+ <div class="ck-card ck-stat-card ck-rise" id="ck-worst-metric-card" style="--rise-delay: 120ms;">
2
+ <p class="ck-kicker">Worst metric · last 7 days</p>
3
+ <div class="ck-stat-card__body">
4
+ <% if worst_metric %>
5
+ <span class="ck-stat-card__metric"><%= worst_metric[:name] %></span>
6
+ <span class="<%= ck_badge_classes(ck_score_kind(worst_metric[:avg])) %> ck-stat-card__score"><%= worst_metric[:avg] %></span>
7
+ <% else %>
8
+ <span class="ck-stat-card__metric ck-stat-card__metric--empty">No scored reviews</span>
9
+ <% end %>
10
+ </div>
11
+
12
+ <div class="ck-stat-card__foot ck-stat-card__foot--split">
13
+ <span>
14
+ <% if worst_metric && worst_metric[:response] %>
15
+ <%= link_to "Worst response →",
16
+ completion_kit.run_response_path(worst_metric[:response].run, worst_metric[:response]),
17
+ class: "ck-link" %>
18
+ <% elsif worst_metric %>
19
+ Lowest score this week.
20
+ <% else %>
21
+ Run a judge to populate this.
22
+ <% end %>
23
+ </span>
24
+
25
+ <% if ignored_metrics.any? %>
26
+ <details class="ck-flyout">
27
+ <summary class="ck-flyout__toggle"><%= ignored_metrics.size %> ignored</summary>
28
+ <ul class="ck-flyout__panel">
29
+ <% ignored_metrics.each do |dismissal| %>
30
+ <li class="ck-flyout__item">
31
+ <span class="ck-flyout__label">
32
+ <%= dismissal.dismissable.name %>
33
+ <% if dismissal.baseline_score %>
34
+ <span class="ck-flyout__meta">baseline <%= dismissal.baseline_score %></span>
35
+ <% end %>
36
+ </span>
37
+ <%= button_to completion_kit.dashboard_dismissal_path(dismissal),
38
+ method: :delete, class: "ck-icon-btn", title: "Un-ignore",
39
+ "aria-label": "Un-ignore #{dismissal.dismissable.name}" do %><%= render "completion_kit/dashboard/eye_icon" %><% end %>
40
+ </li>
41
+ <% end %>
42
+ </ul>
43
+ </details>
44
+ <% end %>
45
+
46
+ <% if worst_metric %>
47
+ <%= button_to completion_kit.dashboard_dismissals_path,
48
+ params: { dashboard_dismissal: { dismissable_type: "CompletionKit::Metric",
49
+ dismissable_id: worst_metric[:metric].id } },
50
+ class: "ck-icon-btn", title: "Ignore this metric",
51
+ "aria-label": "Ignore #{worst_metric[:name]}" do %><%= render "completion_kit/dashboard/eye_off_icon" %><% end %>
52
+ <% end %>
53
+ </div>
54
+ </div>
@@ -0,0 +1,8 @@
1
+ <%= turbo_stream.replace "ck-worst-metric-card" do %>
2
+ <%= render "completion_kit/dashboard/worst_metric_card",
3
+ worst_metric: @worst_metric, ignored_metrics: @ignored_metrics %>
4
+ <% end %>
5
+ <%= turbo_stream.replace "ck-failures-card" do %>
6
+ <%= render "completion_kit/dashboard/failures_card",
7
+ failures: @failures, ignored_failures: @ignored_failures %>
8
+ <% end %>
data/config/routes.rb CHANGED
@@ -15,6 +15,7 @@ CompletionKit::Engine.routes.draw do
15
15
  resources :metrics
16
16
  resources :metric_groups
17
17
  resources :tags
18
+ resources :dashboard_dismissals, only: [:create, :destroy]
18
19
 
19
20
  resources :runs do
20
21
  member do
@@ -0,0 +1,15 @@
1
+ class CreateCompletionKitDashboardDismissals < ActiveRecord::Migration[8.1]
2
+ def change
3
+ create_table :completion_kit_dashboard_dismissals do |t|
4
+ t.string :dismissable_type, null: false
5
+ t.bigint :dismissable_id, null: false
6
+ t.decimal :baseline_score, precision: 4, scale: 1
7
+ t.timestamps
8
+ end
9
+
10
+ add_index :completion_kit_dashboard_dismissals,
11
+ [:dismissable_type, :dismissable_id],
12
+ unique: true,
13
+ name: "index_ck_dashboard_dismissals_on_dismissable"
14
+ end
15
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.18"
2
+ VERSION = "0.5.20"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.18
4
+ version: 0.5.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -242,6 +242,7 @@ files:
242
242
  - app/controllers/completion_kit/api/v1/tags_controller.rb
243
243
  - app/controllers/completion_kit/api_reference_controller.rb
244
244
  - app/controllers/completion_kit/application_controller.rb
245
+ - app/controllers/completion_kit/dashboard_dismissals_controller.rb
245
246
  - app/controllers/completion_kit/datasets_controller.rb
246
247
  - app/controllers/completion_kit/mcp_controller.rb
247
248
  - app/controllers/completion_kit/metric_groups_controller.rb
@@ -262,6 +263,7 @@ files:
262
263
  - app/jobs/completion_kit/run_completion_check_job.rb
263
264
  - app/mailers/completion_kit/application_mailer.rb
264
265
  - app/models/completion_kit/application_record.rb
266
+ - app/models/completion_kit/dashboard_dismissal.rb
265
267
  - app/models/completion_kit/dataset.rb
266
268
  - app/models/completion_kit/mcp_session.rb
267
269
  - app/models/completion_kit/metric.rb
@@ -281,6 +283,7 @@ files:
281
283
  - app/services/completion_kit/anthropic_client.rb
282
284
  - app/services/completion_kit/api_config.rb
283
285
  - app/services/completion_kit/csv_processor.rb
286
+ - app/services/completion_kit/dashboard_stats.rb
284
287
  - app/services/completion_kit/judge_service.rb
285
288
  - app/services/completion_kit/llm_client.rb
286
289
  - app/services/completion_kit/mcp_dispatcher.rb
@@ -308,6 +311,11 @@ files:
308
311
  - app/views/completion_kit/api_reference/_resource_card.html.erb
309
312
  - app/views/completion_kit/api_reference/_resource_list.html.erb
310
313
  - app/views/completion_kit/api_reference/index.html.erb
314
+ - app/views/completion_kit/dashboard/_eye_icon.html.erb
315
+ - app/views/completion_kit/dashboard/_eye_off_icon.html.erb
316
+ - app/views/completion_kit/dashboard/_failures_card.html.erb
317
+ - app/views/completion_kit/dashboard/_worst_metric_card.html.erb
318
+ - app/views/completion_kit/dashboard_dismissals/refresh.turbo_stream.erb
311
319
  - app/views/completion_kit/datasets/_form.html.erb
312
320
  - app/views/completion_kit/datasets/edit.html.erb
313
321
  - app/views/completion_kit/datasets/index.html.erb
@@ -382,6 +390,7 @@ files:
382
390
  - db/migrate/20260509000002_create_completion_kit_taggings.rb
383
391
  - db/migrate/20260513000001_create_completion_kit_mcp_sessions.rb
384
392
  - db/migrate/20260514000001_allow_judge_only_runs.rb
393
+ - db/migrate/20260516000001_create_completion_kit_dashboard_dismissals.rb
385
394
  - lib/completion-kit.rb
386
395
  - lib/completion_kit.rb
387
396
  - lib/completion_kit/concurrency_check.rb