completion-kit 0.5.17 → 0.5.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ce9123debaea45400d854ccca1cef4ca1c93a486bb397213f7e7998c8748ca2
4
- data.tar.gz: 4fc585bef631af5ee454a8e86bfd7cbdf95303d0e100e5405a644b1ca85fae2b
3
+ metadata.gz: fe8be5a5838f6a270f5ed934e250f0dd55d673b51ba15ed29579429dadcaaa03
4
+ data.tar.gz: 6044e3c6805e697c1e8e80b28760ab5f749e2ba14bd976b67225f0d3cffb6eff
5
5
  SHA512:
6
- metadata.gz: 37321a08941fbdc87e43698a18db78ef3adb100831449e128a5487004e93714ea494361b07a2aec4b55b8b723aae39e0a8c38f5f7b82f377e365269eeb3ffa0c
7
- data.tar.gz: 8744ac203aeb30e7dceda1fad9cd70b5f1b31c8f4eed156ed66b7ed8e33a8be92955d4d025fbfd313d0c8389c117ca0ac44f551dd7251da4148c00430da75ec4
6
+ metadata.gz: efc0e7b9a1b23eeec3b3ba91ab65d94d2454bec0554cdd8e36ab6d1286b9c208de2e1ea24fec1c19f73d01dc044097df6e2ac004a0de2565bc6e6b4965af1d85
7
+ data.tar.gz: e9fca34118e8a1b10107807e1243f946ad6148f7af8e92d23c3c9bfc20f10f2d91c6db0c4b285996f8267b6511593417a11a7f6c1002b9d8dd2a9f32f5793860
@@ -345,6 +345,176 @@ form.button_to {
345
345
  grid-template-columns: repeat(2, minmax(0, 1fr));
346
346
  }
347
347
 
348
+ .ck-grid--cards-3 {
349
+ grid-template-columns: repeat(3, minmax(0, 1fr));
350
+ }
351
+
352
+ /* ── Dashboard: workspace stat ribbon ─────────────────────────────────
353
+ Replaces the old oversized count cards. One thin instrument strip,
354
+ four navigable segments split by hairline dividers. */
355
+ .ck-statbar {
356
+ display: grid;
357
+ grid-template-columns: repeat(4, minmax(0, 1fr));
358
+ margin-top: 1.5rem;
359
+ border: 1px solid var(--ck-line);
360
+ border-radius: var(--ck-radius-lg);
361
+ background: var(--ck-surface);
362
+ overflow: hidden;
363
+ }
364
+ .ck-statbar__item {
365
+ display: flex;
366
+ flex-direction: column;
367
+ gap: 0.25rem;
368
+ padding: 0.95rem 1.3rem;
369
+ text-decoration: none;
370
+ border-left: 1px solid var(--ck-line);
371
+ transition: background 0.15s ease;
372
+ }
373
+ .ck-statbar__item:first-child { border-left: 0; }
374
+ .ck-statbar__item:hover { background: var(--ck-surface-hover); }
375
+ .ck-statbar__label {
376
+ font-family: var(--ck-mono);
377
+ font-size: 0.68rem;
378
+ letter-spacing: 0.14em;
379
+ text-transform: uppercase;
380
+ color: var(--ck-dim);
381
+ }
382
+ .ck-statbar__value {
383
+ font-family: var(--ck-mono);
384
+ font-size: 1.65rem;
385
+ line-height: 1;
386
+ color: var(--ck-text);
387
+ transition: color 0.15s ease;
388
+ }
389
+ .ck-statbar__item:hover .ck-statbar__value { color: var(--ck-accent); }
390
+
391
+ /* ── Dashboard: pulse cards (activity / worst metric / failed reviews) ── */
392
+ .ck-pulse-grid { margin-top: 0.75rem; }
393
+
394
+ .ck-stat-card {
395
+ display: flex;
396
+ flex-direction: column;
397
+ min-height: 10.5rem;
398
+ }
399
+ .ck-stat-card .ck-kicker { margin-bottom: auto; }
400
+ .ck-stat-card__foot {
401
+ margin-top: auto;
402
+ padding-top: 0.85rem;
403
+ font-family: var(--ck-mono);
404
+ font-size: 0.78rem;
405
+ color: var(--ck-muted);
406
+ }
407
+ .ck-stat-card__figure { color: var(--ck-text); }
408
+
409
+ .ck-stat-card__body {
410
+ display: flex;
411
+ align-items: baseline;
412
+ gap: 0.6rem;
413
+ margin: 0.9rem 0 0.3rem;
414
+ }
415
+ .ck-stat-card__metric {
416
+ font-size: 1.2rem;
417
+ line-height: 1.25;
418
+ color: var(--ck-text);
419
+ }
420
+ .ck-stat-card__metric--empty { color: var(--ck-dim); }
421
+ .ck-stat-card__score { align-self: center; }
422
+ .ck-stat-card__count {
423
+ font-family: var(--ck-mono);
424
+ font-size: 2.6rem;
425
+ line-height: 1;
426
+ }
427
+ .ck-stat-card__count.is-clean { color: var(--ck-success, #4ade80); }
428
+ .ck-stat-card__count.is-danger { color: var(--ck-danger, #f87171); }
429
+
430
+ /* Dashboard activity sparkline — fixed-height row of bars, height set
431
+ inline per day. Bars hug the bottom; quiet days collapse to a sliver.
432
+ The busiest day(s) get the bright accent so the peak reads instantly. */
433
+ .ck-sparkline {
434
+ display: flex;
435
+ align-items: flex-end;
436
+ gap: 3px;
437
+ height: 3.5rem;
438
+ margin: 0.9rem 0 0.3rem;
439
+ border-bottom: 1px solid var(--ck-line);
440
+ padding-bottom: 1px;
441
+ }
442
+ .ck-sparkline__bar {
443
+ flex: 1;
444
+ min-height: 2px;
445
+ background: var(--ck-line-strong);
446
+ border-radius: 2px 2px 0 0;
447
+ transition: background 0.15s ease;
448
+ }
449
+ .ck-sparkline__bar.is-peak { background: var(--ck-accent); }
450
+ .ck-sparkline__bar:hover { background: var(--ck-accent-hover); }
451
+
452
+ /* One orchestrated page-load reveal — the dashboard "boots up". */
453
+ @keyframes ck-rise {
454
+ from { opacity: 0; transform: translateY(8px); }
455
+ to { opacity: 1; transform: translateY(0); }
456
+ }
457
+ .ck-rise {
458
+ animation: ck-rise 0.32s ease both;
459
+ animation-delay: var(--rise-delay, 0ms);
460
+ }
461
+ @media (prefers-reduced-motion: reduce) {
462
+ .ck-rise { animation: none; }
463
+ }
464
+
465
+ /* ── Dashboard: prompt improvements list ──────────────────────────────
466
+ One row per family that improved version-over-version. Name takes the
467
+ slack; version transition, score transition, and delta sit to the right. */
468
+ .ck-improvements {
469
+ list-style: none;
470
+ margin: 0.85rem 0 0;
471
+ padding: 0;
472
+ }
473
+ .ck-improvement {
474
+ display: flex;
475
+ align-items: baseline;
476
+ gap: 1.1rem;
477
+ padding: 0.7rem 0;
478
+ border-top: 1px solid var(--ck-line);
479
+ font-family: var(--ck-mono);
480
+ font-size: 0.85rem;
481
+ }
482
+ .ck-improvement:first-child { border-top: 0; }
483
+ .ck-improvement__name {
484
+ flex: 1;
485
+ min-width: 0;
486
+ overflow: hidden;
487
+ text-overflow: ellipsis;
488
+ white-space: nowrap;
489
+ }
490
+ .ck-improvement__versions {
491
+ color: var(--ck-dim);
492
+ font-size: 0.78rem;
493
+ white-space: nowrap;
494
+ }
495
+ .ck-improvement__scores {
496
+ display: inline-flex;
497
+ align-items: baseline;
498
+ gap: 0.4rem;
499
+ color: var(--ck-muted);
500
+ white-space: nowrap;
501
+ }
502
+ .ck-improvement__arrow { color: var(--ck-dim); }
503
+ .ck-improvement__to { color: var(--ck-text); }
504
+ .ck-improvement__delta {
505
+ min-width: 5rem;
506
+ text-align: right;
507
+ }
508
+ .ck-improvement__delta.is-gain { color: var(--ck-success); }
509
+ .ck-improvement__delta.is-loss { color: var(--ck-danger); }
510
+ .ck-improvements__empty {
511
+ margin: 0.85rem 0 0;
512
+ max-width: 46rem;
513
+ color: var(--ck-muted);
514
+ font-size: 0.85rem;
515
+ line-height: 1.6;
516
+ }
517
+
348
518
  .ck-grid--spaced,
349
519
  .ck-card--spaced,
350
520
  .ck-empty--spaced {
@@ -608,15 +778,18 @@ tr:hover .ck-chip--publish {
608
778
  max-width: 380px;
609
779
  }
610
780
 
781
+ /* Login brand lockup — the standard puzzle logo + two-tone wordmark,
782
+ stacked and centred above the sign-in form. */
611
783
  .ck-login__brand {
612
- font-family: var(--ck-mono);
613
- font-size: 1.1rem;
614
- font-weight: 700;
615
- color: var(--ck-accent);
616
- text-transform: uppercase;
617
- letter-spacing: 0.04em;
618
- text-align: center;
619
- margin: 0 0 2rem;
784
+ display: flex;
785
+ flex-direction: column;
786
+ align-items: center;
787
+ gap: 0.45rem;
788
+ margin: 0 auto 2rem;
789
+ }
790
+ .ck-login__brand .ck-brand__name {
791
+ padding-top: 0;
792
+ font-size: 1.4rem;
620
793
  }
621
794
 
622
795
  .ck-login__form {
@@ -180,15 +180,23 @@ module CompletionKit
180
180
  end
181
181
 
182
182
  def ck_run_path(run)
183
- CompletionKit::Engine.routes.url_helpers.run_path(run, **url_options.except(:host, :protocol, :script_name))
183
+ CompletionKit::Engine.routes.url_helpers.run_path(run, **ck_engine_path_options)
184
184
  end
185
185
 
186
186
  def ck_prompt_path(prompt)
187
- CompletionKit::Engine.routes.url_helpers.prompt_path(prompt, **url_options.except(:host, :protocol, :script_name))
187
+ CompletionKit::Engine.routes.url_helpers.prompt_path(prompt, **ck_engine_path_options)
188
188
  end
189
189
 
190
190
  def ck_dataset_path(dataset)
191
- CompletionKit::Engine.routes.url_helpers.dataset_path(dataset, **url_options.except(:host, :protocol, :script_name))
191
+ CompletionKit::Engine.routes.url_helpers.dataset_path(dataset, **ck_engine_path_options)
192
+ end
193
+
194
+ # Dynamic route segments owned by the host's mount scope (e.g. an
195
+ # `org_slug` from `scope "/orgs/:org_slug"`) live in `url_options[:_recall]`.
196
+ # The engine's url_helpers won't read them out of the nested recall hash to
197
+ # satisfy a required segment — they have to arrive as explicit kwargs.
198
+ def ck_engine_path_options
199
+ (url_options[:_recall] || {}).except(:controller, :action)
192
200
  end
193
201
 
194
202
  def ck_format_maybe_json(text)
@@ -0,0 +1,99 @@
1
+ module CompletionKit
2
+ # Read-only aggregate queries powering the standalone dashboard cards.
3
+ # Each method is a small, scoped query — nothing here writes or caches.
4
+ class DashboardStats
5
+ # Runs per calendar day for the trailing `days` window, oldest first.
6
+ # Always returns one entry per day (count 0 for quiet days) so callers
7
+ # can render a fixed-width sparkline.
8
+ def self.activity(days: 14)
9
+ since = (days - 1).days.ago.to_date
10
+ counts = Run.where("created_at >= ?", since.beginning_of_day)
11
+ .group("DATE(created_at)")
12
+ .count
13
+ (0...days).map do |offset|
14
+ date = since + offset
15
+ { date: date, count: counts[date] || counts[date.to_s] || 0 }
16
+ end
17
+ end
18
+
19
+ # The metric with the lowest average judge score across succeeded reviews
20
+ # in the window — the prompt-engineering target. Returns nil when there
21
+ # are no scored reviews. `response` is the single worst-scoring response
22
+ # for that metric, for a deep link.
23
+ def self.worst_metric(since:)
24
+ averages = scored_reviews_since(since).group(:metric_name).average(:ai_score)
25
+ return nil if averages.empty?
26
+
27
+ name, avg = averages.min_by { |_, value| value }
28
+ # averages is non-empty, so at least one review carries this
29
+ # metric_name — worst is always present here.
30
+ worst = scored_reviews_since(since)
31
+ .where(metric_name: name)
32
+ .order(:ai_score)
33
+ .first
34
+ {
35
+ name: name,
36
+ avg: avg.to_f.round(2),
37
+ response: worst.response,
38
+ score: worst.ai_score.to_f
39
+ }
40
+ end
41
+
42
+ # Reviews that terminally failed in the window — parse failures, judge
43
+ # truncations, provider errors. Invisible on the dashboard otherwise.
44
+ def self.failed_review_count(since:)
45
+ Review.where(status: "failed").where("created_at >= ?", since).count
46
+ end
47
+
48
+ # The most recent measurable change per prompt family — gains and
49
+ # regressions both. For each family the comparison is:
50
+ # * latest scored version vs the published version, when a draft sits
51
+ # ahead of what's live ("is my work-in-progress better?")
52
+ # * published vs the previous scored version, when the latest version
53
+ # IS the published one ("did my last publish help?")
54
+ # Biggest movement first. Empty until something has been iterated and
55
+ # re-judged on both sides of the comparison.
56
+ def self.prompt_changes(limit: 5)
57
+ scores = Review.joins(response: :run)
58
+ .where(status: "succeeded")
59
+ .where.not(ai_score: nil)
60
+ .group("completion_kit_runs.prompt_id")
61
+ .average(:ai_score)
62
+ return [] if scores.empty?
63
+
64
+ Prompt.where(id: scores.keys).group_by(&:family_key).filter_map do |_key, versions|
65
+ scored = versions.select { |v| scores[v.id] }.sort_by(&:version_number)
66
+ next if scored.size < 2
67
+
68
+ candidate = scored.last
69
+ published = versions.find(&:current?)
70
+ baseline =
71
+ if published && published != candidate && scores[published.id]
72
+ published
73
+ else
74
+ scored[-2]
75
+ end
76
+
77
+ delta = (scores[candidate.id] - scores[baseline.id]).to_f.round(2)
78
+ next if delta.zero?
79
+
80
+ {
81
+ prompt: candidate,
82
+ from_version: baseline.version_number,
83
+ to_version: candidate.version_number,
84
+ from_score: scores[baseline.id].to_f.round(2),
85
+ to_score: scores[candidate.id].to_f.round(2),
86
+ delta: delta
87
+ }
88
+ end.sort_by { |row| -row[:delta].abs }.first(limit)
89
+ end
90
+
91
+ def self.scored_reviews_since(since)
92
+ Review.joins(:response)
93
+ .where(status: "succeeded")
94
+ .where("completion_kit_reviews.created_at >= ?", since)
95
+ .where.not(ai_score: nil)
96
+ end
97
+ private_class_method :scored_reviews_since
98
+ end
99
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.17"
2
+ VERSION = "0.5.19"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.17
4
+ version: 0.5.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -281,6 +281,7 @@ files:
281
281
  - app/services/completion_kit/anthropic_client.rb
282
282
  - app/services/completion_kit/api_config.rb
283
283
  - app/services/completion_kit/csv_processor.rb
284
+ - app/services/completion_kit/dashboard_stats.rb
284
285
  - app/services/completion_kit/judge_service.rb
285
286
  - app/services/completion_kit/llm_client.rb
286
287
  - app/services/completion_kit/mcp_dispatcher.rb