completion-kit 0.5.41 → 0.5.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c92ab72bfe3b2fe9fa296a21e8430f7b8c3d9f949a933f7675f4ffd8059dd8ae
4
- data.tar.gz: ec53ee3e3b29a4d283db95d90e7a1dd8afef3572647ae243c9458500600714c9
3
+ metadata.gz: cb4e02cd47c184c237c34642f1ece8c44bef6a2e79cfeeb3da913a536940216e
4
+ data.tar.gz: 516720138ec5dd2eeecc2d28cd9e7830c8c2ab2b4a183733afe44a939ca4d9af
5
5
  SHA512:
6
- metadata.gz: 8b783ec1b478a365f9e8a04da486fc7a87251d53dd6c63720c7d88795a524546bf71da8a07eca79337863c6787f52c776e51c75940b854a6e477f8074bf1d016
7
- data.tar.gz: 7444db06e0adf5e29c7d68426496e643c743187d9cc8d974367cfdf025c993c0e061ad2b4b3f9d6a70f1aca3d70d124744b2a7ae416d5827efab9b6339b496f7
6
+ metadata.gz: 0f3403ecf234b58f659f49406e0264384a5ccbda7206e3673c3b4d96704658b1efe5c2e74943fd6110477a8acffeca3600af655980e76a139319e6dc4376779f
7
+ data.tar.gz: da16ea0344f7a00f8366faabe4cd9d53ef487fc67a06f415b7772ff3631fcb3edca6f30be9a59dd3e4cc9e40795cdd4977028c5fd880ab9d2d49d3563babe5aa
@@ -5538,3 +5538,71 @@ a.tag-mark {
5538
5538
  margin: 0;
5539
5539
  font-size: 0.78rem;
5540
5540
  }
5541
+
5542
+ .ck-starter-row {
5543
+ margin-top: 2rem;
5544
+ padding-top: 1.5rem;
5545
+ border-top: 1px solid var(--ck-line);
5546
+ }
5547
+ .ck-starter-row--empty-state {
5548
+ margin-top: 0;
5549
+ padding-top: 0;
5550
+ border-top: 0;
5551
+ }
5552
+ .ck-starter-row .ck-kicker,
5553
+ .ck-starter-row .ck-title {
5554
+ margin-bottom: 0.5rem;
5555
+ }
5556
+ .ck-starter-row .ck-lead,
5557
+ .ck-starter-row .ck-meta-copy {
5558
+ margin-bottom: 1.25rem;
5559
+ }
5560
+ .ck-starter-grid {
5561
+ display: grid;
5562
+ grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
5563
+ gap: 12px;
5564
+ }
5565
+ .ck-starter-card {
5566
+ display: flex;
5567
+ flex-direction: column;
5568
+ gap: 8px;
5569
+ padding: 14px;
5570
+ background: var(--ck-surface);
5571
+ border: 1px solid var(--ck-line);
5572
+ border-radius: var(--ck-radius-lg);
5573
+ transition: border-color 0.12s, background 0.12s;
5574
+ }
5575
+ .ck-starter-card:hover {
5576
+ border-color: rgba(6, 182, 212, 0.35);
5577
+ background: var(--ck-surface-hover);
5578
+ }
5579
+ .ck-starter-card__name {
5580
+ margin: 0;
5581
+ font-size: 1rem;
5582
+ font-weight: 600;
5583
+ }
5584
+ .ck-starter-card__desc {
5585
+ margin: 0;
5586
+ font-size: 0.85rem;
5587
+ color: var(--ck-muted);
5588
+ line-height: 1.45;
5589
+ flex: 1;
5590
+ }
5591
+ .ck-starter-card__actions {
5592
+ display: flex;
5593
+ align-items: center;
5594
+ justify-content: space-between;
5595
+ gap: 10px;
5596
+ margin-top: 4px;
5597
+ }
5598
+ .ck-starter-card__dismiss {
5599
+ font-size: 0.75rem;
5600
+ color: var(--ck-dim);
5601
+ }
5602
+ .ck-starter-card__dismiss:hover,
5603
+ .ck-starter-card__dismiss:focus-visible {
5604
+ color: var(--ck-text);
5605
+ }
5606
+ .ck-actions--right {
5607
+ justify-content: flex-end;
5608
+ }
@@ -5,6 +5,33 @@ module CompletionKit
5
5
 
6
6
  def index
7
7
  @metrics = apply_tag_filter(Metric.includes(:metric_groups, :tags).order(:name))
8
+ @available_starters = StarterMetrics.available
9
+ end
10
+
11
+ def starter_preview
12
+ @starter = StarterMetrics.find(params[:key])
13
+ return redirect_to(metrics_path, alert: "Unknown starter metric.") unless @starter
14
+ end
15
+
16
+ def adopt_starter
17
+ starter = StarterMetrics.find(params[:key])
18
+ return redirect_to(metrics_path, alert: "Unknown starter metric.") unless starter
19
+ if Metric.exists?(name: starter.name)
20
+ return redirect_to(metrics_path, alert: "A metric named \"#{starter.name}\" already exists.")
21
+ end
22
+ metric = Metric.create!(
23
+ name: starter.name,
24
+ instruction: starter.instruction,
25
+ rubric_bands: starter.rubric_bands
26
+ )
27
+ redirect_to metric_path(metric), notice: "Added the \"#{starter.name}\" starter. Tweak any band before you run a judge against it."
28
+ end
29
+
30
+ def dismiss_starter
31
+ starter = StarterMetrics.find(params[:key])
32
+ return redirect_to(metrics_path, alert: "Unknown starter metric.") unless starter
33
+ StarterMetricDismissal.find_or_create_by(starter_key: starter.key)
34
+ redirect_to metrics_path, notice: "Dismissed \"#{starter.name}\". It won't appear here again."
8
35
  end
9
36
 
10
37
  def show
@@ -0,0 +1,5 @@
1
+ module CompletionKit
2
+ class StarterMetricDismissal < ApplicationRecord
3
+ validates :starter_key, presence: true, uniqueness: true
4
+ end
5
+ end
@@ -0,0 +1,94 @@
1
+ module CompletionKit
2
+ module StarterMetrics
3
+ Starter = Struct.new(:key, :name, :description, :catches, :instruction, :rubric_bands, keyword_init: true)
4
+
5
+ ALL = [
6
+ Starter.new(
7
+ key: "correctness",
8
+ name: "Correctness",
9
+ description: "Is the output factually right and free of made-up information?",
10
+ catches: "Hallucinations, wrong facts, subtle distortions. The most universally-asked question about an LLM's output.",
11
+ instruction: "Is the output factually right and free of made-up information? Penalise hallucinations and subtle factual distortions; reward outputs whose every claim checks out.",
12
+ rubric_bands: [
13
+ { "stars" => 5, "description" => "Every fact in the output checks out." },
14
+ { "stars" => 4, "description" => "Right in substance; minor imprecision or omission." },
15
+ { "stars" => 3, "description" => "Mostly right, one or two facts are off." },
16
+ { "stars" => 2, "description" => "Mostly wrong with a few right details." },
17
+ { "stars" => 1, "description" => "Wrong, misleading, or contains fabricated facts." }
18
+ ]
19
+ ),
20
+ Starter.new(
21
+ key: "instruction_following",
22
+ name: "Instruction following",
23
+ description: "Did the model do everything that was asked?",
24
+ catches: "The response is factually right but ignores \"answer in two sentences\", \"use bullet points\", \"do not include X\". Distinct from Correctness — a response can be right and still fail this.",
25
+ instruction: "Did the model do every concrete thing the prompt asked for? Score against the explicit requirements in the prompt (format constraints, count limits, exclusions, audience cues). Factual accuracy is a different dimension — score that elsewhere.",
26
+ rubric_bands: [
27
+ { "stars" => 5, "description" => "Followed every requirement in the prompt exactly." },
28
+ { "stars" => 4, "description" => "Followed every requirement with a small slip." },
29
+ { "stars" => 3, "description" => "Did the main thing, missed at least one explicit requirement." },
30
+ { "stars" => 2, "description" => "Did some of what was asked, missed the main requirement." },
31
+ { "stars" => 1, "description" => "Ignored the instructions or did something different." }
32
+ ]
33
+ ),
34
+ Starter.new(
35
+ key: "format_compliance",
36
+ name: "Format compliance",
37
+ description: "Does the output follow the required structure?",
38
+ catches: "Invalid JSON, missing schema fields, extra prose around a structured response, wrong casing on keys. Critical for any LLM wired into an API.",
39
+ instruction: "Does the output match the format the prompt asked for — JSON shape, schema, keys, casing, no stray prose? Score on whether a downstream parser would accept it without massaging.",
40
+ rubric_bands: [
41
+ { "stars" => 5, "description" => "Exact spec, ready to consume programmatically." },
42
+ { "stars" => 4, "description" => "Spec-compliant with one cosmetic issue." },
43
+ { "stars" => 3, "description" => "Right shape, minor deviations (extra commentary, casing, ordering)." },
44
+ { "stars" => 2, "description" => "Right format with substantive deviations (missing required fields, wrong types)." },
45
+ { "stars" => 1, "description" => "Wrong format or unparseable." }
46
+ ]
47
+ ),
48
+ Starter.new(
49
+ key: "tone",
50
+ name: "Tone",
51
+ description: "Does the voice fit the audience the prompt asked for?",
52
+ catches: "Rude, robotic, off-brand, too casual, too formal. The dimension hardest to eyeball at scale and the one most user-facing surfaces care about.",
53
+ instruction: "Does the voice match the audience and brand the prompt called for? Reward outputs that sound like the persona the prompt asked for. Penalise rude, robotic, off-brand, or wrong-register replies.",
54
+ rubric_bands: [
55
+ { "stars" => 5, "description" => "Sounds like the brand or persona the prompt asked for." },
56
+ { "stars" => 4, "description" => "Right tone with a slip or two." },
57
+ { "stars" => 3, "description" => "Acceptable, generic, no personality." },
58
+ { "stars" => 2, "description" => "Mismatched tone; sounds like a different audience." },
59
+ { "stars" => 1, "description" => "Off-tone in a way a user would notice (rude, condescending, jarring)." }
60
+ ]
61
+ ),
62
+ Starter.new(
63
+ key: "conciseness",
64
+ name: "Conciseness",
65
+ description: "Is it the right length — no padding, no missing detail?",
66
+ catches: "Rambling responses, repetitive caveats, over-hedging. LLMs default to verbose. Conciseness is the dimension where users most often see scores move after tuning.",
67
+ instruction: "Is the output the right length for the task — no padding, no missing detail, no hedging filler? Penalise rambling, repetition, over-caveating, and unnecessary preamble. Penalise too-short outputs that drop information.",
68
+ rubric_bands: [
69
+ { "stars" => 5, "description" => "Exactly as long as the task needs, no more, no less." },
70
+ { "stars" => 4, "description" => "Right length with a small redundancy." },
71
+ { "stars" => 3, "description" => "Acceptable; trims could happen or detail could be added." },
72
+ { "stars" => 2, "description" => "Noticeable filler or visible gaps." },
73
+ { "stars" => 1, "description" => "Padded, repetitive, or so short it loses information." }
74
+ ]
75
+ )
76
+ ].freeze
77
+
78
+ module_function
79
+
80
+ def find(key)
81
+ ALL.find { |s| s.key == key }
82
+ end
83
+
84
+ def available
85
+ adopted_names = Metric.where(name: ALL.map(&:name)).pluck(:name).to_set
86
+ dismissed_keys = StarterMetricDismissal.pluck(:starter_key).to_set
87
+ ALL.reject { |s| adopted_names.include?(s.name) || dismissed_keys.include?(s.key) }
88
+ end
89
+
90
+ def adopted?(starter)
91
+ Metric.exists?(name: starter.name)
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,11 @@
1
+ <article class="ck-starter-card">
2
+ <h3 class="ck-starter-card__name"><%= link_to starter.name, starter_preview_metrics_path(key: starter.key), class: "ck-link" %></h3>
3
+ <p class="ck-starter-card__desc"><%= starter.description %></p>
4
+ <div class="ck-starter-card__actions">
5
+ <%= link_to "Preview", starter_preview_metrics_path(key: starter.key), class: ck_button_classes(:dark) + " ck-button--sm" %>
6
+ <%= button_to "Don't show this one", dismiss_starter_metrics_path(key: starter.key),
7
+ method: :post, form_class: "inline-block",
8
+ class: "ck-link ck-starter-card__dismiss",
9
+ data: { turbo_confirm: "Hide \"#{starter.name}\" from this list?" } %>
10
+ </div>
11
+ </article>
@@ -75,12 +75,36 @@
75
75
  Use the same metrics on multiple runs? <%= link_to "Group them →", metric_groups_path, class: "ck-link" %>
76
76
  </p>
77
77
  <% end %>
78
+
79
+ <% if @available_starters.any? %>
80
+ <section class="ck-starter-row">
81
+ <p class="ck-kicker">Add a starter metric</p>
82
+ <p class="ck-meta-copy">Pre-written rubrics for the dimensions most teams score against. Click a card to preview before it's created.</p>
83
+ <div class="ck-starter-grid">
84
+ <% @available_starters.each do |starter| %>
85
+ <%= render "starter_card", starter: starter %>
86
+ <% end %>
87
+ </div>
88
+ </section>
89
+ <% end %>
78
90
  <% elsif @selected_tags.any? %>
79
91
  <div class="ck-empty">
80
92
  <p>No metrics match these tags. <%= link_to "Clear filters", metrics_path, class: "ck-link" %>.</p>
81
93
  </div>
82
94
  <% else %>
83
- <div class="ck-empty">
84
- <p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
85
- </div>
95
+ <% if @available_starters.any? %>
96
+ <section class="ck-starter-row ck-starter-row--empty-state">
97
+ <h2 class="ck-title ck-title--sm">Start with a ready-made rubric</h2>
98
+ <p class="ck-lead">Pick one of the dimensions below to drop in a pre-written 1–5 rubric. You can edit anything after adding it. Or <%= link_to "write your own from scratch", new_metric_path, class: "ck-link" %>.</p>
99
+ <div class="ck-starter-grid">
100
+ <% @available_starters.each do |starter| %>
101
+ <%= render "starter_card", starter: starter %>
102
+ <% end %>
103
+ </div>
104
+ </section>
105
+ <% else %>
106
+ <div class="ck-empty">
107
+ <p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
108
+ </div>
109
+ <% end %>
86
110
  <% end %>
@@ -0,0 +1,45 @@
1
+ <section class="ck-page-header">
2
+ <div>
3
+ <p class="ck-kicker">Starter metric</p>
4
+ <h1 class="ck-title"><%= @starter.name %></h1>
5
+ <p class="ck-lead"><%= @starter.description %></p>
6
+ </div>
7
+ <div class="ck-actions">
8
+ <%= link_to "← Back to metrics", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
9
+ </div>
10
+ </section>
11
+
12
+ <section class="ck-card ck-card--spaced">
13
+ <p class="ck-kicker">What this catches</p>
14
+ <p class="ck-copy"><%= @starter.catches %></p>
15
+ </section>
16
+
17
+ <section class="ck-card ck-card--spaced">
18
+ <p class="ck-kicker">Instruction the judge will see</p>
19
+ <p class="ck-copy"><%= @starter.instruction %></p>
20
+ </section>
21
+
22
+ <section class="ck-card ck-card--spaced">
23
+ <p class="ck-kicker">Rubric</p>
24
+ <div class="ck-rubric-display">
25
+ <% @starter.rubric_bands.sort_by { |b| -b["stars"] }.each do |band| %>
26
+ <div class="ck-rubric-row ck-rubric-row--display">
27
+ <div class="ck-rubric-row__stars">
28
+ <% 5.times do |i| %>
29
+ <svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
30
+ <% end %>
31
+ </div>
32
+ <div class="ck-rubric-row__fields">
33
+ <p class="ck-copy"><%= band["description"] %></p>
34
+ </div>
35
+ </div>
36
+ <% end %>
37
+ </div>
38
+ </section>
39
+
40
+ <div class="ck-actions ck-actions--right">
41
+ <%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
42
+ <%= button_to "Add this metric", adopt_starter_metrics_path(key: @starter.key),
43
+ method: :post, form_class: "inline-block",
44
+ class: ck_button_classes(:dark) %>
45
+ </div>
data/config/routes.rb CHANGED
@@ -13,6 +13,11 @@ CompletionKit::Engine.routes.draw do
13
13
 
14
14
  resources :datasets
15
15
  resources :metrics do
16
+ collection do
17
+ get "starters/:key", to: "metrics#starter_preview", as: :starter_preview
18
+ post "starters/:key", to: "metrics#adopt_starter", as: :adopt_starter
19
+ post "starters/:key/dismiss", to: "metrics#dismiss_starter", as: :dismiss_starter
20
+ end
16
21
  member do
17
22
  post :add_few_shot
18
23
  post :publish_draft
@@ -0,0 +1,12 @@
1
+ class CreateCompletionKitStarterMetricDismissals < ActiveRecord::Migration[8.1]
2
+ def change
3
+ create_table :completion_kit_starter_metric_dismissals do |t|
4
+ t.string :starter_key, null: false
5
+ t.timestamps
6
+ end
7
+
8
+ add_index :completion_kit_starter_metric_dismissals, :starter_key,
9
+ unique: true,
10
+ name: "index_ck_starter_dismissals_on_key"
11
+ end
12
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.41"
2
+ VERSION = "0.5.42"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.41
4
+ version: 0.5.42
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -284,6 +284,7 @@ files:
284
284
  - app/models/completion_kit/review.rb
285
285
  - app/models/completion_kit/run.rb
286
286
  - app/models/completion_kit/run_metric.rb
287
+ - app/models/completion_kit/starter_metric_dismissal.rb
287
288
  - app/models/completion_kit/suggestion.rb
288
289
  - app/models/completion_kit/tag.rb
289
290
  - app/models/completion_kit/tagging.rb
@@ -318,6 +319,7 @@ files:
318
319
  - app/services/completion_kit/open_router_client.rb
319
320
  - app/services/completion_kit/prompt_improvement_service.rb
320
321
  - app/services/completion_kit/provider_endpoint.rb
322
+ - app/services/completion_kit/starter_metrics.rb
321
323
  - app/services/completion_kit/worker_health.rb
322
324
  - app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb
323
325
  - app/views/completion_kit/api_reference/_authentication.html.erb
@@ -345,10 +347,12 @@ files:
345
347
  - app/views/completion_kit/metric_groups/new.html.erb
346
348
  - app/views/completion_kit/metric_groups/show.html.erb
347
349
  - app/views/completion_kit/metrics/_form.html.erb
350
+ - app/views/completion_kit/metrics/_starter_card.html.erb
348
351
  - app/views/completion_kit/metrics/edit.html.erb
349
352
  - app/views/completion_kit/metrics/index.html.erb
350
353
  - app/views/completion_kit/metrics/new.html.erb
351
354
  - app/views/completion_kit/metrics/show.html.erb
355
+ - app/views/completion_kit/metrics/starter_preview.html.erb
352
356
  - app/views/completion_kit/onboarding/_concept.html.erb
353
357
  - app/views/completion_kit/onboarding/show.html.erb
354
358
  - app/views/completion_kit/prompts/_form.html.erb
@@ -414,6 +418,7 @@ files:
414
418
  - db/migrate/20260522000002_create_completion_kit_calibrations.rb
415
419
  - db/migrate/20260523000001_add_few_shot_examples_to_completion_kit_metrics.rb
416
420
  - db/migrate/20260523000002_add_state_to_completion_kit_judge_versions.rb
421
+ - db/migrate/20260524000001_create_completion_kit_starter_metric_dismissals.rb
417
422
  - lib/completion-kit.rb
418
423
  - lib/completion_kit.rb
419
424
  - lib/completion_kit/concurrency_check.rb