RubyGems - completion-kit - Versions diffs - 0.5.41 → 0.5.42 - Mend

completion-kit 0.5.41 → 0.5.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/app/assets/stylesheets/completion_kit/application.css +68 -0
data/app/controllers/completion_kit/metrics_controller.rb +27 -0
data/app/models/completion_kit/starter_metric_dismissal.rb +5 -0
data/app/services/completion_kit/starter_metrics.rb +94 -0
data/app/views/completion_kit/metrics/_starter_card.html.erb +11 -0
data/app/views/completion_kit/metrics/index.html.erb +27 -3
data/app/views/completion_kit/metrics/starter_preview.html.erb +45 -0
data/config/routes.rb +5 -0
data/db/migrate/20260524000001_create_completion_kit_starter_metric_dismissals.rb +12 -0
data/lib/completion_kit/version.rb +1 -1
metadata +6 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c92ab72bfe3b2fe9fa296a21e8430f7b8c3d9f949a933f7675f4ffd8059dd8ae
-  data.tar.gz: ec53ee3e3b29a4d283db95d90e7a1dd8afef3572647ae243c9458500600714c9
+  metadata.gz: cb4e02cd47c184c237c34642f1ece8c44bef6a2e79cfeeb3da913a536940216e
+  data.tar.gz: 516720138ec5dd2eeecc2d28cd9e7830c8c2ab2b4a183733afe44a939ca4d9af
 SHA512:
-  metadata.gz: 8b783ec1b478a365f9e8a04da486fc7a87251d53dd6c63720c7d88795a524546bf71da8a07eca79337863c6787f52c776e51c75940b854a6e477f8074bf1d016
-  data.tar.gz: 7444db06e0adf5e29c7d68426496e643c743187d9cc8d974367cfdf025c993c0e061ad2b4b3f9d6a70f1aca3d70d124744b2a7ae416d5827efab9b6339b496f7
+  metadata.gz: 0f3403ecf234b58f659f49406e0264384a5ccbda7206e3673c3b4d96704658b1efe5c2e74943fd6110477a8acffeca3600af655980e76a139319e6dc4376779f
+  data.tar.gz: da16ea0344f7a00f8366faabe4cd9d53ef487fc67a06f415b7772ff3631fcb3edca6f30be9a59dd3e4cc9e40795cdd4977028c5fd880ab9d2d49d3563babe5aa

data/app/assets/stylesheets/completion_kit/application.css CHANGED Viewed

@@ -5538,3 +5538,71 @@ a.tag-mark {
   margin: 0;
   font-size: 0.78rem;
 }
+.ck-starter-row {
+  margin-top: 2rem;
+  padding-top: 1.5rem;
+  border-top: 1px solid var(--ck-line);
+}
+.ck-starter-row--empty-state {
+  margin-top: 0;
+  padding-top: 0;
+  border-top: 0;
+}
+.ck-starter-row .ck-kicker,
+.ck-starter-row .ck-title {
+  margin-bottom: 0.5rem;
+}
+.ck-starter-row .ck-lead,
+.ck-starter-row .ck-meta-copy {
+  margin-bottom: 1.25rem;
+}
+.ck-starter-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
+  gap: 12px;
+}
+.ck-starter-card {
+  display: flex;
+  flex-direction: column;
+  gap: 8px;
+  padding: 14px;
+  background: var(--ck-surface);
+  border: 1px solid var(--ck-line);
+  border-radius: var(--ck-radius-lg);
+  transition: border-color 0.12s, background 0.12s;
+}
+.ck-starter-card:hover {
+  border-color: rgba(6, 182, 212, 0.35);
+  background: var(--ck-surface-hover);
+}
+.ck-starter-card__name {
+  margin: 0;
+  font-size: 1rem;
+  font-weight: 600;
+}
+.ck-starter-card__desc {
+  margin: 0;
+  font-size: 0.85rem;
+  color: var(--ck-muted);
+  line-height: 1.45;
+  flex: 1;
+}
+.ck-starter-card__actions {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: 10px;
+  margin-top: 4px;
+}
+.ck-starter-card__dismiss {
+  font-size: 0.75rem;
+  color: var(--ck-dim);
+}
+.ck-starter-card__dismiss:hover,
+.ck-starter-card__dismiss:focus-visible {
+  color: var(--ck-text);
+}
+.ck-actions--right {
+  justify-content: flex-end;
+}

data/app/controllers/completion_kit/metrics_controller.rb CHANGED Viewed

@@ -5,6 +5,33 @@ module CompletionKit
     def index
       @metrics = apply_tag_filter(Metric.includes(:metric_groups, :tags).order(:name))
+      @available_starters = StarterMetrics.available
+    end
+    def starter_preview
+      @starter = StarterMetrics.find(params[:key])
+      return redirect_to(metrics_path, alert: "Unknown starter metric.") unless @starter
+    end
+    def adopt_starter
+      starter = StarterMetrics.find(params[:key])
+      return redirect_to(metrics_path, alert: "Unknown starter metric.") unless starter
+      if Metric.exists?(name: starter.name)
+        return redirect_to(metrics_path, alert: "A metric named \"#{starter.name}\" already exists.")
+      end
+      metric = Metric.create!(
+        name: starter.name,
+        instruction: starter.instruction,
+        rubric_bands: starter.rubric_bands
+      )
+      redirect_to metric_path(metric), notice: "Added the \"#{starter.name}\" starter. Tweak any band before you run a judge against it."
+    end
+    def dismiss_starter
+      starter = StarterMetrics.find(params[:key])
+      return redirect_to(metrics_path, alert: "Unknown starter metric.") unless starter
+      StarterMetricDismissal.find_or_create_by(starter_key: starter.key)
+      redirect_to metrics_path, notice: "Dismissed \"#{starter.name}\". It won't appear here again."
     end
     def show

data/app/models/completion_kit/starter_metric_dismissal.rb ADDED Viewed

@@ -0,0 +1,5 @@
+module CompletionKit
+  class StarterMetricDismissal < ApplicationRecord
+    validates :starter_key, presence: true, uniqueness: true
+  end
+end

data/app/services/completion_kit/starter_metrics.rb ADDED Viewed

@@ -0,0 +1,94 @@
+module CompletionKit
+  module StarterMetrics
+    Starter = Struct.new(:key, :name, :description, :catches, :instruction, :rubric_bands, keyword_init: true)
+    ALL = [
+      Starter.new(
+        key: "correctness",
+        name: "Correctness",
+        description: "Is the output factually right and free of made-up information?",
+        catches: "Hallucinations, wrong facts, subtle distortions. The most universally-asked question about an LLM's output.",
+        instruction: "Is the output factually right and free of made-up information? Penalise hallucinations and subtle factual distortions; reward outputs whose every claim checks out.",
+        rubric_bands: [
+          { "stars" => 5, "description" => "Every fact in the output checks out." },
+          { "stars" => 4, "description" => "Right in substance; minor imprecision or omission." },
+          { "stars" => 3, "description" => "Mostly right, one or two facts are off." },
+          { "stars" => 2, "description" => "Mostly wrong with a few right details." },
+          { "stars" => 1, "description" => "Wrong, misleading, or contains fabricated facts." }
+        ]
+      ),
+      Starter.new(
+        key: "instruction_following",
+        name: "Instruction following",
+        description: "Did the model do everything that was asked?",
+        catches: "The response is factually right but ignores \"answer in two sentences\", \"use bullet points\", \"do not include X\". Distinct from Correctness — a response can be right and still fail this.",
+        instruction: "Did the model do every concrete thing the prompt asked for? Score against the explicit requirements in the prompt (format constraints, count limits, exclusions, audience cues). Factual accuracy is a different dimension — score that elsewhere.",
+        rubric_bands: [
+          { "stars" => 5, "description" => "Followed every requirement in the prompt exactly." },
+          { "stars" => 4, "description" => "Followed every requirement with a small slip." },
+          { "stars" => 3, "description" => "Did the main thing, missed at least one explicit requirement." },
+          { "stars" => 2, "description" => "Did some of what was asked, missed the main requirement." },
+          { "stars" => 1, "description" => "Ignored the instructions or did something different." }
+        ]
+      ),
+      Starter.new(
+        key: "format_compliance",
+        name: "Format compliance",
+        description: "Does the output follow the required structure?",
+        catches: "Invalid JSON, missing schema fields, extra prose around a structured response, wrong casing on keys. Critical for any LLM wired into an API.",
+        instruction: "Does the output match the format the prompt asked for — JSON shape, schema, keys, casing, no stray prose? Score on whether a downstream parser would accept it without massaging.",
+        rubric_bands: [
+          { "stars" => 5, "description" => "Exact spec, ready to consume programmatically." },
+          { "stars" => 4, "description" => "Spec-compliant with one cosmetic issue." },
+          { "stars" => 3, "description" => "Right shape, minor deviations (extra commentary, casing, ordering)." },
+          { "stars" => 2, "description" => "Right format with substantive deviations (missing required fields, wrong types)." },
+          { "stars" => 1, "description" => "Wrong format or unparseable." }
+        ]
+      ),
+      Starter.new(
+        key: "tone",
+        name: "Tone",
+        description: "Does the voice fit the audience the prompt asked for?",
+        catches: "Rude, robotic, off-brand, too casual, too formal. The dimension hardest to eyeball at scale and the one most user-facing surfaces care about.",
+        instruction: "Does the voice match the audience and brand the prompt called for? Reward outputs that sound like the persona the prompt asked for. Penalise rude, robotic, off-brand, or wrong-register replies.",
+        rubric_bands: [
+          { "stars" => 5, "description" => "Sounds like the brand or persona the prompt asked for." },
+          { "stars" => 4, "description" => "Right tone with a slip or two." },
+          { "stars" => 3, "description" => "Acceptable, generic, no personality." },
+          { "stars" => 2, "description" => "Mismatched tone; sounds like a different audience." },
+          { "stars" => 1, "description" => "Off-tone in a way a user would notice (rude, condescending, jarring)." }
+        ]
+      ),
+      Starter.new(
+        key: "conciseness",
+        name: "Conciseness",
+        description: "Is it the right length — no padding, no missing detail?",
+        catches: "Rambling responses, repetitive caveats, over-hedging. LLMs default to verbose. Conciseness is the dimension where users most often see scores move after tuning.",
+        instruction: "Is the output the right length for the task — no padding, no missing detail, no hedging filler? Penalise rambling, repetition, over-caveating, and unnecessary preamble. Penalise too-short outputs that drop information.",
+        rubric_bands: [
+          { "stars" => 5, "description" => "Exactly as long as the task needs, no more, no less." },
+          { "stars" => 4, "description" => "Right length with a small redundancy." },
+          { "stars" => 3, "description" => "Acceptable; trims could happen or detail could be added." },
+          { "stars" => 2, "description" => "Noticeable filler or visible gaps." },
+          { "stars" => 1, "description" => "Padded, repetitive, or so short it loses information." }
+        ]
+      )
+    ].freeze
+    module_function
+    def find(key)
+      ALL.find { |s| s.key == key }
+    end
+    def available
+      adopted_names = Metric.where(name: ALL.map(&:name)).pluck(:name).to_set
+      dismissed_keys = StarterMetricDismissal.pluck(:starter_key).to_set
+      ALL.reject { |s| adopted_names.include?(s.name) || dismissed_keys.include?(s.key) }
+    end
+    def adopted?(starter)
+      Metric.exists?(name: starter.name)
+    end
+  end
+end

data/app/views/completion_kit/metrics/_starter_card.html.erb ADDED Viewed

@@ -0,0 +1,11 @@
+<article class="ck-starter-card">
+  <h3 class="ck-starter-card__name"><%= link_to starter.name, starter_preview_metrics_path(key: starter.key), class: "ck-link" %></h3>
+  <p class="ck-starter-card__desc"><%= starter.description %></p>
+  <div class="ck-starter-card__actions">
+    <%= link_to "Preview", starter_preview_metrics_path(key: starter.key), class: ck_button_classes(:dark) + " ck-button--sm" %>
+    <%= button_to "Don't show this one", dismiss_starter_metrics_path(key: starter.key),
+          method: :post, form_class: "inline-block",
+          class: "ck-link ck-starter-card__dismiss",
+          data: { turbo_confirm: "Hide \"#{starter.name}\" from this list?" } %>
+  </div>
+</article>

data/app/views/completion_kit/metrics/index.html.erb CHANGED Viewed

@@ -75,12 +75,36 @@
       Use the same metrics on multiple runs? <%= link_to "Group them →", metric_groups_path, class: "ck-link" %>
     </p>
   <% end %>
+  <% if @available_starters.any? %>
+    <section class="ck-starter-row">
+      <p class="ck-kicker">Add a starter metric</p>
+      <p class="ck-meta-copy">Pre-written rubrics for the dimensions most teams score against. Click a card to preview before it's created.</p>
+      <div class="ck-starter-grid">
+        <% @available_starters.each do |starter| %>
+          <%= render "starter_card", starter: starter %>
+        <% end %>
+      </div>
+    </section>
+  <% end %>
 <% elsif @selected_tags.any? %>
   <div class="ck-empty">
     <p>No metrics match these tags. <%= link_to "Clear filters", metrics_path, class: "ck-link" %>.</p>
   </div>
 <% else %>
-  <div class="ck-empty">
-    <p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
-  </div>
+  <% if @available_starters.any? %>
+    <section class="ck-starter-row ck-starter-row--empty-state">
+      <h2 class="ck-title ck-title--sm">Start with a ready-made rubric</h2>
+      <p class="ck-lead">Pick one of the dimensions below to drop in a pre-written 1–5 rubric. You can edit anything after adding it. Or <%= link_to "write your own from scratch", new_metric_path, class: "ck-link" %>.</p>
+      <div class="ck-starter-grid">
+        <% @available_starters.each do |starter| %>
+          <%= render "starter_card", starter: starter %>
+        <% end %>
+      </div>
+    </section>
+  <% else %>
+    <div class="ck-empty">
+      <p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
+    </div>
+  <% end %>
 <% end %>

data/app/views/completion_kit/metrics/starter_preview.html.erb ADDED Viewed

@@ -0,0 +1,45 @@
+<section class="ck-page-header">
+  <div>
+    <p class="ck-kicker">Starter metric</p>
+    <h1 class="ck-title"><%= @starter.name %></h1>
+    <p class="ck-lead"><%= @starter.description %></p>
+  </div>
+  <div class="ck-actions">
+    <%= link_to "← Back to metrics", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
+  </div>
+</section>
+<section class="ck-card ck-card--spaced">
+  <p class="ck-kicker">What this catches</p>
+  <p class="ck-copy"><%= @starter.catches %></p>
+</section>
+<section class="ck-card ck-card--spaced">
+  <p class="ck-kicker">Instruction the judge will see</p>
+  <p class="ck-copy"><%= @starter.instruction %></p>
+</section>
+<section class="ck-card ck-card--spaced">
+  <p class="ck-kicker">Rubric</p>
+  <div class="ck-rubric-display">
+    <% @starter.rubric_bands.sort_by { |b| -b["stars"] }.each do |band| %>
+      <div class="ck-rubric-row ck-rubric-row--display">
+        <div class="ck-rubric-row__stars">
+          <% 5.times do |i| %>
+            <svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
+          <% end %>
+        </div>
+        <div class="ck-rubric-row__fields">
+          <p class="ck-copy"><%= band["description"] %></p>
+        </div>
+      </div>
+    <% end %>
+  </div>
+</section>
+<div class="ck-actions ck-actions--right">
+  <%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
+  <%= button_to "Add this metric", adopt_starter_metrics_path(key: @starter.key),
+        method: :post, form_class: "inline-block",
+        class: ck_button_classes(:dark) %>
+</div>

data/config/routes.rb CHANGED Viewed

@@ -13,6 +13,11 @@ CompletionKit::Engine.routes.draw do
   resources :datasets
   resources :metrics do
+    collection do
+      get  "starters/:key", to: "metrics#starter_preview",  as: :starter_preview
+      post "starters/:key", to: "metrics#adopt_starter",    as: :adopt_starter
+      post "starters/:key/dismiss", to: "metrics#dismiss_starter", as: :dismiss_starter
+    end
     member do
       post :add_few_shot
       post :publish_draft

data/db/migrate/20260524000001_create_completion_kit_starter_metric_dismissals.rb ADDED Viewed

@@ -0,0 +1,12 @@
+class CreateCompletionKitStarterMetricDismissals < ActiveRecord::Migration[8.1]
+  def change
+    create_table :completion_kit_starter_metric_dismissals do |t|
+      t.string :starter_key, null: false
+      t.timestamps
+    end
+    add_index :completion_kit_starter_metric_dismissals, :starter_key,
+              unique: true,
+              name: "index_ck_starter_dismissals_on_key"
+  end
+end

data/lib/completion_kit/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module CompletionKit
-  VERSION = "0.5.41"
+  VERSION = "0.5.42"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: completion-kit
 version: !ruby/object:Gem::Version
-  version: 0.5.41
+  version: 0.5.42
 platform: ruby
 authors:
 - Damien Bastin
@@ -284,6 +284,7 @@ files:
 - app/models/completion_kit/review.rb
 - app/models/completion_kit/run.rb
 - app/models/completion_kit/run_metric.rb
+- app/models/completion_kit/starter_metric_dismissal.rb
 - app/models/completion_kit/suggestion.rb
 - app/models/completion_kit/tag.rb
 - app/models/completion_kit/tagging.rb
@@ -318,6 +319,7 @@ files:
 - app/services/completion_kit/open_router_client.rb
 - app/services/completion_kit/prompt_improvement_service.rb
 - app/services/completion_kit/provider_endpoint.rb
+- app/services/completion_kit/starter_metrics.rb
 - app/services/completion_kit/worker_health.rb
 - app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb
 - app/views/completion_kit/api_reference/_authentication.html.erb
@@ -345,10 +347,12 @@ files:
 - app/views/completion_kit/metric_groups/new.html.erb
 - app/views/completion_kit/metric_groups/show.html.erb
 - app/views/completion_kit/metrics/_form.html.erb
+- app/views/completion_kit/metrics/_starter_card.html.erb
 - app/views/completion_kit/metrics/edit.html.erb
 - app/views/completion_kit/metrics/index.html.erb
 - app/views/completion_kit/metrics/new.html.erb
 - app/views/completion_kit/metrics/show.html.erb
+- app/views/completion_kit/metrics/starter_preview.html.erb
 - app/views/completion_kit/onboarding/_concept.html.erb
 - app/views/completion_kit/onboarding/show.html.erb
 - app/views/completion_kit/prompts/_form.html.erb
@@ -414,6 +418,7 @@ files:
 - db/migrate/20260522000002_create_completion_kit_calibrations.rb
 - db/migrate/20260523000001_add_few_shot_examples_to_completion_kit_metrics.rb
 - db/migrate/20260523000002_add_state_to_completion_kit_judge_versions.rb
+- db/migrate/20260524000001_create_completion_kit_starter_metric_dismissals.rb
 - lib/completion-kit.rb
 - lib/completion_kit.rb
 - lib/completion_kit/concurrency_check.rb