completion-kit 0.1.0.rc1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +83 -88
  3. data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +62 -0
  4. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
  5. data/app/controllers/completion_kit/metric_groups_controller.rb +67 -0
  6. data/app/controllers/completion_kit/metrics_controller.rb +2 -2
  7. data/app/controllers/completion_kit/runs_controller.rb +1 -1
  8. data/app/models/completion_kit/metric.rb +3 -5
  9. data/app/models/completion_kit/metric_group.rb +22 -0
  10. data/app/models/completion_kit/metric_group_membership.rb +20 -0
  11. data/app/models/completion_kit/run.rb +0 -1
  12. data/app/services/completion_kit/judge_service.rb +3 -7
  13. data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
  14. data/app/services/completion_kit/mcp_tools/{criteria.rb → metric_groups.rb} +28 -28
  15. data/app/services/completion_kit/mcp_tools/metrics.rb +2 -4
  16. data/app/views/completion_kit/api_reference/index.html.erb +11 -11
  17. data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
  18. data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
  19. data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
  20. data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
  21. data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
  22. data/app/views/completion_kit/metrics/_form.html.erb +2 -23
  23. data/app/views/completion_kit/metrics/index.html.erb +13 -5
  24. data/app/views/completion_kit/metrics/show.html.erb +1 -12
  25. data/app/views/completion_kit/runs/_form.html.erb +5 -5
  26. data/app/views/layouts/completion_kit/application.html.erb +4 -1
  27. data/config/routes.rb +2 -2
  28. data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
  29. data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
  30. data/lib/completion_kit/engine.rb +1 -7
  31. data/lib/completion_kit/version.rb +1 -1
  32. metadata +21 -21
  33. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
  34. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
  35. data/app/controllers/completion_kit/criteria_controller.rb +0 -67
  36. data/app/models/completion_kit/criteria.rb +0 -22
  37. data/app/models/completion_kit/criteria_membership.rb +0 -20
  38. data/app/views/completion_kit/criteria/_form.html.erb +0 -46
  39. data/app/views/completion_kit/criteria/edit.html.erb +0 -14
  40. data/app/views/completion_kit/criteria/index.html.erb +0 -37
  41. data/app/views/completion_kit/criteria/new.html.erb +0 -13
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.rc1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-16 00:00:00.000000000 Z
11
+ date: 2026-04-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -198,12 +198,11 @@ dependencies:
198
198
  - - "~>"
199
199
  - !ruby/object:Gem::Version
200
200
  version: '0.22'
201
- description: 'CompletionKit is a mountable Rails engine for rigorous prompt testing:
202
- run prompts against datasets, score outputs with an LLM-as-judge against custom
203
- metrics and rubrics, track versions, and iterate with AI-assisted improvement suggestions.
204
- Every resource is accessible through a bearer-token REST API and a built-in Model
205
- Context Protocol server, so you can drive it from a browser, from HTTP clients,
206
- or directly from agents like Claude Code.'
201
+ description: CompletionKit is a prompt testing platform that runs as a Rails engine
202
+ or a standalone app. Run prompts against real datasets, score every output with
203
+ an LLM judge against criteria you define, track prompt versions, and get AI-generated
204
+ improvement suggestions grounded in your actual results. Includes a web UI, REST
205
+ API, and a built-in MCP server with 36 tools.
207
206
  email:
208
207
  - damien@homemade.software
209
208
  executables: []
@@ -216,11 +215,10 @@ files:
216
215
  - app/assets/config/completion_kit_manifest.js
217
216
  - app/assets/config/manifest.js
218
217
  - app/assets/images/completion_kit/logo.svg
219
- - app/assets/javascripts/completion_kit/evaluation_steps_controller.js
220
218
  - app/assets/stylesheets/completion_kit/application.css
221
219
  - app/controllers/completion_kit/api/v1/base_controller.rb
222
- - app/controllers/completion_kit/api/v1/criteria_controller.rb
223
220
  - app/controllers/completion_kit/api/v1/datasets_controller.rb
221
+ - app/controllers/completion_kit/api/v1/metric_groups_controller.rb
224
222
  - app/controllers/completion_kit/api/v1/metrics_controller.rb
225
223
  - app/controllers/completion_kit/api/v1/prompts_controller.rb
226
224
  - app/controllers/completion_kit/api/v1/provider_credentials_controller.rb
@@ -228,9 +226,9 @@ files:
228
226
  - app/controllers/completion_kit/api/v1/runs_controller.rb
229
227
  - app/controllers/completion_kit/api_reference_controller.rb
230
228
  - app/controllers/completion_kit/application_controller.rb
231
- - app/controllers/completion_kit/criteria_controller.rb
232
229
  - app/controllers/completion_kit/datasets_controller.rb
233
230
  - app/controllers/completion_kit/mcp_controller.rb
231
+ - app/controllers/completion_kit/metric_groups_controller.rb
234
232
  - app/controllers/completion_kit/metrics_controller.rb
235
233
  - app/controllers/completion_kit/prompts_controller.rb
236
234
  - app/controllers/completion_kit/provider_credentials_controller.rb
@@ -243,10 +241,10 @@ files:
243
241
  - app/jobs/completion_kit/model_discovery_job.rb
244
242
  - app/mailers/completion_kit/application_mailer.rb
245
243
  - app/models/completion_kit/application_record.rb
246
- - app/models/completion_kit/criteria.rb
247
- - app/models/completion_kit/criteria_membership.rb
248
244
  - app/models/completion_kit/dataset.rb
249
245
  - app/models/completion_kit/metric.rb
246
+ - app/models/completion_kit/metric_group.rb
247
+ - app/models/completion_kit/metric_group_membership.rb
250
248
  - app/models/completion_kit/model.rb
251
249
  - app/models/completion_kit/prompt.rb
252
250
  - app/models/completion_kit/provider_credential.rb
@@ -261,8 +259,8 @@ files:
261
259
  - app/services/completion_kit/judge_service.rb
262
260
  - app/services/completion_kit/llm_client.rb
263
261
  - app/services/completion_kit/mcp_dispatcher.rb
264
- - app/services/completion_kit/mcp_tools/criteria.rb
265
262
  - app/services/completion_kit/mcp_tools/datasets.rb
263
+ - app/services/completion_kit/mcp_tools/metric_groups.rb
266
264
  - app/services/completion_kit/mcp_tools/metrics.rb
267
265
  - app/services/completion_kit/mcp_tools/prompts.rb
268
266
  - app/services/completion_kit/mcp_tools/provider_credentials.rb
@@ -275,16 +273,16 @@ files:
275
273
  - app/services/completion_kit/prompt_improvement_service.rb
276
274
  - app/views/completion_kit/api_reference/_example.html.erb
277
275
  - app/views/completion_kit/api_reference/index.html.erb
278
- - app/views/completion_kit/criteria/_form.html.erb
279
- - app/views/completion_kit/criteria/edit.html.erb
280
- - app/views/completion_kit/criteria/index.html.erb
281
- - app/views/completion_kit/criteria/new.html.erb
282
- - app/views/completion_kit/criteria/show.html.erb
283
276
  - app/views/completion_kit/datasets/_form.html.erb
284
277
  - app/views/completion_kit/datasets/edit.html.erb
285
278
  - app/views/completion_kit/datasets/index.html.erb
286
279
  - app/views/completion_kit/datasets/new.html.erb
287
280
  - app/views/completion_kit/datasets/show.html.erb
281
+ - app/views/completion_kit/metric_groups/_form.html.erb
282
+ - app/views/completion_kit/metric_groups/edit.html.erb
283
+ - app/views/completion_kit/metric_groups/index.html.erb
284
+ - app/views/completion_kit/metric_groups/new.html.erb
285
+ - app/views/completion_kit/metric_groups/show.html.erb
288
286
  - app/views/completion_kit/metrics/_form.html.erb
289
287
  - app/views/completion_kit/metrics/edit.html.erb
290
288
  - app/views/completion_kit/metrics/index.html.erb
@@ -324,6 +322,8 @@ files:
324
322
  - db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb
325
323
  - db/migrate/20260403000002_create_completion_kit_suggestions.rb
326
324
  - db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb
325
+ - db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb
326
+ - db/migrate/20260417000001_rename_criteria_to_metric_groups.rb
327
327
  - lib/completion-kit.rb
328
328
  - lib/completion_kit.rb
329
329
  - lib/completion_kit/engine.rb
@@ -356,6 +356,6 @@ requirements: []
356
356
  rubygems_version: 3.5.16
357
357
  signing_key:
358
358
  specification_version: 4
359
- summary: Rails engine for testing, scoring, and improving GenAI prompts against real
360
- data with a web UI, REST API, and built-in MCP server.
359
+ summary: Your prompts need tests too. Run them against real data, score outputs with
360
+ an LLM judge, iterate until they work.
361
361
  test_files: []
@@ -1,25 +0,0 @@
1
- document.addEventListener("DOMContentLoaded", function () {
2
- document.addEventListener("click", function (event) {
3
- var addBtn = event.target.closest("[data-action='evaluation-steps#add']");
4
- if (addBtn) {
5
- var container = addBtn.closest("[data-controller='evaluation-steps']");
6
- var list = container.querySelector("[data-evaluation-steps-target='list']");
7
- var row = document.createElement("div");
8
- row.className = "ck-step-row";
9
- row.setAttribute("data-evaluation-steps-target", "row");
10
- row.innerHTML =
11
- '<input type="text" name="metric[evaluation_steps][]" value="" class="ck-input" placeholder="Describe this evaluation step..." />' +
12
- '<button type="button" class="ck-icon-btn" data-action="evaluation-steps#remove" aria-label="Remove step">' +
13
- '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="1.75"><path d="M3 6h18"/><path d="M19 6v14c0 1-1 2-2 2H7c-1 0-2-1-2-2V6"/><path d="M8 6V4c0-1 1-2 2-2h4c1 0 2 1 2 2v2"/><line x1="10" y1="11" x2="10" y2="17"/><line x1="14" y1="11" x2="14" y2="17"/></svg>' +
14
- "</button>";
15
- list.appendChild(row);
16
- row.querySelector("input").focus();
17
- }
18
-
19
- var removeBtn = event.target.closest("[data-action='evaluation-steps#remove']");
20
- if (removeBtn) {
21
- var stepRow = removeBtn.closest("[data-evaluation-steps-target='row']");
22
- if (stepRow) stepRow.remove();
23
- }
24
- });
25
- });
@@ -1,62 +0,0 @@
1
- module CompletionKit
2
- module Api
3
- module V1
4
- class CriteriaController < BaseController
5
- before_action :set_criteria, only: [:show, :update, :destroy]
6
-
7
- def index
8
- render json: Criteria.order(created_at: :desc)
9
- end
10
-
11
- def show
12
- render json: @criteria
13
- end
14
-
15
- def create
16
- criteria = Criteria.new(criteria_params.except(:metric_ids))
17
- if criteria.save
18
- replace_metric_memberships(criteria, params[:metric_ids]) if params.key?(:metric_ids)
19
- render json: criteria.reload, status: :created
20
- else
21
- render json: {errors: criteria.errors}, status: :unprocessable_entity
22
- end
23
- end
24
-
25
- def update
26
- if @criteria.update(criteria_params.except(:metric_ids))
27
- replace_metric_memberships(@criteria, params[:metric_ids]) if params.key?(:metric_ids)
28
- render json: @criteria.reload
29
- else
30
- render json: {errors: @criteria.errors}, status: :unprocessable_entity
31
- end
32
- end
33
-
34
- def destroy
35
- @criteria.destroy!
36
- head :no_content
37
- end
38
-
39
- private
40
-
41
- def set_criteria
42
- @criteria = Criteria.find(params[:id])
43
- rescue ActiveRecord::RecordNotFound
44
- not_found
45
- end
46
-
47
- def criteria_params
48
- params.permit(:name, :description, metric_ids: [])
49
- end
50
-
51
- def replace_metric_memberships(criteria, metric_ids)
52
- return unless metric_ids
53
-
54
- criteria.criteria_memberships.delete_all
55
- Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
56
- criteria.criteria_memberships.create!(metric_id: metric_id, position: index + 1)
57
- end
58
- end
59
- end
60
- end
61
- end
62
- end
@@ -1,67 +0,0 @@
1
- module CompletionKit
2
- class CriteriaController < ApplicationController
3
- before_action :set_criteria, only: [:show, :edit, :update, :destroy]
4
-
5
- def index
6
- @criterias = Criteria.includes(:metrics).order(:name)
7
- end
8
-
9
- def show
10
- end
11
-
12
- def new
13
- @criteria = Criteria.new
14
- @metrics = Metric.order(:name)
15
- end
16
-
17
- def edit
18
- @metrics = Metric.order(:name)
19
- end
20
-
21
- def create
22
- @criteria = Criteria.new(criteria_params.except(:metric_ids))
23
- @metrics = Metric.order(:name)
24
-
25
- if @criteria.save
26
- replace_metric_memberships
27
- redirect_to criterion_path(@criteria), notice: "Criteria was successfully created."
28
- else
29
- render :new, status: :unprocessable_entity
30
- end
31
- end
32
-
33
- def update
34
- @metrics = Metric.order(:name)
35
-
36
- if @criteria.update(criteria_params.except(:metric_ids))
37
- replace_metric_memberships
38
- redirect_to criterion_path(@criteria), notice: "Criteria was successfully updated."
39
- else
40
- render :edit, status: :unprocessable_entity
41
- end
42
- end
43
-
44
- def destroy
45
- @criteria.destroy
46
- redirect_to criteria_path, notice: "Criteria was successfully destroyed."
47
- end
48
-
49
- private
50
-
51
- def set_criteria
52
- @criteria = Criteria.find(params[:id])
53
- end
54
-
55
- def criteria_params
56
- params.require(:criteria).permit(:name, :description, metric_ids: [])
57
- end
58
-
59
- def replace_metric_memberships
60
- metric_ids = Array(criteria_params[:metric_ids]).reject(&:blank?)
61
- @criteria.criteria_memberships.delete_all
62
- metric_ids.each_with_index do |metric_id, index|
63
- @criteria.criteria_memberships.create!(metric_id: metric_id, position: index + 1)
64
- end
65
- end
66
- end
67
- end
@@ -1,22 +0,0 @@
1
- module CompletionKit
2
- class Criteria < ApplicationRecord
3
- self.table_name = "completion_kit_criteria"
4
-
5
- has_many :criteria_memberships, -> { order(:position, :id) }, dependent: :destroy
6
- has_many :metrics, through: :criteria_memberships
7
-
8
- validates :name, presence: true
9
-
10
- def ordered_metrics
11
- criteria_memberships.includes(:metric).map(&:metric).compact
12
- end
13
-
14
- def as_json(options = {})
15
- {
16
- id: id, name: name, description: description,
17
- created_at: created_at, updated_at: updated_at,
18
- metric_ids: metric_ids
19
- }
20
- end
21
- end
22
- end
@@ -1,20 +0,0 @@
1
- module CompletionKit
2
- class CriteriaMembership < ApplicationRecord
3
- self.table_name = "completion_kit_criteria_memberships"
4
-
5
- belongs_to :criteria, class_name: "CompletionKit::Criteria", foreign_key: "criteria_id"
6
- belongs_to :metric
7
-
8
- validates :metric_id, uniqueness: { scope: :criteria_id }
9
-
10
- before_validation :set_default_position
11
-
12
- private
13
-
14
- def set_default_position
15
- return if position.present? || criteria.blank?
16
-
17
- self.position = criteria.criteria_memberships.maximum(:position).to_i + 1
18
- end
19
- end
20
- end
@@ -1,46 +0,0 @@
1
- <%= form_with(model: criteria, url: criteria.persisted? ? criterion_path(criteria) : criteria_path, local: true) do |form| %>
2
- <% if criteria.errors.any? %>
3
- <div class="ck-flash ck-flash--alert">
4
- <p class="ck-flash__title"><%= pluralize(criteria.errors.count, "problem") %> prevented this criteria from being saved.</p>
5
- <ul class="ck-error-list">
6
- <% criteria.errors.full_messages.each do |message| %>
7
- <li><%= message %></li>
8
- <% end %>
9
- </ul>
10
- </div>
11
- <% end %>
12
-
13
- <div class="ck-card ck-form-card">
14
- <div class="ck-field">
15
- <%= form.label :name, "Criteria name", class: "ck-label" %>
16
- <%= form.text_field :name, class: "ck-input", placeholder: "Support quality" %>
17
- </div>
18
-
19
- <div class="ck-field">
20
- <%= form.label :description, class: "ck-label" %>
21
- <%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "When this criteria should be used." %>
22
- </div>
23
-
24
- <div class="ck-field">
25
- <p class="ck-label">Metrics in this criteria</p>
26
- <p class="ck-hint">A criteria groups metrics together for judging.</p>
27
- <div class="ck-list ck-list--compact">
28
- <% @metrics.each do |metric| %>
29
- <label class="ck-item">
30
- <span>
31
- <strong><%= metric.name %></strong>
32
- <span class="ck-meta-copy"><%= metric.instruction.presence || "No instruction set." %></span>
33
- </span>
34
- <%= check_box_tag "criteria[metric_ids][]", metric.id, criteria.metrics.exists?(metric.id), class: "ck-checkbox" %>
35
- </label>
36
- <% end %>
37
- </div>
38
- <%= hidden_field_tag "criteria[metric_ids][]", "" %>
39
- </div>
40
-
41
- <div class="ck-actions">
42
- <%= link_to "Cancel", criteria_path, class: ck_button_classes(:light, variant: :outline) %>
43
- <%= form.submit(criteria.persisted? ? "Save criteria" : "Create criteria", class: ck_button_classes(:dark)) %>
44
- </div>
45
- </div>
46
- <% end %>
@@ -1,14 +0,0 @@
1
- <ol class="ck-breadcrumb">
2
- <li><%= link_to "Metrics", metrics_path %></li>
3
- <li><%= link_to "Criteria", criteria_path %></li>
4
- <li><%= link_to @criteria.name, criterion_path(@criteria) %></li>
5
- <li>Edit</li>
6
- </ol>
7
-
8
- <section class="ck-page-header">
9
- <div>
10
- <h1 class="ck-title">Edit criteria</h1>
11
- </div>
12
- </section>
13
-
14
- <%= render "form", criteria: @criteria %>
@@ -1,37 +0,0 @@
1
- <ol class="ck-breadcrumb">
2
- <li><%= link_to "Metrics", metrics_path %></li>
3
- <li>Criteria</li>
4
- </ol>
5
-
6
- <section class="ck-page-header">
7
- <div>
8
- <h1 class="ck-title">Criteria</h1>
9
- <p class="ck-lead">Named groups of metrics that can be applied to a run as a set. Use criteria to bundle related metrics together for reuse across runs.</p>
10
- </div>
11
- <div class="ck-actions">
12
- <%= link_to "New criteria", new_criterion_path, class: ck_button_classes(:dark) %>
13
- </div>
14
- </section>
15
-
16
- <% if @criterias.any? %>
17
- <table class="ck-results-table">
18
- <thead>
19
- <tr>
20
- <th>Name</th>
21
- <th>Metrics</th>
22
- <th></th>
23
- </tr>
24
- </thead>
25
- <tbody>
26
- <% @criterias.each do |criteria| %>
27
- <tr onclick="window.location='<%= criterion_path(criteria) %>'" style="cursor: pointer;">
28
- <td><strong><%= criteria.name %></strong></td>
29
- <td><%= criteria.metrics.size %></td>
30
- <td class="ck-results-table__arrow">&rarr;</td>
31
- </tr>
32
- <% end %>
33
- </tbody>
34
- </table>
35
- <% else %>
36
- <div class="ck-empty">No criteria yet.</div>
37
- <% end %>
@@ -1,13 +0,0 @@
1
- <ol class="ck-breadcrumb">
2
- <li><%= link_to "Metrics", metrics_path %></li>
3
- <li><%= link_to "Criteria", criteria_path %></li>
4
- <li>New</li>
5
- </ol>
6
-
7
- <section class="ck-page-header">
8
- <div>
9
- <h1 class="ck-title">New criteria</h1>
10
- </div>
11
- </section>
12
-
13
- <%= render "form", criteria: @criteria %>