completion-kit 0.1.0.rc1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +97 -86
  3. data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +53 -0
  4. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
  5. data/app/controllers/completion_kit/api/v1/runs_controller.rb +2 -10
  6. data/app/controllers/completion_kit/metric_groups_controller.rb +59 -0
  7. data/app/controllers/completion_kit/metrics_controller.rb +2 -2
  8. data/app/controllers/completion_kit/runs_controller.rb +4 -11
  9. data/app/helpers/completion_kit/application_helper.rb +1 -8
  10. data/app/models/completion_kit/application_record.rb +7 -0
  11. data/app/models/completion_kit/metric.rb +4 -6
  12. data/app/models/completion_kit/metric_group.rb +30 -0
  13. data/app/models/completion_kit/metric_group_membership.rb +20 -0
  14. data/app/models/completion_kit/model.rb +1 -1
  15. data/app/models/completion_kit/provider_credential.rb +2 -1
  16. data/app/models/completion_kit/run.rb +11 -4
  17. data/app/services/completion_kit/anthropic_client.rb +4 -17
  18. data/app/services/completion_kit/judge_service.rb +3 -7
  19. data/app/services/completion_kit/llm_client.rb +15 -0
  20. data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
  21. data/app/services/completion_kit/mcp_tools/base.rb +23 -0
  22. data/app/services/completion_kit/mcp_tools/datasets.rb +2 -18
  23. data/app/services/completion_kit/mcp_tools/metric_groups.rb +82 -0
  24. data/app/services/completion_kit/mcp_tools/metrics.rb +4 -22
  25. data/app/services/completion_kit/mcp_tools/prompts.rb +2 -18
  26. data/app/services/completion_kit/mcp_tools/provider_credentials.rb +2 -18
  27. data/app/services/completion_kit/mcp_tools/responses.rb +2 -13
  28. data/app/services/completion_kit/mcp_tools/runs.rb +4 -28
  29. data/app/services/completion_kit/ollama_client.rb +2 -15
  30. data/app/services/completion_kit/open_ai_client.rb +1 -10
  31. data/app/services/completion_kit/open_router_client.rb +1 -12
  32. data/app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb +15 -0
  33. data/app/views/completion_kit/api_reference/index.html.erb +11 -11
  34. data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
  35. data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
  36. data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
  37. data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
  38. data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
  39. data/app/views/completion_kit/metrics/_form.html.erb +2 -23
  40. data/app/views/completion_kit/metrics/index.html.erb +13 -5
  41. data/app/views/completion_kit/metrics/show.html.erb +1 -12
  42. data/app/views/completion_kit/runs/_form.html.erb +5 -5
  43. data/app/views/layouts/completion_kit/application.html.erb +4 -1
  44. data/config/routes.rb +2 -2
  45. data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
  46. data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
  47. data/lib/completion_kit/engine.rb +1 -7
  48. data/lib/completion_kit/version.rb +1 -1
  49. data/lib/completion_kit.rb +5 -0
  50. metadata +23 -21
  51. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
  52. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
  53. data/app/controllers/completion_kit/criteria_controller.rb +0 -67
  54. data/app/models/completion_kit/criteria.rb +0 -22
  55. data/app/models/completion_kit/criteria_membership.rb +0 -20
  56. data/app/services/completion_kit/mcp_tools/criteria.rb +0 -106
  57. data/app/views/completion_kit/criteria/_form.html.erb +0 -46
  58. data/app/views/completion_kit/criteria/edit.html.erb +0 -14
  59. data/app/views/completion_kit/criteria/index.html.erb +0 -37
  60. data/app/views/completion_kit/criteria/new.html.erb +0 -13
@@ -63,7 +63,7 @@ end %>
63
63
  <input type="radio" name="ck-api-tab" id="ck-tab-responses" class="ck-api-tabs__radio">
64
64
  <input type="radio" name="ck-api-tab" id="ck-tab-datasets" class="ck-api-tabs__radio">
65
65
  <input type="radio" name="ck-api-tab" id="ck-tab-metrics" class="ck-api-tabs__radio">
66
- <input type="radio" name="ck-api-tab" id="ck-tab-criteria" class="ck-api-tabs__radio">
66
+ <input type="radio" name="ck-api-tab" id="ck-tab-metric-groups" class="ck-api-tabs__radio">
67
67
  <input type="radio" name="ck-api-tab" id="ck-tab-providers" class="ck-api-tabs__radio">
68
68
 
69
69
  <nav class="ck-api-tabs__nav">
@@ -73,7 +73,7 @@ end %>
73
73
  <label for="ck-tab-responses" class="ck-api-tabs__label">Responses <span class="ck-api-tabs__count">2</span></label>
74
74
  <label for="ck-tab-datasets" class="ck-api-tabs__label">Datasets <span class="ck-api-tabs__count">5</span></label>
75
75
  <label for="ck-tab-metrics" class="ck-api-tabs__label">Metrics <span class="ck-api-tabs__count">5</span></label>
76
- <label for="ck-tab-criteria" class="ck-api-tabs__label">Criteria <span class="ck-api-tabs__count">5</span></label>
76
+ <label for="ck-tab-metric-groups" class="ck-api-tabs__label">Metric Groups <span class="ck-api-tabs__count">5</span></label>
77
77
  <label for="ck-tab-providers" class="ck-api-tabs__label">Providers <span class="ck-api-tabs__count">5</span></label>
78
78
  </nav>
79
79
 
@@ -232,7 +232,7 @@ end %>
232
232
  <div class="ck-api-endpoint">
233
233
  <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metrics</p>
234
234
  <p class="ck-meta-copy">Create a metric.</p>
235
- <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>instruction</code>, <code>evaluation_steps</code> (array), <code>rubric_bands</code> (array of {stars, description})</p>
235
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>instruction</code>, <code>rubric_bands</code> (array of {stars, description})</p>
236
236
  <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/metrics \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"relevance\", \"instruction\": \"Is the response relevant?\"}'" %>
237
237
  </div>
238
238
  <div class="ck-api-endpoint">
@@ -242,20 +242,20 @@ end %>
242
242
  </div>
243
243
 
244
244
  <div class="ck-api-tabs__panel">
245
- <h2 class="ck-section-title">Criteria</h2>
246
- <p class="ck-copy">Named groups of metrics applied to runs as a set.</p>
245
+ <h2 class="ck-section-title">Metric Groups</h2>
246
+ <p class="ck-copy">Named groups of metrics you can apply to a run as a set.</p>
247
247
  <div class="ck-api-endpoint">
248
- <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/criteria</p>
249
- <p class="ck-meta-copy">List all criteria with their metric IDs.</p>
248
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/metric_groups</p>
249
+ <p class="ck-meta-copy">List all metric groups with their metric IDs.</p>
250
250
  </div>
251
251
  <div class="ck-api-endpoint">
252
- <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/criteria</p>
253
- <p class="ck-meta-copy">Create a criteria group.</p>
252
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metric_groups</p>
253
+ <p class="ck-meta-copy">Create a metric group.</p>
254
254
  <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>description</code>, <code>metric_ids</code> (array)</p>
255
255
  </div>
256
256
  <div class="ck-api-endpoint">
257
- <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/criteria/:id</p>
258
- <p class="ck-meta-copy">Get, update, or delete a criteria group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
257
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metric_groups/:id</p>
258
+ <p class="ck-meta-copy">Get, update, or delete a metric group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
259
259
  </div>
260
260
  </div>
261
261
 
@@ -0,0 +1,46 @@
1
+ <%= form_with(model: metric_group, url: metric_group.persisted? ? metric_group_path(metric_group) : metric_groups_path, local: true) do |form| %>
2
+ <% if metric_group.errors.any? %>
3
+ <div class="ck-flash ck-flash--alert">
4
+ <p class="ck-flash__title"><%= pluralize(metric_group.errors.count, "problem") %> prevented this metric group from being saved.</p>
5
+ <ul class="ck-error-list">
6
+ <% metric_group.errors.full_messages.each do |message| %>
7
+ <li><%= message %></li>
8
+ <% end %>
9
+ </ul>
10
+ </div>
11
+ <% end %>
12
+
13
+ <div class="ck-card ck-form-card">
14
+ <div class="ck-field">
15
+ <%= form.label :name, "Metric group name", class: "ck-label" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: "Support quality" %>
17
+ </div>
18
+
19
+ <div class="ck-field">
20
+ <%= form.label :description, class: "ck-label" %>
21
+ <%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "When this metric group should be used." %>
22
+ </div>
23
+
24
+ <div class="ck-field">
25
+ <p class="ck-label">Metrics in this group</p>
26
+ <p class="ck-hint">Pick the metrics to include.</p>
27
+ <div class="ck-list ck-list--compact">
28
+ <% @metrics.each do |metric| %>
29
+ <label class="ck-item">
30
+ <%= check_box_tag "metric_group[metric_ids][]", metric.id, metric_group.metrics.exists?(metric.id), class: "ck-checkbox" %>
31
+ <span>
32
+ <strong><%= metric.name %></strong>
33
+ <span class="ck-meta-copy"><%= metric.instruction.presence || "No instruction set." %></span>
34
+ </span>
35
+ </label>
36
+ <% end %>
37
+ </div>
38
+ <%= hidden_field_tag "metric_group[metric_ids][]", "" %>
39
+ </div>
40
+
41
+ <div class="ck-actions">
42
+ <%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
43
+ <%= form.submit(metric_group.persisted? ? "Save metric group" : "Create metric group", class: ck_button_classes(:dark)) %>
44
+ </div>
45
+ </div>
46
+ <% end %>
@@ -0,0 +1,13 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li><%= link_to @metric_group.name, metric_group_path(@metric_group) %></li>
4
+ <li>Edit</li>
5
+ </ol>
6
+
7
+ <section class="ck-page-header">
8
+ <div>
9
+ <h1 class="ck-title">Edit metric group</h1>
10
+ </div>
11
+ </section>
12
+
13
+ <%= render "form", metric_group: @metric_group %>
@@ -0,0 +1,41 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li>Metric groups</li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title">Metric groups</h1>
9
+ <p class="ck-lead">Named groups of metrics. Apply a group to a run to score outputs against every metric in the group at once.</p>
10
+ </div>
11
+ <div class="ck-actions">
12
+ <%= link_to "New metric group", new_metric_group_path, class: ck_button_classes(:dark) %>
13
+ </div>
14
+ </section>
15
+
16
+ <% if @metric_groups.any? %>
17
+ <table class="ck-results-table">
18
+ <thead>
19
+ <tr>
20
+ <th>Name</th>
21
+ <th>Description</th>
22
+ <th>Metrics</th>
23
+ <th></th>
24
+ </tr>
25
+ </thead>
26
+ <tbody>
27
+ <% @metric_groups.each do |metric_group| %>
28
+ <tr onclick="window.location='<%= metric_group_path(metric_group) %>'" style="cursor: pointer;">
29
+ <td><strong><%= metric_group.name %></strong></td>
30
+ <td class="ck-meta-copy"><%= truncate(metric_group.description.to_s, length: 90).presence || "—" %></td>
31
+ <td class="ck-meta-copy"><%= metric_group.metrics.any? ? metric_group.metrics.map(&:name).join(", ") : "empty" %></td>
32
+ <td class="ck-results-table__arrow">&rarr;</td>
33
+ </tr>
34
+ <% end %>
35
+ </tbody>
36
+ </table>
37
+ <% else %>
38
+ <div class="ck-empty">
39
+ <p>No metric groups yet. <%= link_to "Create one", new_metric_group_path, class: "ck-link" %> if you want to group multiple metrics and apply them together.</p>
40
+ </div>
41
+ <% end %>
@@ -0,0 +1,12 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li>New metric group</li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title">New metric group</h1>
9
+ </div>
10
+ </section>
11
+
12
+ <%= render "form", metric_group: @metric_group %>
@@ -1,26 +1,25 @@
1
1
  <ol class="ck-breadcrumb">
2
2
  <li><%= link_to "Metrics", metrics_path %></li>
3
- <li><%= link_to "Criteria", criteria_path %></li>
4
- <li><%= @criteria.name %></li>
3
+ <li><%= @metric_group.name %></li>
5
4
  </ol>
6
5
 
7
6
  <section class="ck-page-header">
8
7
  <div>
9
- <h1 class="ck-title"><%= @criteria.name %></h1>
10
- <% if @criteria.description.present? %>
11
- <p class="ck-lead"><%= @criteria.description %></p>
8
+ <h1 class="ck-title"><%= @metric_group.name %></h1>
9
+ <% if @metric_group.description.present? %>
10
+ <p class="ck-lead"><%= @metric_group.description %></p>
12
11
  <% end %>
13
12
  </div>
14
13
  <div class="ck-actions">
15
- <%= link_to "Edit", edit_criterion_path(@criteria), class: ck_button_classes(:light, variant: :outline) %>
14
+ <%= link_to "Edit", edit_metric_group_path(@metric_group), class: ck_button_classes(:light, variant: :outline) %>
16
15
  </div>
17
16
  </section>
18
17
 
19
18
  <section class="ck-card">
20
19
  <p class="ck-kicker">Metrics</p>
21
- <% if @criteria.metrics.any? %>
20
+ <% if @metric_group.metrics.any? %>
22
21
  <div class="ck-list ck-list--compact">
23
- <% @criteria.metrics.each do |metric| %>
22
+ <% @metric_group.metrics.each do |metric| %>
24
23
  <div class="ck-item">
25
24
  <div>
26
25
  <p class="ck-item-title"><%= link_to metric.name, metric_path(metric), class: "ck-link" %></p>
@@ -32,6 +31,6 @@
32
31
  <% end %>
33
32
  </div>
34
33
  <% else %>
35
- <p class="ck-copy">No metrics in this criteria yet.</p>
34
+ <p class="ck-copy">No metrics in this group yet.</p>
36
35
  <% end %>
37
36
  </section>
@@ -17,32 +17,11 @@
17
17
  </div>
18
18
 
19
19
  <div class="ck-field ck-field--spacious">
20
- <p class="ck-section-title">Evaluation Instruction</p>
21
- <p class="ck-hint">What should the evaluator assess? This is the core instruction sent to the judge model.</p>
20
+ <p class="ck-section-title">Instruction</p>
21
+ <p class="ck-hint">What should the judge assess? This instruction is sent to the LLM judge when scoring outputs.</p>
22
22
  <%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output..." %>
23
23
  </div>
24
24
 
25
- <div class="ck-field ck-field--spacious" data-controller="evaluation-steps">
26
- <p class="ck-section-title">Evaluation Steps</p>
27
- <p class="ck-hint">Steps the evaluator follows during assessment.</p>
28
-
29
- <div data-evaluation-steps-target="list">
30
- <% Array(metric.evaluation_steps).each_with_index do |step, index| %>
31
- <div class="ck-step-row" data-evaluation-steps-target="row">
32
- <input type="text" name="metric[evaluation_steps][]" value="<%= step %>" class="ck-input" />
33
- <button type="button" class="ck-icon-btn" data-action="evaluation-steps#remove" aria-label="Remove step">
34
- <%= heroicon_tag "trash", variant: :outline, size: 16 %>
35
- </button>
36
- </div>
37
- <% end %>
38
- </div>
39
-
40
- <button type="button" class="ck-add-btn" data-action="evaluation-steps#add">
41
- <%= heroicon_tag "plus", variant: :outline, size: 14 %>
42
- Add step
43
- </button>
44
- </div>
45
-
46
25
  <div class="ck-field ck-field--spacious">
47
26
  <p class="ck-section-title">Rubric</p>
48
27
  <p class="ck-hint">What each star rating means for this metric.</p>
@@ -1,7 +1,7 @@
1
1
  <section class="ck-page-header">
2
2
  <div>
3
3
  <h1 class="ck-title">Metrics</h1>
4
- <p class="ck-lead">Scoring dimensions used by the judge model to evaluate responses. Each metric defines what to assess, the evaluation steps, and a rubric for star ratings. Group metrics into <%= link_to "Criteria", criteria_path, class: "ck-link" %> for reuse across runs.</p>
4
+ <p class="ck-lead">Scoring dimensions the judge uses to evaluate each response. Each metric defines one thing to assess and a 1-5 scale.</p>
5
5
  </div>
6
6
  <div class="ck-actions">
7
7
  <%= link_to "New metric", new_metric_path, class: ck_button_classes(:dark) %>
@@ -14,7 +14,7 @@
14
14
  <tr>
15
15
  <th>Name</th>
16
16
  <th>Instruction</th>
17
- <th>Criteria</th>
17
+ <th>In groups</th>
18
18
  <th></th>
19
19
  </tr>
20
20
  </thead>
@@ -22,13 +22,21 @@
22
22
  <% @metrics.each do |metric| %>
23
23
  <tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
24
24
  <td><strong><%= metric.name %></strong></td>
25
- <td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 80).presence || "None" %></td>
26
- <td class="ck-meta-copy"><%= metric.criterias.any? ? metric.criterias.map { |c| link_to(c.name, criterion_path(c), class: "ck-link", onclick: "event.stopPropagation()") }.join(", ").html_safe : "None" %></td>
25
+ <td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 90).presence || "" %></td>
26
+ <td class="ck-meta-copy"><%= metric.metric_groups.any? ? metric.metric_groups.map(&:name).join(", ") : "" %></td>
27
27
  <td class="ck-results-table__arrow">&rarr;</td>
28
28
  </tr>
29
29
  <% end %>
30
30
  </tbody>
31
31
  </table>
32
+
33
+ <% if @metrics.size >= 2 %>
34
+ <p class="ck-meta-copy" style="margin-top: 1.5rem; text-align: center;">
35
+ Use the same metrics on multiple runs? <%= link_to "Group them →", metric_groups_path, class: "ck-link" %>
36
+ </p>
37
+ <% end %>
32
38
  <% else %>
33
- <div class="ck-empty">No metrics yet.</div>
39
+ <div class="ck-empty">
40
+ <p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
41
+ </div>
34
42
  <% end %>
@@ -14,22 +14,11 @@
14
14
 
15
15
  <% if @metric.instruction.present? %>
16
16
  <section class="ck-card">
17
- <p class="ck-kicker">Evaluation Instruction</p>
17
+ <p class="ck-kicker">Instruction</p>
18
18
  <div class="ck-note-box"><%= simple_format(@metric.instruction) %></div>
19
19
  </section>
20
20
  <% end %>
21
21
 
22
- <% if @metric.evaluation_steps.present? && @metric.evaluation_steps.any? %>
23
- <section class="ck-card">
24
- <p class="ck-kicker">Evaluation Steps</p>
25
- <ol class="ck-step-list">
26
- <% @metric.evaluation_steps.each do |step| %>
27
- <li><%= step %></li>
28
- <% end %>
29
- </ol>
30
- </section>
31
- <% end %>
32
-
33
22
  <section class="ck-card ck-card--spaced">
34
23
  <p class="ck-kicker">Rubric</p>
35
24
  <div class="ck-rubric-display">
@@ -70,11 +70,11 @@
70
70
  <% if @all_metrics.empty? %>
71
71
  <p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet.&ensp;<%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
72
72
  <% else %>
73
- <% if @criterias.any? %>
73
+ <% if @metric_groups.any? %>
74
74
  <p class="ck-meta-copy" style="margin-bottom: 0.5rem;">
75
75
  Quick add:&ensp;
76
- <% @criterias.each do |c| %>
77
- <span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddCriteria(<%= c.metric_ids.to_json %>)"><%= c.name %></span>&ensp;
76
+ <% @metric_groups.each do |g| %>
77
+ <span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddMetricGroup(<%= g.metric_ids.to_json %>)"><%= g.name %></span>&ensp;
78
78
  <% end %>
79
79
  </p>
80
80
  <% end %>
@@ -112,7 +112,7 @@ function updateRunForm() {
112
112
  } else if (judge && metrics.length === 0) {
113
113
  if (judgeField) judgeField.className = 'ck-field ck-field--info';
114
114
  if (metricsField) metricsField.className = 'ck-field ck-field--info';
115
- if (metricsHint) metricsHint.textContent = 'Select at least one metric or criteria to enable judging.';
115
+ if (metricsHint) metricsHint.textContent = 'Select at least one metric or group to enable judging.';
116
116
  } else if (!judge && metrics.length > 0) {
117
117
  if (judgeField) judgeField.className = 'ck-field ck-field--info';
118
118
  if (judgeHint) judgeHint.textContent = 'Select a judge model to enable judging.';
@@ -131,7 +131,7 @@ function updateRunForm() {
131
131
  if (submitBtn) submitBtn.disabled = !valid;
132
132
  }
133
133
 
134
- function ckQuickAddCriteria(metricIds) {
134
+ function ckQuickAddMetricGroup(metricIds) {
135
135
  metricIds.forEach(function(id) {
136
136
  var cb = document.getElementById('run_metric_' + id);
137
137
  if (cb) cb.checked = true;
@@ -19,11 +19,14 @@
19
19
  <nav class="ck-nav">
20
20
  <% active = ->(path) { request.path.start_with?(path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) } %>
21
21
  <%= link_to "Prompts", prompts_path, class: active.(prompts_path) %>
22
- <%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(criteria_path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
22
+ <%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(metric_groups_path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
23
23
  <%= link_to "Datasets", datasets_path, class: active.(datasets_path) %>
24
24
  <%= link_to "Runs", runs_path, class: active.(runs_path) %>
25
25
  <%= link_to "Settings", provider_credentials_path, class: active.(provider_credentials_path) %>
26
26
  <%= link_to "API", api_reference_path, class: active.(api_reference_path) %>
27
+ <% if main_app.respond_to?(:logout_path) %>
28
+ <%= button_to "Log out", main_app.logout_path, method: :delete, class: ck_button_classes(:light, variant: :outline) %>
29
+ <% end %>
27
30
  </nav>
28
31
  </div>
29
32
  </header>
data/config/routes.rb CHANGED
@@ -9,7 +9,7 @@ CompletionKit::Engine.routes.draw do
9
9
 
10
10
  resources :datasets
11
11
  resources :metrics
12
- resources :criteria, controller: "criteria"
12
+ resources :metric_groups
13
13
 
14
14
  resources :runs do
15
15
  member do
@@ -45,7 +45,7 @@ CompletionKit::Engine.routes.draw do
45
45
  end
46
46
  resources :datasets
47
47
  resources :metrics
48
- resources :criteria, controller: "criteria"
48
+ resources :metric_groups
49
49
  resources :provider_credentials
50
50
  end
51
51
  end
@@ -0,0 +1,5 @@
1
+ class RemoveEvaluationStepsFromMetrics < ActiveRecord::Migration[7.0]
2
+ def change
3
+ remove_column :completion_kit_metrics, :evaluation_steps, :text
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ class RenameCriteriaToMetricGroups < ActiveRecord::Migration[8.1]
2
+ def change
3
+ rename_table :completion_kit_criteria, :completion_kit_metric_groups
4
+ rename_table :completion_kit_criteria_memberships, :completion_kit_metric_group_memberships
5
+ rename_column :completion_kit_metric_group_memberships, :criteria_id, :metric_group_id
6
+
7
+ if index_name_exists?(:completion_kit_metric_group_memberships, "index_completion_kit_criteria_memberships_on_criteria_id")
8
+ rename_index :completion_kit_metric_group_memberships,
9
+ "index_completion_kit_criteria_memberships_on_criteria_id",
10
+ "index_completion_kit_metric_group_memberships_on_metric_group_id"
11
+ end
12
+ end
13
+ end
@@ -5,16 +5,10 @@ module CompletionKit
5
5
  class Engine < ::Rails::Engine
6
6
  isolate_namespace CompletionKit
7
7
 
8
- initializer("completion_kit.inflections", before: :load_config_initializers) do
9
- ActiveSupport::Inflector.inflections(:en) do |inflect|
10
- inflect.irregular "criterion", "criteria"
11
- end
12
- end
13
-
14
8
  paths.add "app/services", eager_load: true
15
9
 
16
10
  def self.register_assets(app)
17
- app.config.assets.precompile += %w( completion_kit/application.css completion_kit/evaluation_steps_controller.js completion_kit/logo.svg )
11
+ app.config.assets.precompile += %w( completion_kit/application.css completion_kit/logo.svg )
18
12
  end
19
13
 
20
14
  initializer("completion_kit.assets") { |app| Engine.register_assets(app) }
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.1.0.rc1"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -8,6 +8,7 @@ module CompletionKit
8
8
  attr_accessor :openai_api_key, :anthropic_api_key, :ollama_api_key, :ollama_api_endpoint
9
9
  attr_accessor :judge_model, :high_quality_threshold, :medium_quality_threshold
10
10
  attr_accessor :username, :password, :auth_strategy, :api_token
11
+ attr_accessor :tenant_scope, :tenant_scope_columns
11
12
 
12
13
  def initialize
13
14
  @openai_api_key = ENV['OPENAI_API_KEY']
@@ -19,6 +20,10 @@ module CompletionKit
19
20
  @high_quality_threshold = 4
20
21
  @medium_quality_threshold = 3
21
22
  end
23
+
24
+ def tenant_scope_columns
25
+ @tenant_scope_columns ||= []
26
+ end
22
27
  end
23
28
 
24
29
  class << self
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.rc1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-16 00:00:00.000000000 Z
11
+ date: 2026-04-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -198,12 +198,11 @@ dependencies:
198
198
  - - "~>"
199
199
  - !ruby/object:Gem::Version
200
200
  version: '0.22'
201
- description: 'CompletionKit is a mountable Rails engine for rigorous prompt testing:
202
- run prompts against datasets, score outputs with an LLM-as-judge against custom
203
- metrics and rubrics, track versions, and iterate with AI-assisted improvement suggestions.
204
- Every resource is accessible through a bearer-token REST API and a built-in Model
205
- Context Protocol server, so you can drive it from a browser, from HTTP clients,
206
- or directly from agents like Claude Code.'
201
+ description: CompletionKit is a prompt testing platform that runs as a Rails engine
202
+ or a standalone app. Run prompts against real datasets, score every output with
203
+ an LLM judge against criteria you define, track prompt versions, and get AI-generated
204
+ improvement suggestions grounded in your actual results. Includes a web UI, REST
205
+ API, and a built-in MCP server with 36 tools.
207
206
  email:
208
207
  - damien@homemade.software
209
208
  executables: []
@@ -216,11 +215,10 @@ files:
216
215
  - app/assets/config/completion_kit_manifest.js
217
216
  - app/assets/config/manifest.js
218
217
  - app/assets/images/completion_kit/logo.svg
219
- - app/assets/javascripts/completion_kit/evaluation_steps_controller.js
220
218
  - app/assets/stylesheets/completion_kit/application.css
221
219
  - app/controllers/completion_kit/api/v1/base_controller.rb
222
- - app/controllers/completion_kit/api/v1/criteria_controller.rb
223
220
  - app/controllers/completion_kit/api/v1/datasets_controller.rb
221
+ - app/controllers/completion_kit/api/v1/metric_groups_controller.rb
224
222
  - app/controllers/completion_kit/api/v1/metrics_controller.rb
225
223
  - app/controllers/completion_kit/api/v1/prompts_controller.rb
226
224
  - app/controllers/completion_kit/api/v1/provider_credentials_controller.rb
@@ -228,9 +226,9 @@ files:
228
226
  - app/controllers/completion_kit/api/v1/runs_controller.rb
229
227
  - app/controllers/completion_kit/api_reference_controller.rb
230
228
  - app/controllers/completion_kit/application_controller.rb
231
- - app/controllers/completion_kit/criteria_controller.rb
232
229
  - app/controllers/completion_kit/datasets_controller.rb
233
230
  - app/controllers/completion_kit/mcp_controller.rb
231
+ - app/controllers/completion_kit/metric_groups_controller.rb
234
232
  - app/controllers/completion_kit/metrics_controller.rb
235
233
  - app/controllers/completion_kit/prompts_controller.rb
236
234
  - app/controllers/completion_kit/provider_credentials_controller.rb
@@ -243,10 +241,10 @@ files:
243
241
  - app/jobs/completion_kit/model_discovery_job.rb
244
242
  - app/mailers/completion_kit/application_mailer.rb
245
243
  - app/models/completion_kit/application_record.rb
246
- - app/models/completion_kit/criteria.rb
247
- - app/models/completion_kit/criteria_membership.rb
248
244
  - app/models/completion_kit/dataset.rb
249
245
  - app/models/completion_kit/metric.rb
246
+ - app/models/completion_kit/metric_group.rb
247
+ - app/models/completion_kit/metric_group_membership.rb
250
248
  - app/models/completion_kit/model.rb
251
249
  - app/models/completion_kit/prompt.rb
252
250
  - app/models/completion_kit/provider_credential.rb
@@ -261,8 +259,9 @@ files:
261
259
  - app/services/completion_kit/judge_service.rb
262
260
  - app/services/completion_kit/llm_client.rb
263
261
  - app/services/completion_kit/mcp_dispatcher.rb
264
- - app/services/completion_kit/mcp_tools/criteria.rb
262
+ - app/services/completion_kit/mcp_tools/base.rb
265
263
  - app/services/completion_kit/mcp_tools/datasets.rb
264
+ - app/services/completion_kit/mcp_tools/metric_groups.rb
266
265
  - app/services/completion_kit/mcp_tools/metrics.rb
267
266
  - app/services/completion_kit/mcp_tools/prompts.rb
268
267
  - app/services/completion_kit/mcp_tools/provider_credentials.rb
@@ -273,18 +272,19 @@ files:
273
272
  - app/services/completion_kit/open_ai_client.rb
274
273
  - app/services/completion_kit/open_router_client.rb
275
274
  - app/services/completion_kit/prompt_improvement_service.rb
275
+ - app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb
276
276
  - app/views/completion_kit/api_reference/_example.html.erb
277
277
  - app/views/completion_kit/api_reference/index.html.erb
278
- - app/views/completion_kit/criteria/_form.html.erb
279
- - app/views/completion_kit/criteria/edit.html.erb
280
- - app/views/completion_kit/criteria/index.html.erb
281
- - app/views/completion_kit/criteria/new.html.erb
282
- - app/views/completion_kit/criteria/show.html.erb
283
278
  - app/views/completion_kit/datasets/_form.html.erb
284
279
  - app/views/completion_kit/datasets/edit.html.erb
285
280
  - app/views/completion_kit/datasets/index.html.erb
286
281
  - app/views/completion_kit/datasets/new.html.erb
287
282
  - app/views/completion_kit/datasets/show.html.erb
283
+ - app/views/completion_kit/metric_groups/_form.html.erb
284
+ - app/views/completion_kit/metric_groups/edit.html.erb
285
+ - app/views/completion_kit/metric_groups/index.html.erb
286
+ - app/views/completion_kit/metric_groups/new.html.erb
287
+ - app/views/completion_kit/metric_groups/show.html.erb
288
288
  - app/views/completion_kit/metrics/_form.html.erb
289
289
  - app/views/completion_kit/metrics/edit.html.erb
290
290
  - app/views/completion_kit/metrics/index.html.erb
@@ -324,6 +324,8 @@ files:
324
324
  - db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb
325
325
  - db/migrate/20260403000002_create_completion_kit_suggestions.rb
326
326
  - db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb
327
+ - db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb
328
+ - db/migrate/20260417000001_rename_criteria_to_metric_groups.rb
327
329
  - lib/completion-kit.rb
328
330
  - lib/completion_kit.rb
329
331
  - lib/completion_kit/engine.rb
@@ -356,6 +358,6 @@ requirements: []
356
358
  rubygems_version: 3.5.16
357
359
  signing_key:
358
360
  specification_version: 4
359
- summary: Rails engine for testing, scoring, and improving GenAI prompts against real
360
- data with a web UI, REST API, and built-in MCP server.
361
+ summary: Your prompts need tests too. Run them against real data, score outputs with
362
+ an LLM judge, iterate until they work.
361
363
  test_files: []
@@ -1,25 +0,0 @@
1
- document.addEventListener("DOMContentLoaded", function () {
2
- document.addEventListener("click", function (event) {
3
- var addBtn = event.target.closest("[data-action='evaluation-steps#add']");
4
- if (addBtn) {
5
- var container = addBtn.closest("[data-controller='evaluation-steps']");
6
- var list = container.querySelector("[data-evaluation-steps-target='list']");
7
- var row = document.createElement("div");
8
- row.className = "ck-step-row";
9
- row.setAttribute("data-evaluation-steps-target", "row");
10
- row.innerHTML =
11
- '<input type="text" name="metric[evaluation_steps][]" value="" class="ck-input" placeholder="Describe this evaluation step..." />' +
12
- '<button type="button" class="ck-icon-btn" data-action="evaluation-steps#remove" aria-label="Remove step">' +
13
- '<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="1.75"><path d="M3 6h18"/><path d="M19 6v14c0 1-1 2-2 2H7c-1 0-2-1-2-2V6"/><path d="M8 6V4c0-1 1-2 2-2h4c1 0 2 1 2 2v2"/><line x1="10" y1="11" x2="10" y2="17"/><line x1="14" y1="11" x2="14" y2="17"/></svg>' +
14
- "</button>";
15
- list.appendChild(row);
16
- row.querySelector("input").focus();
17
- }
18
-
19
- var removeBtn = event.target.closest("[data-action='evaluation-steps#remove']");
20
- if (removeBtn) {
21
- var stepRow = removeBtn.closest("[data-evaluation-steps-target='row']");
22
- if (stepRow) stepRow.remove();
23
- }
24
- });
25
- });