completion-kit 0.1.0.rc1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +97 -86
- data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +53 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +2 -10
- data/app/controllers/completion_kit/metric_groups_controller.rb +59 -0
- data/app/controllers/completion_kit/metrics_controller.rb +2 -2
- data/app/controllers/completion_kit/runs_controller.rb +4 -11
- data/app/helpers/completion_kit/application_helper.rb +1 -8
- data/app/models/completion_kit/application_record.rb +7 -0
- data/app/models/completion_kit/metric.rb +4 -6
- data/app/models/completion_kit/metric_group.rb +30 -0
- data/app/models/completion_kit/metric_group_membership.rb +20 -0
- data/app/models/completion_kit/model.rb +1 -1
- data/app/models/completion_kit/provider_credential.rb +2 -1
- data/app/models/completion_kit/run.rb +11 -4
- data/app/services/completion_kit/anthropic_client.rb +4 -17
- data/app/services/completion_kit/judge_service.rb +3 -7
- data/app/services/completion_kit/llm_client.rb +15 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
- data/app/services/completion_kit/mcp_tools/base.rb +23 -0
- data/app/services/completion_kit/mcp_tools/datasets.rb +2 -18
- data/app/services/completion_kit/mcp_tools/metric_groups.rb +82 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +4 -22
- data/app/services/completion_kit/mcp_tools/prompts.rb +2 -18
- data/app/services/completion_kit/mcp_tools/provider_credentials.rb +2 -18
- data/app/services/completion_kit/mcp_tools/responses.rb +2 -13
- data/app/services/completion_kit/mcp_tools/runs.rb +4 -28
- data/app/services/completion_kit/ollama_client.rb +2 -15
- data/app/services/completion_kit/open_ai_client.rb +1 -10
- data/app/services/completion_kit/open_router_client.rb +1 -12
- data/app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb +15 -0
- data/app/views/completion_kit/api_reference/index.html.erb +11 -11
- data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
- data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
- data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
- data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
- data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
- data/app/views/completion_kit/metrics/_form.html.erb +2 -23
- data/app/views/completion_kit/metrics/index.html.erb +13 -5
- data/app/views/completion_kit/metrics/show.html.erb +1 -12
- data/app/views/completion_kit/runs/_form.html.erb +5 -5
- data/app/views/layouts/completion_kit/application.html.erb +4 -1
- data/config/routes.rb +2 -2
- data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
- data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
- data/lib/completion_kit/engine.rb +1 -7
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +5 -0
- metadata +23 -21
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
- data/app/controllers/completion_kit/criteria_controller.rb +0 -67
- data/app/models/completion_kit/criteria.rb +0 -22
- data/app/models/completion_kit/criteria_membership.rb +0 -20
- data/app/services/completion_kit/mcp_tools/criteria.rb +0 -106
- data/app/views/completion_kit/criteria/_form.html.erb +0 -46
- data/app/views/completion_kit/criteria/edit.html.erb +0 -14
- data/app/views/completion_kit/criteria/index.html.erb +0 -37
- data/app/views/completion_kit/criteria/new.html.erb +0 -13
|
@@ -63,7 +63,7 @@ end %>
|
|
|
63
63
|
<input type="radio" name="ck-api-tab" id="ck-tab-responses" class="ck-api-tabs__radio">
|
|
64
64
|
<input type="radio" name="ck-api-tab" id="ck-tab-datasets" class="ck-api-tabs__radio">
|
|
65
65
|
<input type="radio" name="ck-api-tab" id="ck-tab-metrics" class="ck-api-tabs__radio">
|
|
66
|
-
<input type="radio" name="ck-api-tab" id="ck-tab-
|
|
66
|
+
<input type="radio" name="ck-api-tab" id="ck-tab-metric-groups" class="ck-api-tabs__radio">
|
|
67
67
|
<input type="radio" name="ck-api-tab" id="ck-tab-providers" class="ck-api-tabs__radio">
|
|
68
68
|
|
|
69
69
|
<nav class="ck-api-tabs__nav">
|
|
@@ -73,7 +73,7 @@ end %>
|
|
|
73
73
|
<label for="ck-tab-responses" class="ck-api-tabs__label">Responses <span class="ck-api-tabs__count">2</span></label>
|
|
74
74
|
<label for="ck-tab-datasets" class="ck-api-tabs__label">Datasets <span class="ck-api-tabs__count">5</span></label>
|
|
75
75
|
<label for="ck-tab-metrics" class="ck-api-tabs__label">Metrics <span class="ck-api-tabs__count">5</span></label>
|
|
76
|
-
<label for="ck-tab-
|
|
76
|
+
<label for="ck-tab-metric-groups" class="ck-api-tabs__label">Metric Groups <span class="ck-api-tabs__count">5</span></label>
|
|
77
77
|
<label for="ck-tab-providers" class="ck-api-tabs__label">Providers <span class="ck-api-tabs__count">5</span></label>
|
|
78
78
|
</nav>
|
|
79
79
|
|
|
@@ -232,7 +232,7 @@ end %>
|
|
|
232
232
|
<div class="ck-api-endpoint">
|
|
233
233
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metrics</p>
|
|
234
234
|
<p class="ck-meta-copy">Create a metric.</p>
|
|
235
|
-
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>instruction</code>, <code>
|
|
235
|
+
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>instruction</code>, <code>rubric_bands</code> (array of {stars, description})</p>
|
|
236
236
|
<%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/metrics \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"relevance\", \"instruction\": \"Is the response relevant?\"}'" %>
|
|
237
237
|
</div>
|
|
238
238
|
<div class="ck-api-endpoint">
|
|
@@ -242,20 +242,20 @@ end %>
|
|
|
242
242
|
</div>
|
|
243
243
|
|
|
244
244
|
<div class="ck-api-tabs__panel">
|
|
245
|
-
<h2 class="ck-section-title">
|
|
246
|
-
<p class="ck-copy">Named groups of metrics
|
|
245
|
+
<h2 class="ck-section-title">Metric Groups</h2>
|
|
246
|
+
<p class="ck-copy">Named groups of metrics you can apply to a run as a set.</p>
|
|
247
247
|
<div class="ck-api-endpoint">
|
|
248
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/
|
|
249
|
-
<p class="ck-meta-copy">List all
|
|
248
|
+
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/metric_groups</p>
|
|
249
|
+
<p class="ck-meta-copy">List all metric groups with their metric IDs.</p>
|
|
250
250
|
</div>
|
|
251
251
|
<div class="ck-api-endpoint">
|
|
252
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/
|
|
253
|
-
<p class="ck-meta-copy">Create a
|
|
252
|
+
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metric_groups</p>
|
|
253
|
+
<p class="ck-meta-copy">Create a metric group.</p>
|
|
254
254
|
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>description</code>, <code>metric_ids</code> (array)</p>
|
|
255
255
|
</div>
|
|
256
256
|
<div class="ck-api-endpoint">
|
|
257
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/
|
|
258
|
-
<p class="ck-meta-copy">Get, update, or delete a
|
|
257
|
+
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metric_groups/:id</p>
|
|
258
|
+
<p class="ck-meta-copy">Get, update, or delete a metric group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
|
|
259
259
|
</div>
|
|
260
260
|
</div>
|
|
261
261
|
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
<%= form_with(model: metric_group, url: metric_group.persisted? ? metric_group_path(metric_group) : metric_groups_path, local: true) do |form| %>
|
|
2
|
+
<% if metric_group.errors.any? %>
|
|
3
|
+
<div class="ck-flash ck-flash--alert">
|
|
4
|
+
<p class="ck-flash__title"><%= pluralize(metric_group.errors.count, "problem") %> prevented this metric group from being saved.</p>
|
|
5
|
+
<ul class="ck-error-list">
|
|
6
|
+
<% metric_group.errors.full_messages.each do |message| %>
|
|
7
|
+
<li><%= message %></li>
|
|
8
|
+
<% end %>
|
|
9
|
+
</ul>
|
|
10
|
+
</div>
|
|
11
|
+
<% end %>
|
|
12
|
+
|
|
13
|
+
<div class="ck-card ck-form-card">
|
|
14
|
+
<div class="ck-field">
|
|
15
|
+
<%= form.label :name, "Metric group name", class: "ck-label" %>
|
|
16
|
+
<%= form.text_field :name, class: "ck-input", placeholder: "Support quality" %>
|
|
17
|
+
</div>
|
|
18
|
+
|
|
19
|
+
<div class="ck-field">
|
|
20
|
+
<%= form.label :description, class: "ck-label" %>
|
|
21
|
+
<%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "When this metric group should be used." %>
|
|
22
|
+
</div>
|
|
23
|
+
|
|
24
|
+
<div class="ck-field">
|
|
25
|
+
<p class="ck-label">Metrics in this group</p>
|
|
26
|
+
<p class="ck-hint">Pick the metrics to include.</p>
|
|
27
|
+
<div class="ck-list ck-list--compact">
|
|
28
|
+
<% @metrics.each do |metric| %>
|
|
29
|
+
<label class="ck-item">
|
|
30
|
+
<%= check_box_tag "metric_group[metric_ids][]", metric.id, metric_group.metrics.exists?(metric.id), class: "ck-checkbox" %>
|
|
31
|
+
<span>
|
|
32
|
+
<strong><%= metric.name %></strong>
|
|
33
|
+
<span class="ck-meta-copy"><%= metric.instruction.presence || "No instruction set." %></span>
|
|
34
|
+
</span>
|
|
35
|
+
</label>
|
|
36
|
+
<% end %>
|
|
37
|
+
</div>
|
|
38
|
+
<%= hidden_field_tag "metric_group[metric_ids][]", "" %>
|
|
39
|
+
</div>
|
|
40
|
+
|
|
41
|
+
<div class="ck-actions">
|
|
42
|
+
<%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
|
|
43
|
+
<%= form.submit(metric_group.persisted? ? "Save metric group" : "Create metric group", class: ck_button_classes(:dark)) %>
|
|
44
|
+
</div>
|
|
45
|
+
</div>
|
|
46
|
+
<% end %>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li><%= link_to @metric_group.name, metric_group_path(@metric_group) %></li>
|
|
4
|
+
<li>Edit</li>
|
|
5
|
+
</ol>
|
|
6
|
+
|
|
7
|
+
<section class="ck-page-header">
|
|
8
|
+
<div>
|
|
9
|
+
<h1 class="ck-title">Edit metric group</h1>
|
|
10
|
+
</div>
|
|
11
|
+
</section>
|
|
12
|
+
|
|
13
|
+
<%= render "form", metric_group: @metric_group %>
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li>Metric groups</li>
|
|
4
|
+
</ol>
|
|
5
|
+
|
|
6
|
+
<section class="ck-page-header">
|
|
7
|
+
<div>
|
|
8
|
+
<h1 class="ck-title">Metric groups</h1>
|
|
9
|
+
<p class="ck-lead">Named groups of metrics. Apply a group to a run to score outputs against every metric in the group at once.</p>
|
|
10
|
+
</div>
|
|
11
|
+
<div class="ck-actions">
|
|
12
|
+
<%= link_to "New metric group", new_metric_group_path, class: ck_button_classes(:dark) %>
|
|
13
|
+
</div>
|
|
14
|
+
</section>
|
|
15
|
+
|
|
16
|
+
<% if @metric_groups.any? %>
|
|
17
|
+
<table class="ck-results-table">
|
|
18
|
+
<thead>
|
|
19
|
+
<tr>
|
|
20
|
+
<th>Name</th>
|
|
21
|
+
<th>Description</th>
|
|
22
|
+
<th>Metrics</th>
|
|
23
|
+
<th></th>
|
|
24
|
+
</tr>
|
|
25
|
+
</thead>
|
|
26
|
+
<tbody>
|
|
27
|
+
<% @metric_groups.each do |metric_group| %>
|
|
28
|
+
<tr onclick="window.location='<%= metric_group_path(metric_group) %>'" style="cursor: pointer;">
|
|
29
|
+
<td><strong><%= metric_group.name %></strong></td>
|
|
30
|
+
<td class="ck-meta-copy"><%= truncate(metric_group.description.to_s, length: 90).presence || "—" %></td>
|
|
31
|
+
<td class="ck-meta-copy"><%= metric_group.metrics.any? ? metric_group.metrics.map(&:name).join(", ") : "empty" %></td>
|
|
32
|
+
<td class="ck-results-table__arrow">→</td>
|
|
33
|
+
</tr>
|
|
34
|
+
<% end %>
|
|
35
|
+
</tbody>
|
|
36
|
+
</table>
|
|
37
|
+
<% else %>
|
|
38
|
+
<div class="ck-empty">
|
|
39
|
+
<p>No metric groups yet. <%= link_to "Create one", new_metric_group_path, class: "ck-link" %> if you want to group multiple metrics and apply them together.</p>
|
|
40
|
+
</div>
|
|
41
|
+
<% end %>
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li>New metric group</li>
|
|
4
|
+
</ol>
|
|
5
|
+
|
|
6
|
+
<section class="ck-page-header">
|
|
7
|
+
<div>
|
|
8
|
+
<h1 class="ck-title">New metric group</h1>
|
|
9
|
+
</div>
|
|
10
|
+
</section>
|
|
11
|
+
|
|
12
|
+
<%= render "form", metric_group: @metric_group %>
|
|
@@ -1,26 +1,25 @@
|
|
|
1
1
|
<ol class="ck-breadcrumb">
|
|
2
2
|
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
-
<li><%=
|
|
4
|
-
<li><%= @criteria.name %></li>
|
|
3
|
+
<li><%= @metric_group.name %></li>
|
|
5
4
|
</ol>
|
|
6
5
|
|
|
7
6
|
<section class="ck-page-header">
|
|
8
7
|
<div>
|
|
9
|
-
<h1 class="ck-title"><%= @
|
|
10
|
-
<% if @
|
|
11
|
-
<p class="ck-lead"><%= @
|
|
8
|
+
<h1 class="ck-title"><%= @metric_group.name %></h1>
|
|
9
|
+
<% if @metric_group.description.present? %>
|
|
10
|
+
<p class="ck-lead"><%= @metric_group.description %></p>
|
|
12
11
|
<% end %>
|
|
13
12
|
</div>
|
|
14
13
|
<div class="ck-actions">
|
|
15
|
-
<%= link_to "Edit",
|
|
14
|
+
<%= link_to "Edit", edit_metric_group_path(@metric_group), class: ck_button_classes(:light, variant: :outline) %>
|
|
16
15
|
</div>
|
|
17
16
|
</section>
|
|
18
17
|
|
|
19
18
|
<section class="ck-card">
|
|
20
19
|
<p class="ck-kicker">Metrics</p>
|
|
21
|
-
<% if @
|
|
20
|
+
<% if @metric_group.metrics.any? %>
|
|
22
21
|
<div class="ck-list ck-list--compact">
|
|
23
|
-
<% @
|
|
22
|
+
<% @metric_group.metrics.each do |metric| %>
|
|
24
23
|
<div class="ck-item">
|
|
25
24
|
<div>
|
|
26
25
|
<p class="ck-item-title"><%= link_to metric.name, metric_path(metric), class: "ck-link" %></p>
|
|
@@ -32,6 +31,6 @@
|
|
|
32
31
|
<% end %>
|
|
33
32
|
</div>
|
|
34
33
|
<% else %>
|
|
35
|
-
<p class="ck-copy">No metrics in this
|
|
34
|
+
<p class="ck-copy">No metrics in this group yet.</p>
|
|
36
35
|
<% end %>
|
|
37
36
|
</section>
|
|
@@ -17,32 +17,11 @@
|
|
|
17
17
|
</div>
|
|
18
18
|
|
|
19
19
|
<div class="ck-field ck-field--spacious">
|
|
20
|
-
<p class="ck-section-title">
|
|
21
|
-
<p class="ck-hint">What should the
|
|
20
|
+
<p class="ck-section-title">Instruction</p>
|
|
21
|
+
<p class="ck-hint">What should the judge assess? This instruction is sent to the LLM judge when scoring outputs.</p>
|
|
22
22
|
<%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output..." %>
|
|
23
23
|
</div>
|
|
24
24
|
|
|
25
|
-
<div class="ck-field ck-field--spacious" data-controller="evaluation-steps">
|
|
26
|
-
<p class="ck-section-title">Evaluation Steps</p>
|
|
27
|
-
<p class="ck-hint">Steps the evaluator follows during assessment.</p>
|
|
28
|
-
|
|
29
|
-
<div data-evaluation-steps-target="list">
|
|
30
|
-
<% Array(metric.evaluation_steps).each_with_index do |step, index| %>
|
|
31
|
-
<div class="ck-step-row" data-evaluation-steps-target="row">
|
|
32
|
-
<input type="text" name="metric[evaluation_steps][]" value="<%= step %>" class="ck-input" />
|
|
33
|
-
<button type="button" class="ck-icon-btn" data-action="evaluation-steps#remove" aria-label="Remove step">
|
|
34
|
-
<%= heroicon_tag "trash", variant: :outline, size: 16 %>
|
|
35
|
-
</button>
|
|
36
|
-
</div>
|
|
37
|
-
<% end %>
|
|
38
|
-
</div>
|
|
39
|
-
|
|
40
|
-
<button type="button" class="ck-add-btn" data-action="evaluation-steps#add">
|
|
41
|
-
<%= heroicon_tag "plus", variant: :outline, size: 14 %>
|
|
42
|
-
Add step
|
|
43
|
-
</button>
|
|
44
|
-
</div>
|
|
45
|
-
|
|
46
25
|
<div class="ck-field ck-field--spacious">
|
|
47
26
|
<p class="ck-section-title">Rubric</p>
|
|
48
27
|
<p class="ck-hint">What each star rating means for this metric.</p>
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
<section class="ck-page-header">
|
|
2
2
|
<div>
|
|
3
3
|
<h1 class="ck-title">Metrics</h1>
|
|
4
|
-
<p class="ck-lead">Scoring dimensions
|
|
4
|
+
<p class="ck-lead">Scoring dimensions the judge uses to evaluate each response. Each metric defines one thing to assess and a 1-5 scale.</p>
|
|
5
5
|
</div>
|
|
6
6
|
<div class="ck-actions">
|
|
7
7
|
<%= link_to "New metric", new_metric_path, class: ck_button_classes(:dark) %>
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
<tr>
|
|
15
15
|
<th>Name</th>
|
|
16
16
|
<th>Instruction</th>
|
|
17
|
-
<th>
|
|
17
|
+
<th>In groups</th>
|
|
18
18
|
<th></th>
|
|
19
19
|
</tr>
|
|
20
20
|
</thead>
|
|
@@ -22,13 +22,21 @@
|
|
|
22
22
|
<% @metrics.each do |metric| %>
|
|
23
23
|
<tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
|
|
24
24
|
<td><strong><%= metric.name %></strong></td>
|
|
25
|
-
<td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length:
|
|
26
|
-
<td class="ck-meta-copy"><%= metric.
|
|
25
|
+
<td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 90).presence || "—" %></td>
|
|
26
|
+
<td class="ck-meta-copy"><%= metric.metric_groups.any? ? metric.metric_groups.map(&:name).join(", ") : "—" %></td>
|
|
27
27
|
<td class="ck-results-table__arrow">→</td>
|
|
28
28
|
</tr>
|
|
29
29
|
<% end %>
|
|
30
30
|
</tbody>
|
|
31
31
|
</table>
|
|
32
|
+
|
|
33
|
+
<% if @metrics.size >= 2 %>
|
|
34
|
+
<p class="ck-meta-copy" style="margin-top: 1.5rem; text-align: center;">
|
|
35
|
+
Use the same metrics on multiple runs? <%= link_to "Group them →", metric_groups_path, class: "ck-link" %>
|
|
36
|
+
</p>
|
|
37
|
+
<% end %>
|
|
32
38
|
<% else %>
|
|
33
|
-
<div class="ck-empty">
|
|
39
|
+
<div class="ck-empty">
|
|
40
|
+
<p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
|
|
41
|
+
</div>
|
|
34
42
|
<% end %>
|
|
@@ -14,22 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
<% if @metric.instruction.present? %>
|
|
16
16
|
<section class="ck-card">
|
|
17
|
-
<p class="ck-kicker">
|
|
17
|
+
<p class="ck-kicker">Instruction</p>
|
|
18
18
|
<div class="ck-note-box"><%= simple_format(@metric.instruction) %></div>
|
|
19
19
|
</section>
|
|
20
20
|
<% end %>
|
|
21
21
|
|
|
22
|
-
<% if @metric.evaluation_steps.present? && @metric.evaluation_steps.any? %>
|
|
23
|
-
<section class="ck-card">
|
|
24
|
-
<p class="ck-kicker">Evaluation Steps</p>
|
|
25
|
-
<ol class="ck-step-list">
|
|
26
|
-
<% @metric.evaluation_steps.each do |step| %>
|
|
27
|
-
<li><%= step %></li>
|
|
28
|
-
<% end %>
|
|
29
|
-
</ol>
|
|
30
|
-
</section>
|
|
31
|
-
<% end %>
|
|
32
|
-
|
|
33
22
|
<section class="ck-card ck-card--spaced">
|
|
34
23
|
<p class="ck-kicker">Rubric</p>
|
|
35
24
|
<div class="ck-rubric-display">
|
|
@@ -70,11 +70,11 @@
|
|
|
70
70
|
<% if @all_metrics.empty? %>
|
|
71
71
|
<p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet. <%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
|
|
72
72
|
<% else %>
|
|
73
|
-
<% if @
|
|
73
|
+
<% if @metric_groups.any? %>
|
|
74
74
|
<p class="ck-meta-copy" style="margin-bottom: 0.5rem;">
|
|
75
75
|
Quick add: 
|
|
76
|
-
<% @
|
|
77
|
-
<span class="ck-chip" style="cursor: pointer;" onclick="
|
|
76
|
+
<% @metric_groups.each do |g| %>
|
|
77
|
+
<span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddMetricGroup(<%= g.metric_ids.to_json %>)"><%= g.name %></span> 
|
|
78
78
|
<% end %>
|
|
79
79
|
</p>
|
|
80
80
|
<% end %>
|
|
@@ -112,7 +112,7 @@ function updateRunForm() {
|
|
|
112
112
|
} else if (judge && metrics.length === 0) {
|
|
113
113
|
if (judgeField) judgeField.className = 'ck-field ck-field--info';
|
|
114
114
|
if (metricsField) metricsField.className = 'ck-field ck-field--info';
|
|
115
|
-
if (metricsHint) metricsHint.textContent = 'Select at least one metric or
|
|
115
|
+
if (metricsHint) metricsHint.textContent = 'Select at least one metric or group to enable judging.';
|
|
116
116
|
} else if (!judge && metrics.length > 0) {
|
|
117
117
|
if (judgeField) judgeField.className = 'ck-field ck-field--info';
|
|
118
118
|
if (judgeHint) judgeHint.textContent = 'Select a judge model to enable judging.';
|
|
@@ -131,7 +131,7 @@ function updateRunForm() {
|
|
|
131
131
|
if (submitBtn) submitBtn.disabled = !valid;
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
function
|
|
134
|
+
function ckQuickAddMetricGroup(metricIds) {
|
|
135
135
|
metricIds.forEach(function(id) {
|
|
136
136
|
var cb = document.getElementById('run_metric_' + id);
|
|
137
137
|
if (cb) cb.checked = true;
|
|
@@ -19,11 +19,14 @@
|
|
|
19
19
|
<nav class="ck-nav">
|
|
20
20
|
<% active = ->(path) { request.path.start_with?(path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) } %>
|
|
21
21
|
<%= link_to "Prompts", prompts_path, class: active.(prompts_path) %>
|
|
22
|
-
<%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(
|
|
22
|
+
<%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(metric_groups_path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
|
|
23
23
|
<%= link_to "Datasets", datasets_path, class: active.(datasets_path) %>
|
|
24
24
|
<%= link_to "Runs", runs_path, class: active.(runs_path) %>
|
|
25
25
|
<%= link_to "Settings", provider_credentials_path, class: active.(provider_credentials_path) %>
|
|
26
26
|
<%= link_to "API", api_reference_path, class: active.(api_reference_path) %>
|
|
27
|
+
<% if main_app.respond_to?(:logout_path) %>
|
|
28
|
+
<%= button_to "Log out", main_app.logout_path, method: :delete, class: ck_button_classes(:light, variant: :outline) %>
|
|
29
|
+
<% end %>
|
|
27
30
|
</nav>
|
|
28
31
|
</div>
|
|
29
32
|
</header>
|
data/config/routes.rb
CHANGED
|
@@ -9,7 +9,7 @@ CompletionKit::Engine.routes.draw do
|
|
|
9
9
|
|
|
10
10
|
resources :datasets
|
|
11
11
|
resources :metrics
|
|
12
|
-
resources :
|
|
12
|
+
resources :metric_groups
|
|
13
13
|
|
|
14
14
|
resources :runs do
|
|
15
15
|
member do
|
|
@@ -45,7 +45,7 @@ CompletionKit::Engine.routes.draw do
|
|
|
45
45
|
end
|
|
46
46
|
resources :datasets
|
|
47
47
|
resources :metrics
|
|
48
|
-
resources :
|
|
48
|
+
resources :metric_groups
|
|
49
49
|
resources :provider_credentials
|
|
50
50
|
end
|
|
51
51
|
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
class RenameCriteriaToMetricGroups < ActiveRecord::Migration[8.1]
|
|
2
|
+
def change
|
|
3
|
+
rename_table :completion_kit_criteria, :completion_kit_metric_groups
|
|
4
|
+
rename_table :completion_kit_criteria_memberships, :completion_kit_metric_group_memberships
|
|
5
|
+
rename_column :completion_kit_metric_group_memberships, :criteria_id, :metric_group_id
|
|
6
|
+
|
|
7
|
+
if index_name_exists?(:completion_kit_metric_group_memberships, "index_completion_kit_criteria_memberships_on_criteria_id")
|
|
8
|
+
rename_index :completion_kit_metric_group_memberships,
|
|
9
|
+
"index_completion_kit_criteria_memberships_on_criteria_id",
|
|
10
|
+
"index_completion_kit_metric_group_memberships_on_metric_group_id"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -5,16 +5,10 @@ module CompletionKit
|
|
|
5
5
|
class Engine < ::Rails::Engine
|
|
6
6
|
isolate_namespace CompletionKit
|
|
7
7
|
|
|
8
|
-
initializer("completion_kit.inflections", before: :load_config_initializers) do
|
|
9
|
-
ActiveSupport::Inflector.inflections(:en) do |inflect|
|
|
10
|
-
inflect.irregular "criterion", "criteria"
|
|
11
|
-
end
|
|
12
|
-
end
|
|
13
|
-
|
|
14
8
|
paths.add "app/services", eager_load: true
|
|
15
9
|
|
|
16
10
|
def self.register_assets(app)
|
|
17
|
-
app.config.assets.precompile += %w( completion_kit/application.css completion_kit/
|
|
11
|
+
app.config.assets.precompile += %w( completion_kit/application.css completion_kit/logo.svg )
|
|
18
12
|
end
|
|
19
13
|
|
|
20
14
|
initializer("completion_kit.assets") { |app| Engine.register_assets(app) }
|
data/lib/completion_kit.rb
CHANGED
|
@@ -8,6 +8,7 @@ module CompletionKit
|
|
|
8
8
|
attr_accessor :openai_api_key, :anthropic_api_key, :ollama_api_key, :ollama_api_endpoint
|
|
9
9
|
attr_accessor :judge_model, :high_quality_threshold, :medium_quality_threshold
|
|
10
10
|
attr_accessor :username, :password, :auth_strategy, :api_token
|
|
11
|
+
attr_accessor :tenant_scope, :tenant_scope_columns
|
|
11
12
|
|
|
12
13
|
def initialize
|
|
13
14
|
@openai_api_key = ENV['OPENAI_API_KEY']
|
|
@@ -19,6 +20,10 @@ module CompletionKit
|
|
|
19
20
|
@high_quality_threshold = 4
|
|
20
21
|
@medium_quality_threshold = 3
|
|
21
22
|
end
|
|
23
|
+
|
|
24
|
+
def tenant_scope_columns
|
|
25
|
+
@tenant_scope_columns ||= []
|
|
26
|
+
end
|
|
22
27
|
end
|
|
23
28
|
|
|
24
29
|
class << self
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: completion-kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Damien Bastin
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rails
|
|
@@ -198,12 +198,11 @@ dependencies:
|
|
|
198
198
|
- - "~>"
|
|
199
199
|
- !ruby/object:Gem::Version
|
|
200
200
|
version: '0.22'
|
|
201
|
-
description:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
or directly from agents like Claude Code.'
|
|
201
|
+
description: CompletionKit is a prompt testing platform that runs as a Rails engine
|
|
202
|
+
or a standalone app. Run prompts against real datasets, score every output with
|
|
203
|
+
an LLM judge against criteria you define, track prompt versions, and get AI-generated
|
|
204
|
+
improvement suggestions grounded in your actual results. Includes a web UI, REST
|
|
205
|
+
API, and a built-in MCP server with 36 tools.
|
|
207
206
|
email:
|
|
208
207
|
- damien@homemade.software
|
|
209
208
|
executables: []
|
|
@@ -216,11 +215,10 @@ files:
|
|
|
216
215
|
- app/assets/config/completion_kit_manifest.js
|
|
217
216
|
- app/assets/config/manifest.js
|
|
218
217
|
- app/assets/images/completion_kit/logo.svg
|
|
219
|
-
- app/assets/javascripts/completion_kit/evaluation_steps_controller.js
|
|
220
218
|
- app/assets/stylesheets/completion_kit/application.css
|
|
221
219
|
- app/controllers/completion_kit/api/v1/base_controller.rb
|
|
222
|
-
- app/controllers/completion_kit/api/v1/criteria_controller.rb
|
|
223
220
|
- app/controllers/completion_kit/api/v1/datasets_controller.rb
|
|
221
|
+
- app/controllers/completion_kit/api/v1/metric_groups_controller.rb
|
|
224
222
|
- app/controllers/completion_kit/api/v1/metrics_controller.rb
|
|
225
223
|
- app/controllers/completion_kit/api/v1/prompts_controller.rb
|
|
226
224
|
- app/controllers/completion_kit/api/v1/provider_credentials_controller.rb
|
|
@@ -228,9 +226,9 @@ files:
|
|
|
228
226
|
- app/controllers/completion_kit/api/v1/runs_controller.rb
|
|
229
227
|
- app/controllers/completion_kit/api_reference_controller.rb
|
|
230
228
|
- app/controllers/completion_kit/application_controller.rb
|
|
231
|
-
- app/controllers/completion_kit/criteria_controller.rb
|
|
232
229
|
- app/controllers/completion_kit/datasets_controller.rb
|
|
233
230
|
- app/controllers/completion_kit/mcp_controller.rb
|
|
231
|
+
- app/controllers/completion_kit/metric_groups_controller.rb
|
|
234
232
|
- app/controllers/completion_kit/metrics_controller.rb
|
|
235
233
|
- app/controllers/completion_kit/prompts_controller.rb
|
|
236
234
|
- app/controllers/completion_kit/provider_credentials_controller.rb
|
|
@@ -243,10 +241,10 @@ files:
|
|
|
243
241
|
- app/jobs/completion_kit/model_discovery_job.rb
|
|
244
242
|
- app/mailers/completion_kit/application_mailer.rb
|
|
245
243
|
- app/models/completion_kit/application_record.rb
|
|
246
|
-
- app/models/completion_kit/criteria.rb
|
|
247
|
-
- app/models/completion_kit/criteria_membership.rb
|
|
248
244
|
- app/models/completion_kit/dataset.rb
|
|
249
245
|
- app/models/completion_kit/metric.rb
|
|
246
|
+
- app/models/completion_kit/metric_group.rb
|
|
247
|
+
- app/models/completion_kit/metric_group_membership.rb
|
|
250
248
|
- app/models/completion_kit/model.rb
|
|
251
249
|
- app/models/completion_kit/prompt.rb
|
|
252
250
|
- app/models/completion_kit/provider_credential.rb
|
|
@@ -261,8 +259,9 @@ files:
|
|
|
261
259
|
- app/services/completion_kit/judge_service.rb
|
|
262
260
|
- app/services/completion_kit/llm_client.rb
|
|
263
261
|
- app/services/completion_kit/mcp_dispatcher.rb
|
|
264
|
-
- app/services/completion_kit/mcp_tools/
|
|
262
|
+
- app/services/completion_kit/mcp_tools/base.rb
|
|
265
263
|
- app/services/completion_kit/mcp_tools/datasets.rb
|
|
264
|
+
- app/services/completion_kit/mcp_tools/metric_groups.rb
|
|
266
265
|
- app/services/completion_kit/mcp_tools/metrics.rb
|
|
267
266
|
- app/services/completion_kit/mcp_tools/prompts.rb
|
|
268
267
|
- app/services/completion_kit/mcp_tools/provider_credentials.rb
|
|
@@ -273,18 +272,19 @@ files:
|
|
|
273
272
|
- app/services/completion_kit/open_ai_client.rb
|
|
274
273
|
- app/services/completion_kit/open_router_client.rb
|
|
275
274
|
- app/services/completion_kit/prompt_improvement_service.rb
|
|
275
|
+
- app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb
|
|
276
276
|
- app/views/completion_kit/api_reference/_example.html.erb
|
|
277
277
|
- app/views/completion_kit/api_reference/index.html.erb
|
|
278
|
-
- app/views/completion_kit/criteria/_form.html.erb
|
|
279
|
-
- app/views/completion_kit/criteria/edit.html.erb
|
|
280
|
-
- app/views/completion_kit/criteria/index.html.erb
|
|
281
|
-
- app/views/completion_kit/criteria/new.html.erb
|
|
282
|
-
- app/views/completion_kit/criteria/show.html.erb
|
|
283
278
|
- app/views/completion_kit/datasets/_form.html.erb
|
|
284
279
|
- app/views/completion_kit/datasets/edit.html.erb
|
|
285
280
|
- app/views/completion_kit/datasets/index.html.erb
|
|
286
281
|
- app/views/completion_kit/datasets/new.html.erb
|
|
287
282
|
- app/views/completion_kit/datasets/show.html.erb
|
|
283
|
+
- app/views/completion_kit/metric_groups/_form.html.erb
|
|
284
|
+
- app/views/completion_kit/metric_groups/edit.html.erb
|
|
285
|
+
- app/views/completion_kit/metric_groups/index.html.erb
|
|
286
|
+
- app/views/completion_kit/metric_groups/new.html.erb
|
|
287
|
+
- app/views/completion_kit/metric_groups/show.html.erb
|
|
288
288
|
- app/views/completion_kit/metrics/_form.html.erb
|
|
289
289
|
- app/views/completion_kit/metrics/edit.html.erb
|
|
290
290
|
- app/views/completion_kit/metrics/index.html.erb
|
|
@@ -324,6 +324,8 @@ files:
|
|
|
324
324
|
- db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb
|
|
325
325
|
- db/migrate/20260403000002_create_completion_kit_suggestions.rb
|
|
326
326
|
- db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb
|
|
327
|
+
- db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb
|
|
328
|
+
- db/migrate/20260417000001_rename_criteria_to_metric_groups.rb
|
|
327
329
|
- lib/completion-kit.rb
|
|
328
330
|
- lib/completion_kit.rb
|
|
329
331
|
- lib/completion_kit/engine.rb
|
|
@@ -356,6 +358,6 @@ requirements: []
|
|
|
356
358
|
rubygems_version: 3.5.16
|
|
357
359
|
signing_key:
|
|
358
360
|
specification_version: 4
|
|
359
|
-
summary:
|
|
360
|
-
|
|
361
|
+
summary: Your prompts need tests too. Run them against real data, score outputs with
|
|
362
|
+
an LLM judge, iterate until they work.
|
|
361
363
|
test_files: []
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
document.addEventListener("DOMContentLoaded", function () {
|
|
2
|
-
document.addEventListener("click", function (event) {
|
|
3
|
-
var addBtn = event.target.closest("[data-action='evaluation-steps#add']");
|
|
4
|
-
if (addBtn) {
|
|
5
|
-
var container = addBtn.closest("[data-controller='evaluation-steps']");
|
|
6
|
-
var list = container.querySelector("[data-evaluation-steps-target='list']");
|
|
7
|
-
var row = document.createElement("div");
|
|
8
|
-
row.className = "ck-step-row";
|
|
9
|
-
row.setAttribute("data-evaluation-steps-target", "row");
|
|
10
|
-
row.innerHTML =
|
|
11
|
-
'<input type="text" name="metric[evaluation_steps][]" value="" class="ck-input" placeholder="Describe this evaluation step..." />' +
|
|
12
|
-
'<button type="button" class="ck-icon-btn" data-action="evaluation-steps#remove" aria-label="Remove step">' +
|
|
13
|
-
'<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor" stroke-width="1.75"><path d="M3 6h18"/><path d="M19 6v14c0 1-1 2-2 2H7c-1 0-2-1-2-2V6"/><path d="M8 6V4c0-1 1-2 2-2h4c1 0 2 1 2 2v2"/><line x1="10" y1="11" x2="10" y2="17"/><line x1="14" y1="11" x2="14" y2="17"/></svg>' +
|
|
14
|
-
"</button>";
|
|
15
|
-
list.appendChild(row);
|
|
16
|
-
row.querySelector("input").focus();
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
var removeBtn = event.target.closest("[data-action='evaluation-steps#remove']");
|
|
20
|
-
if (removeBtn) {
|
|
21
|
-
var stepRow = removeBtn.closest("[data-evaluation-steps-target='row']");
|
|
22
|
-
if (stepRow) stepRow.remove();
|
|
23
|
-
}
|
|
24
|
-
});
|
|
25
|
-
});
|