completion-kit 0.1.0.rc1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +83 -88
- data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +62 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
- data/app/controllers/completion_kit/metric_groups_controller.rb +67 -0
- data/app/controllers/completion_kit/metrics_controller.rb +2 -2
- data/app/controllers/completion_kit/runs_controller.rb +1 -1
- data/app/models/completion_kit/metric.rb +3 -5
- data/app/models/completion_kit/metric_group.rb +22 -0
- data/app/models/completion_kit/metric_group_membership.rb +20 -0
- data/app/models/completion_kit/run.rb +0 -1
- data/app/services/completion_kit/judge_service.rb +3 -7
- data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
- data/app/services/completion_kit/mcp_tools/{criteria.rb → metric_groups.rb} +28 -28
- data/app/services/completion_kit/mcp_tools/metrics.rb +2 -4
- data/app/views/completion_kit/api_reference/index.html.erb +11 -11
- data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
- data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
- data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
- data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
- data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
- data/app/views/completion_kit/metrics/_form.html.erb +2 -23
- data/app/views/completion_kit/metrics/index.html.erb +13 -5
- data/app/views/completion_kit/metrics/show.html.erb +1 -12
- data/app/views/completion_kit/runs/_form.html.erb +5 -5
- data/app/views/layouts/completion_kit/application.html.erb +4 -1
- data/config/routes.rb +2 -2
- data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
- data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
- data/lib/completion_kit/engine.rb +1 -7
- data/lib/completion_kit/version.rb +1 -1
- metadata +21 -21
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
- data/app/controllers/completion_kit/criteria_controller.rb +0 -67
- data/app/models/completion_kit/criteria.rb +0 -22
- data/app/models/completion_kit/criteria_membership.rb +0 -20
- data/app/views/completion_kit/criteria/_form.html.erb +0 -46
- data/app/views/completion_kit/criteria/edit.html.erb +0 -14
- data/app/views/completion_kit/criteria/index.html.erb +0 -37
- data/app/views/completion_kit/criteria/new.html.erb +0 -13
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module McpTools
|
|
3
|
-
module
|
|
3
|
+
module MetricGroups
|
|
4
4
|
TOOLS = {
|
|
5
|
-
"
|
|
6
|
-
description: "List all
|
|
5
|
+
"metric_groups_list" => {
|
|
6
|
+
description: "List all metric groups",
|
|
7
7
|
inputSchema: {type: "object", properties: {}, required: []},
|
|
8
8
|
handler: :list
|
|
9
9
|
},
|
|
10
|
-
"
|
|
11
|
-
description: "Get a
|
|
10
|
+
"metric_groups_get" => {
|
|
11
|
+
description: "Get a metric group by ID",
|
|
12
12
|
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
13
13
|
handler: :get
|
|
14
14
|
},
|
|
15
|
-
"
|
|
16
|
-
description: "Create a
|
|
15
|
+
"metric_groups_create" => {
|
|
16
|
+
description: "Create a metric group",
|
|
17
17
|
inputSchema: {
|
|
18
18
|
type: "object",
|
|
19
19
|
properties: {
|
|
@@ -24,8 +24,8 @@ module CompletionKit
|
|
|
24
24
|
},
|
|
25
25
|
handler: :create
|
|
26
26
|
},
|
|
27
|
-
"
|
|
28
|
-
description: "Update a
|
|
27
|
+
"metric_groups_update" => {
|
|
28
|
+
description: "Update a metric group",
|
|
29
29
|
inputSchema: {
|
|
30
30
|
type: "object",
|
|
31
31
|
properties: {
|
|
@@ -36,8 +36,8 @@ module CompletionKit
|
|
|
36
36
|
},
|
|
37
37
|
handler: :update
|
|
38
38
|
},
|
|
39
|
-
"
|
|
40
|
-
description: "Delete a
|
|
39
|
+
"metric_groups_delete" => {
|
|
40
|
+
description: "Delete a metric group",
|
|
41
41
|
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
42
42
|
handler: :delete
|
|
43
43
|
}
|
|
@@ -53,36 +53,36 @@ module CompletionKit
|
|
|
53
53
|
end
|
|
54
54
|
|
|
55
55
|
def self.list(_args)
|
|
56
|
-
text_result(CompletionKit::
|
|
56
|
+
text_result(CompletionKit::MetricGroup.order(created_at: :desc).map(&:as_json))
|
|
57
57
|
end
|
|
58
58
|
|
|
59
59
|
def self.get(args)
|
|
60
|
-
text_result(CompletionKit::
|
|
60
|
+
text_result(CompletionKit::MetricGroup.find(args["id"]).as_json)
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
def self.create(args)
|
|
64
|
-
|
|
65
|
-
if
|
|
66
|
-
replace_metric_memberships(
|
|
67
|
-
text_result(
|
|
64
|
+
metric_group = CompletionKit::MetricGroup.new(args.slice("name", "description"))
|
|
65
|
+
if metric_group.save
|
|
66
|
+
replace_metric_memberships(metric_group, args["metric_ids"])
|
|
67
|
+
text_result(metric_group.reload.as_json)
|
|
68
68
|
else
|
|
69
|
-
error_result(
|
|
69
|
+
error_result(metric_group.errors.full_messages.join(", "))
|
|
70
70
|
end
|
|
71
71
|
end
|
|
72
72
|
|
|
73
73
|
def self.update(args)
|
|
74
|
-
|
|
75
|
-
if
|
|
76
|
-
replace_metric_memberships(
|
|
77
|
-
text_result(
|
|
74
|
+
metric_group = CompletionKit::MetricGroup.find(args["id"])
|
|
75
|
+
if metric_group.update(args.except("id", "metric_ids").slice("name", "description"))
|
|
76
|
+
replace_metric_memberships(metric_group, args["metric_ids"]) if args.key?("metric_ids")
|
|
77
|
+
text_result(metric_group.reload.as_json)
|
|
78
78
|
else
|
|
79
|
-
error_result(
|
|
79
|
+
error_result(metric_group.errors.full_messages.join(", "))
|
|
80
80
|
end
|
|
81
81
|
end
|
|
82
82
|
|
|
83
83
|
def self.delete(args)
|
|
84
|
-
CompletionKit::
|
|
85
|
-
text_result("
|
|
84
|
+
CompletionKit::MetricGroup.find(args["id"]).destroy!
|
|
85
|
+
text_result("Metric group #{args["id"]} deleted")
|
|
86
86
|
end
|
|
87
87
|
|
|
88
88
|
def self.text_result(data)
|
|
@@ -94,11 +94,11 @@ module CompletionKit
|
|
|
94
94
|
{content: [{type: "text", text: message}], isError: true}
|
|
95
95
|
end
|
|
96
96
|
|
|
97
|
-
def self.replace_metric_memberships(
|
|
97
|
+
def self.replace_metric_memberships(metric_group, metric_ids)
|
|
98
98
|
return unless metric_ids
|
|
99
|
-
|
|
99
|
+
metric_group.metric_group_memberships.delete_all
|
|
100
100
|
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
101
|
-
|
|
101
|
+
metric_group.metric_group_memberships.create!(metric_id: metric_id, position: index + 1)
|
|
102
102
|
end
|
|
103
103
|
end
|
|
104
104
|
end
|
|
@@ -18,7 +18,6 @@ module CompletionKit
|
|
|
18
18
|
type: "object",
|
|
19
19
|
properties: {
|
|
20
20
|
name: {type: "string"}, instruction: {type: "string"},
|
|
21
|
-
evaluation_steps: {type: "array", items: {type: "string"}},
|
|
22
21
|
rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
|
|
23
22
|
},
|
|
24
23
|
required: ["name"]
|
|
@@ -31,7 +30,6 @@ module CompletionKit
|
|
|
31
30
|
type: "object",
|
|
32
31
|
properties: {
|
|
33
32
|
id: {type: "integer"}, name: {type: "string"}, instruction: {type: "string"},
|
|
34
|
-
evaluation_steps: {type: "array", items: {type: "string"}},
|
|
35
33
|
rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
|
|
36
34
|
},
|
|
37
35
|
required: ["id"]
|
|
@@ -63,7 +61,7 @@ module CompletionKit
|
|
|
63
61
|
end
|
|
64
62
|
|
|
65
63
|
def self.create(args)
|
|
66
|
-
metric = Metric.new(args.slice("name", "instruction", "
|
|
64
|
+
metric = Metric.new(args.slice("name", "instruction", "rubric_bands"))
|
|
67
65
|
if metric.save
|
|
68
66
|
text_result(metric.as_json)
|
|
69
67
|
else
|
|
@@ -73,7 +71,7 @@ module CompletionKit
|
|
|
73
71
|
|
|
74
72
|
def self.update(args)
|
|
75
73
|
metric = Metric.find(args["id"])
|
|
76
|
-
if metric.update(args.except("id").slice("name", "instruction", "
|
|
74
|
+
if metric.update(args.except("id").slice("name", "instruction", "rubric_bands"))
|
|
77
75
|
text_result(metric.as_json)
|
|
78
76
|
else
|
|
79
77
|
error_result(metric.errors.full_messages.join(", "))
|
|
@@ -63,7 +63,7 @@ end %>
|
|
|
63
63
|
<input type="radio" name="ck-api-tab" id="ck-tab-responses" class="ck-api-tabs__radio">
|
|
64
64
|
<input type="radio" name="ck-api-tab" id="ck-tab-datasets" class="ck-api-tabs__radio">
|
|
65
65
|
<input type="radio" name="ck-api-tab" id="ck-tab-metrics" class="ck-api-tabs__radio">
|
|
66
|
-
<input type="radio" name="ck-api-tab" id="ck-tab-
|
|
66
|
+
<input type="radio" name="ck-api-tab" id="ck-tab-metric-groups" class="ck-api-tabs__radio">
|
|
67
67
|
<input type="radio" name="ck-api-tab" id="ck-tab-providers" class="ck-api-tabs__radio">
|
|
68
68
|
|
|
69
69
|
<nav class="ck-api-tabs__nav">
|
|
@@ -73,7 +73,7 @@ end %>
|
|
|
73
73
|
<label for="ck-tab-responses" class="ck-api-tabs__label">Responses <span class="ck-api-tabs__count">2</span></label>
|
|
74
74
|
<label for="ck-tab-datasets" class="ck-api-tabs__label">Datasets <span class="ck-api-tabs__count">5</span></label>
|
|
75
75
|
<label for="ck-tab-metrics" class="ck-api-tabs__label">Metrics <span class="ck-api-tabs__count">5</span></label>
|
|
76
|
-
<label for="ck-tab-
|
|
76
|
+
<label for="ck-tab-metric-groups" class="ck-api-tabs__label">Metric Groups <span class="ck-api-tabs__count">5</span></label>
|
|
77
77
|
<label for="ck-tab-providers" class="ck-api-tabs__label">Providers <span class="ck-api-tabs__count">5</span></label>
|
|
78
78
|
</nav>
|
|
79
79
|
|
|
@@ -232,7 +232,7 @@ end %>
|
|
|
232
232
|
<div class="ck-api-endpoint">
|
|
233
233
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metrics</p>
|
|
234
234
|
<p class="ck-meta-copy">Create a metric.</p>
|
|
235
|
-
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>instruction</code>, <code>
|
|
235
|
+
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>instruction</code>, <code>rubric_bands</code> (array of {stars, description})</p>
|
|
236
236
|
<%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/metrics \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"relevance\", \"instruction\": \"Is the response relevant?\"}'" %>
|
|
237
237
|
</div>
|
|
238
238
|
<div class="ck-api-endpoint">
|
|
@@ -242,20 +242,20 @@ end %>
|
|
|
242
242
|
</div>
|
|
243
243
|
|
|
244
244
|
<div class="ck-api-tabs__panel">
|
|
245
|
-
<h2 class="ck-section-title">
|
|
246
|
-
<p class="ck-copy">Named groups of metrics
|
|
245
|
+
<h2 class="ck-section-title">Metric Groups</h2>
|
|
246
|
+
<p class="ck-copy">Named groups of metrics you can apply to a run as a set.</p>
|
|
247
247
|
<div class="ck-api-endpoint">
|
|
248
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/
|
|
249
|
-
<p class="ck-meta-copy">List all
|
|
248
|
+
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/metric_groups</p>
|
|
249
|
+
<p class="ck-meta-copy">List all metric groups with their metric IDs.</p>
|
|
250
250
|
</div>
|
|
251
251
|
<div class="ck-api-endpoint">
|
|
252
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/
|
|
253
|
-
<p class="ck-meta-copy">Create a
|
|
252
|
+
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metric_groups</p>
|
|
253
|
+
<p class="ck-meta-copy">Create a metric group.</p>
|
|
254
254
|
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>description</code>, <code>metric_ids</code> (array)</p>
|
|
255
255
|
</div>
|
|
256
256
|
<div class="ck-api-endpoint">
|
|
257
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/
|
|
258
|
-
<p class="ck-meta-copy">Get, update, or delete a
|
|
257
|
+
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metric_groups/:id</p>
|
|
258
|
+
<p class="ck-meta-copy">Get, update, or delete a metric group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
|
|
259
259
|
</div>
|
|
260
260
|
</div>
|
|
261
261
|
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
<%= form_with(model: metric_group, url: metric_group.persisted? ? metric_group_path(metric_group) : metric_groups_path, local: true) do |form| %>
|
|
2
|
+
<% if metric_group.errors.any? %>
|
|
3
|
+
<div class="ck-flash ck-flash--alert">
|
|
4
|
+
<p class="ck-flash__title"><%= pluralize(metric_group.errors.count, "problem") %> prevented this metric group from being saved.</p>
|
|
5
|
+
<ul class="ck-error-list">
|
|
6
|
+
<% metric_group.errors.full_messages.each do |message| %>
|
|
7
|
+
<li><%= message %></li>
|
|
8
|
+
<% end %>
|
|
9
|
+
</ul>
|
|
10
|
+
</div>
|
|
11
|
+
<% end %>
|
|
12
|
+
|
|
13
|
+
<div class="ck-card ck-form-card">
|
|
14
|
+
<div class="ck-field">
|
|
15
|
+
<%= form.label :name, "Metric group name", class: "ck-label" %>
|
|
16
|
+
<%= form.text_field :name, class: "ck-input", placeholder: "Support quality" %>
|
|
17
|
+
</div>
|
|
18
|
+
|
|
19
|
+
<div class="ck-field">
|
|
20
|
+
<%= form.label :description, class: "ck-label" %>
|
|
21
|
+
<%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "When this metric group should be used." %>
|
|
22
|
+
</div>
|
|
23
|
+
|
|
24
|
+
<div class="ck-field">
|
|
25
|
+
<p class="ck-label">Metrics in this group</p>
|
|
26
|
+
<p class="ck-hint">Pick the metrics to include.</p>
|
|
27
|
+
<div class="ck-list ck-list--compact">
|
|
28
|
+
<% @metrics.each do |metric| %>
|
|
29
|
+
<label class="ck-item">
|
|
30
|
+
<%= check_box_tag "metric_group[metric_ids][]", metric.id, metric_group.metrics.exists?(metric.id), class: "ck-checkbox" %>
|
|
31
|
+
<span>
|
|
32
|
+
<strong><%= metric.name %></strong>
|
|
33
|
+
<span class="ck-meta-copy"><%= metric.instruction.presence || "No instruction set." %></span>
|
|
34
|
+
</span>
|
|
35
|
+
</label>
|
|
36
|
+
<% end %>
|
|
37
|
+
</div>
|
|
38
|
+
<%= hidden_field_tag "metric_group[metric_ids][]", "" %>
|
|
39
|
+
</div>
|
|
40
|
+
|
|
41
|
+
<div class="ck-actions">
|
|
42
|
+
<%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
|
|
43
|
+
<%= form.submit(metric_group.persisted? ? "Save metric group" : "Create metric group", class: ck_button_classes(:dark)) %>
|
|
44
|
+
</div>
|
|
45
|
+
</div>
|
|
46
|
+
<% end %>
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li><%= link_to @metric_group.name, metric_group_path(@metric_group) %></li>
|
|
4
|
+
<li>Edit</li>
|
|
5
|
+
</ol>
|
|
6
|
+
|
|
7
|
+
<section class="ck-page-header">
|
|
8
|
+
<div>
|
|
9
|
+
<h1 class="ck-title">Edit metric group</h1>
|
|
10
|
+
</div>
|
|
11
|
+
</section>
|
|
12
|
+
|
|
13
|
+
<%= render "form", metric_group: @metric_group %>
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li>Metric groups</li>
|
|
4
|
+
</ol>
|
|
5
|
+
|
|
6
|
+
<section class="ck-page-header">
|
|
7
|
+
<div>
|
|
8
|
+
<h1 class="ck-title">Metric groups</h1>
|
|
9
|
+
<p class="ck-lead">Named groups of metrics. Apply a group to a run to score outputs against every metric in the group at once.</p>
|
|
10
|
+
</div>
|
|
11
|
+
<div class="ck-actions">
|
|
12
|
+
<%= link_to "New metric group", new_metric_group_path, class: ck_button_classes(:dark) %>
|
|
13
|
+
</div>
|
|
14
|
+
</section>
|
|
15
|
+
|
|
16
|
+
<% if @metric_groups.any? %>
|
|
17
|
+
<table class="ck-results-table">
|
|
18
|
+
<thead>
|
|
19
|
+
<tr>
|
|
20
|
+
<th>Name</th>
|
|
21
|
+
<th>Description</th>
|
|
22
|
+
<th>Metrics</th>
|
|
23
|
+
<th></th>
|
|
24
|
+
</tr>
|
|
25
|
+
</thead>
|
|
26
|
+
<tbody>
|
|
27
|
+
<% @metric_groups.each do |metric_group| %>
|
|
28
|
+
<tr onclick="window.location='<%= metric_group_path(metric_group) %>'" style="cursor: pointer;">
|
|
29
|
+
<td><strong><%= metric_group.name %></strong></td>
|
|
30
|
+
<td class="ck-meta-copy"><%= truncate(metric_group.description.to_s, length: 90).presence || "—" %></td>
|
|
31
|
+
<td class="ck-meta-copy"><%= metric_group.metrics.any? ? metric_group.metrics.map(&:name).join(", ") : "empty" %></td>
|
|
32
|
+
<td class="ck-results-table__arrow">→</td>
|
|
33
|
+
</tr>
|
|
34
|
+
<% end %>
|
|
35
|
+
</tbody>
|
|
36
|
+
</table>
|
|
37
|
+
<% else %>
|
|
38
|
+
<div class="ck-empty">
|
|
39
|
+
<p>No metric groups yet. <%= link_to "Create one", new_metric_group_path, class: "ck-link" %> if you want to group multiple metrics and apply them together.</p>
|
|
40
|
+
</div>
|
|
41
|
+
<% end %>
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
+
<li>New metric group</li>
|
|
4
|
+
</ol>
|
|
5
|
+
|
|
6
|
+
<section class="ck-page-header">
|
|
7
|
+
<div>
|
|
8
|
+
<h1 class="ck-title">New metric group</h1>
|
|
9
|
+
</div>
|
|
10
|
+
</section>
|
|
11
|
+
|
|
12
|
+
<%= render "form", metric_group: @metric_group %>
|
|
@@ -1,26 +1,25 @@
|
|
|
1
1
|
<ol class="ck-breadcrumb">
|
|
2
2
|
<li><%= link_to "Metrics", metrics_path %></li>
|
|
3
|
-
<li><%=
|
|
4
|
-
<li><%= @criteria.name %></li>
|
|
3
|
+
<li><%= @metric_group.name %></li>
|
|
5
4
|
</ol>
|
|
6
5
|
|
|
7
6
|
<section class="ck-page-header">
|
|
8
7
|
<div>
|
|
9
|
-
<h1 class="ck-title"><%= @
|
|
10
|
-
<% if @
|
|
11
|
-
<p class="ck-lead"><%= @
|
|
8
|
+
<h1 class="ck-title"><%= @metric_group.name %></h1>
|
|
9
|
+
<% if @metric_group.description.present? %>
|
|
10
|
+
<p class="ck-lead"><%= @metric_group.description %></p>
|
|
12
11
|
<% end %>
|
|
13
12
|
</div>
|
|
14
13
|
<div class="ck-actions">
|
|
15
|
-
<%= link_to "Edit",
|
|
14
|
+
<%= link_to "Edit", edit_metric_group_path(@metric_group), class: ck_button_classes(:light, variant: :outline) %>
|
|
16
15
|
</div>
|
|
17
16
|
</section>
|
|
18
17
|
|
|
19
18
|
<section class="ck-card">
|
|
20
19
|
<p class="ck-kicker">Metrics</p>
|
|
21
|
-
<% if @
|
|
20
|
+
<% if @metric_group.metrics.any? %>
|
|
22
21
|
<div class="ck-list ck-list--compact">
|
|
23
|
-
<% @
|
|
22
|
+
<% @metric_group.metrics.each do |metric| %>
|
|
24
23
|
<div class="ck-item">
|
|
25
24
|
<div>
|
|
26
25
|
<p class="ck-item-title"><%= link_to metric.name, metric_path(metric), class: "ck-link" %></p>
|
|
@@ -32,6 +31,6 @@
|
|
|
32
31
|
<% end %>
|
|
33
32
|
</div>
|
|
34
33
|
<% else %>
|
|
35
|
-
<p class="ck-copy">No metrics in this
|
|
34
|
+
<p class="ck-copy">No metrics in this group yet.</p>
|
|
36
35
|
<% end %>
|
|
37
36
|
</section>
|
|
@@ -17,32 +17,11 @@
|
|
|
17
17
|
</div>
|
|
18
18
|
|
|
19
19
|
<div class="ck-field ck-field--spacious">
|
|
20
|
-
<p class="ck-section-title">
|
|
21
|
-
<p class="ck-hint">What should the
|
|
20
|
+
<p class="ck-section-title">Instruction</p>
|
|
21
|
+
<p class="ck-hint">What should the judge assess? This instruction is sent to the LLM judge when scoring outputs.</p>
|
|
22
22
|
<%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output..." %>
|
|
23
23
|
</div>
|
|
24
24
|
|
|
25
|
-
<div class="ck-field ck-field--spacious" data-controller="evaluation-steps">
|
|
26
|
-
<p class="ck-section-title">Evaluation Steps</p>
|
|
27
|
-
<p class="ck-hint">Steps the evaluator follows during assessment.</p>
|
|
28
|
-
|
|
29
|
-
<div data-evaluation-steps-target="list">
|
|
30
|
-
<% Array(metric.evaluation_steps).each_with_index do |step, index| %>
|
|
31
|
-
<div class="ck-step-row" data-evaluation-steps-target="row">
|
|
32
|
-
<input type="text" name="metric[evaluation_steps][]" value="<%= step %>" class="ck-input" />
|
|
33
|
-
<button type="button" class="ck-icon-btn" data-action="evaluation-steps#remove" aria-label="Remove step">
|
|
34
|
-
<%= heroicon_tag "trash", variant: :outline, size: 16 %>
|
|
35
|
-
</button>
|
|
36
|
-
</div>
|
|
37
|
-
<% end %>
|
|
38
|
-
</div>
|
|
39
|
-
|
|
40
|
-
<button type="button" class="ck-add-btn" data-action="evaluation-steps#add">
|
|
41
|
-
<%= heroicon_tag "plus", variant: :outline, size: 14 %>
|
|
42
|
-
Add step
|
|
43
|
-
</button>
|
|
44
|
-
</div>
|
|
45
|
-
|
|
46
25
|
<div class="ck-field ck-field--spacious">
|
|
47
26
|
<p class="ck-section-title">Rubric</p>
|
|
48
27
|
<p class="ck-hint">What each star rating means for this metric.</p>
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
<section class="ck-page-header">
|
|
2
2
|
<div>
|
|
3
3
|
<h1 class="ck-title">Metrics</h1>
|
|
4
|
-
<p class="ck-lead">Scoring dimensions
|
|
4
|
+
<p class="ck-lead">Scoring dimensions the judge uses to evaluate each response. Each metric defines one thing to assess and a 1-5 scale.</p>
|
|
5
5
|
</div>
|
|
6
6
|
<div class="ck-actions">
|
|
7
7
|
<%= link_to "New metric", new_metric_path, class: ck_button_classes(:dark) %>
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
<tr>
|
|
15
15
|
<th>Name</th>
|
|
16
16
|
<th>Instruction</th>
|
|
17
|
-
<th>
|
|
17
|
+
<th>In groups</th>
|
|
18
18
|
<th></th>
|
|
19
19
|
</tr>
|
|
20
20
|
</thead>
|
|
@@ -22,13 +22,21 @@
|
|
|
22
22
|
<% @metrics.each do |metric| %>
|
|
23
23
|
<tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
|
|
24
24
|
<td><strong><%= metric.name %></strong></td>
|
|
25
|
-
<td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length:
|
|
26
|
-
<td class="ck-meta-copy"><%= metric.
|
|
25
|
+
<td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 90).presence || "—" %></td>
|
|
26
|
+
<td class="ck-meta-copy"><%= metric.metric_groups.any? ? metric.metric_groups.map(&:name).join(", ") : "—" %></td>
|
|
27
27
|
<td class="ck-results-table__arrow">→</td>
|
|
28
28
|
</tr>
|
|
29
29
|
<% end %>
|
|
30
30
|
</tbody>
|
|
31
31
|
</table>
|
|
32
|
+
|
|
33
|
+
<% if @metrics.size >= 2 %>
|
|
34
|
+
<p class="ck-meta-copy" style="margin-top: 1.5rem; text-align: center;">
|
|
35
|
+
Use the same metrics on multiple runs? <%= link_to "Group them →", metric_groups_path, class: "ck-link" %>
|
|
36
|
+
</p>
|
|
37
|
+
<% end %>
|
|
32
38
|
<% else %>
|
|
33
|
-
<div class="ck-empty">
|
|
39
|
+
<div class="ck-empty">
|
|
40
|
+
<p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
|
|
41
|
+
</div>
|
|
34
42
|
<% end %>
|
|
@@ -14,22 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
<% if @metric.instruction.present? %>
|
|
16
16
|
<section class="ck-card">
|
|
17
|
-
<p class="ck-kicker">
|
|
17
|
+
<p class="ck-kicker">Instruction</p>
|
|
18
18
|
<div class="ck-note-box"><%= simple_format(@metric.instruction) %></div>
|
|
19
19
|
</section>
|
|
20
20
|
<% end %>
|
|
21
21
|
|
|
22
|
-
<% if @metric.evaluation_steps.present? && @metric.evaluation_steps.any? %>
|
|
23
|
-
<section class="ck-card">
|
|
24
|
-
<p class="ck-kicker">Evaluation Steps</p>
|
|
25
|
-
<ol class="ck-step-list">
|
|
26
|
-
<% @metric.evaluation_steps.each do |step| %>
|
|
27
|
-
<li><%= step %></li>
|
|
28
|
-
<% end %>
|
|
29
|
-
</ol>
|
|
30
|
-
</section>
|
|
31
|
-
<% end %>
|
|
32
|
-
|
|
33
22
|
<section class="ck-card ck-card--spaced">
|
|
34
23
|
<p class="ck-kicker">Rubric</p>
|
|
35
24
|
<div class="ck-rubric-display">
|
|
@@ -70,11 +70,11 @@
|
|
|
70
70
|
<% if @all_metrics.empty? %>
|
|
71
71
|
<p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet. <%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
|
|
72
72
|
<% else %>
|
|
73
|
-
<% if @
|
|
73
|
+
<% if @metric_groups.any? %>
|
|
74
74
|
<p class="ck-meta-copy" style="margin-bottom: 0.5rem;">
|
|
75
75
|
Quick add: 
|
|
76
|
-
<% @
|
|
77
|
-
<span class="ck-chip" style="cursor: pointer;" onclick="
|
|
76
|
+
<% @metric_groups.each do |g| %>
|
|
77
|
+
<span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddMetricGroup(<%= g.metric_ids.to_json %>)"><%= g.name %></span> 
|
|
78
78
|
<% end %>
|
|
79
79
|
</p>
|
|
80
80
|
<% end %>
|
|
@@ -112,7 +112,7 @@ function updateRunForm() {
|
|
|
112
112
|
} else if (judge && metrics.length === 0) {
|
|
113
113
|
if (judgeField) judgeField.className = 'ck-field ck-field--info';
|
|
114
114
|
if (metricsField) metricsField.className = 'ck-field ck-field--info';
|
|
115
|
-
if (metricsHint) metricsHint.textContent = 'Select at least one metric or
|
|
115
|
+
if (metricsHint) metricsHint.textContent = 'Select at least one metric or group to enable judging.';
|
|
116
116
|
} else if (!judge && metrics.length > 0) {
|
|
117
117
|
if (judgeField) judgeField.className = 'ck-field ck-field--info';
|
|
118
118
|
if (judgeHint) judgeHint.textContent = 'Select a judge model to enable judging.';
|
|
@@ -131,7 +131,7 @@ function updateRunForm() {
|
|
|
131
131
|
if (submitBtn) submitBtn.disabled = !valid;
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
-
function
|
|
134
|
+
function ckQuickAddMetricGroup(metricIds) {
|
|
135
135
|
metricIds.forEach(function(id) {
|
|
136
136
|
var cb = document.getElementById('run_metric_' + id);
|
|
137
137
|
if (cb) cb.checked = true;
|
|
@@ -19,11 +19,14 @@
|
|
|
19
19
|
<nav class="ck-nav">
|
|
20
20
|
<% active = ->(path) { request.path.start_with?(path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) } %>
|
|
21
21
|
<%= link_to "Prompts", prompts_path, class: active.(prompts_path) %>
|
|
22
|
-
<%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(
|
|
22
|
+
<%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(metric_groups_path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
|
|
23
23
|
<%= link_to "Datasets", datasets_path, class: active.(datasets_path) %>
|
|
24
24
|
<%= link_to "Runs", runs_path, class: active.(runs_path) %>
|
|
25
25
|
<%= link_to "Settings", provider_credentials_path, class: active.(provider_credentials_path) %>
|
|
26
26
|
<%= link_to "API", api_reference_path, class: active.(api_reference_path) %>
|
|
27
|
+
<% if main_app.respond_to?(:logout_path) %>
|
|
28
|
+
<%= button_to "Log out", main_app.logout_path, method: :delete, class: ck_button_classes(:light, variant: :outline) %>
|
|
29
|
+
<% end %>
|
|
27
30
|
</nav>
|
|
28
31
|
</div>
|
|
29
32
|
</header>
|
data/config/routes.rb
CHANGED
|
@@ -9,7 +9,7 @@ CompletionKit::Engine.routes.draw do
|
|
|
9
9
|
|
|
10
10
|
resources :datasets
|
|
11
11
|
resources :metrics
|
|
12
|
-
resources :
|
|
12
|
+
resources :metric_groups
|
|
13
13
|
|
|
14
14
|
resources :runs do
|
|
15
15
|
member do
|
|
@@ -45,7 +45,7 @@ CompletionKit::Engine.routes.draw do
|
|
|
45
45
|
end
|
|
46
46
|
resources :datasets
|
|
47
47
|
resources :metrics
|
|
48
|
-
resources :
|
|
48
|
+
resources :metric_groups
|
|
49
49
|
resources :provider_credentials
|
|
50
50
|
end
|
|
51
51
|
end
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
class RenameCriteriaToMetricGroups < ActiveRecord::Migration[8.1]
|
|
2
|
+
def change
|
|
3
|
+
rename_table :completion_kit_criteria, :completion_kit_metric_groups
|
|
4
|
+
rename_table :completion_kit_criteria_memberships, :completion_kit_metric_group_memberships
|
|
5
|
+
rename_column :completion_kit_metric_group_memberships, :criteria_id, :metric_group_id
|
|
6
|
+
|
|
7
|
+
if index_name_exists?(:completion_kit_metric_group_memberships, "index_completion_kit_criteria_memberships_on_criteria_id")
|
|
8
|
+
rename_index :completion_kit_metric_group_memberships,
|
|
9
|
+
"index_completion_kit_criteria_memberships_on_criteria_id",
|
|
10
|
+
"index_completion_kit_metric_group_memberships_on_metric_group_id"
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
@@ -5,16 +5,10 @@ module CompletionKit
|
|
|
5
5
|
class Engine < ::Rails::Engine
|
|
6
6
|
isolate_namespace CompletionKit
|
|
7
7
|
|
|
8
|
-
initializer("completion_kit.inflections", before: :load_config_initializers) do
|
|
9
|
-
ActiveSupport::Inflector.inflections(:en) do |inflect|
|
|
10
|
-
inflect.irregular "criterion", "criteria"
|
|
11
|
-
end
|
|
12
|
-
end
|
|
13
|
-
|
|
14
8
|
paths.add "app/services", eager_load: true
|
|
15
9
|
|
|
16
10
|
def self.register_assets(app)
|
|
17
|
-
app.config.assets.precompile += %w( completion_kit/application.css completion_kit/
|
|
11
|
+
app.config.assets.precompile += %w( completion_kit/application.css completion_kit/logo.svg )
|
|
18
12
|
end
|
|
19
13
|
|
|
20
14
|
initializer("completion_kit.assets") { |app| Engine.register_assets(app) }
|