completion-kit 0.1.0.rc1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +83 -88
  3. data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +62 -0
  4. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
  5. data/app/controllers/completion_kit/metric_groups_controller.rb +67 -0
  6. data/app/controllers/completion_kit/metrics_controller.rb +2 -2
  7. data/app/controllers/completion_kit/runs_controller.rb +1 -1
  8. data/app/models/completion_kit/metric.rb +3 -5
  9. data/app/models/completion_kit/metric_group.rb +22 -0
  10. data/app/models/completion_kit/metric_group_membership.rb +20 -0
  11. data/app/models/completion_kit/run.rb +0 -1
  12. data/app/services/completion_kit/judge_service.rb +3 -7
  13. data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
  14. data/app/services/completion_kit/mcp_tools/{criteria.rb → metric_groups.rb} +28 -28
  15. data/app/services/completion_kit/mcp_tools/metrics.rb +2 -4
  16. data/app/views/completion_kit/api_reference/index.html.erb +11 -11
  17. data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
  18. data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
  19. data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
  20. data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
  21. data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
  22. data/app/views/completion_kit/metrics/_form.html.erb +2 -23
  23. data/app/views/completion_kit/metrics/index.html.erb +13 -5
  24. data/app/views/completion_kit/metrics/show.html.erb +1 -12
  25. data/app/views/completion_kit/runs/_form.html.erb +5 -5
  26. data/app/views/layouts/completion_kit/application.html.erb +4 -1
  27. data/config/routes.rb +2 -2
  28. data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
  29. data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
  30. data/lib/completion_kit/engine.rb +1 -7
  31. data/lib/completion_kit/version.rb +1 -1
  32. metadata +21 -21
  33. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
  34. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
  35. data/app/controllers/completion_kit/criteria_controller.rb +0 -67
  36. data/app/models/completion_kit/criteria.rb +0 -22
  37. data/app/models/completion_kit/criteria_membership.rb +0 -20
  38. data/app/views/completion_kit/criteria/_form.html.erb +0 -46
  39. data/app/views/completion_kit/criteria/edit.html.erb +0 -14
  40. data/app/views/completion_kit/criteria/index.html.erb +0 -37
  41. data/app/views/completion_kit/criteria/new.html.erb +0 -13
@@ -1,19 +1,19 @@
1
1
  module CompletionKit
2
2
  module McpTools
3
- module Criteria
3
+ module MetricGroups
4
4
  TOOLS = {
5
- "criteria_list" => {
6
- description: "List all criteria",
5
+ "metric_groups_list" => {
6
+ description: "List all metric groups",
7
7
  inputSchema: {type: "object", properties: {}, required: []},
8
8
  handler: :list
9
9
  },
10
- "criteria_get" => {
11
- description: "Get a criteria by ID",
10
+ "metric_groups_get" => {
11
+ description: "Get a metric group by ID",
12
12
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
13
13
  handler: :get
14
14
  },
15
- "criteria_create" => {
16
- description: "Create a criteria grouping metrics",
15
+ "metric_groups_create" => {
16
+ description: "Create a metric group",
17
17
  inputSchema: {
18
18
  type: "object",
19
19
  properties: {
@@ -24,8 +24,8 @@ module CompletionKit
24
24
  },
25
25
  handler: :create
26
26
  },
27
- "criteria_update" => {
28
- description: "Update a criteria",
27
+ "metric_groups_update" => {
28
+ description: "Update a metric group",
29
29
  inputSchema: {
30
30
  type: "object",
31
31
  properties: {
@@ -36,8 +36,8 @@ module CompletionKit
36
36
  },
37
37
  handler: :update
38
38
  },
39
- "criteria_delete" => {
40
- description: "Delete a criteria",
39
+ "metric_groups_delete" => {
40
+ description: "Delete a metric group",
41
41
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
42
42
  handler: :delete
43
43
  }
@@ -53,36 +53,36 @@ module CompletionKit
53
53
  end
54
54
 
55
55
  def self.list(_args)
56
- text_result(CompletionKit::Criteria.order(created_at: :desc).map(&:as_json))
56
+ text_result(CompletionKit::MetricGroup.order(created_at: :desc).map(&:as_json))
57
57
  end
58
58
 
59
59
  def self.get(args)
60
- text_result(CompletionKit::Criteria.find(args["id"]).as_json)
60
+ text_result(CompletionKit::MetricGroup.find(args["id"]).as_json)
61
61
  end
62
62
 
63
63
  def self.create(args)
64
- criteria = CompletionKit::Criteria.new(args.slice("name", "description"))
65
- if criteria.save
66
- replace_metric_memberships(criteria, args["metric_ids"])
67
- text_result(criteria.reload.as_json)
64
+ metric_group = CompletionKit::MetricGroup.new(args.slice("name", "description"))
65
+ if metric_group.save
66
+ replace_metric_memberships(metric_group, args["metric_ids"])
67
+ text_result(metric_group.reload.as_json)
68
68
  else
69
- error_result(criteria.errors.full_messages.join(", "))
69
+ error_result(metric_group.errors.full_messages.join(", "))
70
70
  end
71
71
  end
72
72
 
73
73
  def self.update(args)
74
- criteria = CompletionKit::Criteria.find(args["id"])
75
- if criteria.update(args.except("id", "metric_ids").slice("name", "description"))
76
- replace_metric_memberships(criteria, args["metric_ids"]) if args.key?("metric_ids")
77
- text_result(criteria.reload.as_json)
74
+ metric_group = CompletionKit::MetricGroup.find(args["id"])
75
+ if metric_group.update(args.except("id", "metric_ids").slice("name", "description"))
76
+ replace_metric_memberships(metric_group, args["metric_ids"]) if args.key?("metric_ids")
77
+ text_result(metric_group.reload.as_json)
78
78
  else
79
- error_result(criteria.errors.full_messages.join(", "))
79
+ error_result(metric_group.errors.full_messages.join(", "))
80
80
  end
81
81
  end
82
82
 
83
83
  def self.delete(args)
84
- CompletionKit::Criteria.find(args["id"]).destroy!
85
- text_result("Criteria #{args["id"]} deleted")
84
+ CompletionKit::MetricGroup.find(args["id"]).destroy!
85
+ text_result("Metric group #{args["id"]} deleted")
86
86
  end
87
87
 
88
88
  def self.text_result(data)
@@ -94,11 +94,11 @@ module CompletionKit
94
94
  {content: [{type: "text", text: message}], isError: true}
95
95
  end
96
96
 
97
- def self.replace_metric_memberships(criteria, metric_ids)
97
+ def self.replace_metric_memberships(metric_group, metric_ids)
98
98
  return unless metric_ids
99
- criteria.criteria_memberships.delete_all
99
+ metric_group.metric_group_memberships.delete_all
100
100
  Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
101
- criteria.criteria_memberships.create!(metric_id: metric_id, position: index + 1)
101
+ metric_group.metric_group_memberships.create!(metric_id: metric_id, position: index + 1)
102
102
  end
103
103
  end
104
104
  end
@@ -18,7 +18,6 @@ module CompletionKit
18
18
  type: "object",
19
19
  properties: {
20
20
  name: {type: "string"}, instruction: {type: "string"},
21
- evaluation_steps: {type: "array", items: {type: "string"}},
22
21
  rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
23
22
  },
24
23
  required: ["name"]
@@ -31,7 +30,6 @@ module CompletionKit
31
30
  type: "object",
32
31
  properties: {
33
32
  id: {type: "integer"}, name: {type: "string"}, instruction: {type: "string"},
34
- evaluation_steps: {type: "array", items: {type: "string"}},
35
33
  rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
36
34
  },
37
35
  required: ["id"]
@@ -63,7 +61,7 @@ module CompletionKit
63
61
  end
64
62
 
65
63
  def self.create(args)
66
- metric = Metric.new(args.slice("name", "instruction", "evaluation_steps", "rubric_bands"))
64
+ metric = Metric.new(args.slice("name", "instruction", "rubric_bands"))
67
65
  if metric.save
68
66
  text_result(metric.as_json)
69
67
  else
@@ -73,7 +71,7 @@ module CompletionKit
73
71
 
74
72
  def self.update(args)
75
73
  metric = Metric.find(args["id"])
76
- if metric.update(args.except("id").slice("name", "instruction", "evaluation_steps", "rubric_bands"))
74
+ if metric.update(args.except("id").slice("name", "instruction", "rubric_bands"))
77
75
  text_result(metric.as_json)
78
76
  else
79
77
  error_result(metric.errors.full_messages.join(", "))
@@ -63,7 +63,7 @@ end %>
63
63
  <input type="radio" name="ck-api-tab" id="ck-tab-responses" class="ck-api-tabs__radio">
64
64
  <input type="radio" name="ck-api-tab" id="ck-tab-datasets" class="ck-api-tabs__radio">
65
65
  <input type="radio" name="ck-api-tab" id="ck-tab-metrics" class="ck-api-tabs__radio">
66
- <input type="radio" name="ck-api-tab" id="ck-tab-criteria" class="ck-api-tabs__radio">
66
+ <input type="radio" name="ck-api-tab" id="ck-tab-metric-groups" class="ck-api-tabs__radio">
67
67
  <input type="radio" name="ck-api-tab" id="ck-tab-providers" class="ck-api-tabs__radio">
68
68
 
69
69
  <nav class="ck-api-tabs__nav">
@@ -73,7 +73,7 @@ end %>
73
73
  <label for="ck-tab-responses" class="ck-api-tabs__label">Responses <span class="ck-api-tabs__count">2</span></label>
74
74
  <label for="ck-tab-datasets" class="ck-api-tabs__label">Datasets <span class="ck-api-tabs__count">5</span></label>
75
75
  <label for="ck-tab-metrics" class="ck-api-tabs__label">Metrics <span class="ck-api-tabs__count">5</span></label>
76
- <label for="ck-tab-criteria" class="ck-api-tabs__label">Criteria <span class="ck-api-tabs__count">5</span></label>
76
+ <label for="ck-tab-metric-groups" class="ck-api-tabs__label">Metric Groups <span class="ck-api-tabs__count">5</span></label>
77
77
  <label for="ck-tab-providers" class="ck-api-tabs__label">Providers <span class="ck-api-tabs__count">5</span></label>
78
78
  </nav>
79
79
 
@@ -232,7 +232,7 @@ end %>
232
232
  <div class="ck-api-endpoint">
233
233
  <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metrics</p>
234
234
  <p class="ck-meta-copy">Create a metric.</p>
235
- <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>instruction</code>, <code>evaluation_steps</code> (array), <code>rubric_bands</code> (array of {stars, description})</p>
235
+ <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>instruction</code>, <code>rubric_bands</code> (array of {stars, description})</p>
236
236
  <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/metrics \\\n -H \"Authorization: Bearer #{token_display}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"relevance\", \"instruction\": \"Is the response relevant?\"}'" %>
237
237
  </div>
238
238
  <div class="ck-api-endpoint">
@@ -242,20 +242,20 @@ end %>
242
242
  </div>
243
243
 
244
244
  <div class="ck-api-tabs__panel">
245
- <h2 class="ck-section-title">Criteria</h2>
246
- <p class="ck-copy">Named groups of metrics applied to runs as a set.</p>
245
+ <h2 class="ck-section-title">Metric Groups</h2>
246
+ <p class="ck-copy">Named groups of metrics you can apply to a run as a set.</p>
247
247
  <div class="ck-api-endpoint">
248
- <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/criteria</p>
249
- <p class="ck-meta-copy">List all criteria with their metric IDs.</p>
248
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/metric_groups</p>
249
+ <p class="ck-meta-copy">List all metric groups with their metric IDs.</p>
250
250
  </div>
251
251
  <div class="ck-api-endpoint">
252
- <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/criteria</p>
253
- <p class="ck-meta-copy">Create a criteria group.</p>
252
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metric_groups</p>
253
+ <p class="ck-meta-copy">Create a metric group.</p>
254
254
  <p class="ck-api-params"><strong>Required:</strong>&ensp;<code>name</code>&emsp;<strong>Optional:</strong>&ensp;<code>description</code>, <code>metric_ids</code> (array)</p>
255
255
  </div>
256
256
  <div class="ck-api-endpoint">
257
- <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/criteria/:id</p>
258
- <p class="ck-meta-copy">Get, update, or delete a criteria group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
257
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span>&ensp;<span class="ck-chip ck-chip--soft">PATCH</span>&ensp;<span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metric_groups/:id</p>
258
+ <p class="ck-meta-copy">Get, update, or delete a metric group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
259
259
  </div>
260
260
  </div>
261
261
 
@@ -0,0 +1,46 @@
1
+ <%= form_with(model: metric_group, url: metric_group.persisted? ? metric_group_path(metric_group) : metric_groups_path, local: true) do |form| %>
2
+ <% if metric_group.errors.any? %>
3
+ <div class="ck-flash ck-flash--alert">
4
+ <p class="ck-flash__title"><%= pluralize(metric_group.errors.count, "problem") %> prevented this metric group from being saved.</p>
5
+ <ul class="ck-error-list">
6
+ <% metric_group.errors.full_messages.each do |message| %>
7
+ <li><%= message %></li>
8
+ <% end %>
9
+ </ul>
10
+ </div>
11
+ <% end %>
12
+
13
+ <div class="ck-card ck-form-card">
14
+ <div class="ck-field">
15
+ <%= form.label :name, "Metric group name", class: "ck-label" %>
16
+ <%= form.text_field :name, class: "ck-input", placeholder: "Support quality" %>
17
+ </div>
18
+
19
+ <div class="ck-field">
20
+ <%= form.label :description, class: "ck-label" %>
21
+ <%= form.text_area :description, rows: 3, class: "ck-input ck-input--area", placeholder: "When this metric group should be used." %>
22
+ </div>
23
+
24
+ <div class="ck-field">
25
+ <p class="ck-label">Metrics in this group</p>
26
+ <p class="ck-hint">Pick the metrics to include.</p>
27
+ <div class="ck-list ck-list--compact">
28
+ <% @metrics.each do |metric| %>
29
+ <label class="ck-item">
30
+ <%= check_box_tag "metric_group[metric_ids][]", metric.id, metric_group.metrics.exists?(metric.id), class: "ck-checkbox" %>
31
+ <span>
32
+ <strong><%= metric.name %></strong>
33
+ <span class="ck-meta-copy"><%= metric.instruction.presence || "No instruction set." %></span>
34
+ </span>
35
+ </label>
36
+ <% end %>
37
+ </div>
38
+ <%= hidden_field_tag "metric_group[metric_ids][]", "" %>
39
+ </div>
40
+
41
+ <div class="ck-actions">
42
+ <%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
43
+ <%= form.submit(metric_group.persisted? ? "Save metric group" : "Create metric group", class: ck_button_classes(:dark)) %>
44
+ </div>
45
+ </div>
46
+ <% end %>
@@ -0,0 +1,13 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li><%= link_to @metric_group.name, metric_group_path(@metric_group) %></li>
4
+ <li>Edit</li>
5
+ </ol>
6
+
7
+ <section class="ck-page-header">
8
+ <div>
9
+ <h1 class="ck-title">Edit metric group</h1>
10
+ </div>
11
+ </section>
12
+
13
+ <%= render "form", metric_group: @metric_group %>
@@ -0,0 +1,41 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li>Metric groups</li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title">Metric groups</h1>
9
+ <p class="ck-lead">Named groups of metrics. Apply a group to a run to score outputs against every metric in the group at once.</p>
10
+ </div>
11
+ <div class="ck-actions">
12
+ <%= link_to "New metric group", new_metric_group_path, class: ck_button_classes(:dark) %>
13
+ </div>
14
+ </section>
15
+
16
+ <% if @metric_groups.any? %>
17
+ <table class="ck-results-table">
18
+ <thead>
19
+ <tr>
20
+ <th>Name</th>
21
+ <th>Description</th>
22
+ <th>Metrics</th>
23
+ <th></th>
24
+ </tr>
25
+ </thead>
26
+ <tbody>
27
+ <% @metric_groups.each do |metric_group| %>
28
+ <tr onclick="window.location='<%= metric_group_path(metric_group) %>'" style="cursor: pointer;">
29
+ <td><strong><%= metric_group.name %></strong></td>
30
+ <td class="ck-meta-copy"><%= truncate(metric_group.description.to_s, length: 90).presence || "—" %></td>
31
+ <td class="ck-meta-copy"><%= metric_group.metrics.any? ? metric_group.metrics.map(&:name).join(", ") : "empty" %></td>
32
+ <td class="ck-results-table__arrow">&rarr;</td>
33
+ </tr>
34
+ <% end %>
35
+ </tbody>
36
+ </table>
37
+ <% else %>
38
+ <div class="ck-empty">
39
+ <p>No metric groups yet. <%= link_to "Create one", new_metric_group_path, class: "ck-link" %> if you want to group multiple metrics and apply them together.</p>
40
+ </div>
41
+ <% end %>
@@ -0,0 +1,12 @@
1
+ <ol class="ck-breadcrumb">
2
+ <li><%= link_to "Metrics", metrics_path %></li>
3
+ <li>New metric group</li>
4
+ </ol>
5
+
6
+ <section class="ck-page-header">
7
+ <div>
8
+ <h1 class="ck-title">New metric group</h1>
9
+ </div>
10
+ </section>
11
+
12
+ <%= render "form", metric_group: @metric_group %>
@@ -1,26 +1,25 @@
1
1
  <ol class="ck-breadcrumb">
2
2
  <li><%= link_to "Metrics", metrics_path %></li>
3
- <li><%= link_to "Criteria", criteria_path %></li>
4
- <li><%= @criteria.name %></li>
3
+ <li><%= @metric_group.name %></li>
5
4
  </ol>
6
5
 
7
6
  <section class="ck-page-header">
8
7
  <div>
9
- <h1 class="ck-title"><%= @criteria.name %></h1>
10
- <% if @criteria.description.present? %>
11
- <p class="ck-lead"><%= @criteria.description %></p>
8
+ <h1 class="ck-title"><%= @metric_group.name %></h1>
9
+ <% if @metric_group.description.present? %>
10
+ <p class="ck-lead"><%= @metric_group.description %></p>
12
11
  <% end %>
13
12
  </div>
14
13
  <div class="ck-actions">
15
- <%= link_to "Edit", edit_criterion_path(@criteria), class: ck_button_classes(:light, variant: :outline) %>
14
+ <%= link_to "Edit", edit_metric_group_path(@metric_group), class: ck_button_classes(:light, variant: :outline) %>
16
15
  </div>
17
16
  </section>
18
17
 
19
18
  <section class="ck-card">
20
19
  <p class="ck-kicker">Metrics</p>
21
- <% if @criteria.metrics.any? %>
20
+ <% if @metric_group.metrics.any? %>
22
21
  <div class="ck-list ck-list--compact">
23
- <% @criteria.metrics.each do |metric| %>
22
+ <% @metric_group.metrics.each do |metric| %>
24
23
  <div class="ck-item">
25
24
  <div>
26
25
  <p class="ck-item-title"><%= link_to metric.name, metric_path(metric), class: "ck-link" %></p>
@@ -32,6 +31,6 @@
32
31
  <% end %>
33
32
  </div>
34
33
  <% else %>
35
- <p class="ck-copy">No metrics in this criteria yet.</p>
34
+ <p class="ck-copy">No metrics in this group yet.</p>
36
35
  <% end %>
37
36
  </section>
@@ -17,32 +17,11 @@
17
17
  </div>
18
18
 
19
19
  <div class="ck-field ck-field--spacious">
20
- <p class="ck-section-title">Evaluation Instruction</p>
21
- <p class="ck-hint">What should the evaluator assess? This is the core instruction sent to the judge model.</p>
20
+ <p class="ck-section-title">Instruction</p>
21
+ <p class="ck-hint">What should the judge assess? This instruction is sent to the LLM judge when scoring outputs.</p>
22
22
  <%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output..." %>
23
23
  </div>
24
24
 
25
- <div class="ck-field ck-field--spacious" data-controller="evaluation-steps">
26
- <p class="ck-section-title">Evaluation Steps</p>
27
- <p class="ck-hint">Steps the evaluator follows during assessment.</p>
28
-
29
- <div data-evaluation-steps-target="list">
30
- <% Array(metric.evaluation_steps).each_with_index do |step, index| %>
31
- <div class="ck-step-row" data-evaluation-steps-target="row">
32
- <input type="text" name="metric[evaluation_steps][]" value="<%= step %>" class="ck-input" />
33
- <button type="button" class="ck-icon-btn" data-action="evaluation-steps#remove" aria-label="Remove step">
34
- <%= heroicon_tag "trash", variant: :outline, size: 16 %>
35
- </button>
36
- </div>
37
- <% end %>
38
- </div>
39
-
40
- <button type="button" class="ck-add-btn" data-action="evaluation-steps#add">
41
- <%= heroicon_tag "plus", variant: :outline, size: 14 %>
42
- Add step
43
- </button>
44
- </div>
45
-
46
25
  <div class="ck-field ck-field--spacious">
47
26
  <p class="ck-section-title">Rubric</p>
48
27
  <p class="ck-hint">What each star rating means for this metric.</p>
@@ -1,7 +1,7 @@
1
1
  <section class="ck-page-header">
2
2
  <div>
3
3
  <h1 class="ck-title">Metrics</h1>
4
- <p class="ck-lead">Scoring dimensions used by the judge model to evaluate responses. Each metric defines what to assess, the evaluation steps, and a rubric for star ratings. Group metrics into <%= link_to "Criteria", criteria_path, class: "ck-link" %> for reuse across runs.</p>
4
+ <p class="ck-lead">Scoring dimensions the judge uses to evaluate each response. Each metric defines one thing to assess and a 1-5 scale.</p>
5
5
  </div>
6
6
  <div class="ck-actions">
7
7
  <%= link_to "New metric", new_metric_path, class: ck_button_classes(:dark) %>
@@ -14,7 +14,7 @@
14
14
  <tr>
15
15
  <th>Name</th>
16
16
  <th>Instruction</th>
17
- <th>Criteria</th>
17
+ <th>In groups</th>
18
18
  <th></th>
19
19
  </tr>
20
20
  </thead>
@@ -22,13 +22,21 @@
22
22
  <% @metrics.each do |metric| %>
23
23
  <tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
24
24
  <td><strong><%= metric.name %></strong></td>
25
- <td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 80).presence || "None" %></td>
26
- <td class="ck-meta-copy"><%= metric.criterias.any? ? metric.criterias.map { |c| link_to(c.name, criterion_path(c), class: "ck-link", onclick: "event.stopPropagation()") }.join(", ").html_safe : "None" %></td>
25
+ <td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 90).presence || "" %></td>
26
+ <td class="ck-meta-copy"><%= metric.metric_groups.any? ? metric.metric_groups.map(&:name).join(", ") : "" %></td>
27
27
  <td class="ck-results-table__arrow">&rarr;</td>
28
28
  </tr>
29
29
  <% end %>
30
30
  </tbody>
31
31
  </table>
32
+
33
+ <% if @metrics.size >= 2 %>
34
+ <p class="ck-meta-copy" style="margin-top: 1.5rem; text-align: center;">
35
+ Use the same metrics on multiple runs? <%= link_to "Group them →", metric_groups_path, class: "ck-link" %>
36
+ </p>
37
+ <% end %>
32
38
  <% else %>
33
- <div class="ck-empty">No metrics yet.</div>
39
+ <div class="ck-empty">
40
+ <p>No metrics yet. <%= link_to "Create your first metric", new_metric_path, class: "ck-link" %> to start scoring prompt outputs.</p>
41
+ </div>
34
42
  <% end %>
@@ -14,22 +14,11 @@
14
14
 
15
15
  <% if @metric.instruction.present? %>
16
16
  <section class="ck-card">
17
- <p class="ck-kicker">Evaluation Instruction</p>
17
+ <p class="ck-kicker">Instruction</p>
18
18
  <div class="ck-note-box"><%= simple_format(@metric.instruction) %></div>
19
19
  </section>
20
20
  <% end %>
21
21
 
22
- <% if @metric.evaluation_steps.present? && @metric.evaluation_steps.any? %>
23
- <section class="ck-card">
24
- <p class="ck-kicker">Evaluation Steps</p>
25
- <ol class="ck-step-list">
26
- <% @metric.evaluation_steps.each do |step| %>
27
- <li><%= step %></li>
28
- <% end %>
29
- </ol>
30
- </section>
31
- <% end %>
32
-
33
22
  <section class="ck-card ck-card--spaced">
34
23
  <p class="ck-kicker">Rubric</p>
35
24
  <div class="ck-rubric-display">
@@ -70,11 +70,11 @@
70
70
  <% if @all_metrics.empty? %>
71
71
  <p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet.&ensp;<%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
72
72
  <% else %>
73
- <% if @criterias.any? %>
73
+ <% if @metric_groups.any? %>
74
74
  <p class="ck-meta-copy" style="margin-bottom: 0.5rem;">
75
75
  Quick add:&ensp;
76
- <% @criterias.each do |c| %>
77
- <span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddCriteria(<%= c.metric_ids.to_json %>)"><%= c.name %></span>&ensp;
76
+ <% @metric_groups.each do |g| %>
77
+ <span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddMetricGroup(<%= g.metric_ids.to_json %>)"><%= g.name %></span>&ensp;
78
78
  <% end %>
79
79
  </p>
80
80
  <% end %>
@@ -112,7 +112,7 @@ function updateRunForm() {
112
112
  } else if (judge && metrics.length === 0) {
113
113
  if (judgeField) judgeField.className = 'ck-field ck-field--info';
114
114
  if (metricsField) metricsField.className = 'ck-field ck-field--info';
115
- if (metricsHint) metricsHint.textContent = 'Select at least one metric or criteria to enable judging.';
115
+ if (metricsHint) metricsHint.textContent = 'Select at least one metric or group to enable judging.';
116
116
  } else if (!judge && metrics.length > 0) {
117
117
  if (judgeField) judgeField.className = 'ck-field ck-field--info';
118
118
  if (judgeHint) judgeHint.textContent = 'Select a judge model to enable judging.';
@@ -131,7 +131,7 @@ function updateRunForm() {
131
131
  if (submitBtn) submitBtn.disabled = !valid;
132
132
  }
133
133
 
134
- function ckQuickAddCriteria(metricIds) {
134
+ function ckQuickAddMetricGroup(metricIds) {
135
135
  metricIds.forEach(function(id) {
136
136
  var cb = document.getElementById('run_metric_' + id);
137
137
  if (cb) cb.checked = true;
@@ -19,11 +19,14 @@
19
19
  <nav class="ck-nav">
20
20
  <% active = ->(path) { request.path.start_with?(path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) } %>
21
21
  <%= link_to "Prompts", prompts_path, class: active.(prompts_path) %>
22
- <%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(criteria_path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
22
+ <%= link_to "Metrics", metrics_path, class: request.path.start_with?(metrics_path) || request.path.start_with?(metric_groups_path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) %>
23
23
  <%= link_to "Datasets", datasets_path, class: active.(datasets_path) %>
24
24
  <%= link_to "Runs", runs_path, class: active.(runs_path) %>
25
25
  <%= link_to "Settings", provider_credentials_path, class: active.(provider_credentials_path) %>
26
26
  <%= link_to "API", api_reference_path, class: active.(api_reference_path) %>
27
+ <% if main_app.respond_to?(:logout_path) %>
28
+ <%= button_to "Log out", main_app.logout_path, method: :delete, class: ck_button_classes(:light, variant: :outline) %>
29
+ <% end %>
27
30
  </nav>
28
31
  </div>
29
32
  </header>
data/config/routes.rb CHANGED
@@ -9,7 +9,7 @@ CompletionKit::Engine.routes.draw do
9
9
 
10
10
  resources :datasets
11
11
  resources :metrics
12
- resources :criteria, controller: "criteria"
12
+ resources :metric_groups
13
13
 
14
14
  resources :runs do
15
15
  member do
@@ -45,7 +45,7 @@ CompletionKit::Engine.routes.draw do
45
45
  end
46
46
  resources :datasets
47
47
  resources :metrics
48
- resources :criteria, controller: "criteria"
48
+ resources :metric_groups
49
49
  resources :provider_credentials
50
50
  end
51
51
  end
@@ -0,0 +1,5 @@
1
+ class RemoveEvaluationStepsFromMetrics < ActiveRecord::Migration[7.0]
2
+ def change
3
+ remove_column :completion_kit_metrics, :evaluation_steps, :text
4
+ end
5
+ end
@@ -0,0 +1,13 @@
1
+ class RenameCriteriaToMetricGroups < ActiveRecord::Migration[8.1]
2
+ def change
3
+ rename_table :completion_kit_criteria, :completion_kit_metric_groups
4
+ rename_table :completion_kit_criteria_memberships, :completion_kit_metric_group_memberships
5
+ rename_column :completion_kit_metric_group_memberships, :criteria_id, :metric_group_id
6
+
7
+ if index_name_exists?(:completion_kit_metric_group_memberships, "index_completion_kit_criteria_memberships_on_criteria_id")
8
+ rename_index :completion_kit_metric_group_memberships,
9
+ "index_completion_kit_criteria_memberships_on_criteria_id",
10
+ "index_completion_kit_metric_group_memberships_on_metric_group_id"
11
+ end
12
+ end
13
+ end
@@ -5,16 +5,10 @@ module CompletionKit
5
5
  class Engine < ::Rails::Engine
6
6
  isolate_namespace CompletionKit
7
7
 
8
- initializer("completion_kit.inflections", before: :load_config_initializers) do
9
- ActiveSupport::Inflector.inflections(:en) do |inflect|
10
- inflect.irregular "criterion", "criteria"
11
- end
12
- end
13
-
14
8
  paths.add "app/services", eager_load: true
15
9
 
16
10
  def self.register_assets(app)
17
- app.config.assets.precompile += %w( completion_kit/application.css completion_kit/evaluation_steps_controller.js completion_kit/logo.svg )
11
+ app.config.assets.precompile += %w( completion_kit/application.css completion_kit/logo.svg )
18
12
  end
19
13
 
20
14
  initializer("completion_kit.assets") { |app| Engine.register_assets(app) }
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.1.0.rc1"
2
+ VERSION = "0.1.0"
3
3
  end