completion-kit 0.5.9 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -15
- data/app/assets/images/completion_kit/favicon.ico +0 -0
- data/app/assets/images/completion_kit/logo.png +0 -0
- data/app/assets/stylesheets/completion_kit/application.css +38 -7
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +1 -1
- data/app/controllers/completion_kit/api_reference_controller.rb +6 -0
- data/app/controllers/completion_kit/datasets_controller.rb +10 -0
- data/app/controllers/completion_kit/mcp_controller.rb +2 -2
- data/app/controllers/completion_kit/runs_controller.rb +8 -2
- data/app/jobs/completion_kit/judge_review_job.rb +1 -1
- data/app/models/completion_kit/mcp_session.rb +29 -0
- data/app/models/completion_kit/run.rb +55 -10
- data/app/services/completion_kit/mcp_dispatcher.rb +1 -3
- data/app/services/completion_kit/mcp_tools/runs.rb +6 -4
- data/app/views/completion_kit/api_reference/_body.html.erb +47 -23
- data/app/views/completion_kit/api_reference/_resource_card.html.erb +24 -0
- data/app/views/completion_kit/api_reference/_resource_list.html.erb +10 -0
- data/app/views/completion_kit/api_reference/index.html.erb +7 -1
- data/app/views/completion_kit/datasets/show.html.erb +2 -18
- data/app/views/completion_kit/prompts/show.html.erb +8 -26
- data/app/views/completion_kit/responses/show.html.erb +26 -11
- data/app/views/completion_kit/runs/_form.html.erb +51 -4
- data/app/views/completion_kit/runs/_row.html.erb +6 -2
- data/app/views/completion_kit/runs/_status_header.html.erb +5 -1
- data/app/views/completion_kit/runs/_table.html.erb +19 -0
- data/app/views/completion_kit/runs/edit.html.erb +6 -2
- data/app/views/completion_kit/runs/index.html.erb +1 -17
- data/app/views/completion_kit/runs/show.html.erb +24 -15
- data/app/views/layouts/completion_kit/application.html.erb +2 -2
- data/db/migrate/20260513000001_create_completion_kit_mcp_sessions.rb +12 -0
- data/db/migrate/20260514000001_allow_judge_only_runs.rb +6 -0
- data/lib/completion_kit/engine.rb +2 -1
- data/lib/completion_kit/version.rb +1 -1
- metadata +9 -2
- data/app/assets/images/completion_kit/logo.svg +0 -6
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
token = local_assigns.fetch(:token, "YOUR_TOKEN")
|
|
3
3
|
real_token = local_assigns.fetch(:real_token, nil)
|
|
4
4
|
published_prompts = local_assigns.fetch(:published_prompts, [])
|
|
5
|
+
recent_runs = local_assigns.fetch(:recent_runs, [])
|
|
6
|
+
datasets = local_assigns.fetch(:datasets, [])
|
|
7
|
+
metrics = local_assigns.fetch(:metrics, [])
|
|
8
|
+
metric_groups = local_assigns.fetch(:metric_groups, [])
|
|
9
|
+
tags = local_assigns.fetch(:tags, [])
|
|
10
|
+
provider_credentials = local_assigns.fetch(:provider_credentials, [])
|
|
5
11
|
%>
|
|
6
12
|
<div class="ck-api-tabs">
|
|
7
13
|
<input type="radio" name="ck-api-tab" id="ck-tab-mcp" class="ck-api-tabs__radio" checked>
|
|
@@ -86,28 +92,11 @@
|
|
|
86
92
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/prompts/:id</p>
|
|
87
93
|
<p class="ck-meta-copy">Get a single prompt by ID.</p>
|
|
88
94
|
</div>
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
<div class="ck-api-prompt-card__top">
|
|
95
|
-
<div>
|
|
96
|
-
<strong class="ck-api-prompt-card__name"><%= p.name %></strong>
|
|
97
|
-
<% if p.description.present? %>
|
|
98
|
-
<p class="ck-api-prompt-card__desc"><%= p.description %></p>
|
|
99
|
-
<% end %>
|
|
100
|
-
</div>
|
|
101
|
-
<span class="ck-chip" style="text-transform: none; flex-shrink: 0;"><%= p.llm_model %></span>
|
|
102
|
-
</div>
|
|
103
|
-
<div class="ck-api-prompt-card__url">
|
|
104
|
-
<code class="ck-endpoint__url" id="prompt_ep_<%= p.id %>"><%= base_url %>/api/v1/prompts/<%= p.slug %></code>
|
|
105
|
-
<button type="button" class="ck-icon-btn" title="Copy endpoint" aria-label="Copy endpoint URL" onclick="navigator.clipboard.writeText(document.getElementById('prompt_ep_<%= p.id %>').textContent)"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="14" height="14" aria-hidden="true"><path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"/><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"/></svg></button>
|
|
106
|
-
</div>
|
|
107
|
-
</div>
|
|
108
|
-
<% end %>
|
|
109
|
-
</div>
|
|
110
|
-
<% end %>
|
|
95
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your published prompts",
|
|
96
|
+
items: published_prompts.map { |p|
|
|
97
|
+
{ name: p.name, subtitle: p.description, meta: p.llm_model,
|
|
98
|
+
url: "#{base_url}/api/v1/prompts/#{p.slug}", dom_id: "prompt_ep_#{p.id}" }
|
|
99
|
+
} %>
|
|
111
100
|
<div class="ck-api-endpoint">
|
|
112
101
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">PATCH</span> /api/v1/prompts/:id</p>
|
|
113
102
|
<p class="ck-meta-copy">Update a prompt. Accepts same params as create.</p>
|
|
@@ -132,7 +121,7 @@
|
|
|
132
121
|
<div class="ck-api-endpoint">
|
|
133
122
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs</p>
|
|
134
123
|
<p class="ck-meta-copy">Create a new run.</p>
|
|
135
|
-
<p class="ck-api-params"><strong>
|
|
124
|
+
<p class="ck-api-params"><strong>Optional:</strong> <code>name</code>, <code>prompt_id</code>, <code>dataset_id</code>, <code>metric_ids</code>, <code>judge_model</code>, <code>output_column</code> (judge-only: omit <code>prompt_id</code> and grade a dataset column instead, default <code>actual_output</code>)</p>
|
|
136
125
|
<%= render "completion_kit/api_reference/example", base_url: base_url, token: token, real_token: real_token, cmd: "curl -X POST #{base_url}/api/v1/runs \\\n -H \"Authorization: Bearer #{token}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"prompt_id\": 1, \"dataset_id\": 1, \"metric_ids\": [1, 2]}'" %>
|
|
137
126
|
</div>
|
|
138
127
|
<div class="ck-api-endpoint">
|
|
@@ -140,6 +129,11 @@
|
|
|
140
129
|
<p class="ck-meta-copy">Get a run with status, progress, response count, and average score.</p>
|
|
141
130
|
<%= render "completion_kit/api_reference/example", base_url: base_url, token: token, real_token: real_token, cmd: "curl #{base_url}/api/v1/runs/1 \\\n -H \"Authorization: Bearer #{token}\"" %>
|
|
142
131
|
</div>
|
|
132
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your recent runs",
|
|
133
|
+
items: recent_runs.map { |r|
|
|
134
|
+
{ name: r.name, meta: r.status.to_s.titleize,
|
|
135
|
+
url: "#{base_url}/api/v1/runs/#{r.id}", dom_id: "run_ep_#{r.id}" }
|
|
136
|
+
} %>
|
|
143
137
|
<div class="ck-api-endpoint">
|
|
144
138
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs/:id/generate</p>
|
|
145
139
|
<p class="ck-meta-copy">Start generating responses. Returns 202 Accepted. Poll the run to check progress.</p>
|
|
@@ -167,6 +161,11 @@
|
|
|
167
161
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/runs/:run_id/responses/:id</p>
|
|
168
162
|
<p class="ck-meta-copy">Get a single response with its review scores and feedback.</p>
|
|
169
163
|
</div>
|
|
164
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your responses",
|
|
165
|
+
items: recent_runs.first(1).map { |r|
|
|
166
|
+
{ name: "#{r.name} — responses",
|
|
167
|
+
url: "#{base_url}/api/v1/runs/#{r.id}/responses", dom_id: "responses_ep_#{r.id}" }
|
|
168
|
+
} %>
|
|
170
169
|
</div>
|
|
171
170
|
|
|
172
171
|
<div class="ck-api-tabs__panel">
|
|
@@ -186,6 +185,11 @@
|
|
|
186
185
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/datasets/:id</p>
|
|
187
186
|
<p class="ck-meta-copy">Get, update, or delete a dataset.</p>
|
|
188
187
|
</div>
|
|
188
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your datasets",
|
|
189
|
+
items: datasets.map { |d|
|
|
190
|
+
{ name: d.name, meta: pluralize([d.csv_data.to_s.lines.count - 1, 0].max, "row"),
|
|
191
|
+
url: "#{base_url}/api/v1/datasets/#{d.id}", dom_id: "dataset_ep_#{d.id}" }
|
|
192
|
+
} %>
|
|
189
193
|
</div>
|
|
190
194
|
|
|
191
195
|
<div class="ck-api-tabs__panel">
|
|
@@ -205,6 +209,11 @@
|
|
|
205
209
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metrics/:id</p>
|
|
206
210
|
<p class="ck-meta-copy">Get, update, or delete a metric.</p>
|
|
207
211
|
</div>
|
|
212
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your metrics",
|
|
213
|
+
items: metrics.map { |m|
|
|
214
|
+
{ name: m.name, subtitle: m.instruction.presence&.truncate(100),
|
|
215
|
+
url: "#{base_url}/api/v1/metrics/#{m.id}", dom_id: "metric_ep_#{m.id}" }
|
|
216
|
+
} %>
|
|
208
217
|
</div>
|
|
209
218
|
|
|
210
219
|
<div class="ck-api-tabs__panel">
|
|
@@ -223,6 +232,11 @@
|
|
|
223
232
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metric_groups/:id</p>
|
|
224
233
|
<p class="ck-meta-copy">Get, update, or delete a metric group. PATCH with <code>metric_ids</code> replaces all metric associations.</p>
|
|
225
234
|
</div>
|
|
235
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your metric groups",
|
|
236
|
+
items: metric_groups.map { |g|
|
|
237
|
+
{ name: g.name, subtitle: g.description.presence, meta: pluralize(g.metric_ids.size, "metric"),
|
|
238
|
+
url: "#{base_url}/api/v1/metric_groups/#{g.id}", dom_id: "metric_group_ep_#{g.id}" }
|
|
239
|
+
} %>
|
|
226
240
|
</div>
|
|
227
241
|
|
|
228
242
|
<div class="ck-api-tabs__panel">
|
|
@@ -243,6 +257,11 @@
|
|
|
243
257
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/tags/:id</p>
|
|
244
258
|
<p class="ck-meta-copy">Get, update, or delete a tag. PATCH accepts <code>name</code>. DELETE returns 204 No Content and removes all taggings for this tag.</p>
|
|
245
259
|
</div>
|
|
260
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your tags",
|
|
261
|
+
items: tags.map { |t|
|
|
262
|
+
{ name: t.name, meta: t.color,
|
|
263
|
+
url: "#{base_url}/api/v1/tags/#{t.id}", dom_id: "tag_ep_#{t.id}" }
|
|
264
|
+
} %>
|
|
246
265
|
<div class="ck-api-endpoint" style="padding-top: 1rem;">
|
|
247
266
|
<p class="ck-kicker" style="margin-bottom: 0.5rem;">Tagging resources</p>
|
|
248
267
|
<p class="ck-meta-copy">Metrics, prompts, runs, and datasets accept a <code>tag_names</code> array on their create and update endpoints. Passing a name that does not yet exist silently creates the tag. On PATCH, the list replaces all existing tags for that record (omit the field to leave tags unchanged).</p>
|
|
@@ -277,6 +296,11 @@
|
|
|
277
296
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> <span class="ck-chip ck-chip--soft">PATCH</span> <span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/provider_credentials/:id</p>
|
|
278
297
|
<p class="ck-meta-copy">Get, update, or delete a provider credential.</p>
|
|
279
298
|
</div>
|
|
299
|
+
<%= render "completion_kit/api_reference/resource_list", title: "Your providers",
|
|
300
|
+
items: provider_credentials.map { |pc|
|
|
301
|
+
{ name: ck_provider_label(pc.provider), meta: pluralize(pc.model_count, "model"),
|
|
302
|
+
url: "#{base_url}/api/v1/provider_credentials/#{pc.id}", dom_id: "provider_ep_#{pc.id}" }
|
|
303
|
+
} %>
|
|
280
304
|
</div>
|
|
281
305
|
|
|
282
306
|
</div>
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
<%
|
|
2
|
+
name = local_assigns.fetch(:name)
|
|
3
|
+
url = local_assigns.fetch(:url)
|
|
4
|
+
dom_id = local_assigns.fetch(:dom_id)
|
|
5
|
+
subtitle = local_assigns[:subtitle]
|
|
6
|
+
meta = local_assigns[:meta]
|
|
7
|
+
%>
|
|
8
|
+
<div class="ck-api-prompt-card">
|
|
9
|
+
<div class="ck-api-prompt-card__top">
|
|
10
|
+
<div>
|
|
11
|
+
<strong class="ck-api-prompt-card__name"><%= name %></strong>
|
|
12
|
+
<% if subtitle.present? %>
|
|
13
|
+
<p class="ck-api-prompt-card__desc"><%= subtitle %></p>
|
|
14
|
+
<% end %>
|
|
15
|
+
</div>
|
|
16
|
+
<% if meta.present? %>
|
|
17
|
+
<span class="ck-chip" style="text-transform: none; flex-shrink: 0;"><%= meta %></span>
|
|
18
|
+
<% end %>
|
|
19
|
+
</div>
|
|
20
|
+
<div class="ck-api-prompt-card__url">
|
|
21
|
+
<code class="ck-endpoint__url" id="<%= dom_id %>"><%= url %></code>
|
|
22
|
+
<button type="button" class="ck-icon-btn" title="Copy endpoint" aria-label="Copy endpoint URL" onclick="navigator.clipboard.writeText(document.getElementById('<%= dom_id %>').textContent)"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="14" height="14" aria-hidden="true"><path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"/><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"/></svg></button>
|
|
23
|
+
</div>
|
|
24
|
+
</div>
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<%# title: heading text; items: array of hashes for _resource_card (name:, url:, dom_id:, subtitle?:, meta?:) %>
|
|
2
|
+
<% items = Array(local_assigns.fetch(:items, [])) %>
|
|
3
|
+
<% if items.any? %>
|
|
4
|
+
<div class="ck-api-endpoint" style="padding-top: 1rem;">
|
|
5
|
+
<p class="ck-kicker" style="margin-bottom: 0.5rem;"><%= local_assigns.fetch(:title) %></p>
|
|
6
|
+
<% items.each do |item| %>
|
|
7
|
+
<%= render "completion_kit/api_reference/resource_card", item %>
|
|
8
|
+
<% end %>
|
|
9
|
+
</div>
|
|
10
|
+
<% end %>
|
|
@@ -20,4 +20,10 @@
|
|
|
20
20
|
base_url: @base_url,
|
|
21
21
|
token: ck_masked_token(@token),
|
|
22
22
|
real_token: @token,
|
|
23
|
-
published_prompts: @published_prompts
|
|
23
|
+
published_prompts: @published_prompts,
|
|
24
|
+
recent_runs: @recent_runs,
|
|
25
|
+
datasets: @datasets,
|
|
26
|
+
metrics: @metrics,
|
|
27
|
+
metric_groups: @metric_groups,
|
|
28
|
+
tags: @tags,
|
|
29
|
+
provider_credentials: @provider_credentials %>
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
<h1 class="ck-title"><%= @dataset.name %></h1>
|
|
9
9
|
</div>
|
|
10
10
|
<div class="ck-actions">
|
|
11
|
+
<%= link_to "Download CSV", dataset_path(@dataset, format: :csv), class: ck_button_classes(:light, variant: :outline) %>
|
|
11
12
|
<%= link_to "Edit", edit_dataset_path(@dataset), class: ck_button_classes(:light, variant: :outline) %>
|
|
12
13
|
</div>
|
|
13
14
|
</section>
|
|
@@ -67,23 +68,6 @@
|
|
|
67
68
|
<% if @runs.any? %>
|
|
68
69
|
<section class="ck-card--spaced">
|
|
69
70
|
<p class="ck-kicker">Runs</p>
|
|
70
|
-
|
|
71
|
-
<table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
|
|
72
|
-
<thead>
|
|
73
|
-
<tr>
|
|
74
|
-
<th>Run</th>
|
|
75
|
-
<th>Responses</th>
|
|
76
|
-
<th>Metrics</th>
|
|
77
|
-
<th>Avg score</th>
|
|
78
|
-
<th>When</th>
|
|
79
|
-
<th></th>
|
|
80
|
-
</tr>
|
|
81
|
-
</thead>
|
|
82
|
-
<tbody>
|
|
83
|
-
<% @runs.each do |run| %>
|
|
84
|
-
<%= render "completion_kit/runs/row", run: run %>
|
|
85
|
-
<% end %>
|
|
86
|
-
</tbody>
|
|
87
|
-
</table>
|
|
71
|
+
<%= render "completion_kit/runs/table", runs: @runs %>
|
|
88
72
|
</section>
|
|
89
73
|
<% end %>
|
|
@@ -17,6 +17,11 @@
|
|
|
17
17
|
<code class="ck-endpoint__url" id="prompt_endpoint"><%= request.base_url %><%= api_v1_prompt_path(@prompt.slug) %></code>
|
|
18
18
|
<button type="button" class="ck-icon-btn" title="Copy endpoint" onclick="navigator.clipboard.writeText(document.getElementById('prompt_endpoint').textContent)"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="14" height="14"><path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"/><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"/></svg></button>
|
|
19
19
|
</div>
|
|
20
|
+
<% if @prompt.tags.any? %>
|
|
21
|
+
<div class="tag-marks-row">
|
|
22
|
+
<%= render "completion_kit/tags/marks", tags: @prompt.tags %>
|
|
23
|
+
</div>
|
|
24
|
+
<% end %>
|
|
20
25
|
</div>
|
|
21
26
|
|
|
22
27
|
<div class="ck-actions">
|
|
@@ -26,13 +31,7 @@
|
|
|
26
31
|
</div>
|
|
27
32
|
</section>
|
|
28
33
|
|
|
29
|
-
|
|
30
|
-
<div class="tag-marks-row tag-marks-row--header">
|
|
31
|
-
<%= render "completion_kit/tags/marks", tags: @prompt.tags %>
|
|
32
|
-
</div>
|
|
33
|
-
<% end %>
|
|
34
|
-
|
|
35
|
-
<section>
|
|
34
|
+
<section class="ck-card--spaced">
|
|
36
35
|
<div class="ck-prompt-preview__header">
|
|
37
36
|
<p class="ck-kicker">Prompt</p>
|
|
38
37
|
<% judged_run = @runs.select { |r| r.prompt_id == @prompt.id && r.status == "completed" }.find { |r| r.responses.joins(:reviews).exists? } %>
|
|
@@ -45,7 +44,7 @@
|
|
|
45
44
|
<p class="ck-hint"><%= link_to "Run a test with judging configured", new_run_path(prompt_id: @prompt.id), class: "ck-link" %> to get AI-suggested improvements.</p>
|
|
46
45
|
<% end %>
|
|
47
46
|
</div>
|
|
48
|
-
<pre class="ck-code ck-code--dark"><%= @prompt.template %></pre>
|
|
47
|
+
<pre class="ck-code ck-code--dark ck-code--prompt"><%= @prompt.template %></pre>
|
|
49
48
|
</section>
|
|
50
49
|
|
|
51
50
|
<% versions = @prompt.family_versions.includes(runs: { responses: :reviews }).to_a %>
|
|
@@ -140,24 +139,7 @@
|
|
|
140
139
|
<% if @runs.any? %>
|
|
141
140
|
<section class="ck-card--spaced">
|
|
142
141
|
<p class="ck-kicker">Runs</p>
|
|
143
|
-
|
|
144
|
-
<table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
|
|
145
|
-
<thead>
|
|
146
|
-
<tr>
|
|
147
|
-
<th>Run</th>
|
|
148
|
-
<th>Responses</th>
|
|
149
|
-
<th>Metrics</th>
|
|
150
|
-
<th>Avg score</th>
|
|
151
|
-
<th>When</th>
|
|
152
|
-
<th></th>
|
|
153
|
-
</tr>
|
|
154
|
-
</thead>
|
|
155
|
-
<tbody>
|
|
156
|
-
<% @runs.each do |run| %>
|
|
157
|
-
<%= render "completion_kit/runs/row", run: run %>
|
|
158
|
-
<% end %>
|
|
159
|
-
</tbody>
|
|
160
|
-
</table>
|
|
142
|
+
<%= render "completion_kit/runs/table", runs: @runs %>
|
|
161
143
|
</section>
|
|
162
144
|
<% end %>
|
|
163
145
|
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
<ol class="ck-breadcrumb">
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
<% if @run.prompt %>
|
|
3
|
+
<li><%= link_to "Prompts", prompts_path %></li>
|
|
4
|
+
<li><%= link_to @run.prompt.name, prompt_path(@run.prompt) %></li>
|
|
5
|
+
<% else %>
|
|
6
|
+
<li><%= link_to "Runs", runs_path %></li>
|
|
7
|
+
<% end %>
|
|
4
8
|
<li><%= link_to @run.name, run_path(@run) %></li>
|
|
5
9
|
<li>Response #<%= @response_number %></li>
|
|
6
10
|
</ol>
|
|
@@ -30,20 +34,29 @@
|
|
|
30
34
|
<span class="ck-run-config__key">Run</span>
|
|
31
35
|
<%= link_to @run.name, run_path(@run), class: "ck-link" %>
|
|
32
36
|
</div>
|
|
33
|
-
|
|
34
|
-
<
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
<% if @run.prompt %>
|
|
38
|
+
<div class="ck-run-config__row">
|
|
39
|
+
<span class="ck-run-config__key">Prompt</span>
|
|
40
|
+
<%= link_to @run.prompt.display_name, prompt_path(@run.prompt), class: "ck-link" %>
|
|
41
|
+
</div>
|
|
42
|
+
<% else %>
|
|
43
|
+
<div class="ck-run-config__row">
|
|
44
|
+
<span class="ck-run-config__key">Output</span>
|
|
45
|
+
<span>Dataset column <code><%= @run.output_column.presence || "actual_output" %></code></span>
|
|
46
|
+
</div>
|
|
47
|
+
<% end %>
|
|
37
48
|
<% if @run.dataset %>
|
|
38
49
|
<div class="ck-run-config__row">
|
|
39
50
|
<span class="ck-run-config__key">Dataset</span>
|
|
40
51
|
<%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
|
|
41
52
|
</div>
|
|
42
53
|
<% end %>
|
|
43
|
-
|
|
44
|
-
<
|
|
45
|
-
|
|
46
|
-
|
|
54
|
+
<% if @run.prompt %>
|
|
55
|
+
<div class="ck-run-config__row">
|
|
56
|
+
<span class="ck-run-config__key">Model</span>
|
|
57
|
+
<span style="text-transform: none;"><%= @run.prompt.llm_model %></span>
|
|
58
|
+
</div>
|
|
59
|
+
<% end %>
|
|
47
60
|
<% if @run.judge_model.present? %>
|
|
48
61
|
<div class="ck-run-config__row">
|
|
49
62
|
<span class="ck-run-config__key">Judge</span>
|
|
@@ -60,7 +73,9 @@
|
|
|
60
73
|
<section class="ck-card--spaced">
|
|
61
74
|
<div class="ck-prompt-preview__header">
|
|
62
75
|
<p class="ck-kicker">Response</p>
|
|
63
|
-
|
|
76
|
+
<% if @run.prompt %>
|
|
77
|
+
<span class="ck-chip ck-chip--soft" style="text-transform: none;"><%= @run.prompt.llm_model %></span>
|
|
78
|
+
<% end %>
|
|
64
79
|
</div>
|
|
65
80
|
<pre class="ck-code"><%= @response.response_text %></pre>
|
|
66
81
|
</section>
|
|
@@ -17,6 +17,17 @@
|
|
|
17
17
|
</div>
|
|
18
18
|
|
|
19
19
|
<div class="ck-field">
|
|
20
|
+
<label class="ck-checkbox-label">
|
|
21
|
+
<%= check_box_tag "run[judge_only]", "1", run.persisted? && run.judge_only?, id: "run_judge_only", class: "ck-checkbox" %>
|
|
22
|
+
<span class="ck-checkbox-label__box" aria-hidden="true"></span>
|
|
23
|
+
<span class="ck-checkbox-label__body">
|
|
24
|
+
<span class="ck-checkbox-label__text">Judge-only run</span>
|
|
25
|
+
<span class="ck-checkbox-label__hint">Grade an existing column on the dataset instead of running a prompt. Roughly half the LLM calls per row.</span>
|
|
26
|
+
</span>
|
|
27
|
+
</label>
|
|
28
|
+
</div>
|
|
29
|
+
|
|
30
|
+
<div class="ck-field" id="prompt-field">
|
|
20
31
|
<%= form.label :prompt_id, "Prompt", class: "ck-label" %>
|
|
21
32
|
<%= form.select :prompt_id,
|
|
22
33
|
@prompts.map { |p|
|
|
@@ -43,6 +54,12 @@
|
|
|
43
54
|
</div>
|
|
44
55
|
</div>
|
|
45
56
|
|
|
57
|
+
<div class="ck-field" id="output-column-field" hidden>
|
|
58
|
+
<%= form.label :output_column, "Output column", class: "ck-label" %>
|
|
59
|
+
<%= form.text_field :output_column, value: run.output_column.presence || "actual_output", class: "ck-input", id: "run_output_column", placeholder: "actual_output" %>
|
|
60
|
+
<p class="ck-field-hint">Name of the dataset column whose value will be graded as the response. Defaults to <code>actual_output</code>.</p>
|
|
61
|
+
</div>
|
|
62
|
+
|
|
46
63
|
<div class="ck-field" id="dataset-field">
|
|
47
64
|
<%= form.label :dataset_id, "Dataset", class: "ck-label" %>
|
|
48
65
|
<% if @datasets.empty? %>
|
|
@@ -93,10 +110,10 @@
|
|
|
93
110
|
|
|
94
111
|
<div class="ck-field" id="metrics-field">
|
|
95
112
|
<label class="ck-label">Metrics</label>
|
|
96
|
-
<p class="ck-field-hint" id="metrics-hint"></p>
|
|
97
113
|
<% if @all_metrics.empty? %>
|
|
98
114
|
<p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet. <%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
|
|
99
115
|
<% else %>
|
|
116
|
+
<p class="ck-field-hint" id="metrics-hint"></p>
|
|
100
117
|
<% if @metric_groups.any? %>
|
|
101
118
|
<div class="ck-metric-groups">
|
|
102
119
|
<span class="ck-metric-groups__label">Groups</span>
|
|
@@ -157,6 +174,15 @@
|
|
|
157
174
|
function updateRunForm() {
|
|
158
175
|
var promptEl = document.getElementById('run_prompt_id');
|
|
159
176
|
var judgeEl = document.getElementById('run_judge_model');
|
|
177
|
+
var judgeOnlyEl = document.getElementById('run_judge_only');
|
|
178
|
+
var judgeOnly = !!(judgeOnlyEl && judgeOnlyEl.checked);
|
|
179
|
+
var promptField = document.getElementById('prompt-field');
|
|
180
|
+
var outputColumnField = document.getElementById('output-column-field');
|
|
181
|
+
var outputColumnEl = document.getElementById('run_output_column');
|
|
182
|
+
if (promptField) promptField.hidden = judgeOnly;
|
|
183
|
+
if (outputColumnField) outputColumnField.hidden = !judgeOnly;
|
|
184
|
+
if (judgeOnly && promptEl) promptEl.value = '';
|
|
185
|
+
|
|
160
186
|
var prompt = promptEl ? promptEl.value : '';
|
|
161
187
|
var judge = judgeEl ? judgeEl.value : '';
|
|
162
188
|
var metrics = document.querySelectorAll('input[name="run[metric_ids][]"]:checked');
|
|
@@ -222,11 +248,28 @@ function updateRunForm() {
|
|
|
222
248
|
}
|
|
223
249
|
}
|
|
224
250
|
|
|
225
|
-
var valid
|
|
251
|
+
var valid;
|
|
252
|
+
if (judgeOnly) {
|
|
253
|
+
valid = !!dataset;
|
|
254
|
+
if (dataset && datasetEl && outputColumnEl) {
|
|
255
|
+
var headersJudge = (datasetEl.options[datasetEl.selectedIndex] && datasetEl.options[datasetEl.selectedIndex].dataset.headers ? datasetEl.options[datasetEl.selectedIndex].dataset.headers.split(/,\s*/) : []).filter(Boolean);
|
|
256
|
+
var col = (outputColumnEl.value || 'actual_output').trim();
|
|
257
|
+
if (col === '' || headersJudge.indexOf(col) === -1) {
|
|
258
|
+
valid = false;
|
|
259
|
+
if (datasetField) datasetField.className = 'ck-field ck-field--error';
|
|
260
|
+
if (datasetHint) datasetHint.textContent = "Dataset has no \"" + col + "\" column — pick a different output column or dataset.";
|
|
261
|
+
}
|
|
262
|
+
} else if (!dataset) {
|
|
263
|
+
if (datasetField) datasetField.className = 'ck-field ck-field--info';
|
|
264
|
+
if (datasetHint) datasetHint.textContent = 'Judge-only runs need a dataset that supplies the output column.';
|
|
265
|
+
}
|
|
266
|
+
} else {
|
|
267
|
+
valid = prompt !== '';
|
|
268
|
+
if (hasVars && !dataset) valid = false;
|
|
269
|
+
if (missingVars.length > 0) valid = false;
|
|
270
|
+
}
|
|
226
271
|
if (judge && metrics.length === 0) valid = false;
|
|
227
272
|
if (!judge && metrics.length > 0) valid = false;
|
|
228
|
-
if (hasVars && !dataset) valid = false;
|
|
229
|
-
if (missingVars.length > 0) valid = false;
|
|
230
273
|
if (submitBtn) submitBtn.disabled = !valid;
|
|
231
274
|
|
|
232
275
|
ckUpdateMetricGroupsState();
|
|
@@ -260,9 +303,13 @@ function ckUpdateMetricGroupsState() {
|
|
|
260
303
|
var judgeEl = document.getElementById('run_judge_model');
|
|
261
304
|
var promptEl = document.getElementById('run_prompt_id');
|
|
262
305
|
var datasetEl = document.getElementById('run_dataset_id');
|
|
306
|
+
var judgeOnlyEl = document.getElementById('run_judge_only');
|
|
307
|
+
var outputColumnEl = document.getElementById('run_output_column');
|
|
263
308
|
if (judgeEl) judgeEl.addEventListener('change', updateRunForm);
|
|
264
309
|
if (promptEl) promptEl.addEventListener('change', updateRunForm);
|
|
265
310
|
if (datasetEl) datasetEl.addEventListener('change', updateRunForm);
|
|
311
|
+
if (judgeOnlyEl) judgeOnlyEl.addEventListener('change', updateRunForm);
|
|
312
|
+
if (outputColumnEl) outputColumnEl.addEventListener('input', updateRunForm);
|
|
266
313
|
document.querySelectorAll('input[name="run[metric_ids][]"]').forEach(function(cb) {
|
|
267
314
|
cb.addEventListener('change', updateRunForm);
|
|
268
315
|
});
|
|
@@ -6,8 +6,12 @@
|
|
|
6
6
|
<strong><%= run.name %></strong>
|
|
7
7
|
</span>
|
|
8
8
|
<div class="ck-runs-table__config">
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
<% if run.prompt %>
|
|
10
|
+
<%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
|
|
11
|
+
<span class="ck-runs-table__version">v<%= run.prompt.version_number %></span>
|
|
12
|
+
<% else %>
|
|
13
|
+
<span class="ck-runs-table__version">Judge-only</span>
|
|
14
|
+
<% end %>
|
|
11
15
|
<% if run.dataset %>
|
|
12
16
|
<span class="ck-runs-table__sep">·</span>
|
|
13
17
|
<%= link_to run.dataset.name, dataset_path(run.dataset), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
|
|
@@ -19,7 +19,11 @@
|
|
|
19
19
|
<span class="ck-status-badge__label"><%= run.status.upcase %></span>
|
|
20
20
|
</span>
|
|
21
21
|
<h1 class="ck-title"><%= run.name %></h1>
|
|
22
|
-
|
|
22
|
+
<% if run.prompt %>
|
|
23
|
+
<p class="ck-meta-copy"><%= link_to run.prompt.display_name, prompt_path(run.prompt), class: "ck-link" %> <span class="ck-chip" style="text-transform: none;"><%= run.prompt.llm_model %></span></p>
|
|
24
|
+
<% else %>
|
|
25
|
+
<p class="ck-meta-copy">Judge-only run — grading column <code><%= run.output_column.presence || "actual_output" %></code><% if run.dataset %> on <%= link_to run.dataset.name, dataset_path(run.dataset), class: "ck-link" %><% end %></p>
|
|
26
|
+
<% end %>
|
|
23
27
|
</div>
|
|
24
28
|
<%= render "completion_kit/runs/actions", run: run %>
|
|
25
29
|
</section>
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
<%# Renders a runs table. Locals: runs: enumerable of CompletionKit::Run. %>
|
|
2
|
+
<% runs = local_assigns.fetch(:runs) %>
|
|
3
|
+
<table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
|
|
4
|
+
<thead>
|
|
5
|
+
<tr>
|
|
6
|
+
<th>Run</th>
|
|
7
|
+
<th>Responses</th>
|
|
8
|
+
<th>Metrics</th>
|
|
9
|
+
<th>Avg score</th>
|
|
10
|
+
<th>When</th>
|
|
11
|
+
<th></th>
|
|
12
|
+
</tr>
|
|
13
|
+
</thead>
|
|
14
|
+
<tbody>
|
|
15
|
+
<% runs.each do |run| %>
|
|
16
|
+
<%= render "completion_kit/runs/row", run: run %>
|
|
17
|
+
<% end %>
|
|
18
|
+
</tbody>
|
|
19
|
+
</table>
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
<ol class="ck-breadcrumb">
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
<% if @run.prompt %>
|
|
3
|
+
<li><%= link_to "Prompts", prompts_path %></li>
|
|
4
|
+
<li><%= link_to @run.prompt.name, prompt_path(@run.prompt) %></li>
|
|
5
|
+
<% else %>
|
|
6
|
+
<li><%= link_to "Runs", runs_path %></li>
|
|
7
|
+
<% end %>
|
|
4
8
|
<li><%= link_to @run.name, run_path(@run) %></li>
|
|
5
9
|
<li>Edit</li>
|
|
6
10
|
</ol>
|
|
@@ -14,23 +14,7 @@
|
|
|
14
14
|
base_path: runs_path %>
|
|
15
15
|
|
|
16
16
|
<% if @runs.any? %>
|
|
17
|
-
|
|
18
|
-
<thead>
|
|
19
|
-
<tr>
|
|
20
|
-
<th>Run</th>
|
|
21
|
-
<th>Responses</th>
|
|
22
|
-
<th>Metrics</th>
|
|
23
|
-
<th>Avg score</th>
|
|
24
|
-
<th>When</th>
|
|
25
|
-
<th></th>
|
|
26
|
-
</tr>
|
|
27
|
-
</thead>
|
|
28
|
-
<tbody>
|
|
29
|
-
<% @runs.each do |run| %>
|
|
30
|
-
<%= render "row", run: run %>
|
|
31
|
-
<% end %>
|
|
32
|
-
</tbody>
|
|
33
|
-
</table>
|
|
17
|
+
<%= render "completion_kit/runs/table", runs: @runs %>
|
|
34
18
|
<% elsif @selected_tags.any? %>
|
|
35
19
|
<div class="ck-empty">
|
|
36
20
|
<p>No runs match these tags. <%= link_to "Clear filters", runs_path, class: "ck-link" %>.</p>
|
|
@@ -59,24 +59,33 @@
|
|
|
59
59
|
</div>
|
|
60
60
|
</div>
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
<div class="ck-prompt-
|
|
64
|
-
<
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
<%=
|
|
71
|
-
|
|
62
|
+
<% if @run.prompt %>
|
|
63
|
+
<div class="ck-prompt-preview">
|
|
64
|
+
<div class="ck-prompt-preview__header">
|
|
65
|
+
<p class="ck-kicker">Prompt</p>
|
|
66
|
+
<% latest_suggestion = @run.suggestions.order(created_at: :desc).first %>
|
|
67
|
+
<% if latest_suggestion %>
|
|
68
|
+
<%= link_to "View suggestion", suggestion_path(latest_suggestion, from: "run"), class: ck_button_classes(:light, variant: :outline) + " ck-button--sm" %>
|
|
69
|
+
<% elsif @run.status == "completed" && @run.responses.joins(:reviews).exists? %>
|
|
70
|
+
<%= button_to suggest_run_path(@run), method: :post, class: ck_button_classes(:light, variant: :outline) + " ck-button--sm", form_class: "inline-block" do %>
|
|
71
|
+
<%= heroicon_tag "sparkles", variant: :outline, class: "ck-magic-icon", "aria-hidden": "true" %>
|
|
72
|
+
Suggest improvements
|
|
73
|
+
<% end %>
|
|
72
74
|
<% end %>
|
|
75
|
+
</div>
|
|
76
|
+
<p class="ck-prompt-preview__text" id="prompt_text"><%= @run.prompt.template %></p>
|
|
77
|
+
<% if @run.prompt.template.length > 200 %>
|
|
78
|
+
<button type="button" class="ck-disclosure-toggle" id="prompt_toggle" aria-expanded="false" aria-controls="prompt_text" onclick="var t=document.getElementById('prompt_text');var l=this;var expanded=t.classList.toggle('ck-prompt-preview__text--expanded');l.firstChild.textContent=expanded?'Show less':'Show more';l.setAttribute('aria-expanded',expanded?'true':'false')"><span>Show more</span></button>
|
|
73
79
|
<% end %>
|
|
74
80
|
</div>
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
<
|
|
78
|
-
|
|
79
|
-
</div>
|
|
81
|
+
<% else %>
|
|
82
|
+
<div class="ck-prompt-preview">
|
|
83
|
+
<div class="ck-prompt-preview__header">
|
|
84
|
+
<p class="ck-kicker">Output source</p>
|
|
85
|
+
</div>
|
|
86
|
+
<p class="ck-prompt-preview__text">Dataset column <code><%= @run.output_column.presence || "actual_output" %></code> — no prompt generated these outputs.</p>
|
|
87
|
+
</div>
|
|
88
|
+
<% end %>
|
|
80
89
|
|
|
81
90
|
<% if @run.dataset %>
|
|
82
91
|
<dialog id="dataset-preview-<%= @run.id %>" class="ck-modal" onclick="if(event.target===this)this.close()">
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
<%= csrf_meta_tags %>
|
|
7
7
|
<%= csp_meta_tag %>
|
|
8
8
|
|
|
9
|
-
<%= favicon_link_tag "completion_kit/
|
|
9
|
+
<%= favicon_link_tag "completion_kit/favicon.ico" %>
|
|
10
10
|
<%= stylesheet_link_tag "completion_kit/application", media: "all" %>
|
|
11
11
|
<%= javascript_include_tag "turbo", type: "module" %>
|
|
12
12
|
<%= javascript_include_tag "completion_kit/application", defer: true %>
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
<body class="ck-app">
|
|
16
16
|
<header class="ck-topbar">
|
|
17
17
|
<div class="ck-wrap ck-topbar__inner">
|
|
18
|
-
<%= link_to (main_app.respond_to?(:root_path) ? main_app.root_path : prompts_path), class: "ck-brand" do %><%= image_tag "completion_kit/logo.
|
|
18
|
+
<%= link_to (main_app.respond_to?(:root_path) ? main_app.root_path : prompts_path), class: "ck-brand" do %><%= image_tag "completion_kit/logo.png", alt: "CompletionKit", style: "height: 64px; width: auto;" %><span class="ck-brand__name">Completion<span class="ck-brand__kit">Kit</span></span><% end %>
|
|
19
19
|
|
|
20
20
|
<nav class="ck-nav">
|
|
21
21
|
<% active = ->(path) { request.path.start_with?(path) ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline) } %>
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
class CreateCompletionKitMcpSessions < ActiveRecord::Migration[8.1]
|
|
2
|
+
def change
|
|
3
|
+
create_table :completion_kit_mcp_sessions do |t|
|
|
4
|
+
t.string :session_id, null: false
|
|
5
|
+
t.datetime :expires_at, null: false
|
|
6
|
+
t.timestamps
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
add_index :completion_kit_mcp_sessions, :session_id, unique: true
|
|
10
|
+
add_index :completion_kit_mcp_sessions, :expires_at
|
|
11
|
+
end
|
|
12
|
+
end
|