completion-kit 0.20.3 → 0.20.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 71e8645d7a790e2afc35e4fb7c8ca4ec5a6d08f51f3f84a2c289a2b6c76a9051
4
- data.tar.gz: f5143766d938735ae5f2072516bc520c7d826894c02dc6a584bf62e40dd4b583
3
+ metadata.gz: e49a3e185722be44be75a0e236862942841720af9f8b6bdcfe233778968d26ab
4
+ data.tar.gz: 1545324b88bc8eef05f507d71a80e06804d4a9d3c65f99010b76610657ffa021
5
5
  SHA512:
6
- metadata.gz: 0a6c71340315065125e6bc520f80453132de07d858acd4e5327d2cc35d3b3420c920a2f2ccf148c696c20afdad15371f6323d27d85dd7f3d9baa8dd03d3f5386
7
- data.tar.gz: 65c2978028e0897cb00e350f3a9e6af98f11e8d6fc6327da245acdbcc6de41bd783b8bbfe63b1cb76fc54e65debbd4488d71103f35d4222478c9611220d84a95
6
+ metadata.gz: 21f1b48c9ed2ba23b111eb1ff733ac62f180050ce3e1edd9ea31719ee25cf557025af86e00f13be27b6e04a79afb677ced23fa7e7e5a9580715249533559338c
7
+ data.tar.gz: a7799c294e109c42a585f9fc7aa01e94d389291dd3fc1eec6821123ea4cb066e5cb805b803dfcf935f771b494bf1324fd7f5787f781746cfd0290d6c97b93641
@@ -210,6 +210,76 @@ document.addEventListener("click", function(e) {
210
210
  });
211
211
  });
212
212
 
213
+ var CK_CHECK_FIELDS = {
214
+ contains: ["value", "case_sensitive", "trim"],
215
+ not_contains: ["value", "case_sensitive", "trim"],
216
+ equals: ["value", "case_sensitive", "trim"],
217
+ regex: ["pattern", "case_sensitive", "multiline"],
218
+ valid_json: [],
219
+ json_path_equals: ["json_path", "expected"],
220
+ length_bounds: ["min", "max"],
221
+ no_refusal: []
222
+ };
223
+
224
+ function ckApplyCheckFields(scope) {
225
+ if (!scope) return;
226
+ var kindSelect = scope.querySelector('[name="metric[check_config][check_kind]"]');
227
+ if (!kindSelect) return;
228
+ var visible = CK_CHECK_FIELDS[kindSelect.value];
229
+ var targetSelect = scope.querySelector('[name="metric[check_config][target]"]');
230
+ var targetIsJsonPath = !!(targetSelect && targetSelect.value === "json_path");
231
+ scope.querySelectorAll("[data-ck-check-field]").forEach(function(field) {
232
+ var key = field.getAttribute("data-ck-check-field");
233
+ var show;
234
+ if (key === "target_path") {
235
+ show = targetIsJsonPath;
236
+ } else if (!visible) {
237
+ show = true;
238
+ } else {
239
+ show = visible.indexOf(key) !== -1;
240
+ }
241
+ field.hidden = !show;
242
+ });
243
+ }
244
+
245
+ function ckApplyMetricType(group) {
246
+ var checked = group.querySelector('input[type="radio"]:checked');
247
+ if (!checked) return;
248
+ var value = checked.value;
249
+ var scope = group.closest("form") || document;
250
+ scope.querySelectorAll("[data-ck-metric-editor]").forEach(function(editor) {
251
+ var active = editor.getAttribute("data-ck-metric-editor") === value;
252
+ editor.hidden = !active;
253
+ editor.querySelectorAll("input, select, textarea").forEach(function(field) {
254
+ field.disabled = !active;
255
+ });
256
+ });
257
+ ckApplyCheckFields(scope);
258
+ }
259
+
260
+ document.addEventListener("turbo:load", function() {
261
+ document.querySelectorAll("[data-ck-metric-type]").forEach(function(group) {
262
+ ckApplyMetricType(group);
263
+ });
264
+ document.querySelectorAll('[data-ck-metric-editor="check"]').forEach(function(editor) {
265
+ ckApplyCheckFields(editor);
266
+ });
267
+ });
268
+
269
+ document.addEventListener("change", function(e) {
270
+ var target = e.target;
271
+ if (!target || !target.closest) return;
272
+ var group = target.closest("[data-ck-metric-type]");
273
+ if (group && target.type === "radio") {
274
+ ckApplyMetricType(group);
275
+ return;
276
+ }
277
+ if (target.name === "metric[check_config][check_kind]" || target.name === "metric[check_config][target]") {
278
+ var scope = target.closest('[data-ck-metric-editor="check"]') || target.closest("form");
279
+ ckApplyCheckFields(scope);
280
+ }
281
+ });
282
+
213
283
  document.addEventListener("click", function(e) {
214
284
  var btn = e.target.closest("[data-ck-apply]");
215
285
  if (!btn) return;
@@ -1922,6 +1922,13 @@ label.ck-checkbox input {
1922
1922
  cursor: pointer;
1923
1923
  }
1924
1924
 
1925
+ .ck-radio-info {
1926
+ width: 16px;
1927
+ height: 16px;
1928
+ color: var(--ck-muted);
1929
+ cursor: help;
1930
+ }
1931
+
1925
1932
  .ck-field-row {
1926
1933
  display: flex;
1927
1934
  gap: 1rem;
@@ -3238,7 +3245,8 @@ select.ck-input {
3238
3245
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
3239
3246
  #ck-tab-agreements:checked ~ .ck-api-tabs__nav label[for="ck-tab-agreements"],
3240
3247
  #ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
3241
- #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
3248
+ #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"],
3249
+ #ck-tab-imports:checked ~ .ck-api-tabs__nav label[for="ck-tab-imports"] {
3242
3250
  color: var(--ck-accent);
3243
3251
  background: var(--ck-surface-soft);
3244
3252
  border-left-color: var(--ck-accent);
@@ -3253,7 +3261,8 @@ select.ck-input {
3253
3261
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(7),
3254
3262
  #ck-tab-agreements:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(8),
3255
3263
  #ck-tab-tags:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(9),
3256
- #ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(10) {
3264
+ #ck-tab-providers:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(10),
3265
+ #ck-tab-imports:checked ~ .ck-api-tabs__panels .ck-api-tabs__panel:nth-child(11) {
3257
3266
  display: block;
3258
3267
  }
3259
3268
 
@@ -3295,7 +3304,8 @@ select.ck-input {
3295
3304
  #ck-tab-metric-groups:checked ~ .ck-api-tabs__nav label[for="ck-tab-metric-groups"],
3296
3305
  #ck-tab-agreements:checked ~ .ck-api-tabs__nav label[for="ck-tab-agreements"],
3297
3306
  #ck-tab-tags:checked ~ .ck-api-tabs__nav label[for="ck-tab-tags"],
3298
- #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"] {
3307
+ #ck-tab-providers:checked ~ .ck-api-tabs__nav label[for="ck-tab-providers"],
3308
+ #ck-tab-imports:checked ~ .ck-api-tabs__nav label[for="ck-tab-imports"] {
3299
3309
  border-left-color: transparent;
3300
3310
  border-bottom-color: var(--ck-accent);
3301
3311
  }
@@ -68,7 +68,7 @@ module CompletionKit
68
68
  end
69
69
 
70
70
  def create
71
- @metric = Metric.new(metric_params)
71
+ @metric = Metric.new(create_metric_params)
72
72
 
73
73
  if @metric.save
74
74
  redirect_to metric_path(@metric), notice: "Metric was successfully created."
@@ -235,6 +235,15 @@ module CompletionKit
235
235
  @metric = Metric.find(params[:id])
236
236
  end
237
237
 
238
+ def create_metric_params
239
+ attrs = metric_params
240
+ if attrs[:metric_type] == "check"
241
+ attrs.except(:instruction, :rubric_bands)
242
+ else
243
+ attrs.except(:check_config)
244
+ end
245
+ end
246
+
238
247
  def metric_params
239
248
  permitted = params.require(:metric).permit(:name, :instruction, :metric_type,
240
249
  rubric_bands: [:stars, :description],
@@ -122,7 +122,7 @@ module CompletionKit
122
122
 
123
123
  def suggest
124
124
  if @run.prompt.nil?
125
- redirect_to run_path(@run), alert: "Judge-only runs don't have a prompt to improve."
125
+ redirect_to run_path(@run), alert: "A run that only scores existing outputs has no prompt to improve."
126
126
  return
127
127
  end
128
128
 
@@ -30,7 +30,7 @@ module CompletionKit
30
30
  display_scoped.select(:id)
31
31
  end
32
32
 
33
- # A judge-only run grades a pre-existing column on the dataset instead of
33
+ # A scoring-only run grades a pre-existing column on the dataset instead of
34
34
  # generating new outputs. No prompt is attached; the response text is read
35
35
  # from row[output_column]; no LLM generation happens.
36
36
  def judge_only?
@@ -442,7 +442,7 @@ module CompletionKit
442
442
  self.name = "#{prompt.name} — v#{prompt.version_number} ##{count}"
443
443
  elsif dataset.present?
444
444
  count = Run.where(prompt_id: nil, dataset_id: dataset.id).count + 1
445
- self.name = "#{dataset.name} judge-only ##{count}"
445
+ self.name = "#{dataset.name} scoring ##{count}"
446
446
  end
447
447
  end
448
448
 
@@ -461,7 +461,7 @@ module CompletionKit
461
461
  return if prompt.present?
462
462
 
463
463
  if dataset.nil?
464
- errors.add(:dataset_id, "is required for a judge-only run (no prompt)")
464
+ errors.add(:dataset_id, "is required when scoring existing outputs (no prompt)")
465
465
  return
466
466
  end
467
467
 
@@ -5,7 +5,7 @@ module CompletionKit
5
5
 
6
6
  TOOLS = {
7
7
  "judges_replay" => {
8
- description: "Run the current judge against a dataset (judge-only run). Wraps runs_create with prompt_id omitted and output_column supplied. Re-judges existing dataset outputs so you can compare against human verdicts.",
8
+ description: "Run the current judge against a dataset (scores existing outputs). Wraps runs_create with prompt_id omitted and output_column supplied. Re-judges existing dataset outputs so you can compare against human verdicts.",
9
9
  inputSchema: {
10
10
  type: "object",
11
11
  properties: {
@@ -51,7 +51,7 @@ module CompletionKit
51
51
  handler: :publish
52
52
  },
53
53
  "prompts_suggest_improvement" => {
54
- description: "Suggest an improved version of a prompt, grounded in a run's test results and judge feedback. Analyzes the run's responses, scores, and reviews, then returns reasoning plus a rewritten template (preserving {{variables}}) and persists it as a Suggestion. Requires a run that has a prompt (not a judge-only run).",
54
+ description: "Suggest an improved version of a prompt, grounded in a run's test results and judge feedback. Analyzes the run's responses, scores, and reviews, then returns reasoning plus a rewritten template (preserving {{variables}}) and persists it as a Suggestion. Requires a run that has a prompt (not a scoring-only run).",
55
55
  inputSchema: {
56
56
  type: "object",
57
57
  properties: {run_id: {type: "integer", description: "The run whose results ground the improvement."}},
@@ -107,7 +107,7 @@ module CompletionKit
107
107
 
108
108
  def self.suggest_improvement(args)
109
109
  run = Run.find(args["run_id"])
110
- return error_result("Judge-only runs don't have a prompt to improve.") if run.prompt.nil?
110
+ return error_result("A run that only scores existing outputs has no prompt to improve.") if run.prompt.nil?
111
111
 
112
112
  result = PromptImprovementService.new(run).suggest
113
113
  return error_result("The model didn't return a usable rewrite.") if result["suggested_template"].blank?
@@ -15,7 +15,7 @@ module CompletionKit
15
15
  handler: :get
16
16
  },
17
17
  "runs_create" => {
18
- description: "Create a run. Omit prompt_id and provide output_column for a judge-only run that grades a pre-existing dataset column instead of generating new outputs.",
18
+ description: "Create a run. Omit prompt_id and provide output_column to score existing outputs by grading a pre-existing dataset column instead of generating new ones.",
19
19
  inputSchema: {
20
20
  type: "object",
21
21
  properties: {
@@ -20,6 +20,7 @@
20
20
  <input type="radio" name="ck-api-tab" id="ck-tab-agreements" class="ck-api-tabs__radio">
21
21
  <input type="radio" name="ck-api-tab" id="ck-tab-tags" class="ck-api-tabs__radio">
22
22
  <input type="radio" name="ck-api-tab" id="ck-tab-providers" class="ck-api-tabs__radio">
23
+ <input type="radio" name="ck-api-tab" id="ck-tab-imports" class="ck-api-tabs__radio">
23
24
 
24
25
  <nav class="ck-api-tabs__nav">
25
26
  <label for="ck-tab-mcp" class="ck-api-tabs__label">MCP <span class="ck-api-tabs__count"><%= CompletionKit::McpDispatcher.tool_definitions.size %></span></label>
@@ -32,6 +33,7 @@
32
33
  <label for="ck-tab-agreements" class="ck-api-tabs__label">Agreements <span class="ck-api-tabs__count">3</span></label>
33
34
  <label for="ck-tab-tags" class="ck-api-tabs__label">Tags <span class="ck-api-tabs__count">5</span></label>
34
35
  <label for="ck-tab-providers" class="ck-api-tabs__label">Providers <span class="ck-api-tabs__count">5</span></label>
36
+ <label for="ck-tab-imports" class="ck-api-tabs__label">Imports <span class="ck-api-tabs__count">1</span></label>
35
37
  </nav>
36
38
 
37
39
  <div class="ck-api-tabs__panels">
@@ -124,7 +126,7 @@
124
126
  <div class="ck-api-endpoint">
125
127
  <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs</p>
126
128
  <p class="ck-meta-copy">Create a new run.</p>
127
- <p class="ck-api-params"><strong>Optional:</strong>&ensp;<code>name</code>, <code>prompt_id</code>, <code>dataset_id</code>, <code>metric_ids</code>, <code>judge_model</code>, <code>output_column</code> (judge-only: omit <code>prompt_id</code> and grade a dataset column instead, default <code>actual_output</code>)</p>
129
+ <p class="ck-api-params"><strong>Optional:</strong>&ensp;<code>name</code>, <code>prompt_id</code>, <code>dataset_id</code>, <code>metric_ids</code>, <code>judge_model</code>, <code>output_column</code> (score existing outputs: omit <code>prompt_id</code> and grade a dataset column instead, default <code>actual_output</code>)</p>
128
130
  <%= render "completion_kit/api_reference/example", base_url: base_url, token: token, real_token: real_token, cmd: "curl -X POST #{base_url}/api/v1/runs \\\n -H \"Authorization: Bearer #{token}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"prompt_id\": 1, \"dataset_id\": 1, \"metric_ids\": [1, 2]}'" %>
129
131
  </div>
130
132
  <div class="ck-api-endpoint">
@@ -379,6 +381,18 @@
379
381
  } %>
380
382
  </div>
381
383
 
384
+ <div class="ck-api-tabs__panel">
385
+ <h2 class="ck-section-title">Imports</h2>
386
+ <p class="ck-copy">Bring an existing <a href="https://www.promptfoo.dev" class="ck-link">promptfoo</a> config into CompletionKit in one call. Prompts, the test dataset, assert-based metrics, and providers are created where they map cleanly and skipped with a reason where they don't.</p>
387
+ <div class="ck-api-endpoint">
388
+ <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/imports/promptfoo</p>
389
+ <p class="ck-meta-copy">Import a promptfooconfig.yaml. Send the YAML as a <code>config</code> param, or POST the raw YAML as the request body. Returns 201 with a mapping summary, or 422 if the YAML cannot be parsed.</p>
390
+ <p class="ck-api-params"><strong>Request:</strong>&ensp;<code>config</code> (the YAML text) or a raw YAML request body</p>
391
+ <p class="ck-api-params"><strong>Response 201:</strong>&ensp;<code>prompts</code>, <code>dataset</code>, <code>metrics</code>, and <code>providers</code>, each listing what was <code>created</code> and what was <code>skipped</code> (with a reason)</p>
392
+ <%= render "completion_kit/api_reference/example", base_url: base_url, token: token, real_token: real_token, cmd: "curl -X POST #{base_url}/api/v1/imports/promptfoo \\\n -H \"Authorization: Bearer #{token}\" \\\n -H \"Content-Type: application/x-yaml\" \\\n --data-binary @promptfooconfig.yaml" %>
393
+ </div>
394
+ </div>
395
+
382
396
  </div>
383
397
  </div>
384
398
 
@@ -75,20 +75,22 @@
75
75
  <% else %>
76
76
  <div class="ck-field" data-ck-metric-type>
77
77
  <p class="ck-section-title">Metric type</p>
78
- <p class="ck-hint">An LLM judge scores each output on a 1-5 rubric. A deterministic check passes or fails with no model call.</p>
78
+ <p class="ck-hint">The judge gives each response 1 to 5 stars against your rubric. A check just passes or fails, with no AI.</p>
79
79
  <label class="ck-radio">
80
80
  <%= form.radio_button :metric_type, "llm_judge", checked: !metric.check? %>
81
81
  <span>LLM judge (1-5)</span>
82
+ <%= heroicon_tag "information-circle", variant: :outline, size: 16, class: "ck-radio-info", "aria-hidden": "true", title: "An AI reads each response and rates it 1 to 5 stars against your rubric, with a written reason. Best for subjective quality: tone, helpfulness, accuracy." %>
82
83
  </label>
83
84
  <label class="ck-radio">
84
85
  <%= form.radio_button :metric_type, "check", checked: metric.check? %>
85
86
  <span>Deterministic check</span>
87
+ <%= heroicon_tag "information-circle", variant: :outline, size: 16, class: "ck-radio-info", "aria-hidden": "true", title: "A rule that passes or fails instantly with no AI and no cost. Best for exact things: valid JSON, contains a phrase, no refusal." %>
86
88
  </label>
87
89
  </div>
88
90
  <% end %>
89
91
 
90
92
  <% if show_judge %>
91
- <div class="ck-field ck-field--spacious" data-ck-metric-editor="llm_judge">
93
+ <div class="ck-field ck-field--spacious" data-ck-metric-editor="llm_judge" <%= "hidden" if metric.check? %>>
92
94
  <p class="ck-section-title">Instruction</p>
93
95
  <p class="ck-hint">What should the judge assess? This instruction is sent to the LLM judge when scoring outputs.</p>
94
96
  <%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output...", **ck_field_aria(form, :instruction) %>
@@ -112,7 +114,7 @@
112
114
  <% end %>
113
115
  </div>
114
116
 
115
- <div class="ck-field ck-field--spacious">
117
+ <div class="ck-field ck-field--spacious" data-ck-metric-editor="llm_judge" <%= "hidden" if metric.check? %>>
116
118
  <p class="ck-section-title">Rubric<%= render "completion_kit/metrics/rubric_hint" %></p>
117
119
  <p class="ck-hint">What each star rating means for this metric.</p>
118
120
 
@@ -155,7 +157,7 @@
155
157
 
156
158
  <% if show_check %>
157
159
  <% check = metric.check_config || {} %>
158
- <div class="ck-field ck-field--spacious" data-ck-metric-editor="check">
160
+ <div class="ck-field ck-field--spacious" data-ck-metric-editor="check" <%= "hidden" unless metric.check? %>>
159
161
  <p class="ck-section-title">Check</p>
160
162
  <p class="ck-hint">A deterministic pass/fail rule. Fill only the fields the chosen kind needs.</p>
161
163
 
@@ -177,56 +179,56 @@
177
179
  </select>
178
180
  </div>
179
181
 
180
- <div class="ck-field">
182
+ <div class="ck-field" data-ck-check-field="target_path">
181
183
  <label class="ck-label" for="metric_check_target_path">Target path</label>
182
184
  <p class="ck-hint">Used when target is json_path, e.g. data.items.0.name.</p>
183
185
  <input type="text" name="metric[check_config][target_path]" id="metric_check_target_path" class="ck-input" value="<%= check["target_path"] %>">
184
186
  </div>
185
187
 
186
- <div class="ck-field">
188
+ <div class="ck-field" data-ck-check-field="value">
187
189
  <label class="ck-label" for="metric_check_value">Value</label>
188
190
  <p class="ck-hint">The substring or exact string for contains, not_contains, or equals.</p>
189
191
  <input type="text" name="metric[check_config][value]" id="metric_check_value" class="ck-input" value="<%= check["value"] %>">
190
192
  </div>
191
193
 
192
- <div class="ck-field">
194
+ <div class="ck-field" data-ck-check-field="pattern">
193
195
  <label class="ck-label" for="metric_check_pattern">Pattern</label>
194
196
  <p class="ck-hint">A regular expression for the regex kind.</p>
195
197
  <input type="text" name="metric[check_config][pattern]" id="metric_check_pattern" class="ck-input" value="<%= check["pattern"] %>">
196
198
  </div>
197
199
 
198
- <div class="ck-field">
200
+ <div class="ck-field" data-ck-check-field="json_path">
199
201
  <label class="ck-label" for="metric_check_json_path">JSON path</label>
200
202
  <p class="ck-hint">Dotted path into parsed JSON for json_path_equals.</p>
201
203
  <input type="text" name="metric[check_config][json_path]" id="metric_check_json_path" class="ck-input" value="<%= check["json_path"] %>">
202
204
  </div>
203
205
 
204
- <div class="ck-field">
206
+ <div class="ck-field" data-ck-check-field="expected">
205
207
  <label class="ck-label" for="metric_check_expected">Expected</label>
206
208
  <p class="ck-hint">The value the JSON path must equal.</p>
207
209
  <input type="text" name="metric[check_config][expected]" id="metric_check_expected" class="ck-input" value="<%= check["expected"] %>">
208
210
  </div>
209
211
 
210
212
  <div class="ck-field-row">
211
- <div class="ck-field">
213
+ <div class="ck-field" data-ck-check-field="min">
212
214
  <label class="ck-label" for="metric_check_min">Min length</label>
213
215
  <input type="number" name="metric[check_config][min]" id="metric_check_min" class="ck-input" value="<%= check["min"] %>">
214
216
  </div>
215
- <div class="ck-field">
217
+ <div class="ck-field" data-ck-check-field="max">
216
218
  <label class="ck-label" for="metric_check_max">Max length</label>
217
219
  <input type="number" name="metric[check_config][max]" id="metric_check_max" class="ck-input" value="<%= check["max"] %>">
218
220
  </div>
219
221
  </div>
220
222
 
221
- <label class="ck-checkbox">
223
+ <label class="ck-checkbox" data-ck-check-field="case_sensitive">
222
224
  <input type="checkbox" name="metric[check_config][case_sensitive]" value="true"<%= " checked" if check["case_sensitive"] %>>
223
225
  <span>Case sensitive</span>
224
226
  </label>
225
- <label class="ck-checkbox">
227
+ <label class="ck-checkbox" data-ck-check-field="multiline">
226
228
  <input type="checkbox" name="metric[check_config][multiline]" value="true"<%= " checked" if check["multiline"] %>>
227
229
  <span>Multiline</span>
228
230
  </label>
229
- <label class="ck-checkbox">
231
+ <label class="ck-checkbox" data-ck-check-field="trim">
230
232
  <input type="checkbox" name="metric[check_config][trim]" value="true"<%= " checked" if check["trim"] %>>
231
233
  <span>Trim whitespace</span>
232
234
  </label>
@@ -22,7 +22,7 @@
22
22
  <%= check_box_tag "run[judge_only]", "1", run.persisted? && run.judge_only?, id: "run_judge_only", class: "ck-checkbox" %>
23
23
  <span class="ck-checkbox-label__box" aria-hidden="true"></span>
24
24
  <span class="ck-checkbox-label__body">
25
- <span class="ck-checkbox-label__text">Judge-only run</span>
25
+ <span class="ck-checkbox-label__text">Score existing outputs</span>
26
26
  <span class="ck-checkbox-label__hint">Grade an existing column on the dataset instead of running a prompt. Roughly half the LLM calls per row.</span>
27
27
  </span>
28
28
  </label>
@@ -263,7 +263,7 @@ function updateRunForm() {
263
263
  }
264
264
  } else if (!dataset) {
265
265
  if (datasetField) datasetField.className = 'ck-field ck-field--info';
266
- if (datasetHint) datasetHint.textContent = 'Judge-only runs need a dataset that supplies the output column.';
266
+ if (datasetHint) datasetHint.textContent = 'Skip generation and score responses you already have from a dataset column. Works with rubric metrics or deterministic checks.';
267
267
  }
268
268
  } else {
269
269
  valid = prompt !== '';
@@ -10,7 +10,7 @@
10
10
  <%= link_to run.prompt.name, ck_prompt_path(run.prompt), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
11
11
  <span class="ck-runs-table__version">v<%= run.prompt.version_number %></span>
12
12
  <% else %>
13
- <span class="ck-runs-table__version">Judge-only</span>
13
+ <span class="ck-runs-table__version">Scoring only</span>
14
14
  <% end %>
15
15
  <% if run.dataset %>
16
16
  <span class="ck-runs-table__sep">·</span>
@@ -22,7 +22,7 @@
22
22
  <% if run.prompt %>
23
23
  <p class="ck-meta-copy"><%= link_to run.prompt.display_name, prompt_path(run.prompt), class: "ck-link" %>&ensp;<span class="ck-chip" style="text-transform: none;"><%= run.prompt.llm_model %></span></p>
24
24
  <% else %>
25
- <p class="ck-meta-copy">Judge-only run grading column <code><%= run.output_column.presence || "actual_output" %></code><% if run.dataset %> on <%= link_to run.dataset.name, dataset_path(run.dataset), class: "ck-link" %><% end %></p>
25
+ <p class="ck-meta-copy">Scoring existing outputs, grading column <code><%= run.output_column.presence || "actual_output" %></code><% if run.dataset %> on <%= link_to run.dataset.name, dataset_path(run.dataset), class: "ck-link" %><% end %></p>
26
26
  <% end %>
27
27
  </div>
28
28
  <%= render "completion_kit/runs/actions", run: run %>
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.20.3"
2
+ VERSION = "0.20.4"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.3
4
+ version: 0.20.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin