completion-kit 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of completion-kit might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/app/controllers/completion_kit/agreements_controller.rb +5 -0
- data/app/controllers/completion_kit/api/v1/agreements_controller.rb +5 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +9 -2
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +1 -1
- data/app/controllers/completion_kit/metrics_controller.rb +97 -36
- data/app/controllers/completion_kit/runs_controller.rb +1 -1
- data/app/jobs/completion_kit/check_review_job.rb +66 -0
- data/app/jobs/completion_kit/generate_row_job.rb +5 -2
- data/app/jobs/completion_kit/metric_suggestion_job.rb +1 -0
- data/app/models/completion_kit/metric.rb +91 -5
- data/app/models/completion_kit/metric_version.rb +34 -7
- data/app/models/completion_kit/response.rb +18 -2
- data/app/models/completion_kit/review.rb +5 -1
- data/app/models/completion_kit/run.rb +70 -14
- data/app/services/completion_kit/checks/contains.rb +21 -0
- data/app/services/completion_kit/checks/equals.rb +26 -0
- data/app/services/completion_kit/checks/json_path_equals.rb +32 -0
- data/app/services/completion_kit/checks/length_bounds.rb +19 -0
- data/app/services/completion_kit/checks/no_refusal.rb +23 -0
- data/app/services/completion_kit/checks/not_contains.rb +21 -0
- data/app/services/completion_kit/checks/regex.rb +20 -0
- data/app/services/completion_kit/checks/registry.rb +41 -0
- data/app/services/completion_kit/checks/result.rb +5 -0
- data/app/services/completion_kit/checks/target_resolver.rb +31 -0
- data/app/services/completion_kit/checks/valid_json.rb +12 -0
- data/app/services/completion_kit/mcp_tools/agreements.rb +2 -0
- data/app/services/completion_kit/mcp_tools/judges.rb +2 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +32 -4
- data/app/services/completion_kit/metric_agreement_examples.rb +2 -0
- data/app/services/completion_kit/metric_improvement_validator.rb +2 -0
- data/app/services/completion_kit/metric_variant_generator.rb +1 -0
- data/app/services/completion_kit/onboarding/concepts.rb +1 -1
- data/app/services/completion_kit/prompt_improvement_service.rb +8 -4
- data/app/services/completion_kit/prompt_improvement_validator.rb +1 -1
- data/app/services/completion_kit/starter_metrics.rb +25 -1
- data/app/views/completion_kit/api_reference/_body.html.erb +4 -4
- data/app/views/completion_kit/metrics/_check_spec.html.erb +17 -0
- data/app/views/completion_kit/metrics/_form.html.erb +104 -1
- data/app/views/completion_kit/metrics/index.html.erb +4 -3
- data/app/views/completion_kit/metrics/show.html.erb +26 -14
- data/app/views/completion_kit/metrics/starter_preview.html.erb +8 -0
- data/app/views/completion_kit/responses/show.html.erb +1 -1
- data/db/migrate/20260629000001_add_check_type_to_completion_kit_metrics.rb +6 -0
- data/db/migrate/20260629000002_add_check_type_to_completion_kit_metric_versions.rb +6 -0
- data/db/migrate/20260629000003_add_passed_to_completion_kit_reviews.rb +5 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +17 -1
|
@@ -224,8 +224,8 @@
|
|
|
224
224
|
</div>
|
|
225
225
|
<div class="ck-api-endpoint">
|
|
226
226
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metrics</p>
|
|
227
|
-
<p class="ck-meta-copy">Create a metric.</p>
|
|
228
|
-
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>instruction</code
|
|
227
|
+
<p class="ck-meta-copy">Create a metric. Defaults to an LLM judge; pass <code>metric_type</code> <code>check</code> with a <code>check_config</code> for a deterministic pass/fail check.</p>
|
|
228
|
+
<p class="ck-api-params"><strong>Required:</strong> <code>name</code> <strong>Optional:</strong> <code>metric_type</code> (llm_judge default, or check), <code>instruction</code> and <code>rubric_bands</code> (array of {stars, description}) for judges, <code>check_config</code> (<code>check_kind</code>, <code>target</code>, plus per-kind keys) for checks</p>
|
|
229
229
|
<%= render "completion_kit/api_reference/example", base_url: base_url, token: token, real_token: real_token, cmd: "curl -X POST #{base_url}/api/v1/metrics \\\n -H \"Authorization: Bearer #{token}\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\"name\": \"relevance\", \"instruction\": \"Is the response relevant?\"}'" %>
|
|
230
230
|
</div>
|
|
231
231
|
<div class="ck-api-endpoint">
|
|
@@ -259,11 +259,11 @@
|
|
|
259
259
|
</div>
|
|
260
260
|
<div class="ck-api-endpoint">
|
|
261
261
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">GET</span> /api/v1/metrics/:metric_id/metric_versions/:id</p>
|
|
262
|
-
<p class="ck-meta-copy">Get a single version with its instruction
|
|
262
|
+
<p class="ck-meta-copy">Get a single version with its instruction and rubric bands (or check_config for checks), state, and source.</p>
|
|
263
263
|
</div>
|
|
264
264
|
<div class="ck-api-endpoint">
|
|
265
265
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/metrics/:metric_id/metric_versions/:id/publish</p>
|
|
266
|
-
<p class="ck-meta-copy">Publish the version as current. Works for a draft (promote) or a superseded published version (revert). Copies the version's instruction and rubric back onto the metric.</p>
|
|
266
|
+
<p class="ck-meta-copy">Publish the version as current. Works for a draft (promote) or a superseded published version (revert). Copies the version's instruction and rubric (or check_config for checks) back onto the metric.</p>
|
|
267
267
|
</div>
|
|
268
268
|
<div class="ck-api-endpoint">
|
|
269
269
|
<p class="ck-api-method"><span class="ck-chip" style="color: var(--ck-danger);">DELETE</span> /api/v1/metrics/:metric_id/metric_versions/:id</p>
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
<% config = local_assigns.fetch(:config) %>
|
|
2
|
+
<dl class="ck-check-spec">
|
|
3
|
+
<div class="ck-check-spec__row">
|
|
4
|
+
<dt class="ck-check-spec__term">Kind</dt>
|
|
5
|
+
<dd class="ck-check-spec__val"><code class="ck-check-spec__code"><%= config["check_kind"] %></code></dd>
|
|
6
|
+
</div>
|
|
7
|
+
<div class="ck-check-spec__row">
|
|
8
|
+
<dt class="ck-check-spec__term">Target</dt>
|
|
9
|
+
<dd class="ck-check-spec__val"><code class="ck-check-spec__code"><%= config["target"] %></code></dd>
|
|
10
|
+
</div>
|
|
11
|
+
<% config.except("check_kind", "target").each do |key, value| %>
|
|
12
|
+
<div class="ck-check-spec__row">
|
|
13
|
+
<dt class="ck-check-spec__term"><%= key.humanize %></dt>
|
|
14
|
+
<dd class="ck-check-spec__val"><code class="ck-check-spec__code"><%= value %></code></dd>
|
|
15
|
+
</div>
|
|
16
|
+
<% end %>
|
|
17
|
+
</dl>
|
|
@@ -66,7 +66,29 @@
|
|
|
66
66
|
<%= ck_field_error(form, :name) %>
|
|
67
67
|
</div>
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
<% editing = metric.persisted? %>
|
|
70
|
+
<% show_judge = !editing || metric.llm_judge? %>
|
|
71
|
+
<% show_check = !editing || metric.check? %>
|
|
72
|
+
|
|
73
|
+
<% if editing %>
|
|
74
|
+
<%= form.hidden_field :metric_type %>
|
|
75
|
+
<% else %>
|
|
76
|
+
<div class="ck-field" data-ck-metric-type>
|
|
77
|
+
<p class="ck-section-title">Metric type</p>
|
|
78
|
+
<p class="ck-hint">An LLM judge scores each output on a 1-5 rubric. A deterministic check passes or fails with no model call.</p>
|
|
79
|
+
<label class="ck-radio">
|
|
80
|
+
<%= form.radio_button :metric_type, "llm_judge", checked: !metric.check? %>
|
|
81
|
+
<span>LLM judge (1-5)</span>
|
|
82
|
+
</label>
|
|
83
|
+
<label class="ck-radio">
|
|
84
|
+
<%= form.radio_button :metric_type, "check", checked: metric.check? %>
|
|
85
|
+
<span>Deterministic check</span>
|
|
86
|
+
</label>
|
|
87
|
+
</div>
|
|
88
|
+
<% end %>
|
|
89
|
+
|
|
90
|
+
<% if show_judge %>
|
|
91
|
+
<div class="ck-field ck-field--spacious" data-ck-metric-editor="llm_judge">
|
|
70
92
|
<p class="ck-section-title">Instruction</p>
|
|
71
93
|
<p class="ck-hint">What should the judge assess? This instruction is sent to the LLM judge when scoring outputs.</p>
|
|
72
94
|
<%= form.text_area :instruction, rows: 8, class: "ck-input ck-input--area", placeholder: "Evaluate whether the output...", **ck_field_aria(form, :instruction) %>
|
|
@@ -129,6 +151,87 @@
|
|
|
129
151
|
<% end %>
|
|
130
152
|
</div>
|
|
131
153
|
</div>
|
|
154
|
+
<% end %>
|
|
155
|
+
|
|
156
|
+
<% if show_check %>
|
|
157
|
+
<% check = metric.check_config || {} %>
|
|
158
|
+
<div class="ck-field ck-field--spacious" data-ck-metric-editor="check">
|
|
159
|
+
<p class="ck-section-title">Check</p>
|
|
160
|
+
<p class="ck-hint">A deterministic pass/fail rule. Fill only the fields the chosen kind needs.</p>
|
|
161
|
+
|
|
162
|
+
<div class="ck-field">
|
|
163
|
+
<label class="ck-label" for="metric_check_kind">Check kind</label>
|
|
164
|
+
<select name="metric[check_config][check_kind]" id="metric_check_kind" class="ck-input">
|
|
165
|
+
<% CompletionKit::Checks::Registry.kinds.each do |kind| %>
|
|
166
|
+
<option value="<%= kind %>"<%= " selected" if check["check_kind"] == kind %>><%= kind %></option>
|
|
167
|
+
<% end %>
|
|
168
|
+
</select>
|
|
169
|
+
</div>
|
|
170
|
+
|
|
171
|
+
<div class="ck-field">
|
|
172
|
+
<label class="ck-label" for="metric_check_target">Target</label>
|
|
173
|
+
<select name="metric[check_config][target]" id="metric_check_target" class="ck-input">
|
|
174
|
+
<% CompletionKit::Checks::TargetResolver::TARGETS.each do |target| %>
|
|
175
|
+
<option value="<%= target %>"<%= " selected" if check["target"] == target %>><%= target %></option>
|
|
176
|
+
<% end %>
|
|
177
|
+
</select>
|
|
178
|
+
</div>
|
|
179
|
+
|
|
180
|
+
<div class="ck-field">
|
|
181
|
+
<label class="ck-label" for="metric_check_target_path">Target path</label>
|
|
182
|
+
<p class="ck-hint">Used when target is json_path, e.g. data.items.0.name.</p>
|
|
183
|
+
<input type="text" name="metric[check_config][target_path]" id="metric_check_target_path" class="ck-input" value="<%= check["target_path"] %>">
|
|
184
|
+
</div>
|
|
185
|
+
|
|
186
|
+
<div class="ck-field">
|
|
187
|
+
<label class="ck-label" for="metric_check_value">Value</label>
|
|
188
|
+
<p class="ck-hint">The substring or exact string for contains, not_contains, or equals.</p>
|
|
189
|
+
<input type="text" name="metric[check_config][value]" id="metric_check_value" class="ck-input" value="<%= check["value"] %>">
|
|
190
|
+
</div>
|
|
191
|
+
|
|
192
|
+
<div class="ck-field">
|
|
193
|
+
<label class="ck-label" for="metric_check_pattern">Pattern</label>
|
|
194
|
+
<p class="ck-hint">A regular expression for the regex kind.</p>
|
|
195
|
+
<input type="text" name="metric[check_config][pattern]" id="metric_check_pattern" class="ck-input" value="<%= check["pattern"] %>">
|
|
196
|
+
</div>
|
|
197
|
+
|
|
198
|
+
<div class="ck-field">
|
|
199
|
+
<label class="ck-label" for="metric_check_json_path">JSON path</label>
|
|
200
|
+
<p class="ck-hint">Dotted path into parsed JSON for json_path_equals.</p>
|
|
201
|
+
<input type="text" name="metric[check_config][json_path]" id="metric_check_json_path" class="ck-input" value="<%= check["json_path"] %>">
|
|
202
|
+
</div>
|
|
203
|
+
|
|
204
|
+
<div class="ck-field">
|
|
205
|
+
<label class="ck-label" for="metric_check_expected">Expected</label>
|
|
206
|
+
<p class="ck-hint">The value the JSON path must equal.</p>
|
|
207
|
+
<input type="text" name="metric[check_config][expected]" id="metric_check_expected" class="ck-input" value="<%= check["expected"] %>">
|
|
208
|
+
</div>
|
|
209
|
+
|
|
210
|
+
<div class="ck-field-row">
|
|
211
|
+
<div class="ck-field">
|
|
212
|
+
<label class="ck-label" for="metric_check_min">Min length</label>
|
|
213
|
+
<input type="number" name="metric[check_config][min]" id="metric_check_min" class="ck-input" value="<%= check["min"] %>">
|
|
214
|
+
</div>
|
|
215
|
+
<div class="ck-field">
|
|
216
|
+
<label class="ck-label" for="metric_check_max">Max length</label>
|
|
217
|
+
<input type="number" name="metric[check_config][max]" id="metric_check_max" class="ck-input" value="<%= check["max"] %>">
|
|
218
|
+
</div>
|
|
219
|
+
</div>
|
|
220
|
+
|
|
221
|
+
<label class="ck-checkbox">
|
|
222
|
+
<input type="checkbox" name="metric[check_config][case_sensitive]" value="true"<%= " checked" if check["case_sensitive"] %>>
|
|
223
|
+
<span>Case sensitive</span>
|
|
224
|
+
</label>
|
|
225
|
+
<label class="ck-checkbox">
|
|
226
|
+
<input type="checkbox" name="metric[check_config][multiline]" value="true"<%= " checked" if check["multiline"] %>>
|
|
227
|
+
<span>Multiline</span>
|
|
228
|
+
</label>
|
|
229
|
+
<label class="ck-checkbox">
|
|
230
|
+
<input type="checkbox" name="metric[check_config][trim]" value="true"<%= " checked" if check["trim"] %>>
|
|
231
|
+
<span>Trim whitespace</span>
|
|
232
|
+
</label>
|
|
233
|
+
</div>
|
|
234
|
+
<% end %>
|
|
132
235
|
|
|
133
236
|
<%= render "completion_kit/tags/picker", record: metric, param_namespace: :metric %>
|
|
134
237
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
<section class="ck-page-header">
|
|
2
2
|
<div>
|
|
3
3
|
<h1 class="ck-title">Metrics</h1>
|
|
4
|
-
<p class="ck-lead">Scoring dimensions
|
|
4
|
+
<p class="ck-lead">Scoring dimensions used to evaluate each response. Each defines one thing to assess, by an LLM judge on a 1-5 scale or a deterministic check.</p>
|
|
5
5
|
</div>
|
|
6
6
|
<div class="ck-actions">
|
|
7
7
|
<%= link_to "New metric", new_metric_path, class: ck_button_classes(:dark) %>
|
|
@@ -29,6 +29,7 @@
|
|
|
29
29
|
<tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
|
|
30
30
|
<td>
|
|
31
31
|
<%= link_to metric_path(metric), class: "ck-record-name" do %><strong><%= metric.name %></strong><% end %>
|
|
32
|
+
<span class="ck-chip ck-chip--soft ck-metric-type-chip"><%= metric.check? ? "Check" : "Judge" %></span>
|
|
32
33
|
<% if metric.tags.any? %>
|
|
33
34
|
<div class="tag-marks-row">
|
|
34
35
|
<%= render "completion_kit/tags/marks", tags: metric.tags %>
|
|
@@ -69,7 +70,7 @@
|
|
|
69
70
|
<% if @available_starters.any? %>
|
|
70
71
|
<section class="ck-starter-row">
|
|
71
72
|
<p class="ck-kicker">Skip the blank page</p>
|
|
72
|
-
<p class="ck-meta-copy">Pre-written
|
|
73
|
+
<p class="ck-meta-copy">Pre-written metrics for the dimensions most teams score against, both LLM judges and deterministic checks. Click a card to preview before it's created.</p>
|
|
73
74
|
<div class="ck-starter-grid">
|
|
74
75
|
<% @available_starters.each do |starter| %>
|
|
75
76
|
<%= render "starter_card", starter: starter %>
|
|
@@ -85,7 +86,7 @@
|
|
|
85
86
|
<% if @available_starters.any? %>
|
|
86
87
|
<section class="ck-starter-row ck-starter-row--empty-state">
|
|
87
88
|
<h2 class="ck-title ck-title--sm">Skip the blank page</h2>
|
|
88
|
-
<p class="ck-lead">
|
|
89
|
+
<p class="ck-lead">Starter metrics we've worked through for common evaluation dimensions, both LLM judges and deterministic checks. Adopt one to drop in a pre-written metric, edit anything after. Or <%= link_to "write your own from scratch", new_metric_path, class: "ck-link" %>.</p>
|
|
89
90
|
<div class="ck-starter-grid">
|
|
90
91
|
<% @available_starters.each do |starter| %>
|
|
91
92
|
<%= render "starter_card", starter: starter %>
|
|
@@ -24,6 +24,13 @@
|
|
|
24
24
|
</section>
|
|
25
25
|
<% end %>
|
|
26
26
|
|
|
27
|
+
<% if @metric.check? %>
|
|
28
|
+
<section class="ck-card ck-card--spaced">
|
|
29
|
+
<p class="ck-kicker">Check</p>
|
|
30
|
+
<p class="ck-meta-copy">A deterministic pass/fail rule. No judge call.</p>
|
|
31
|
+
<%= render "completion_kit/metrics/check_spec", config: @metric.check_config %>
|
|
32
|
+
</section>
|
|
33
|
+
<% else %>
|
|
27
34
|
<section class="ck-card ck-card--spaced">
|
|
28
35
|
<p class="ck-kicker">Rubric<%= render "completion_kit/metrics/rubric_hint" %></p>
|
|
29
36
|
<div class="ck-rubric-display">
|
|
@@ -41,6 +48,7 @@
|
|
|
41
48
|
<% end %>
|
|
42
49
|
</div>
|
|
43
50
|
</section>
|
|
51
|
+
<% end %>
|
|
44
52
|
|
|
45
53
|
<% if CompletionKit.config.judge_agreement_enabled && @versions.any? %>
|
|
46
54
|
<% predecessor_of = @versions.index_with { |v| @versions.detect { |o| o.version_number < v.version_number } } %>
|
|
@@ -184,7 +192,7 @@
|
|
|
184
192
|
<section class="ck-card ck-card--spaced">
|
|
185
193
|
<div class="ck-prompt-preview__header">
|
|
186
194
|
<p class="ck-kicker">Agreement</p>
|
|
187
|
-
<% if draft.nil? && @improve_disagreement_count.positive? %>
|
|
195
|
+
<% if !@metric.check? && draft.nil? && @improve_disagreement_count.positive? %>
|
|
188
196
|
<%= button_to suggest_variants_metric_path(@metric),
|
|
189
197
|
method: :post, form_class: "inline-block",
|
|
190
198
|
class: ck_button_classes(:light, variant: :outline) + " ck-button--sm",
|
|
@@ -194,19 +202,23 @@
|
|
|
194
202
|
<% end %>
|
|
195
203
|
<% end %>
|
|
196
204
|
</div>
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
205
|
+
<% if @metric.check? %>
|
|
206
|
+
<p class="ck-meta-copy">This is a deterministic check, so there is nothing to calibrate.</p>
|
|
207
|
+
<% else %>
|
|
208
|
+
<%= turbo_stream_from "metric_#{@metric.id}_suggestion" %>
|
|
209
|
+
<div id="ck-suggestion-status-<%= @metric.id %>" class="ck-suggestion-status"></div>
|
|
210
|
+
<p class="ck-meta-copy">How often the judge lands on the same score you would. Review its scores to build that signal, and improve the metric to raise it.</p>
|
|
211
|
+
<%= render "completion_kit/agreements/trust_panel",
|
|
212
|
+
stats: CompletionKit::MetricAgreementStats.for(@metric),
|
|
213
|
+
metric: @metric %>
|
|
214
|
+
<% if CompletionKit.config.judge_examples_from_reviews %>
|
|
215
|
+
<%= render "completion_kit/metrics/guiding_examples", metric: @metric, examples: @guiding_examples %>
|
|
216
|
+
<% end %>
|
|
217
|
+
<% if draft %>
|
|
218
|
+
<div class="ck-cal-foot">
|
|
219
|
+
<span class="ck-cal-foot__note">A draft improvement (<%= draft.version_label %>) is waiting in the Versions table above. Open its change to compare, then Publish to use it.</span>
|
|
220
|
+
</div>
|
|
221
|
+
<% end %>
|
|
210
222
|
<% end %>
|
|
211
223
|
</section>
|
|
212
224
|
<% end %>
|
|
@@ -14,6 +14,13 @@
|
|
|
14
14
|
<p class="ck-copy"><%= @starter.catches %></p>
|
|
15
15
|
</section>
|
|
16
16
|
|
|
17
|
+
<% if @starter.metric_type == "check" %>
|
|
18
|
+
<section class="ck-card ck-card--spaced">
|
|
19
|
+
<p class="ck-kicker">Check</p>
|
|
20
|
+
<p class="ck-meta-copy">A deterministic pass/fail rule. No judge call.</p>
|
|
21
|
+
<%= render "completion_kit/metrics/check_spec", config: @starter.check_config %>
|
|
22
|
+
</section>
|
|
23
|
+
<% else %>
|
|
17
24
|
<section class="ck-card ck-card--spaced">
|
|
18
25
|
<p class="ck-kicker">Judge instruction</p>
|
|
19
26
|
<p class="ck-copy"><%= @starter.instruction %></p>
|
|
@@ -36,6 +43,7 @@
|
|
|
36
43
|
<% end %>
|
|
37
44
|
</div>
|
|
38
45
|
</section>
|
|
46
|
+
<% end %>
|
|
39
47
|
|
|
40
48
|
<div class="ck-starter-actions">
|
|
41
49
|
<%= link_to "Cancel", metrics_path, class: ck_button_classes(:light, variant: :outline) %>
|
|
@@ -124,7 +124,7 @@
|
|
|
124
124
|
<% if review.ai_feedback.present? %>
|
|
125
125
|
<p class="ck-review-card__feedback"><%= review.ai_feedback %></p>
|
|
126
126
|
<% end %>
|
|
127
|
-
<% if CompletionKit.config.judge_agreement_enabled && review.metric && review.ai_score %>
|
|
127
|
+
<% if CompletionKit.config.judge_agreement_enabled && review.metric && review.ai_score && review.metric_version&.llm_judge? %>
|
|
128
128
|
<% existing = CompletionKit::Agreement.find_by(
|
|
129
129
|
response_id: @response.id, metric_id: review.metric_id,
|
|
130
130
|
created_by: CompletionKit.config.username.presence || "operator"
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
class AddCheckTypeToCompletionKitMetricVersions < ActiveRecord::Migration[8.1]
|
|
2
|
+
def change
|
|
3
|
+
add_column :completion_kit_metric_versions, :metric_type, :string, null: false, default: "llm_judge"
|
|
4
|
+
add_column :completion_kit_metric_versions, :check_config, :text
|
|
5
|
+
end
|
|
6
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: completion-kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.18.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Damien Bastin
|
|
@@ -264,6 +264,7 @@ files:
|
|
|
264
264
|
- app/controllers/concerns/completion_kit/tag_filtering.rb
|
|
265
265
|
- app/helpers/completion_kit/application_helper.rb
|
|
266
266
|
- app/jobs/completion_kit/application_job.rb
|
|
267
|
+
- app/jobs/completion_kit/check_review_job.rb
|
|
267
268
|
- app/jobs/completion_kit/generate_row_job.rb
|
|
268
269
|
- app/jobs/completion_kit/judge_review_job.rb
|
|
269
270
|
- app/jobs/completion_kit/metric_suggestion_job.rb
|
|
@@ -296,6 +297,17 @@ files:
|
|
|
296
297
|
- app/services/completion_kit/agreement_math.rb
|
|
297
298
|
- app/services/completion_kit/anthropic_client.rb
|
|
298
299
|
- app/services/completion_kit/api_config.rb
|
|
300
|
+
- app/services/completion_kit/checks/contains.rb
|
|
301
|
+
- app/services/completion_kit/checks/equals.rb
|
|
302
|
+
- app/services/completion_kit/checks/json_path_equals.rb
|
|
303
|
+
- app/services/completion_kit/checks/length_bounds.rb
|
|
304
|
+
- app/services/completion_kit/checks/no_refusal.rb
|
|
305
|
+
- app/services/completion_kit/checks/not_contains.rb
|
|
306
|
+
- app/services/completion_kit/checks/regex.rb
|
|
307
|
+
- app/services/completion_kit/checks/registry.rb
|
|
308
|
+
- app/services/completion_kit/checks/result.rb
|
|
309
|
+
- app/services/completion_kit/checks/target_resolver.rb
|
|
310
|
+
- app/services/completion_kit/checks/valid_json.rb
|
|
299
311
|
- app/services/completion_kit/csv_processor.rb
|
|
300
312
|
- app/services/completion_kit/dashboard_stats.rb
|
|
301
313
|
- app/services/completion_kit/judge_service.rb
|
|
@@ -354,6 +366,7 @@ files:
|
|
|
354
366
|
- app/views/completion_kit/metric_groups/index.html.erb
|
|
355
367
|
- app/views/completion_kit/metric_groups/new.html.erb
|
|
356
368
|
- app/views/completion_kit/metric_groups/show.html.erb
|
|
369
|
+
- app/views/completion_kit/metrics/_check_spec.html.erb
|
|
357
370
|
- app/views/completion_kit/metrics/_form.html.erb
|
|
358
371
|
- app/views/completion_kit/metrics/_guiding_examples.html.erb
|
|
359
372
|
- app/views/completion_kit/metrics/_rubric_diff.html.erb
|
|
@@ -449,6 +462,9 @@ files:
|
|
|
449
462
|
- db/migrate/20260531000003_add_metric_version_fk_to_reviews.rb
|
|
450
463
|
- db/migrate/20260531000004_rename_calibrations_to_agreements.rb
|
|
451
464
|
- db/migrate/20260611000001_add_validation_to_completion_kit_suggestions.rb
|
|
465
|
+
- db/migrate/20260629000001_add_check_type_to_completion_kit_metrics.rb
|
|
466
|
+
- db/migrate/20260629000002_add_check_type_to_completion_kit_metric_versions.rb
|
|
467
|
+
- db/migrate/20260629000003_add_passed_to_completion_kit_reviews.rb
|
|
452
468
|
- lib/completion-kit.rb
|
|
453
469
|
- lib/completion_kit.rb
|
|
454
470
|
- lib/completion_kit/concurrency_check.rb
|