completion-kit 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb708ad1732ab18fac5f415ad90f53374832bd8e629ab2f1530e860a2eca6c03
4
- data.tar.gz: 3e159d8c80601f791a8b5e9fcabea66c1755dfb1635d268efa104a0a17bd9eb1
3
+ metadata.gz: bc297d90963b3ef6543ef3b2f14a94808c9a5a945047bd9ebb4b820fed1c9c9d
4
+ data.tar.gz: dc730069a2f27764f384068d2fab146e3cb402e24f3dfdeea33d700d50118d3a
5
5
  SHA512:
6
- metadata.gz: 2d6bb436f79222b66d6f12bb0cb82642eb9e50c48704bbea76fb6b47c3c3b65f665363a905b4d8e57062445ad31bee44ba65c901de2a07d716d231fdef59e7df
7
- data.tar.gz: ba8c62309efc2f19d343c6744b47e1db25c2a64e3597f19f957bdfd971dac6ee5118fd7f2017c573799f04a5423ac030627ff91ec9785d77822bc22a9ba7d27b
6
+ metadata.gz: c0fe08870fd298ca0ba0e5b850dc9992dbe8fc7cfebb81abb352d1543220c2925f195abd66113204f28ebfa2e48433d9ad389fc94f100ab1f8c9689edfdc6785
7
+ data.tar.gz: 1fc05c857f889ea0e461196471eca63d630c82a84d5447f870db9cd6bdfec9f539125d6189de27e3ee6cfc6969089105212f73e8a48485fecb8282ca886305a5
data/README.md CHANGED
@@ -142,7 +142,7 @@ Only one mode can be active.
142
142
 
143
143
  ## Concepts
144
144
 
145
- - **Prompt.** A versioned template with `{{variable}}` placeholders. Publishing freezes the template; editing a published prompt creates a new version.
145
+ - **Prompt.** A versioned template with `{{variable}}` placeholders. Editing a prompt that's already been run creates a new version, so earlier results stay reproducible.
146
146
  - **Dataset.** A CSV of real inputs. Each row becomes one test case.
147
147
  - **Run.** One execution of a prompt against a dataset. Captures every input (model, temperature, metrics) and stores all outputs and scores.
148
148
  - **Response.** The model's output for one dataset row, with reviews attached.
@@ -105,11 +105,16 @@ document.addEventListener("mouseout", function(e) {
105
105
  });
106
106
 
107
107
  var ckRefreshing = false;
108
+ function ckSetRefreshButtonsBusy(busy) {
109
+ document.querySelectorAll('.ck-icon-btn[title="Refresh models"]').forEach(function(btn) {
110
+ btn.classList.toggle('ck-icon-btn--spinning', busy);
111
+ btn.disabled = busy;
112
+ });
113
+ }
108
114
  function ckRefreshModels() {
109
115
  if (ckRefreshing) return;
110
116
  ckRefreshing = true;
111
- var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
112
- if (btn) btn.classList.add('ck-icon-btn--spinning');
117
+ ckSetRefreshButtonsBusy(true);
113
118
  ckUpdateRefreshProgress();
114
119
  var csrfToken = document.querySelector('meta[name="csrf-token"]').getAttribute("content");
115
120
  fetch("/completion_kit/refresh_models", {
@@ -149,8 +154,7 @@ document.addEventListener("turbo:before-stream-render", function(event) {
149
154
  }
150
155
  if (target === "prompt_llm_model" || target === "run_judge_model") {
151
156
  ckRefreshing = false;
152
- var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
153
- if (btn) btn.classList.remove('ck-icon-btn--spinning');
157
+ ckSetRefreshButtonsBusy(false);
154
158
  var status = document.getElementById('refresh-status');
155
159
  if (status) { status.textContent = 'Models updated.'; setTimeout(function() { status.textContent = ' '; }, 3000); }
156
160
  }
@@ -1148,6 +1148,33 @@ tr:hover .ck-chip--publish {
1148
1148
  font-size: 0.85rem;
1149
1149
  }
1150
1150
 
1151
+ .ck-model-table__unknown {
1152
+ display: inline-flex;
1153
+ align-items: center;
1154
+ justify-content: center;
1155
+ width: 1.25rem;
1156
+ height: 1.25rem;
1157
+ border-radius: 50%;
1158
+ background: rgba(148, 163, 184, 0.12);
1159
+ color: var(--ck-muted);
1160
+ font-family: var(--ck-mono);
1161
+ font-size: 0.78rem;
1162
+ font-weight: 700;
1163
+ line-height: 1;
1164
+ cursor: help;
1165
+ }
1166
+
1167
+ .ck-model-table tr.ck-model-table__section td {
1168
+ padding: 0.85rem 0 0.3rem;
1169
+ font-family: var(--ck-mono);
1170
+ font-size: 0.66rem;
1171
+ font-weight: 600;
1172
+ letter-spacing: 0.16em;
1173
+ text-transform: uppercase;
1174
+ color: var(--ck-dim);
1175
+ }
1176
+ .ck-model-table tr.ck-model-table__section:first-child td { padding-top: 0; }
1177
+
1151
1178
  .ck-model-list__summary {
1152
1179
  display: flex;
1153
1180
  align-items: center;
@@ -1262,6 +1289,102 @@ tr:hover .ck-chip--publish {
1262
1289
  max-width: 36rem;
1263
1290
  }
1264
1291
 
1292
+ .ck-version-note {
1293
+ margin: 0 0 1rem;
1294
+ max-width: 36rem;
1295
+ font-family: var(--ck-sans);
1296
+ font-size: 0.85rem;
1297
+ line-height: 1.5;
1298
+ color: var(--ck-muted);
1299
+ }
1300
+
1301
+ .ck-version-change {
1302
+ margin: 0.5rem 0 0;
1303
+ display: flex;
1304
+ align-items: center;
1305
+ gap: 0.5rem;
1306
+ flex-wrap: wrap;
1307
+ font-family: var(--ck-mono);
1308
+ font-size: 0.8rem;
1309
+ }
1310
+ .ck-version-change__label {
1311
+ font-size: 0.66rem;
1312
+ font-weight: 600;
1313
+ letter-spacing: 0.12em;
1314
+ text-transform: uppercase;
1315
+ color: var(--ck-dim);
1316
+ }
1317
+ .ck-version-change__old {
1318
+ padding: 0.15em 0.5em;
1319
+ border-radius: var(--ck-radius);
1320
+ background: var(--ck-danger-soft);
1321
+ color: var(--ck-danger);
1322
+ text-decoration: line-through;
1323
+ }
1324
+ .ck-version-change__arrow { color: var(--ck-dim); }
1325
+ .ck-version-change__new {
1326
+ padding: 0.15em 0.5em;
1327
+ border-radius: var(--ck-radius);
1328
+ background: var(--ck-success-soft);
1329
+ color: var(--ck-success);
1330
+ }
1331
+
1332
+ .ck-modal__body .ck-version-change + .ck-suggest-diff { margin-top: 0.9rem; }
1333
+
1334
+ .ck-cell-link {
1335
+ background: none;
1336
+ border: none;
1337
+ padding: 0;
1338
+ font-family: var(--ck-mono);
1339
+ font-size: 0.8rem;
1340
+ color: var(--ck-accent);
1341
+ cursor: pointer;
1342
+ text-decoration: underline;
1343
+ text-underline-offset: 2px;
1344
+ }
1345
+ .ck-cell-link:hover { color: var(--ck-accent-hover); }
1346
+ .ck-cell-link--delta {
1347
+ font-size: 1rem;
1348
+ font-weight: 700;
1349
+ text-decoration: none;
1350
+ color: var(--ck-dim);
1351
+ }
1352
+ .ck-cell-link--delta:hover { color: var(--ck-accent); }
1353
+
1354
+ /* Version-history table: cell that pairs the version label + publish control (left) with the diff trigger (right) */
1355
+ .ck-version-cell {
1356
+ display: flex;
1357
+ align-items: center;
1358
+ justify-content: space-between;
1359
+ gap: 0.75rem;
1360
+ }
1361
+ .ck-version-cell__label { display: flex; align-items: center; gap: 0.5rem; }
1362
+
1363
+ /* the version currently being viewed: faint accent wash (like the model chips)
1364
+ plus a bright accent border drawn on the cells (Safari can't box-shadow a <tr>) */
1365
+ .ck-results-table tbody tr.ck-results-table__row--active,
1366
+ .ck-results-table tbody tr.ck-results-table__row--active:hover {
1367
+ background: var(--ck-accent-soft);
1368
+ }
1369
+ .ck-results-table tbody tr.ck-results-table__row--active td {
1370
+ box-shadow: inset 0 2px 0 var(--ck-accent), inset 0 -2px 0 var(--ck-accent);
1371
+ }
1372
+ .ck-results-table tbody tr.ck-results-table__row--active td:first-child {
1373
+ box-shadow: inset 0 2px 0 var(--ck-accent), inset 0 -2px 0 var(--ck-accent), inset 2px 0 0 var(--ck-accent);
1374
+ }
1375
+ .ck-results-table tbody tr.ck-results-table__row--active td:last-child {
1376
+ box-shadow: inset 0 2px 0 var(--ck-accent), inset 0 -2px 0 var(--ck-accent), inset -2px 0 0 var(--ck-accent);
1377
+ }
1378
+ /* when it's the last row, round its bottom corners so the border isn't clipped
1379
+ by the table's rounded container */
1380
+ .ck-results-table tbody tr.ck-results-table__row--active:last-child td:first-child {
1381
+ border-bottom-left-radius: calc(var(--ck-radius-lg) - 1px);
1382
+ }
1383
+ .ck-results-table tbody tr.ck-results-table__row--active:last-child td:last-child {
1384
+ border-bottom-right-radius: calc(var(--ck-radius-lg) - 1px);
1385
+ }
1386
+ .ck-results-table tbody tr.ck-results-table__row--active strong { color: var(--ck-accent-hover); }
1387
+
1265
1388
  .ck-field-hint {
1266
1389
  font-family: var(--ck-sans);
1267
1390
  font-size: 0.8rem;
@@ -1643,30 +1766,54 @@ tr:hover .ck-chip--publish {
1643
1766
  color: var(--ck-text);
1644
1767
  }
1645
1768
 
1769
+ @keyframes ck-flash-in {
1770
+ from { opacity: 0; transform: translateY(-4px); }
1771
+ to { opacity: 1; transform: translateY(0); }
1772
+ }
1773
+
1646
1774
  .ck-flash {
1647
- margin-bottom: 1rem;
1648
- padding: 0.75rem 1rem;
1649
- border-radius: var(--ck-radius);
1775
+ margin-bottom: 1.25rem;
1776
+ padding: 0.8rem 1rem;
1650
1777
  border: 1px solid transparent;
1778
+ border-radius: var(--ck-radius);
1651
1779
  font-family: var(--ck-sans);
1652
- font-size: 0.95rem;
1780
+ font-size: 0.9rem;
1781
+ line-height: 1.6;
1782
+ animation: ck-flash-in 0.22s ease both;
1653
1783
  }
1784
+ .ck-flash--notice { background: rgba(45, 212, 168, 0.08); border-color: rgba(45, 212, 168, 0.32); }
1785
+ .ck-flash--alert { background: rgba(248, 113, 113, 0.08); border-color: rgba(248, 113, 113, 0.32); }
1654
1786
 
1655
- .ck-flash--notice {
1656
- border-color: rgba(34, 197, 94, 0.3);
1657
- background: var(--ck-success-soft);
1658
- color: var(--ck-success);
1787
+ /* layout flash: a mono status tag (the engine's kicker treatment) inline before the message */
1788
+ .ck-flash__label {
1789
+ margin-right: 0.6rem;
1790
+ font-family: var(--ck-mono);
1791
+ font-size: 0.7rem;
1792
+ font-weight: 600;
1793
+ letter-spacing: 0.14em;
1794
+ text-transform: uppercase;
1795
+ white-space: nowrap;
1659
1796
  }
1660
-
1661
- .ck-flash--alert {
1662
- border-color: rgba(239, 68, 68, 0.3);
1663
- background: var(--ck-danger-soft);
1664
- color: var(--ck-danger);
1797
+ .ck-flash__label::before {
1798
+ content: "";
1799
+ display: inline-block;
1800
+ width: 6px;
1801
+ height: 6px;
1802
+ margin-right: 0.45rem;
1803
+ border-radius: 50%;
1804
+ background: currentColor;
1805
+ box-shadow: 0 0 7px currentColor;
1806
+ vertical-align: 0.12em;
1665
1807
  }
1808
+ .ck-flash--notice .ck-flash__label { color: var(--ck-success); }
1809
+ .ck-flash--alert .ck-flash__label { color: var(--ck-danger); }
1810
+ .ck-flash__body { color: var(--ck-text); }
1666
1811
 
1667
- .ck-flash__title {
1668
- margin: 0;
1669
- font-weight: 700;
1812
+ /* form-error summary inside .ck-flash--alert: title sentence + ck-error-list */
1813
+ .ck-flash__title { margin: 0; font-weight: 600; color: var(--ck-danger); }
1814
+
1815
+ @media (prefers-reduced-motion: reduce) {
1816
+ .ck-flash { animation: none; }
1670
1817
  }
1671
1818
 
1672
1819
  .ck-banner {
@@ -1996,6 +2143,10 @@ select.ck-input {
1996
2143
  box-shadow: 0 30px 80px rgba(0, 0, 0, 0.55);
1997
2144
  overflow: hidden;
1998
2145
  }
2146
+ /* the panel is the dialog's autofocus target (so the close button isn't auto-focused);
2147
+ it's a container, not a control, so don't show a ring on it */
2148
+ .ck-modal__panel:focus,
2149
+ .ck-modal__panel:focus-visible { outline: none; }
1999
2150
 
2000
2151
  .ck-modal__header {
2001
2152
  display: flex;
@@ -2032,9 +2183,8 @@ select.ck-input {
2032
2183
 
2033
2184
  .ck-modal__close {
2034
2185
  appearance: none;
2186
+ position: relative;
2035
2187
  display: inline-flex;
2036
- align-items: center;
2037
- justify-content: center;
2038
2188
  width: 28px;
2039
2189
  height: 28px;
2040
2190
  padding: 0;
@@ -2043,12 +2193,25 @@ select.ck-input {
2043
2193
  outline: none;
2044
2194
  background: transparent;
2045
2195
  color: var(--ck-dim);
2046
- font-family: var(--ck-sans);
2047
- font-size: 1.5rem;
2048
- line-height: 1;
2196
+ font-size: 0;
2049
2197
  cursor: pointer;
2050
2198
  transition: color 0.15s, background 0.15s;
2051
2199
  }
2200
+ /* the "×" is drawn as two crossed bars centred on the button, so it (and the
2201
+ focus ring around the button) sit dead centre regardless of font metrics */
2202
+ .ck-modal__close::before,
2203
+ .ck-modal__close::after {
2204
+ content: "";
2205
+ position: absolute;
2206
+ top: 50%;
2207
+ left: 50%;
2208
+ width: 13px;
2209
+ height: 1.6px;
2210
+ border-radius: 1px;
2211
+ background: currentColor;
2212
+ }
2213
+ .ck-modal__close::before { transform: translate(-50%, -50%) rotate(45deg); }
2214
+ .ck-modal__close::after { transform: translate(-50%, -50%) rotate(-45deg); }
2052
2215
 
2053
2216
  .ck-modal__close:hover {
2054
2217
  color: var(--ck-text);
@@ -2067,6 +2230,8 @@ select.ck-input {
2067
2230
  overflow: auto;
2068
2231
  padding: 0 1.5rem;
2069
2232
  }
2233
+ /* no footer? then the body needs its own bottom padding */
2234
+ .ck-modal__body:last-child { padding-bottom: 1.5rem; }
2070
2235
 
2071
2236
  .ck-modal__footer {
2072
2237
  display: flex;
@@ -2862,6 +3027,29 @@ select.ck-input {
2862
3027
  color: var(--ck-dim);
2863
3028
  }
2864
3029
 
3030
+ .ck-prompts-table__desc {
3031
+ margin: 0.3rem 0 0;
3032
+ font-family: var(--ck-sans);
3033
+ font-size: 0.82rem;
3034
+ font-weight: 400;
3035
+ line-height: 1.45;
3036
+ color: var(--ck-muted);
3037
+ white-space: normal;
3038
+ max-width: 42ch;
3039
+ }
3040
+
3041
+ .ck-endpoint--compact {
3042
+ margin-top: 0.45rem;
3043
+ max-width: 26rem;
3044
+ }
3045
+ .ck-endpoint--compact .ck-endpoint__url {
3046
+ min-width: 0;
3047
+ white-space: nowrap;
3048
+ overflow: hidden;
3049
+ text-overflow: ellipsis;
3050
+ }
3051
+ .ck-endpoint--compact .ck-icon-btn { flex-shrink: 0; }
3052
+
2865
3053
  .ck-clamp-2 {
2866
3054
  display: -webkit-box;
2867
3055
  -webkit-line-clamp: 2;
@@ -4008,6 +4196,48 @@ a.tag-mark {
4008
4196
  }
4009
4197
  .ck-launch__step-cta:hover { filter: brightness(1.1); transform: translateY(-1px); }
4010
4198
 
4199
+ /* Opt-in starter data — quiet, secondary (bright cyan is reserved for the next-step CTA) */
4200
+ .ck-launch__sample {
4201
+ margin-top: 1.5rem;
4202
+ padding: 1rem 1.1rem;
4203
+ display: flex;
4204
+ flex-wrap: wrap;
4205
+ align-items: center;
4206
+ justify-content: space-between;
4207
+ gap: 0.85rem 1.25rem;
4208
+ border: 1px dashed var(--ck-line);
4209
+ border-radius: var(--ck-radius);
4210
+ background: var(--ck-bg-strong);
4211
+ }
4212
+ .ck-launch__sample-copy {
4213
+ margin: 0;
4214
+ flex: 1 1 18rem;
4215
+ font-size: 0.85rem;
4216
+ line-height: 1.55;
4217
+ color: var(--ck-muted);
4218
+ }
4219
+ .ck-launch__sample-copy code { font-family: var(--ck-mono); font-size: 0.85em; color: var(--ck-text); }
4220
+ .ck-launch__sample-cta {
4221
+ display: inline-flex;
4222
+ align-items: center;
4223
+ gap: 0.4rem;
4224
+ flex-shrink: 0;
4225
+ margin: 0;
4226
+ padding: 0.5rem 0.95rem;
4227
+ background: transparent;
4228
+ border: 1px solid var(--ck-line);
4229
+ color: var(--ck-text);
4230
+ border-radius: var(--ck-radius);
4231
+ font-family: var(--ck-mono);
4232
+ font-size: 0.72rem;
4233
+ letter-spacing: 0.06em;
4234
+ text-transform: uppercase;
4235
+ font-weight: 600;
4236
+ cursor: pointer;
4237
+ transition: border-color 0.15s, color 0.15s, background 0.15s;
4238
+ }
4239
+ .ck-launch__sample-cta:hover { border-color: var(--ck-accent); color: var(--ck-accent); background: var(--ck-accent-soft); }
4240
+
4011
4241
  .ck-launch__panel-footer {
4012
4242
  margin-top: 1.75rem;
4013
4243
  padding-top: 1.25rem;
@@ -14,5 +14,10 @@ module CompletionKit
14
14
  cookies[DISMISS_COOKIE] = { value: "1", expires: 1.year.from_now, httponly: true }
15
15
  redirect_to prompts_path, notice: "Setup skipped. Pick it back up from Settings → Getting started any time."
16
16
  end
17
+
18
+ def sample_data
19
+ Onboarding::SampleData.install!
20
+ redirect_to onboarding_path, notice: "Loaded a sample dataset and prompt — edit or delete them whenever."
21
+ end
17
22
  end
18
23
  end
@@ -50,7 +50,7 @@ module CompletionKit
50
50
 
51
51
  def publish
52
52
  @prompt.publish!
53
- redirect_to prompt_path(@prompt), notice: "#{@prompt.display_name} is now the current version."
53
+ redirect_to prompt_path(@prompt), notice: "#{@prompt.display_name} is now the published version."
54
54
  end
55
55
 
56
56
  private
@@ -72,6 +72,46 @@ module CompletionKit
72
72
  CompletionKit::ProviderCredential::PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
73
73
  end
74
74
 
75
+ OPENAI_MODEL_FAMILY_ORDER = ["GPT-5", "GPT-4", "o-series", "GPT-3.5", "GPT-OSS", "Other"].freeze
76
+
77
+ def ck_openai_model_family(model_id)
78
+ id = model_id.to_s
79
+ return "GPT-5" if id.match?(/\Agpt-5/i)
80
+ return "GPT-4" if id.match?(/\Agpt-4/i)
81
+ return "GPT-3.5" if id.match?(/\Agpt-3/i)
82
+ return "GPT-OSS" if id.match?(/\Agpt-oss/i)
83
+ return "o-series" if id.match?(/\Ao\d/i)
84
+ "Other"
85
+ end
86
+
87
+ # Groups a provider's models for the models-card table, mirroring how the
88
+ # dropdown sub-groups: OpenRouter clusters by upstream vendor (the part
89
+ # before "/"); OpenAI clusters by family; everyone else stays flat. Returns
90
+ # [[section_label_or_nil, [models]], ...]. A single section collapses to a
91
+ # nil label so we don't render a redundant header.
92
+ def ck_model_table_sections(models)
93
+ models = models.to_a
94
+ sections =
95
+ case models.first&.provider
96
+ when "openrouter"
97
+ models.group_by { |m| m.model_id.to_s.split("/", 2).first.delete_prefix("~") }
98
+ .sort_by { |label, _| label }
99
+ when "openai"
100
+ grouped = models.group_by { |m| ck_openai_model_family(m.model_id) }
101
+ ordered = OPENAI_MODEL_FAMILY_ORDER.filter_map { |label| [label, grouped[label]] if grouped[label] }
102
+ extras = (grouped.keys - OPENAI_MODEL_FAMILY_ORDER).sort.map { |label| [label, grouped[label]] }
103
+ ordered + extras
104
+ else
105
+ [[nil, models]]
106
+ end
107
+ sections.size <= 1 ? [[nil, models]] : sections
108
+ end
109
+
110
+ def ck_model_option_label(model)
111
+ return "#{model[:name]} (?)" if model.key?(:judging_confirmed) && !model[:judging_confirmed]
112
+ model[:name]
113
+ end
114
+
75
115
  def ck_grouped_models(models, selected = nil)
76
116
  if selected.present? && models.none? { |m| m[:id] == selected }
77
117
  retired = CompletionKit::Model.find_by(model_id: selected)
@@ -80,18 +120,31 @@ module CompletionKit
80
120
  end
81
121
  end
82
122
 
83
- groups = models.group_by do |m|
84
- if m[:provider] == "openrouter"
85
- upstream = m[:id].to_s.split("/", 2).first
86
- "OpenRouter — #{upstream}"
87
- else
88
- ck_provider_label(m[:provider])
89
- end
123
+ groups = models.group_by { |m| ck_model_optgroup_label(m) }
124
+ ordered_keys = groups.keys.sort_by { |label| ck_model_optgroup_sort_key(label) }
125
+ grouped = ordered_keys.map { |label| [label, groups[label].map { |m| [ck_model_option_label(m), m[:id]] }] }
126
+ grouped_options_for_select(grouped, selected)
127
+ end
128
+
129
+ # Optgroup label for the model select — mirrors the provider models table:
130
+ # OpenRouter splits by upstream vendor, OpenAI splits by family, everyone
131
+ # else is a single group.
132
+ def ck_model_optgroup_label(model)
133
+ case model[:provider]
134
+ when "openrouter" then "OpenRouter — #{model[:id].to_s.split("/", 2).first.delete_prefix("~")}"
135
+ when "openai" then "OpenAI — #{ck_openai_model_family(model[:id])}"
136
+ else ck_provider_label(model[:provider])
90
137
  end
138
+ end
91
139
 
92
- ordered_keys = groups.keys.sort_by { |label| [label.start_with?("OpenRouter") ? 1 : 0, label] }
93
- grouped = ordered_keys.map { |label| [label, groups[label].map { |m| [m[:name], m[:id]] }] }
94
- grouped_options_for_select(grouped, selected)
140
+ def ck_model_optgroup_sort_key(label)
141
+ if label.start_with?("OpenAI ")
142
+ [0, OPENAI_MODEL_FAMILY_ORDER.index(label.delete_prefix("OpenAI — ")), label]
143
+ elsif label.start_with?("OpenRouter")
144
+ [2, 0, label]
145
+ else
146
+ [1, 0, label]
147
+ end
95
148
  end
96
149
 
97
150
  def ck_model_options_html(scope)
@@ -71,6 +71,7 @@ module CompletionKit
71
71
  )
72
72
  review.save!
73
73
 
74
+ confirm_judging_capability(run.judge_model)
74
75
  run.send(:broadcast_response_update, response)
75
76
  run.send(:broadcast_progress)
76
77
  enqueue_completion_check
@@ -78,6 +79,15 @@ module CompletionKit
78
79
 
79
80
  private
80
81
 
82
+ # A model with supports_judging == nil ("untested") just produced a valid
83
+ # review — promote it to confirmed. No-op once confirmed (so repeated runs
84
+ # don't churn the row), and a model already flagged as a bad judge stays so.
85
+ def confirm_judging_capability(judge_model_id)
86
+ model = Model.find_by(provider: ApiConfig.provider_for_model(judge_model_id), model_id: judge_model_id)
87
+ return unless model && model.supports_judging.nil?
88
+ model.update_columns(supports_judging: true, judging_error: nil)
89
+ end
90
+
81
91
  def record_terminal_failure!(error)
82
92
  response_id = @response_id || arguments.first
83
93
  metric_id = @metric_id || arguments.last
@@ -8,6 +8,9 @@ module CompletionKit
8
8
 
9
9
  scope :active, -> { where(status: "active") }
10
10
  scope :for_generation, -> { active.where(supports_generation: true) }
11
- scope :for_judging, -> { active.where(supports_judging: true) }
11
+ # Includes models not yet confirmed as judges (supports_judging: nil) — worth
12
+ # a try, and a successful run flips them to confirmed. Only models known to be
13
+ # bad judges (false) are excluded.
14
+ scope :for_judging, -> { active.where(supports_judging: [true, nil]) }
12
15
  end
13
16
  end
@@ -47,6 +47,14 @@ module CompletionKit
47
47
  false
48
48
  end
49
49
 
50
+ def model_count
51
+ Model.where(provider: provider).active.count
52
+ end
53
+
54
+ def self.discovery_in_progress?
55
+ where(discovery_status: "discovering").exists?
56
+ end
57
+
50
58
  def prompt_count
51
59
  model_ids = Model.where(provider: provider).pluck(:model_id)
52
60
  return 0 if model_ids.empty?
@@ -61,7 +61,9 @@ module CompletionKit
61
61
  end
62
62
  query = query.where(provider: provider) if provider.present?
63
63
  models = query.order(:provider, :display_name).map do |m|
64
- { id: m.model_id, name: m.display_name || m.model_id, provider: m.provider }
64
+ entry = { id: m.model_id, name: m.display_name || m.model_id, provider: m.provider }
65
+ entry[:judging_confirmed] = !m.supports_judging.nil? if scope == :judging
66
+ entry
65
67
  end
66
68
 
67
69
  return models if models.any?
@@ -13,8 +13,13 @@ module CompletionKit
13
13
  end
14
14
 
15
15
  def refresh!(&on_progress)
16
- models_with_names = fetch_models
17
- reconcile(models_with_names)
16
+ discovered = fetch_models
17
+ reconcile(discovered)
18
+ # OpenRouter publishes capability metadata (output modalities, etc.), so we
19
+ # derive everything from the model list and skip live probing entirely.
20
+ # Judging stays unknown ("?") until a real run proves it.
21
+ return if @provider == "openrouter"
22
+
18
23
  probe_new_models(&on_progress)
19
24
  end
20
25
 
@@ -86,10 +91,19 @@ module CompletionKit
86
91
  next nil if entry["deprecated"] == true
87
92
  context_length = entry["context_length"].to_i
88
93
  next nil if context_length < 8192
89
- { id: entry["id"], display_name: entry["name"] }
94
+ { id: entry["id"], display_name: entry["name"], supports_generation: openrouter_text_output?(entry) }
90
95
  end
91
96
  end
92
97
 
98
+ # OpenRouter exposes architecture.output_modalities (e.g. ["text"], ["image"],
99
+ # ["text", "image"]). A model can be used for generation/judging only if it
100
+ # outputs text. When the field is missing we keep the historical default of
101
+ # treating the model as text-capable.
102
+ def openrouter_text_output?(entry)
103
+ modalities = Array(entry.dig("architecture", "output_modalities")).map(&:to_s)
104
+ modalities.empty? || modalities.include?("text")
105
+ end
106
+
93
107
  def fetch_ollama_models
94
108
  raise DiscoveryError, "Ollama endpoint URL is required" if @api_endpoint.blank?
95
109
  base_url = @api_endpoint.to_s.delete_suffix("/")
@@ -100,35 +114,67 @@ module CompletionKit
100
114
  JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: e["id"] } }
101
115
  end
102
116
 
103
- def reconcile(models_with_names)
104
- api_model_ids = models_with_names.map { |m| m[:id] }
105
- names_by_id = models_with_names.each_with_object({}) { |m, h| h[m[:id]] = m[:display_name] }
117
+ def reconcile(discovered)
118
+ api_model_ids = discovered.map { |m| m[:id] }
119
+ meta_by_id = discovered.index_by { |m| m[:id] }
106
120
  existing = Model.where(provider: @provider).index_by(&:model_id)
107
121
 
108
122
  api_model_ids.each do |model_id|
109
- if existing[model_id]
110
- attrs = { status: "active", retired_at: nil }
111
- attrs[:display_name] = names_by_id[model_id] if names_by_id[model_id].present?
112
- existing[model_id].update!(attrs) if existing[model_id].status == "retired" || names_by_id[model_id].present?
123
+ meta = meta_by_id[model_id]
124
+ if (model = existing[model_id])
125
+ reconcile_existing_model(model, meta)
113
126
  else
114
- attrs = {
115
- provider: @provider,
116
- model_id: model_id,
117
- display_name: names_by_id[model_id],
118
- status: "active",
119
- discovered_at: Time.current
120
- }
121
- if %w[openrouter ollama].include?(@provider)
122
- attrs[:supports_generation] = true
123
- attrs[:probed_at] = nil
124
- end
125
- Model.create!(attrs)
127
+ Model.create!(new_model_attrs(model_id, meta))
126
128
  end
127
129
  end
128
130
 
129
- active_not_in_api = Model.where(provider: @provider, status: "active")
130
- .where.not(model_id: api_model_ids)
131
- active_not_in_api.update_all(status: "retired", retired_at: Time.current)
131
+ Model.where(provider: @provider, status: "active")
132
+ .where.not(model_id: api_model_ids)
133
+ .update_all(status: "retired", retired_at: Time.current)
134
+ end
135
+
136
+ def new_model_attrs(model_id, meta)
137
+ attrs = {
138
+ provider: @provider,
139
+ model_id: model_id,
140
+ display_name: meta[:display_name],
141
+ status: "active",
142
+ discovered_at: Time.current
143
+ }
144
+ if @provider == "openrouter"
145
+ supports_generation = meta[:supports_generation] != false
146
+ attrs.merge!(
147
+ supports_generation: supports_generation,
148
+ supports_judging: nil,
149
+ probed_at: Time.current,
150
+ status: supports_generation ? "active" : "failed"
151
+ )
152
+ elsif @provider == "ollama"
153
+ attrs[:supports_generation] = true
154
+ attrs[:probed_at] = nil
155
+ end
156
+ attrs
157
+ end
158
+
159
+ def reconcile_existing_model(model, meta)
160
+ if @provider == "openrouter"
161
+ # Re-derive generation capability from the published metadata every refresh
162
+ # (fixes models discovered before capability metadata was used). Leave
163
+ # supports_judging alone — it's "learned" from successful runs.
164
+ supports_generation = meta[:supports_generation] != false
165
+ model.update!(
166
+ display_name: meta[:display_name].presence || model.display_name,
167
+ supports_generation: supports_generation,
168
+ generation_error: nil,
169
+ probed_at: Time.current,
170
+ status: supports_generation ? "active" : "failed",
171
+ retired_at: nil
172
+ )
173
+ else
174
+ attrs = { status: "active", retired_at: nil }
175
+ attrs[:display_name] = meta[:display_name] if meta[:display_name].present?
176
+ model.update!(attrs) if model.status == "retired" || meta[:display_name].present?
177
+ end
132
178
  end
133
179
 
134
180
  def probe_new_models(&on_progress)
@@ -223,7 +269,6 @@ module CompletionKit
223
269
  case @provider
224
270
  when "openai" then openai_probe(model_id, input, max_tokens)
225
271
  when "anthropic" then anthropic_probe(model_id, input, max_tokens)
226
- when "openrouter" then openrouter_probe(model_id, input, max_tokens)
227
272
  when "ollama" then ollama_probe(model_id, input, max_tokens)
228
273
  else raise ArgumentError, "Unsupported probe provider: #{@provider}"
229
274
  end
@@ -290,23 +335,6 @@ module CompletionKit
290
335
  end
291
336
  end
292
337
 
293
- def openrouter_probe(model_id, input, max_tokens)
294
- conn = Faraday.new(url: "https://openrouter.ai") do |f|
295
- f.options.timeout = 30
296
- f.options.open_timeout = 5
297
- f.request :retry, max: 1, interval: 0.5
298
- f.adapter Faraday.default_adapter
299
- end
300
- conn.post do |req|
301
- req.url "/api/v1/chat/completions"
302
- req.headers["Content-Type"] = "application/json"
303
- req.headers["Authorization"] = "Bearer #{@api_key}"
304
- req.headers["HTTP-Referer"] = "https://completionkit.com"
305
- req.headers["X-Title"] = "CompletionKit"
306
- req.body = { model: model_id, messages: [{ role: "user", content: input }], max_tokens: max_tokens }.to_json
307
- end
308
- end
309
-
310
338
  def ollama_probe(model_id, input, max_tokens)
311
339
  base_url = @api_endpoint.to_s.delete_suffix("/")
312
340
  conn = Faraday.new(url: base_url) do |f|
@@ -50,6 +50,13 @@ module CompletionKit
50
50
  steps.all?(&:done?)
51
51
  end
52
52
 
53
+ # Whether the "Load sample data" button should show — only while neither
54
+ # the dataset nor the prompt step is done (SampleData.install! no-ops
55
+ # otherwise, so the button would do nothing).
56
+ def sample_loadable?
57
+ steps.none? { |s| %i[dataset prompt].include?(s.key) && s.done? }
58
+ end
59
+
53
60
  def progress
54
61
  done = steps.count(&:done?)
55
62
  { done: done, total: steps.size, percent: ((done.to_f / steps.size) * 100).round }
@@ -0,0 +1,37 @@
1
+ module CompletionKit
2
+ module Onboarding
3
+ # Opt-in starter data for the onboarding page: one dataset + one prompt so a
4
+ # brand-new install has something to poke at. Idempotent — a no-op once the
5
+ # workspace already has any prompt or dataset. Deliberately does NOT create a
6
+ # provider credential (needs a real API key) or a run (user-initiated).
7
+ module SampleData
8
+ SAMPLE_CSV = <<~CSV.freeze
9
+ ticket
10
+ "My order #4827 arrived with a dented panel. I emailed photos 11 days ago and heard nothing. Today I was told the return window 'closed'. I paid $749. I want a refund or replacement, not store credit."
11
+ "Tracking says delivered to my porch Tuesday 3:47pm. I was home all day, nothing arrived, neighbours' cameras show no van. Order #5102 — a $315 mixer, wedding gift, wedding is Saturday. Can someone look today?"
12
+ "WELCOME20 says 'invalid' at checkout but the promo email says it's good through May 31. Same email I'm signed in with. Tried Chrome and Safari. Cart is $186 waiting on you."
13
+ CSV
14
+
15
+ SAMPLE_PROMPT = {
16
+ name: "Sample: Support reply",
17
+ description: "A starter prompt — drafts a warm, professional reply to a customer support ticket. Edit it or delete it; it's just here to get you going.",
18
+ template: "You are a senior customer-support specialist. Write a warm, professional reply to this ticket. Acknowledge the customer's situation, be specific about next steps, and don't be defensive.\n\nTicket:\n{{ticket}}",
19
+ llm_model: "gpt-4o-mini"
20
+ }.freeze
21
+
22
+ module_function
23
+
24
+ def install!
25
+ return if CompletionKit::Prompt.exists? || CompletionKit::Dataset.exists?
26
+
27
+ CompletionKit::Dataset.create!(name: "Sample: Customer tickets", csv_data: SAMPLE_CSV)
28
+ CompletionKit::Prompt.create!(
29
+ name: SAMPLE_PROMPT[:name],
30
+ description: SAMPLE_PROMPT[:description],
31
+ template: SAMPLE_PROMPT[:template],
32
+ llm_model: SAMPLE_PROMPT[:llm_model]
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
@@ -69,6 +69,15 @@
69
69
  <% end %>
70
70
  </ol>
71
71
 
72
+ <% if @checklist.sample_loadable? %>
73
+ <div class="ck-launch__sample">
74
+ <p class="ck-launch__sample-copy">Just exploring? Drop in a sample dataset and prompt to poke around — they're labelled <code>Sample:</code>, and you can edit or delete them whenever.</p>
75
+ <%= button_to onboarding_sample_data_path, method: :post, class: "ck-launch__sample-cta", form: { style: "display:inline" } do %>
76
+ Load sample data <span aria-hidden="true">&rarr;</span>
77
+ <% end %>
78
+ </div>
79
+ <% end %>
80
+
72
81
  <div class="ck-launch__panel-footer">
73
82
  <%= button_to dismiss_onboarding_path, method: :post, class: "ck-launch__dismiss", form: { style: "display:inline" } do %>
74
83
  Skip setup &mdash; go to the app <span aria-hidden="true">&rarr;</span>
@@ -10,6 +10,10 @@
10
10
  </div>
11
11
  <% end %>
12
12
 
13
+ <% if prompt.persisted? && prompt.runs.exists? %>
14
+ <p class="ck-version-note">Editing <%= prompt.version_label %> — it has runs, so saving creates a new version of this prompt.</p>
15
+ <% end %>
16
+
13
17
  <div class="ck-card ck-form-card">
14
18
  <div class="ck-field">
15
19
  <%= form.label :name, "Name", class: "ck-label" %>
@@ -33,7 +37,8 @@
33
37
  <% if available.any? %>
34
38
  <div class="ck-select-with-action">
35
39
  <%= form.select :llm_model, ck_grouped_models(available, prompt.llm_model), { include_blank: "— Select a model —" }, { class: "ck-input", id: "prompt_llm_model" } %>
36
- <button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
40
+ <% ck_refreshing = CompletionKit::ProviderCredential.discovery_in_progress? %>
41
+ <button type="button" class="ck-icon-btn<%= ' ck-icon-btn--spinning' if ck_refreshing %>" title="Refresh models" <%= 'disabled' if ck_refreshing %> onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
37
42
  </div>
38
43
  <% else %>
39
44
  <p class="ck-meta-copy">No models available. <%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %> or click refresh after configuring a provider.</p>
@@ -30,6 +30,13 @@
30
30
  <tr onclick="window.location='<%= prompt_path(prompt) %>'" style="cursor: pointer;">
31
31
  <td>
32
32
  <strong><%= prompt.name %></strong>
33
+ <% if prompt.description.present? %>
34
+ <p class="ck-prompts-table__desc"><%= truncate(prompt.description, length: 120) %></p>
35
+ <% end %>
36
+ <div class="ck-endpoint ck-endpoint--compact" onclick="event.stopPropagation()">
37
+ <code class="ck-endpoint__url" id="prompt_endpoint_<%= prompt.id %>"><%= api_v1_prompt_path(prompt.slug) %></code>
38
+ <button type="button" class="ck-icon-btn" title="Copy API path" onclick="event.stopPropagation();navigator.clipboard.writeText(document.getElementById('prompt_endpoint_<%= prompt.id %>').textContent)"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="13" height="13" aria-hidden="true"><path d="M0 6.75C0 5.784.784 5 1.75 5h1.5a.75.75 0 0 1 0 1.5h-1.5a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-1.5a.75.75 0 0 1 1.5 0v1.5A1.75 1.75 0 0 1 9.25 16h-7.5A1.75 1.75 0 0 1 0 14.25Z"/><path d="M5 1.75C5 .784 5.784 0 6.75 0h7.5C15.216 0 16 .784 16 1.75v7.5A1.75 1.75 0 0 1 14.25 11h-7.5A1.75 1.75 0 0 1 5 9.25Zm1.75-.25a.25.25 0 0 0-.25.25v7.5c0 .138.112.25.25.25h7.5a.25.25 0 0 0 .25-.25v-7.5a.25.25 0 0 0-.25-.25Z"/></svg></button>
39
+ </div>
33
40
  <% if prompt.tags.any? %>
34
41
  <div class="tag-marks-row">
35
42
  <%= render "completion_kit/tags/marks", tags: prompt.tags %>
@@ -7,6 +7,7 @@
7
7
  <div>
8
8
  <div class="ck-inline">
9
9
  <h1 class="ck-title"><%= @prompt.name %></h1>
10
+ <span class="ck-chip ck-chip--soft"><%= @prompt.version_label %></span>
10
11
  <span class="ck-chip"><%= @prompt.llm_model %></span>
11
12
  </div>
12
13
  <% if @prompt.description.present? %>
@@ -47,7 +48,9 @@
47
48
  <pre class="ck-code ck-code--dark"><%= @prompt.template %></pre>
48
49
  </section>
49
50
 
50
- <% versions = @prompt.family_versions %>
51
+ <% versions = @prompt.family_versions.includes(runs: { responses: :reviews }).to_a %>
52
+ <% predecessor_of = versions.index_with { |v| versions.detect { |o| o.version_number < v.version_number } } %>
53
+ <% version_changed = ->(v, pred) { pred && (pred.template != v.template || pred.llm_model != v.llm_model) } %>
51
54
  <% if versions.size > 1 %>
52
55
  <section class="ck-card--spaced">
53
56
  <p class="ck-kicker">Versions</p>
@@ -56,28 +59,82 @@
56
59
  <tr>
57
60
  <th>Version</th>
58
61
  <th>Model</th>
62
+ <th>Best score</th>
59
63
  <th>Created</th>
60
- <th></th>
61
64
  </tr>
62
65
  </thead>
63
66
  <tbody>
64
67
  <% versions.each do |v| %>
65
- <tr>
66
- <td><strong>v<%= v.version_number %></strong></td>
68
+ <% best_score = v.runs.map(&:avg_score).compact.max %>
69
+ <% pred = predecessor_of[v] %>
70
+ <tr class="<%= "ck-results-table__row--active" if v.id == @prompt.id %>" onclick="window.location='<%= prompt_path(v) %>'" style="cursor: pointer;">
71
+ <td>
72
+ <div class="ck-version-cell">
73
+ <div class="ck-version-cell__label" onclick="event.stopPropagation()">
74
+ <strong>v<%= v.version_number %></strong>
75
+ <% if v.current? %>
76
+ <span class="ck-chip">Published</span>
77
+ <% else %>
78
+ <%= button_to "Publish", publish_prompt_path(v), method: :post, class: "ck-chip ck-chip--publish", form_class: "inline-block" %>
79
+ <% end %>
80
+ </div>
81
+ <% if version_changed.call(v, pred) %>
82
+ <button type="button" class="ck-cell-link ck-cell-link--delta" title="What changed from v<%= pred.version_number %>" onclick="event.stopPropagation();document.getElementById('ck-vdiff-<%= v.id %>').showModal()">&Delta;</button>
83
+ <% end %>
84
+ </div>
85
+ </td>
67
86
  <td><span class="ck-chip ck-chip--soft"><%= v.llm_model %></span></td>
68
- <td class="ck-meta-copy"><time datetime="<%= v.created_at.iso8601 %>" data-local-time><%= v.created_at.utc.strftime("%b %-d, %Y at %-I:%M %p UTC") %></time></td>
69
87
  <td>
70
- <% if v.current? %>
71
- <span class="ck-chip">Current</span>
88
+ <% if best_score %>
89
+ <span class="<%= ck_badge_classes(ck_score_kind(best_score)) %>"><%= best_score %></span>
72
90
  <% else %>
73
- <%= button_to "Publish", publish_prompt_path(v), method: :post, class: "ck-chip ck-chip--publish", form_class: "inline-block" %>
91
+ <span class="ck-prompts-table__dim">—</span>
74
92
  <% end %>
75
93
  </td>
94
+ <td class="ck-meta-copy"><time datetime="<%= v.created_at.iso8601 %>" data-local-time><%= v.created_at.utc.strftime("%b %-d, %Y at %-I:%M %p UTC") %></time></td>
76
95
  </tr>
77
96
  <% end %>
78
97
  </tbody>
79
98
  </table>
80
99
  </section>
100
+
101
+ <% versions.each do |v| %>
102
+ <% pred = predecessor_of[v] %>
103
+ <% next unless version_changed.call(v, pred) %>
104
+ <dialog id="ck-vdiff-<%= v.id %>" class="ck-modal" onclick="if(event.target===this)this.close()">
105
+ <article class="ck-modal__panel" tabindex="-1" autofocus onclick="event.stopPropagation()">
106
+ <header class="ck-modal__header">
107
+ <div class="ck-modal__heading">
108
+ <h2 class="ck-modal__title">v<%= pred.version_number %> &rarr; v<%= v.version_number %></h2>
109
+ <span class="ck-modal__meta">What changed in <%= v.version_label %><% if v.current? %> (published)<% end %></span>
110
+ </div>
111
+ <button type="button" class="ck-modal__close" aria-label="Close" onclick="this.closest('dialog').close()">&times;</button>
112
+ </header>
113
+ <div class="ck-modal__body">
114
+ <% if pred.llm_model != v.llm_model %>
115
+ <p class="ck-version-change">
116
+ <span class="ck-version-change__label">Model</span>
117
+ <span class="ck-version-change__old"><%= pred.llm_model %></span>
118
+ <span class="ck-version-change__arrow" aria-hidden="true">&rarr;</span>
119
+ <span class="ck-version-change__new"><%= v.llm_model %></span>
120
+ </p>
121
+ <% end %>
122
+ <% if pred.template != v.template %>
123
+ <div class="ck-suggest-diff">
124
+ <div class="ck-suggest-diff__pane">
125
+ <div class="ck-suggest-diff__header"><span class="ck-suggest-diff__label ck-suggest-diff__label--before"><%= pred.version_label %></span></div>
126
+ <pre class="ck-suggest-diff__code"><%= ck_word_diff_old(pred.template, v.template) %></pre>
127
+ </div>
128
+ <div class="ck-suggest-diff__pane">
129
+ <div class="ck-suggest-diff__header"><span class="ck-suggest-diff__label ck-suggest-diff__label--after"><%= v.version_label %></span></div>
130
+ <pre class="ck-suggest-diff__code"><%= ck_word_diff_new(pred.template, v.template) %></pre>
131
+ </div>
132
+ </div>
133
+ <% end %>
134
+ </div>
135
+ </article>
136
+ </dialog>
137
+ <% end %>
81
138
  <% end %>
82
139
 
83
140
  <% if @runs.any? %>
@@ -36,29 +36,36 @@
36
36
  Gen<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Generation models produce the responses your prompts ask for. Pick one when creating a prompt.</span>
37
37
  </th>
38
38
  <th scope="col" class="ck-model-table__cap">
39
- Judge<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
39
+ Judge<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Judge models score generated responses against your metrics. Pick one when configuring a run. A <strong>?</strong> means we haven't confirmed this model works as a judge — it's still selectable, and a successful run promotes it to ✓.</span>
40
40
  </th>
41
41
  </tr>
42
42
  </thead>
43
43
  <tbody>
44
- <% models.each do |m| %>
45
- <tr>
46
- <td class="ck-model-table__name"><%= m.display_name || m.model_id %></td>
47
- <td class="ck-model-table__cap">
48
- <% if m.supports_generation %>
49
- <span class="ck-model-table__tick" aria-label="Supports generation">✓</span>
50
- <% else %>
51
- <span class="ck-model-table__dash" aria-label="No generation support">—</span>
52
- <% end %>
53
- </td>
54
- <td class="ck-model-table__cap">
55
- <% if m.supports_judging %>
56
- <span class="ck-model-table__tick" aria-label="Supports judging">✓</span>
57
- <% else %>
58
- <span class="ck-model-table__dash" aria-label="No judging support">—</span>
59
- <% end %>
60
- </td>
61
- </tr>
44
+ <% ck_model_table_sections(models).each do |section_label, section_models| %>
45
+ <% if section_label %>
46
+ <tr class="ck-model-table__section"><td colspan="3"><%= section_label %></td></tr>
47
+ <% end %>
48
+ <% section_models.each do |m| %>
49
+ <tr>
50
+ <td class="ck-model-table__name"><%= m.display_name || m.model_id %></td>
51
+ <td class="ck-model-table__cap">
52
+ <% if m.supports_generation %>
53
+ <span class="ck-model-table__tick" aria-label="Supports generation">✓</span>
54
+ <% else %>
55
+ <span class="ck-model-table__dash" aria-label="No generation support">—</span>
56
+ <% end %>
57
+ </td>
58
+ <td class="ck-model-table__cap">
59
+ <% if m.supports_judging %>
60
+ <span class="ck-model-table__tick" aria-label="Supports judging">✓</span>
61
+ <% elsif m.supports_judging.nil? %>
62
+ <span class="ck-model-table__unknown" aria-label="Untested as judge" title="Untested as a judge — selectable; a successful run confirms it">?</span>
63
+ <% else %>
64
+ <span class="ck-model-table__dash" aria-label="Not usable as judge">—</span>
65
+ <% end %>
66
+ </td>
67
+ </tr>
68
+ <% end %>
62
69
  <% end %>
63
70
  </tbody>
64
71
  </table>
@@ -27,6 +27,7 @@
27
27
 
28
28
  <div class="ck-provider-card__meta">
29
29
  <span><%= provider_credential.api_endpoint.presence || default_endpoints[provider_credential.provider] %></span>
30
+ <span><%= provider_credential.model_count %> models</span>
30
31
  <span><%= provider_credential.prompt_count %> prompts</span>
31
32
  <span><%= provider_credential.judge_count %> judges</span>
32
33
  <span><% if provider_credential.last_used_at %>Used <time data-relative-time datetime="<%= provider_credential.last_used_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.last_used_at) %></time> ago<% else %>Never used<% end %></span>
@@ -74,7 +74,8 @@
74
74
  <% if available.any? %>
75
75
  <div class="ck-select-with-action">
76
76
  <%= form.select :judge_model, ck_grouped_models(available, run.judge_model), { include_blank: "None" }, { class: "ck-input", id: "run_judge_model" } %>
77
- <button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
77
+ <% ck_refreshing = CompletionKit::ProviderCredential.discovery_in_progress? %>
78
+ <button type="button" class="ck-icon-btn<%= ' ck-icon-btn--spinning' if ck_refreshing %>" title="Refresh models" <%= 'disabled' if ck_refreshing %> onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
78
79
  </div>
79
80
  <p class="ck-field-hint" id="judge-hint"></p>
80
81
  <div hidden data-refresh-progress-carriers>
@@ -80,7 +80,7 @@
80
80
 
81
81
  <% if @run.dataset %>
82
82
  <dialog id="dataset-preview-<%= @run.id %>" class="ck-modal" onclick="if(event.target===this)this.close()">
83
- <article class="ck-modal__panel" onclick="event.stopPropagation()">
83
+ <article class="ck-modal__panel" tabindex="-1" autofocus onclick="event.stopPropagation()">
84
84
  <header class="ck-modal__header">
85
85
  <div class="ck-modal__heading">
86
86
  <h2 class="ck-modal__title"><%= @run.dataset.name %></h2>
@@ -45,8 +45,9 @@
45
45
  <main class="ck-main">
46
46
  <div class="ck-wrap">
47
47
  <% flash.each do |type, message| %>
48
- <div class="ck-flash <%= type.to_s == "notice" ? "ck-flash--notice" : "ck-flash--alert" %>" role="<%= type.to_s == "notice" ? "status" : "alert" %>">
49
- <%= message %>
48
+ <% notice = type.to_s == "notice" %>
49
+ <div class="ck-flash <%= notice ? "ck-flash--notice" : "ck-flash--alert" %>" role="<%= notice ? "status" : "alert" %>">
50
+ <span class="ck-flash__label"><%= notice ? "Notice" : "Alert" %></span><span class="ck-flash__body"><%= message %></span>
50
51
  </div>
51
52
  <% end %>
52
53
 
data/config/routes.rb CHANGED
@@ -3,6 +3,7 @@ CompletionKit::Engine.routes.draw do
3
3
 
4
4
  get "onboarding", to: "onboarding#show", as: :onboarding
5
5
  post "onboarding/dismiss", to: "onboarding#dismiss", as: :dismiss_onboarding
6
+ post "onboarding/sample-data", to: "onboarding#sample_data", as: :onboarding_sample_data
6
7
 
7
8
  resources :prompts do
8
9
  member do
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.5.4"
2
+ VERSION = "0.5.6"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
@@ -294,6 +294,7 @@ files:
294
294
  - app/services/completion_kit/model_discovery_service.rb
295
295
  - app/services/completion_kit/ollama_client.rb
296
296
  - app/services/completion_kit/onboarding/checklist.rb
297
+ - app/services/completion_kit/onboarding/sample_data.rb
297
298
  - app/services/completion_kit/open_ai_client.rb
298
299
  - app/services/completion_kit/open_router_client.rb
299
300
  - app/services/completion_kit/prompt_improvement_service.rb