completion-kit 0.4.1 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/stylesheets/completion_kit/application.css +1882 -785
  3. data/app/controllers/completion_kit/runs_controller.rb +34 -19
  4. data/app/controllers/completion_kit/suggestions_controller.rb +24 -0
  5. data/app/jobs/completion_kit/generate_row_job.rb +7 -0
  6. data/app/jobs/completion_kit/judge_review_job.rb +2 -0
  7. data/app/jobs/completion_kit/model_discovery_job.rb +9 -4
  8. data/app/models/completion_kit/dataset.rb +9 -0
  9. data/app/models/completion_kit/provider_credential.rb +12 -1
  10. data/app/models/completion_kit/response.rb +7 -0
  11. data/app/models/completion_kit/run.rb +47 -9
  12. data/app/services/completion_kit/anthropic_client.rb +33 -14
  13. data/app/services/completion_kit/model_discovery_service.rb +133 -30
  14. data/app/services/completion_kit/ollama_client.rb +31 -10
  15. data/app/services/completion_kit/open_ai_client.rb +35 -13
  16. data/app/services/completion_kit/open_router_client.rb +34 -13
  17. data/app/services/completion_kit/worker_health.rb +4 -1
  18. data/app/views/completion_kit/datasets/index.html.erb +1 -1
  19. data/app/views/completion_kit/datasets/show.html.erb +47 -9
  20. data/app/views/completion_kit/metrics/_form.html.erb +1 -1
  21. data/app/views/completion_kit/metrics/index.html.erb +15 -2
  22. data/app/views/completion_kit/metrics/show.html.erb +1 -1
  23. data/app/views/completion_kit/prompts/index.html.erb +27 -8
  24. data/app/views/completion_kit/prompts/show.html.erb +6 -36
  25. data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +6 -4
  26. data/app/views/completion_kit/provider_credentials/_form.html.erb +1 -32
  27. data/app/views/completion_kit/provider_credentials/_models_card.html.erb +70 -0
  28. data/app/views/completion_kit/provider_credentials/index.html.erb +1 -1
  29. data/app/views/completion_kit/responses/show.html.erb +27 -6
  30. data/app/views/completion_kit/runs/_actions.html.erb +3 -0
  31. data/app/views/completion_kit/runs/_form.html.erb +114 -20
  32. data/app/views/completion_kit/runs/_response_row.html.erb +52 -22
  33. data/app/views/completion_kit/runs/_row.html.erb +50 -0
  34. data/app/views/completion_kit/runs/_sort_toolbar.html.erb +5 -4
  35. data/app/views/completion_kit/runs/_status_header.html.erb +7 -31
  36. data/app/views/completion_kit/runs/_status_panel.html.erb +80 -0
  37. data/app/views/completion_kit/runs/index.html.erb +4 -16
  38. data/app/views/completion_kit/runs/show.html.erb +111 -17
  39. data/app/views/completion_kit/suggestions/show.html.erb +65 -0
  40. data/app/views/layouts/completion_kit/application.html.erb +71 -0
  41. data/config/routes.rb +8 -2
  42. data/db/migrate/20260507000001_add_discovery_error_to_provider_credentials.rb +5 -0
  43. data/db/migrate/20260507150000_add_temperature_ignored_to_runs.rb +5 -0
  44. data/lib/completion_kit/version.rb +1 -1
  45. metadata +9 -4
  46. data/app/views/completion_kit/runs/_progress.html.erb +0 -18
  47. data/app/views/completion_kit/runs/suggestion.html.erb +0 -47
@@ -9,31 +9,50 @@
9
9
  </section>
10
10
 
11
11
  <% if @prompts.any? %>
12
- <table class="ck-results-table">
12
+ <table class="ck-results-table ck-prompts-table">
13
13
  <thead>
14
14
  <tr>
15
15
  <th>Name</th>
16
+ <th>Version</th>
16
17
  <th>Model</th>
18
+ <th>Best score</th>
17
19
  <th>Runs</th>
18
- <th>Last run</th>
19
20
  <th></th>
20
21
  </tr>
21
22
  </thead>
22
23
  <tbody>
23
24
  <% @prompts.each do |prompt| %>
24
25
  <tr onclick="window.location='<%= prompt_path(prompt) %>'" style="cursor: pointer;">
25
- <td><strong><%= prompt.name %></strong> <span class="ck-chip ck-chip--soft"><%= prompt.version_label %></span></td>
26
+ <td><strong><%= prompt.name %></strong></td>
27
+ <% latest_version = prompt.family_versions.maximum(:version_number) %>
28
+ <td>
29
+ <span class="ck-chip ck-chip--soft"><%= prompt.version_label %></span>
30
+ <% if prompt.version_number < latest_version %>
31
+ <span class="ck-meta-copy" style="margin-left: 0.4rem;">of <%= latest_version %></span>
32
+ <% end %>
33
+ </td>
26
34
  <td><span class="ck-chip"><%= prompt.llm_model %></span></td>
27
35
  <% family_runs = CompletionKit::Run.where(prompt_id: prompt.family_versions.select(:id)) %>
28
- <td><%= family_runs.count %></td>
36
+ <% current_version_runs = prompt.runs.includes(responses: :reviews) %>
37
+ <% best_score = current_version_runs.map(&:avg_score).compact.max %>
29
38
  <td>
30
- <% last_run = family_runs.order(created_at: :desc).first %>
31
- <% if last_run %>
32
- <%= time_ago_in_words(last_run.created_at) %> ago
39
+ <% if best_score %>
40
+ <span class="<%= ck_badge_classes(ck_score_kind(best_score)) %>"><%= best_score %></span>
33
41
  <% else %>
34
- Never
42
+ <span class="ck-prompts-table__dim">—</span>
35
43
  <% end %>
36
44
  </td>
45
+ <td>
46
+ <div class="ck-prompts-table__runs">
47
+ <span class="ck-prompts-table__runs-count"><%= family_runs.count %></span>
48
+ <% last_run = family_runs.order(created_at: :desc).first %>
49
+ <% if last_run %>
50
+ <span class="ck-prompts-table__runs-when">last <time data-relative-time datetime="<%= last_run.created_at.utc.iso8601 %>"><%= time_ago_in_words(last_run.created_at) %></time> ago</span>
51
+ <% else %>
52
+ <span class="ck-prompts-table__runs-when">never run</span>
53
+ <% end %>
54
+ </div>
55
+ </td>
37
56
  <td class="ck-results-table__arrow">&rarr;</td>
38
57
  </tr>
39
58
  <% end %>
@@ -56,7 +56,7 @@
56
56
  <tr>
57
57
  <td><strong>v<%= v.version_number %></strong></td>
58
58
  <td><span class="ck-chip ck-chip--soft"><%= v.llm_model %></span></td>
59
- <td class="ck-meta-copy"><%= time_ago_in_words(v.created_at) %> ago</td>
59
+ <td class="ck-meta-copy"><time datetime="<%= v.created_at.iso8601 %>" data-local-time><%= v.created_at.utc.strftime("%b %-d, %Y at %-I:%M %p UTC") %></time></td>
60
60
  <td>
61
61
  <% if v.current? %>
62
62
  <span class="ck-chip">Current</span>
@@ -75,50 +75,20 @@
75
75
  <section class="ck-card--spaced">
76
76
  <p class="ck-kicker">Runs</p>
77
77
 
78
- <table class="ck-results-table" style="margin-top: 0.5rem;">
78
+ <table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
79
79
  <thead>
80
80
  <tr>
81
81
  <th>Run</th>
82
- <th>Version</th>
83
82
  <th>Responses</th>
84
- <th>Avg score</th>
85
83
  <th>Metrics</th>
84
+ <th>Avg score</th>
86
85
  <th>When</th>
87
86
  <th></th>
88
87
  </tr>
89
88
  </thead>
90
89
  <tbody>
91
90
  <% @runs.each do |run| %>
92
- <tr onclick="window.location='<%= run_path(run) %>'" style="cursor: pointer;">
93
- <td><span class="ck-run-name"><span class="<%= ck_run_dot(run) %>"></span><strong><%= run.name %></strong></span></td>
94
- <td><span class="ck-chip ck-chip--soft">v<%= run.prompt.version_number %></span></td>
95
- <td><%= run.responses.size %></td>
96
- <td>
97
- <% avg = run.avg_score %>
98
- <% if avg %>
99
- <span class="<%= ck_badge_classes(ck_score_kind(avg)) %>"><%= avg %></span>
100
- <% else %>
101
- &mdash;
102
- <% end %>
103
- </td>
104
- <td>
105
- <% metrics = run.metric_averages %>
106
- <% if metrics.any? %>
107
- <div class="ck-metric-bar ck-metric-bar--compact">
108
- <% metrics.each do |m| %>
109
- <span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(m[:avg]) %>">
110
- <span class="ck-metric-pip__bar"></span>
111
- <span class="ck-metric-pip__label"><%= m[:name] %> <strong><%= m[:avg] %></strong></span>
112
- </span>
113
- <% end %>
114
- </div>
115
- <% else %>
116
- &mdash;
117
- <% end %>
118
- </td>
119
- <td class="ck-meta-copy"><%= time_ago_in_words(run.created_at) %> ago</td>
120
- <td class="ck-results-table__arrow">&rarr;</td>
121
- </tr>
91
+ <%= render "completion_kit/runs/row", run: run %>
122
92
  <% end %>
123
93
  </tbody>
124
94
  </table>
@@ -141,11 +111,11 @@
141
111
  </thead>
142
112
  <tbody>
143
113
  <% suggestions.each do |s| %>
144
- <tr onclick="window.location='<%= suggestion_run_path(s.run) %>'" style="cursor: pointer;">
114
+ <tr onclick="window.location='<%= suggestion_path(s, from: "prompt") %>'" style="cursor: pointer;">
145
115
  <td><strong><%= s.run.name %></strong></td>
146
116
  <td class="ck-meta-copy"><%= truncate(s.reasoning.to_s, length: 100) %></td>
147
117
  <td><%= s.applied_at? ? content_tag(:span, "Applied", class: "ck-chip", style: "background: var(--ck-success-soft); color: var(--ck-success);") : "&mdash;".html_safe %></td>
148
- <td class="ck-meta-copy"><%= time_ago_in_words(s.created_at) %> ago</td>
118
+ <td class="ck-meta-copy"><time data-relative-time datetime="<%= s.created_at.utc.iso8601 %>"><%= time_ago_in_words(s.created_at) %></time> ago</td>
149
119
  <td class="ck-results-table__arrow">&rarr;</td>
150
120
  </tr>
151
121
  <% end %>
@@ -3,9 +3,9 @@
3
3
  <div class="ck-discovery-bar">
4
4
  <div class="ck-discovery-bar__label">
5
5
  <% if provider_credential.discovery_total > 0 %>
6
- Testing models&hellip; <%= provider_credential.discovery_current %>/<%= provider_credential.discovery_total %>
6
+ Checking models&hellip; <%= provider_credential.discovery_current %>/<%= provider_credential.discovery_total %>
7
7
  <% else %>
8
- Fetching model list&hellip;
8
+ Looking up models&hellip;
9
9
  <% end %>
10
10
  </div>
11
11
  <% if provider_credential.discovery_total > 0 %>
@@ -20,11 +20,13 @@
20
20
  </div>
21
21
  <% elsif provider_credential.discovery_status == "failed" %>
22
22
  <div class="ck-discovery-bar ck-discovery-bar--failed">
23
- <div class="ck-discovery-bar__label">Model discovery failed</div>
23
+ <div class="ck-discovery-bar__label">
24
+ Model discovery failed<% if provider_credential.discovery_error.present? %>: <%= provider_credential.discovery_error %><% end %>
25
+ </div>
24
26
  </div>
25
27
  <% elsif provider_credential.discovery_status == "completed" && local_assigns.fetch(:show_completed, true) %>
26
28
  <div class="ck-discovery-bar ck-discovery-bar--completed">
27
- <div class="ck-discovery-bar__label">Available models list updated <%= time_ago_in_words(provider_credential.updated_at) %> ago</div>
29
+ <div class="ck-discovery-bar__label">Available models list updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</div>
28
30
  </div>
29
31
  <% end %>
30
32
  </div>
@@ -36,36 +36,5 @@
36
36
 
37
37
  <% if provider_credential.persisted? %>
38
38
  <%= turbo_stream_from "completion_kit_provider_#{provider_credential.id}" %>
39
-
40
- <% models = CompletionKit::Model.where(provider: provider_credential.provider).active.order(:model_id) %>
41
- <% if models.any? || provider_credential.discovery_status.present? %>
42
- <div class="ck-card ck-form-card__models">
43
- <div class="ck-form-card__footer-header">
44
- <%= render "discovery_status", provider_credential: provider_credential %>
45
- <button type="button" class="ck-icon-btn" title="Refresh models" aria-label="Refresh available models" onclick="fetch('<%= refresh_provider_credential_path(provider_credential) %>', {method:'POST',headers:{'X-CSRF-Token':document.querySelector('meta[name=csrf-token]').content}})">
46
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="14" height="14" aria-hidden="true"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg>
47
- </button>
48
- </div>
49
-
50
- <% if models.any? %>
51
- <details class="ck-model-list-details">
52
- <summary class="ck-label" style="cursor: pointer; user-select: none;">Available models (<%= models.count %>)</summary>
53
- <div class="ck-model-list">
54
- <% models.each do |m| %>
55
- <span class="ck-model-list__item">
56
- <%= m.display_name || m.model_id %>
57
- <% if m.supports_generation && m.supports_judging %>
58
- <span class="ck-model-list__badge">gen + judge</span>
59
- <% elsif m.supports_generation %>
60
- <span class="ck-model-list__badge">gen</span>
61
- <% elsif m.supports_judging %>
62
- <span class="ck-model-list__badge">judge</span>
63
- <% end %>
64
- </span>
65
- <% end %>
66
- </div>
67
- </details>
68
- <% end %>
69
- </div>
70
- <% end %>
39
+ <%= render "models_card", provider_credential: provider_credential %>
71
40
  <% end %>
@@ -0,0 +1,70 @@
1
+ <div id="provider_models_<%= provider_credential.id %>">
2
+ <% models = CompletionKit::Model.where(provider: provider_credential.provider).active.order(:model_id) %>
3
+ <% if models.any? || provider_credential.discovery_status.present? %>
4
+ <div class="ck-card ck-form-card__models">
5
+ <% if models.none? && provider_credential.discovery_status.in?(%w[discovering failed]) %>
6
+ <%= render "completion_kit/provider_credentials/discovery_status", provider_credential: provider_credential, show_completed: false %>
7
+ <% end %>
8
+
9
+ <% if models.any? %>
10
+ <% discovering = provider_credential.discovery_status == "discovering" %>
11
+ <% recently_completed = provider_credential.discovery_status == "completed" && provider_credential.updated_at > 1.minute.ago %>
12
+ <% expanded = discovering || recently_completed %>
13
+ <details class="ck-model-list-details"<%= " open" if expanded %>>
14
+ <summary class="ck-model-list__summary">
15
+ <span class="ck-model-list__summary-label">Available models <span class="ck-model-list__summary-count"><%= models.count %></span></span>
16
+ <span class="ck-model-list__summary-meta">
17
+ <% if provider_credential.discovery_status == "completed" %>
18
+ <span class="ck-model-list__summary-stamp">updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</span>
19
+ <% end %>
20
+ <button type="button" class="ck-icon-btn ck-model-list__refresh<%= ' ck-icon-btn--spinning' if discovering %>" title="Refresh models" aria-label="Refresh available models" <%= 'disabled' if discovering %> onclick="event.preventDefault();event.stopPropagation();fetch('<%= refresh_provider_credential_path(provider_credential) %>', {method:'POST',headers:{'X-CSRF-Token':document.querySelector('meta[name=csrf-token]').content}})">
21
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="13" height="13" aria-hidden="true"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg>
22
+ </button>
23
+ </span>
24
+ </summary>
25
+ <% if provider_credential.discovery_status.in?(%w[discovering failed]) %>
26
+ <div class="ck-model-list__progress">
27
+ <%= render "completion_kit/provider_credentials/discovery_status", provider_credential: provider_credential, show_completed: false %>
28
+ </div>
29
+ <% end %>
30
+ <div class="ck-model-table-wrap">
31
+ <table class="ck-model-table">
32
+ <thead>
33
+ <tr>
34
+ <th scope="col" class="ck-model-table__name">Model</th>
35
+ <th scope="col" class="ck-model-table__cap">
36
+ Gen<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Generation models produce the responses your prompts ask for. Pick one when creating a prompt.</span>
37
+ </th>
38
+ <th scope="col" class="ck-model-table__cap">
39
+ Judge<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
40
+ </th>
41
+ </tr>
42
+ </thead>
43
+ <tbody>
44
+ <% models.each do |m| %>
45
+ <tr>
46
+ <td class="ck-model-table__name"><%= m.display_name || m.model_id %></td>
47
+ <td class="ck-model-table__cap">
48
+ <% if m.supports_generation %>
49
+ <span class="ck-model-table__tick" aria-label="Supports generation">✓</span>
50
+ <% else %>
51
+ <span class="ck-model-table__dash" aria-label="No generation support">—</span>
52
+ <% end %>
53
+ </td>
54
+ <td class="ck-model-table__cap">
55
+ <% if m.supports_judging %>
56
+ <span class="ck-model-table__tick" aria-label="Supports judging">✓</span>
57
+ <% else %>
58
+ <span class="ck-model-table__dash" aria-label="No judging support">—</span>
59
+ <% end %>
60
+ </td>
61
+ </tr>
62
+ <% end %>
63
+ </tbody>
64
+ </table>
65
+ </div>
66
+ </details>
67
+ <% end %>
68
+ </div>
69
+ <% end %>
70
+ </div>
@@ -27,7 +27,7 @@
27
27
  <span><%= provider_credential.api_endpoint.presence || default_endpoints[provider_credential.provider] %></span>
28
28
  <span><%= provider_credential.prompt_count %> prompts</span>
29
29
  <span><%= provider_credential.judge_count %> judges</span>
30
- <span><%= provider_credential.last_used_at ? "Used #{time_ago_in_words(provider_credential.last_used_at)} ago" : "Never used" %></span>
30
+ <span><% if provider_credential.last_used_at %>Used <time data-relative-time datetime="<%= provider_credential.last_used_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.last_used_at) %></time> ago<% else %>Never used<% end %></span>
31
31
  </div>
32
32
 
33
33
  <%= render "discovery_status", provider_credential: provider_credential %>
@@ -14,12 +14,6 @@
14
14
  <span class="<%= ck_badge_classes(ck_score_kind(score)) %>"><%= score %></span>
15
15
  <% end %>
16
16
  </div>
17
- <p class="ck-meta-copy">
18
- <span class="ck-run-config__key">Prompt</span> <%= link_to @run.prompt.display_name, prompt_path(@run.prompt), class: "ck-link" %>
19
- <% if @run.dataset %>
20
- &ensp;&middot;&ensp;<span class="ck-run-config__key">Dataset</span> <%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
21
- <% end %>
22
- </p>
23
17
  </div>
24
18
  <div class="ck-actions">
25
19
  <% if @prev_response %>
@@ -31,6 +25,33 @@
31
25
  </div>
32
26
  </section>
33
27
 
28
+ <div class="ck-run-config">
29
+ <div class="ck-run-config__row">
30
+ <span class="ck-run-config__key">Run</span>
31
+ <%= link_to @run.name, run_path(@run), class: "ck-link" %>
32
+ </div>
33
+ <div class="ck-run-config__row">
34
+ <span class="ck-run-config__key">Prompt</span>
35
+ <%= link_to @run.prompt.display_name, prompt_path(@run.prompt), class: "ck-link" %>
36
+ </div>
37
+ <% if @run.dataset %>
38
+ <div class="ck-run-config__row">
39
+ <span class="ck-run-config__key">Dataset</span>
40
+ <%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
41
+ </div>
42
+ <% end %>
43
+ <div class="ck-run-config__row">
44
+ <span class="ck-run-config__key">Model</span>
45
+ <span style="text-transform: none;"><%= @run.prompt.llm_model %></span>
46
+ </div>
47
+ <% if @run.judge_model.present? %>
48
+ <div class="ck-run-config__row">
49
+ <span class="ck-run-config__key">Judge</span>
50
+ <span style="text-transform: none;"><%= @run.judge_model %></span>
51
+ </div>
52
+ <% end %>
53
+ </div>
54
+
34
55
  <section>
35
56
  <p class="ck-kicker">Input</p>
36
57
  <pre class="ck-code ck-code--dark"><%= begin; JSON.pretty_generate(JSON.parse(@response.input_data)); rescue; @response.input_data; end %></pre>
@@ -9,6 +9,9 @@
9
9
  <%= button_to "Start", generate_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
10
10
  <% elsif run.status == "failed" %>
11
11
  <%= button_to "Retry", generate_run_path(run), method: :post, class: ck_button_classes(:light, variant: :outline), form_class: "inline-block" %>
12
+ <%= button_to "Re-run as new", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
13
+ <% elsif run.status == "completed" %>
14
+ <%= button_to "Re-run", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
12
15
  <% end %>
13
16
  <% end %>
14
17
  </div>
@@ -18,7 +18,29 @@
18
18
 
19
19
  <div class="ck-field">
20
20
  <%= form.label :prompt_id, "Prompt", class: "ck-label" %>
21
- <%= form.select :prompt_id, @prompts.map { |p| [p.display_name, p.id, { "data-has-variables" => p.variables.any? ? "1" : "0" }] }, { include_blank: "Select a prompt" }, { class: "ck-input", id: "run_prompt_id" } %>
21
+ <%= form.select :prompt_id,
22
+ @prompts.map { |p|
23
+ vars = p.variables
24
+ label_parts = [p.display_name, p.llm_model]
25
+ label_parts << (vars.any? ? "#{vars.size} #{'var'.pluralize(vars.size)}" : "no vars")
26
+ [
27
+ label_parts.join(" · "),
28
+ p.id,
29
+ {
30
+ "data-has-variables" => vars.any? ? "1" : "0",
31
+ "data-model" => p.llm_model.to_s,
32
+ "data-variables" => vars.join(", "),
33
+ "data-description" => p.description.to_s,
34
+ "data-template-preview" => p.template.to_s.truncate(220, separator: " ")
35
+ }
36
+ ]
37
+ },
38
+ { include_blank: "Select a prompt" },
39
+ { class: "ck-input", id: "run_prompt_id" } %>
40
+ <div class="ck-prompt-summary" id="prompt-summary" hidden>
41
+ <p class="ck-prompt-summary__description" id="prompt-summary-description" hidden></p>
42
+ <p class="ck-prompt-summary__template" id="prompt-summary-template"></p>
43
+ </div>
22
44
  </div>
23
45
 
24
46
  <div class="ck-field" id="dataset-field">
@@ -26,18 +48,17 @@
26
48
  <% if @datasets.empty? %>
27
49
  <p class="ck-meta-copy">No datasets yet. <%= link_to "Create a dataset", new_dataset_path, class: "ck-link" %> first.</p>
28
50
  <% else %>
29
- <%= form.select :dataset_id, @datasets.map { |d| [d.name, d.id] }, { include_blank: "Select a dataset" }, { class: "ck-input", id: "run_dataset_id" } %>
51
+ <%= form.select :dataset_id,
52
+ @datasets.map { |d| [d.name, d.id, { "data-headers" => d.headers.join(",") }] },
53
+ { include_blank: "Select a dataset" },
54
+ { class: "ck-input", id: "run_dataset_id" } %>
30
55
  <% end %>
31
- <p class="ck-field-hint" id="dataset-hint" style="display: none; color: var(--ck-warning);">This prompt uses variables. Select a dataset to provide values.</p>
56
+ <p class="ck-field-hint" id="dataset-hint"></p>
32
57
  </div>
33
58
 
34
59
  <div class="ck-field">
35
60
  <label class="ck-label" for="run_temperature" style="position: relative;">
36
- Temperature
37
- <span class="ck-info-toggle">?</span>
38
- <div class="ck-info-popup">
39
- Controls how random the model's output is. Lower values make the model more focused and deterministic — it'll pick the most likely words. Higher values introduce more variety and creativity, but also more risk of odd phrasing. Most LLMs default to 1.0. For evaluation, try different values to see how your prompt performs under varying conditions.
40
- </div>
61
+ Temperature<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Controls how random the model's output is. Lower values are more focused and deterministic — the model picks the most likely words. Higher values are more varied and creative, with more risk of odd phrasing. Most LLMs default to 1.0; for evaluation, try a few values and see how your prompt holds up. Newer reasoning models (Claude Opus 4.7, GPT-5 family, etc.) ignore temperature entirely — CompletionKit detects this and re-sends without the parameter.</span>
41
62
  </label>
42
63
  <div class="ck-slider-row">
43
64
  <%= form.range_field :temperature, min: 0, max: 1, step: 0.1, class: "ck-slider", id: "run_temperature", oninput: "document.getElementById('temp-value').textContent = this.value" %>
@@ -46,7 +67,9 @@
46
67
  </div>
47
68
 
48
69
  <div class="ck-field" id="judge-field">
49
- <%= form.label :judge_model, "Judge model", class: "ck-label" %>
70
+ <label class="ck-label" for="run_judge_model" style="position: relative;">
71
+ Judge model<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
72
+ </label>
50
73
  <% available = CompletionKit::ApiConfig.available_models(scope: :judging) %>
51
74
  <% if available.any? %>
52
75
  <div class="ck-select-with-action">
@@ -74,18 +97,35 @@
74
97
  <p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet.&ensp;<%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
75
98
  <% else %>
76
99
  <% if @metric_groups.any? %>
77
- <p class="ck-meta-copy" style="margin-bottom: 0.5rem;">
78
- Quick add:&ensp;
79
- <% @metric_groups.each do |g| %>
80
- <span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddMetricGroup(<%= g.metric_ids.to_json %>)"><%= g.name %></span>&ensp;
81
- <% end %>
82
- </p>
100
+ <div class="ck-metric-groups">
101
+ <span class="ck-metric-groups__label">Groups</span>
102
+ <div class="ck-metric-groups__row">
103
+ <% @metric_groups.each do |g| %>
104
+ <button type="button"
105
+ class="ck-metric-group-pill"
106
+ data-metric-group
107
+ data-metric-ids="<%= g.metric_ids.join(",") %>"
108
+ onclick="ckToggleMetricGroup(this)">
109
+ <span class="ck-metric-group-pill__check" aria-hidden="true">✓</span>
110
+ <span class="ck-metric-group-pill__label"><%= g.name %></span>
111
+ <span class="ck-metric-group-pill__count"><%= g.metric_ids.size %></span>
112
+ </button>
113
+ <% end %>
114
+ </div>
115
+ </div>
116
+ <div class="ck-metric-divider"><span>or pick individually</span></div>
83
117
  <% end %>
84
118
  <div class="ck-metric-checkboxes">
85
119
  <% @all_metrics.each do |metric| %>
86
120
  <label class="ck-checkbox-label">
87
121
  <%= check_box_tag "run[metric_ids][]", metric.id, run.metric_ids.include?(metric.id), class: "ck-checkbox", id: "run_metric_#{metric.id}" %>
88
- <span><%= metric.name %></span>
122
+ <span class="ck-checkbox-label__box" aria-hidden="true"></span>
123
+ <span class="ck-checkbox-label__body">
124
+ <span class="ck-checkbox-label__text"><%= metric.name %></span>
125
+ <% if metric.instruction.present? %>
126
+ <span class="ck-checkbox-label__hint"><%= truncate(metric.instruction.to_s, length: 90) %></span>
127
+ <% end %>
128
+ </span>
89
129
  </label>
90
130
  <% end %>
91
131
  </div>
@@ -123,25 +163,79 @@ function updateRunForm() {
123
163
 
124
164
  var datasetEl = document.getElementById('run_dataset_id');
125
165
  var datasetHint = document.getElementById('dataset-hint');
166
+ var datasetField = document.getElementById('dataset-field');
126
167
  var dataset = datasetEl ? datasetEl.value : '';
127
168
  var selectedOption = promptEl ? promptEl.options[promptEl.selectedIndex] : null;
128
169
  var hasVars = selectedOption && selectedOption.dataset.hasVariables === '1';
129
- if (datasetHint) datasetHint.style.display = (hasVars && !dataset) ? '' : 'none';
170
+ var promptVars = (selectedOption && selectedOption.dataset.variables ? selectedOption.dataset.variables.split(/,\s*/) : []).filter(Boolean);
171
+
172
+ var missingVars = [];
173
+ if (hasVars && dataset && datasetEl) {
174
+ var datasetOption = datasetEl.options[datasetEl.selectedIndex];
175
+ var headers = (datasetOption && datasetOption.dataset.headers ? datasetOption.dataset.headers.split(/,\s*/) : []).filter(Boolean);
176
+ missingVars = promptVars.filter(function(v) { return headers.indexOf(v) === -1; });
177
+ }
178
+
179
+ if (datasetField) datasetField.className = 'ck-field';
180
+ if (datasetHint) datasetHint.textContent = '';
181
+ if (missingVars.length > 0) {
182
+ if (datasetField) datasetField.className = 'ck-field ck-field--error';
183
+ if (datasetHint) datasetHint.textContent = 'Dataset is missing ' + (missingVars.length === 1 ? 'column' : 'columns') + ' the prompt needs: ' + missingVars.join(', ');
184
+ } else if (hasVars && !dataset) {
185
+ if (datasetField) datasetField.className = 'ck-field ck-field--info';
186
+ if (datasetHint) datasetHint.textContent = 'This prompt uses variables. Select a dataset to provide values.';
187
+ }
188
+
189
+ var summary = document.getElementById('prompt-summary');
190
+ if (summary) {
191
+ if (selectedOption && selectedOption.value) {
192
+ var desc = selectedOption.dataset.description || '';
193
+ var tmpl = selectedOption.dataset.templatePreview || '';
194
+ var descEl = document.getElementById('prompt-summary-description');
195
+ descEl.textContent = desc;
196
+ descEl.hidden = !desc;
197
+ document.getElementById('prompt-summary-template').textContent = tmpl;
198
+ summary.hidden = false;
199
+ } else {
200
+ summary.hidden = true;
201
+ }
202
+ }
130
203
 
131
204
  var valid = prompt !== '';
132
205
  if (judge && metrics.length === 0) valid = false;
133
206
  if (!judge && metrics.length > 0) valid = false;
207
+ if (hasVars && !dataset) valid = false;
208
+ if (missingVars.length > 0) valid = false;
134
209
  if (submitBtn) submitBtn.disabled = !valid;
210
+
211
+ ckUpdateMetricGroupsState();
135
212
  }
136
213
 
137
- function ckQuickAddMetricGroup(metricIds) {
138
- metricIds.forEach(function(id) {
214
+ function ckToggleMetricGroup(button) {
215
+ var ids = (button.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
216
+ var allChecked = ids.every(function(id) {
217
+ var cb = document.getElementById('run_metric_' + id);
218
+ return cb && cb.checked;
219
+ });
220
+ ids.forEach(function(id) {
139
221
  var cb = document.getElementById('run_metric_' + id);
140
- if (cb) cb.checked = true;
222
+ if (cb) cb.checked = !allChecked;
141
223
  });
142
224
  updateRunForm();
143
225
  }
144
226
 
227
+ function ckUpdateMetricGroupsState() {
228
+ document.querySelectorAll('[data-metric-group]').forEach(function(btn) {
229
+ var ids = (btn.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
230
+ if (ids.length === 0) return;
231
+ var allChecked = ids.every(function(id) {
232
+ var cb = document.getElementById('run_metric_' + id);
233
+ return cb && cb.checked;
234
+ });
235
+ btn.classList.toggle('ck-metric-group-pill--active', allChecked);
236
+ });
237
+ }
238
+
145
239
  var judgeEl = document.getElementById('run_judge_model');
146
240
  var promptEl = document.getElementById('run_prompt_id');
147
241
  var datasetEl = document.getElementById('run_dataset_id');
@@ -1,31 +1,61 @@
1
- <%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row ck-response-row--#{response.status}", id: "response_#{response.id}" do %>
2
- <span class="ck-response-row__index">#<%= index %></span>
3
- <span class="ck-response-row__text">
4
- <% if response.succeeded? %>
5
- <%= truncate(response.response_text.to_s, length: 160) %>
6
- <% elsif response.status == "failed" %>
1
+ <% clickable = response.succeeded? %>
2
+ <tr id="response_<%= response.id %>"<% if clickable %> onclick="window.location='<%= run_response_path(run, response, sort: params[:sort]) %>'" style="cursor: pointer;"<% end %>>
3
+ <td class="ck-response-cell__index"><%= index %></td>
4
+ <td class="ck-response-cell__text">
5
+ <% if response.status == "failed" %>
7
6
  <% err = response.error_payload %>
8
- <span class="ck-response-row__error">
9
- <%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 120) %>
7
+ <span class="ck-response-cell__error"><%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 160) %></span>
8
+ <% else %>
9
+ <%= truncate(response.response_text.to_s, length: 160) %>
10
+ <% end %>
11
+ </td>
12
+ <td>
13
+ <% scored_reviews = response.reviews.select { |r| r.ai_score.present? }.sort_by { |r| r.metric_name.to_s.downcase } %>
14
+ <% if scored_reviews.any? %>
15
+ <span class="ck-metric-bar ck-metric-bar--compact">
16
+ <% scored_reviews.each do |r| %>
17
+ <span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(r.ai_score.to_f) %>">
18
+ <span class="ck-metric-pip__bar"></span>
19
+ <span class="ck-metric-pip__label"><%= r.metric_name %> <strong><%= r.ai_score %></strong></span>
20
+ </span>
21
+ <% end %>
10
22
  </span>
23
+ <% else %>
24
+ <span class="ck-response-cell__dim">—</span>
11
25
  <% end %>
12
- </span>
13
- <span class="ck-response-row__score">
26
+ </td>
27
+ <td>
28
+ <% if response.reviewed? %>
29
+ <span class="<%= ck_badge_classes(ck_score_kind(response.score.to_f)) %>"><%= response.score %></span>
30
+ <% else %>
31
+ <span class="ck-response-cell__dim">—</span>
32
+ <% end %>
33
+ </td>
34
+ <td>
14
35
  <% case response.status
15
- when "succeeded" %>
16
- <% if response.reviewed? %>
17
- <span class="ck-score"><span class="ck-score__star">★</span> <%= response.score %></span>
36
+ when "pending" %>
37
+ <span class="ck-chip">Queued</span>
38
+ <% when "retrying" %>
39
+ <% if response.attempts.to_i <= 1 %>
40
+ <span class="ck-chip">Generating</span>
41
+ <% else %>
42
+ <span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
43
+ <% end %>
44
+ <% when "succeeded" %>
45
+ <% if response.fully_reviewed? %>
46
+ <span class="ck-chip ck-chip--done">Done</span>
18
47
  <% elsif run.status == "running" %>
19
48
  <span class="ck-chip">Judging</span>
49
+ <% else %>
50
+ <span class="ck-chip">Awaiting judge</span>
20
51
  <% end %>
21
- <% when "pending" %>
22
- <span class="ck-chip">Queued</span>
23
- <% when "retrying" %>
24
- <span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
25
52
  <% when "failed" %>
26
- <%= link_to "Retry", retry_failures_run_path(run, only: response.id),
27
- data: { turbo_method: :post },
28
- class: "ck-chip ck-chip--danger ck-chip--retry" %>
53
+ <%= button_to "Retry", retry_failures_run_path(run, only: response.id),
54
+ method: :post,
55
+ class: "ck-chip ck-chip--retry",
56
+ form_class: "inline-block",
57
+ onclick: "event.stopPropagation();" %>
29
58
  <% end %>
30
- </span>
31
- <% end %>
59
+ </td>
60
+ <td class="ck-results-table__arrow"><% if clickable %>&rarr;<% end %></td>
61
+ </tr>