completion-kit 0.4.2 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/app/assets/stylesheets/completion_kit/application.css +850 -69
  3. data/app/controllers/completion_kit/runs_controller.rb +31 -18
  4. data/app/controllers/completion_kit/suggestions_controller.rb +24 -0
  5. data/app/jobs/completion_kit/generate_row_job.rb +7 -0
  6. data/app/jobs/completion_kit/judge_review_job.rb +2 -0
  7. data/app/jobs/completion_kit/model_discovery_job.rb +9 -4
  8. data/app/models/completion_kit/dataset.rb +9 -0
  9. data/app/models/completion_kit/provider_credential.rb +1 -1
  10. data/app/models/completion_kit/response.rb +7 -0
  11. data/app/models/completion_kit/run.rb +22 -1
  12. data/app/services/completion_kit/anthropic_client.rb +33 -14
  13. data/app/services/completion_kit/model_discovery_service.rb +35 -9
  14. data/app/services/completion_kit/ollama_client.rb +31 -10
  15. data/app/services/completion_kit/open_ai_client.rb +35 -13
  16. data/app/services/completion_kit/open_router_client.rb +34 -13
  17. data/app/services/completion_kit/worker_health.rb +4 -1
  18. data/app/views/completion_kit/datasets/index.html.erb +1 -1
  19. data/app/views/completion_kit/datasets/show.html.erb +47 -9
  20. data/app/views/completion_kit/metrics/_form.html.erb +1 -1
  21. data/app/views/completion_kit/metrics/index.html.erb +15 -2
  22. data/app/views/completion_kit/metrics/show.html.erb +1 -1
  23. data/app/views/completion_kit/prompts/index.html.erb +27 -8
  24. data/app/views/completion_kit/prompts/show.html.erb +6 -36
  25. data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +4 -2
  26. data/app/views/completion_kit/provider_credentials/_models_card.html.erb +1 -1
  27. data/app/views/completion_kit/provider_credentials/index.html.erb +1 -1
  28. data/app/views/completion_kit/runs/_actions.html.erb +3 -0
  29. data/app/views/completion_kit/runs/_form.html.erb +114 -20
  30. data/app/views/completion_kit/runs/_response_row.html.erb +58 -35
  31. data/app/views/completion_kit/runs/_row.html.erb +50 -0
  32. data/app/views/completion_kit/runs/_sort_toolbar.html.erb +5 -4
  33. data/app/views/completion_kit/runs/_status_header.html.erb +3 -2
  34. data/app/views/completion_kit/runs/_status_panel.html.erb +55 -21
  35. data/app/views/completion_kit/runs/index.html.erb +4 -16
  36. data/app/views/completion_kit/runs/show.html.erb +110 -16
  37. data/app/views/completion_kit/suggestions/show.html.erb +65 -0
  38. data/app/views/layouts/completion_kit/application.html.erb +71 -0
  39. data/config/routes.rb +8 -2
  40. data/db/migrate/20260507000001_add_discovery_error_to_provider_credentials.rb +5 -0
  41. data/db/migrate/20260507150000_add_temperature_ignored_to_runs.rb +5 -0
  42. data/lib/completion_kit/version.rb +1 -1
  43. metadata +7 -3
  44. data/app/views/completion_kit/runs/suggestion.html.erb +0 -47
@@ -14,30 +14,68 @@
14
14
 
15
15
  <section>
16
16
  <p class="ck-kicker">CSV preview</p>
17
- <pre class="ck-code ck-code--dark"><%= @dataset.csv_data %></pre>
17
+ <%
18
+ require "csv"
19
+ parsed_rows = []
20
+ parse_error = nil
21
+ begin
22
+ csv = ::CSV.parse(@dataset.csv_data.to_s)
23
+ parsed_rows = csv
24
+ rescue ::CSV::MalformedCSVError => e
25
+ parse_error = e.message
26
+ end
27
+ headers = parsed_rows.first || []
28
+ body_rows = parsed_rows.drop(1)
29
+ %>
30
+ <% if parse_error %>
31
+ <p class="ck-field-hint" style="color: var(--ck-warning);">Could not parse CSV: <%= parse_error %></p>
32
+ <pre class="ck-code ck-code--dark"><%= @dataset.csv_data %></pre>
33
+ <% elsif headers.empty? %>
34
+ <p class="ck-field-hint">Dataset is empty.</p>
35
+ <% else %>
36
+ <div class="ck-csv-table-wrap">
37
+ <table class="ck-csv-table">
38
+ <thead>
39
+ <tr>
40
+ <th class="ck-csv-table__rownum">#</th>
41
+ <% headers.each do |h| %>
42
+ <th><%= h %></th>
43
+ <% end %>
44
+ </tr>
45
+ </thead>
46
+ <tbody>
47
+ <% body_rows.each_with_index do |row, idx| %>
48
+ <tr>
49
+ <td class="ck-csv-table__rownum"><%= idx + 1 %></td>
50
+ <% headers.each_with_index do |_, i| %>
51
+ <td><span class="ck-csv-cell"><%= row[i] %></span></td>
52
+ <% end %>
53
+ </tr>
54
+ <% end %>
55
+ </tbody>
56
+ </table>
57
+ </div>
58
+ <% end %>
18
59
  </section>
19
60
 
20
61
  <% if @runs.any? %>
21
62
  <section class="ck-card--spaced">
22
63
  <p class="ck-kicker">Runs</p>
23
64
 
24
- <table class="ck-results-table" style="margin-top: 0.5rem;">
65
+ <table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
25
66
  <thead>
26
67
  <tr>
27
68
  <th>Run</th>
28
- <th>Prompt</th>
29
69
  <th>Responses</th>
70
+ <th>Metrics</th>
71
+ <th>Avg score</th>
72
+ <th>When</th>
30
73
  <th></th>
31
74
  </tr>
32
75
  </thead>
33
76
  <tbody>
34
77
  <% @runs.each do |run| %>
35
- <tr onclick="window.location='<%= run_path(run) %>'" style="cursor: pointer;">
36
- <td><strong><%= run.name %></strong></td>
37
- <td><%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-link" %></td>
38
- <td><%= run.responses.size %></td>
39
- <td class="ck-results-table__arrow">&rarr;</td>
40
- </tr>
78
+ <%= render "completion_kit/runs/row", run: run %>
41
79
  <% end %>
42
80
  </tbody>
43
81
  </table>
@@ -31,7 +31,7 @@
31
31
  <div class="ck-rubric-row">
32
32
  <div class="ck-rubric-row__stars">
33
33
  <% 5.times do |i| %>
34
- <svg viewBox="0 0 24 24" width="14" height="14" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
34
+ <svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
35
35
  <% end %>
36
36
  <input type="hidden" name="metric[rubric_bands][<%= index %>][stars]" value="<%= band["stars"] %>">
37
37
  </div>
@@ -9,7 +9,7 @@
9
9
  </section>
10
10
 
11
11
  <% if @metrics.any? %>
12
- <table class="ck-results-table">
12
+ <table class="ck-results-table ck-metrics-table">
13
13
  <thead>
14
14
  <tr>
15
15
  <th>Name</th>
@@ -23,7 +23,20 @@
23
23
  <tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
24
24
  <td><strong><%= metric.name %></strong></td>
25
25
  <td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 90).presence || "—" %></td>
26
- <td class="ck-meta-copy"><%= metric.metric_groups.any? ? metric.metric_groups.map(&:name).join(", ") : "—" %></td>
26
+ <td>
27
+ <% groups = metric.metric_groups %>
28
+ <% if groups.any? %>
29
+ <div class="ck-metrics-table__groups">
30
+ <% groups.each do |g| %>
31
+ <%= link_to metric_group_path(g), class: "ck-metric-group-pill ck-metric-group-pill--active", onclick: "event.stopPropagation();" do %>
32
+ <span class="ck-metric-group-pill__label"><%= g.name %></span>
33
+ <% end %>
34
+ <% end %>
35
+ </div>
36
+ <% else %>
37
+ <span class="ck-metrics-table__dim">—</span>
38
+ <% end %>
39
+ </td>
27
40
  <td class="ck-results-table__arrow">&rarr;</td>
28
41
  </tr>
29
42
  <% end %>
@@ -26,7 +26,7 @@
26
26
  <div class="ck-rubric-row ck-rubric-row--display">
27
27
  <div class="ck-rubric-row__stars">
28
28
  <% 5.times do |i| %>
29
- <svg viewBox="0 0 24 24" width="14" height="14" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
29
+ <svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
30
30
  <% end %>
31
31
  </div>
32
32
  <div class="ck-rubric-row__fields">
@@ -9,31 +9,50 @@
9
9
  </section>
10
10
 
11
11
  <% if @prompts.any? %>
12
- <table class="ck-results-table">
12
+ <table class="ck-results-table ck-prompts-table">
13
13
  <thead>
14
14
  <tr>
15
15
  <th>Name</th>
16
+ <th>Version</th>
16
17
  <th>Model</th>
18
+ <th>Best score</th>
17
19
  <th>Runs</th>
18
- <th>Last run</th>
19
20
  <th></th>
20
21
  </tr>
21
22
  </thead>
22
23
  <tbody>
23
24
  <% @prompts.each do |prompt| %>
24
25
  <tr onclick="window.location='<%= prompt_path(prompt) %>'" style="cursor: pointer;">
25
- <td><strong><%= prompt.name %></strong> <span class="ck-chip ck-chip--soft"><%= prompt.version_label %></span></td>
26
+ <td><strong><%= prompt.name %></strong></td>
27
+ <% latest_version = prompt.family_versions.maximum(:version_number) %>
28
+ <td>
29
+ <span class="ck-chip ck-chip--soft"><%= prompt.version_label %></span>
30
+ <% if prompt.version_number < latest_version %>
31
+ <span class="ck-meta-copy" style="margin-left: 0.4rem;">of <%= latest_version %></span>
32
+ <% end %>
33
+ </td>
26
34
  <td><span class="ck-chip"><%= prompt.llm_model %></span></td>
27
35
  <% family_runs = CompletionKit::Run.where(prompt_id: prompt.family_versions.select(:id)) %>
28
- <td><%= family_runs.count %></td>
36
+ <% current_version_runs = prompt.runs.includes(responses: :reviews) %>
37
+ <% best_score = current_version_runs.map(&:avg_score).compact.max %>
29
38
  <td>
30
- <% last_run = family_runs.order(created_at: :desc).first %>
31
- <% if last_run %>
32
- <%= time_ago_in_words(last_run.created_at) %> ago
39
+ <% if best_score %>
40
+ <span class="<%= ck_badge_classes(ck_score_kind(best_score)) %>"><%= best_score %></span>
33
41
  <% else %>
34
- Never
42
+ <span class="ck-prompts-table__dim">—</span>
35
43
  <% end %>
36
44
  </td>
45
+ <td>
46
+ <div class="ck-prompts-table__runs">
47
+ <span class="ck-prompts-table__runs-count"><%= family_runs.count %></span>
48
+ <% last_run = family_runs.order(created_at: :desc).first %>
49
+ <% if last_run %>
50
+ <span class="ck-prompts-table__runs-when">last <time data-relative-time datetime="<%= last_run.created_at.utc.iso8601 %>"><%= time_ago_in_words(last_run.created_at) %></time> ago</span>
51
+ <% else %>
52
+ <span class="ck-prompts-table__runs-when">never run</span>
53
+ <% end %>
54
+ </div>
55
+ </td>
37
56
  <td class="ck-results-table__arrow">&rarr;</td>
38
57
  </tr>
39
58
  <% end %>
@@ -56,7 +56,7 @@
56
56
  <tr>
57
57
  <td><strong>v<%= v.version_number %></strong></td>
58
58
  <td><span class="ck-chip ck-chip--soft"><%= v.llm_model %></span></td>
59
- <td class="ck-meta-copy"><%= time_ago_in_words(v.created_at) %> ago</td>
59
+ <td class="ck-meta-copy"><time datetime="<%= v.created_at.iso8601 %>" data-local-time><%= v.created_at.utc.strftime("%b %-d, %Y at %-I:%M %p UTC") %></time></td>
60
60
  <td>
61
61
  <% if v.current? %>
62
62
  <span class="ck-chip">Current</span>
@@ -75,50 +75,20 @@
75
75
  <section class="ck-card--spaced">
76
76
  <p class="ck-kicker">Runs</p>
77
77
 
78
- <table class="ck-results-table" style="margin-top: 0.5rem;">
78
+ <table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
79
79
  <thead>
80
80
  <tr>
81
81
  <th>Run</th>
82
- <th>Version</th>
83
82
  <th>Responses</th>
84
- <th>Avg score</th>
85
83
  <th>Metrics</th>
84
+ <th>Avg score</th>
86
85
  <th>When</th>
87
86
  <th></th>
88
87
  </tr>
89
88
  </thead>
90
89
  <tbody>
91
90
  <% @runs.each do |run| %>
92
- <tr onclick="window.location='<%= run_path(run) %>'" style="cursor: pointer;">
93
- <td><span class="ck-run-name"><span class="<%= ck_run_dot(run) %>"></span><strong><%= run.name %></strong></span></td>
94
- <td><span class="ck-chip ck-chip--soft">v<%= run.prompt.version_number %></span></td>
95
- <td><%= run.responses.size %></td>
96
- <td>
97
- <% avg = run.avg_score %>
98
- <% if avg %>
99
- <span class="<%= ck_badge_classes(ck_score_kind(avg)) %>"><%= avg %></span>
100
- <% else %>
101
- &mdash;
102
- <% end %>
103
- </td>
104
- <td>
105
- <% metrics = run.metric_averages %>
106
- <% if metrics.any? %>
107
- <div class="ck-metric-bar ck-metric-bar--compact">
108
- <% metrics.each do |m| %>
109
- <span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(m[:avg]) %>">
110
- <span class="ck-metric-pip__bar"></span>
111
- <span class="ck-metric-pip__label"><%= m[:name] %> <strong><%= m[:avg] %></strong></span>
112
- </span>
113
- <% end %>
114
- </div>
115
- <% else %>
116
- &mdash;
117
- <% end %>
118
- </td>
119
- <td class="ck-meta-copy"><%= time_ago_in_words(run.created_at) %> ago</td>
120
- <td class="ck-results-table__arrow">&rarr;</td>
121
- </tr>
91
+ <%= render "completion_kit/runs/row", run: run %>
122
92
  <% end %>
123
93
  </tbody>
124
94
  </table>
@@ -141,11 +111,11 @@
141
111
  </thead>
142
112
  <tbody>
143
113
  <% suggestions.each do |s| %>
144
- <tr onclick="window.location='<%= suggestion_run_path(s.run) %>'" style="cursor: pointer;">
114
+ <tr onclick="window.location='<%= suggestion_path(s, from: "prompt") %>'" style="cursor: pointer;">
145
115
  <td><strong><%= s.run.name %></strong></td>
146
116
  <td class="ck-meta-copy"><%= truncate(s.reasoning.to_s, length: 100) %></td>
147
117
  <td><%= s.applied_at? ? content_tag(:span, "Applied", class: "ck-chip", style: "background: var(--ck-success-soft); color: var(--ck-success);") : "&mdash;".html_safe %></td>
148
- <td class="ck-meta-copy"><%= time_ago_in_words(s.created_at) %> ago</td>
118
+ <td class="ck-meta-copy"><time data-relative-time datetime="<%= s.created_at.utc.iso8601 %>"><%= time_ago_in_words(s.created_at) %></time> ago</td>
149
119
  <td class="ck-results-table__arrow">&rarr;</td>
150
120
  </tr>
151
121
  <% end %>
@@ -20,11 +20,13 @@
20
20
  </div>
21
21
  <% elsif provider_credential.discovery_status == "failed" %>
22
22
  <div class="ck-discovery-bar ck-discovery-bar--failed">
23
- <div class="ck-discovery-bar__label">Model discovery failed</div>
23
+ <div class="ck-discovery-bar__label">
24
+ Model discovery failed<% if provider_credential.discovery_error.present? %>: <%= provider_credential.discovery_error %><% end %>
25
+ </div>
24
26
  </div>
25
27
  <% elsif provider_credential.discovery_status == "completed" && local_assigns.fetch(:show_completed, true) %>
26
28
  <div class="ck-discovery-bar ck-discovery-bar--completed">
27
- <div class="ck-discovery-bar__label">Available models list updated <%= time_ago_in_words(provider_credential.updated_at) %> ago</div>
29
+ <div class="ck-discovery-bar__label">Available models list updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</div>
28
30
  </div>
29
31
  <% end %>
30
32
  </div>
@@ -15,7 +15,7 @@
15
15
  <span class="ck-model-list__summary-label">Available models <span class="ck-model-list__summary-count"><%= models.count %></span></span>
16
16
  <span class="ck-model-list__summary-meta">
17
17
  <% if provider_credential.discovery_status == "completed" %>
18
- <span class="ck-model-list__summary-stamp">updated <%= time_ago_in_words(provider_credential.updated_at) %> ago</span>
18
+ <span class="ck-model-list__summary-stamp">updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</span>
19
19
  <% end %>
20
20
  <button type="button" class="ck-icon-btn ck-model-list__refresh<%= ' ck-icon-btn--spinning' if discovering %>" title="Refresh models" aria-label="Refresh available models" <%= 'disabled' if discovering %> onclick="event.preventDefault();event.stopPropagation();fetch('<%= refresh_provider_credential_path(provider_credential) %>', {method:'POST',headers:{'X-CSRF-Token':document.querySelector('meta[name=csrf-token]').content}})">
21
21
  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="13" height="13" aria-hidden="true"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg>
@@ -27,7 +27,7 @@
27
27
  <span><%= provider_credential.api_endpoint.presence || default_endpoints[provider_credential.provider] %></span>
28
28
  <span><%= provider_credential.prompt_count %> prompts</span>
29
29
  <span><%= provider_credential.judge_count %> judges</span>
30
- <span><%= provider_credential.last_used_at ? "Used #{time_ago_in_words(provider_credential.last_used_at)} ago" : "Never used" %></span>
30
+ <span><% if provider_credential.last_used_at %>Used <time data-relative-time datetime="<%= provider_credential.last_used_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.last_used_at) %></time> ago<% else %>Never used<% end %></span>
31
31
  </div>
32
32
 
33
33
  <%= render "discovery_status", provider_credential: provider_credential %>
@@ -9,6 +9,9 @@
9
9
  <%= button_to "Start", generate_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
10
10
  <% elsif run.status == "failed" %>
11
11
  <%= button_to "Retry", generate_run_path(run), method: :post, class: ck_button_classes(:light, variant: :outline), form_class: "inline-block" %>
12
+ <%= button_to "Re-run as new", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
13
+ <% elsif run.status == "completed" %>
14
+ <%= button_to "Re-run", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
12
15
  <% end %>
13
16
  <% end %>
14
17
  </div>
@@ -18,7 +18,29 @@
18
18
 
19
19
  <div class="ck-field">
20
20
  <%= form.label :prompt_id, "Prompt", class: "ck-label" %>
21
- <%= form.select :prompt_id, @prompts.map { |p| [p.display_name, p.id, { "data-has-variables" => p.variables.any? ? "1" : "0" }] }, { include_blank: "Select a prompt" }, { class: "ck-input", id: "run_prompt_id" } %>
21
+ <%= form.select :prompt_id,
22
+ @prompts.map { |p|
23
+ vars = p.variables
24
+ label_parts = [p.display_name, p.llm_model]
25
+ label_parts << (vars.any? ? "#{vars.size} #{'var'.pluralize(vars.size)}" : "no vars")
26
+ [
27
+ label_parts.join(" · "),
28
+ p.id,
29
+ {
30
+ "data-has-variables" => vars.any? ? "1" : "0",
31
+ "data-model" => p.llm_model.to_s,
32
+ "data-variables" => vars.join(", "),
33
+ "data-description" => p.description.to_s,
34
+ "data-template-preview" => p.template.to_s.truncate(220, separator: " ")
35
+ }
36
+ ]
37
+ },
38
+ { include_blank: "Select a prompt" },
39
+ { class: "ck-input", id: "run_prompt_id" } %>
40
+ <div class="ck-prompt-summary" id="prompt-summary" hidden>
41
+ <p class="ck-prompt-summary__description" id="prompt-summary-description" hidden></p>
42
+ <p class="ck-prompt-summary__template" id="prompt-summary-template"></p>
43
+ </div>
22
44
  </div>
23
45
 
24
46
  <div class="ck-field" id="dataset-field">
@@ -26,18 +48,17 @@
26
48
  <% if @datasets.empty? %>
27
49
  <p class="ck-meta-copy">No datasets yet. <%= link_to "Create a dataset", new_dataset_path, class: "ck-link" %> first.</p>
28
50
  <% else %>
29
- <%= form.select :dataset_id, @datasets.map { |d| [d.name, d.id] }, { include_blank: "Select a dataset" }, { class: "ck-input", id: "run_dataset_id" } %>
51
+ <%= form.select :dataset_id,
52
+ @datasets.map { |d| [d.name, d.id, { "data-headers" => d.headers.join(",") }] },
53
+ { include_blank: "Select a dataset" },
54
+ { class: "ck-input", id: "run_dataset_id" } %>
30
55
  <% end %>
31
- <p class="ck-field-hint" id="dataset-hint" style="display: none; color: var(--ck-warning);">This prompt uses variables. Select a dataset to provide values.</p>
56
+ <p class="ck-field-hint" id="dataset-hint"></p>
32
57
  </div>
33
58
 
34
59
  <div class="ck-field">
35
60
  <label class="ck-label" for="run_temperature" style="position: relative;">
36
- Temperature
37
- <span class="ck-info-toggle">?</span>
38
- <div class="ck-info-popup">
39
- Controls how random the model's output is. Lower values make the model more focused and deterministic — it'll pick the most likely words. Higher values introduce more variety and creativity, but also more risk of odd phrasing. Most LLMs default to 1.0. For evaluation, try different values to see how your prompt performs under varying conditions.
40
- </div>
61
+ Temperature<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Controls how random the model's output is. Lower values are more focused and deterministic — the model picks the most likely words. Higher values are more varied and creative, with more risk of odd phrasing. Most LLMs default to 1.0; for evaluation, try a few values and see how your prompt holds up. Newer reasoning models (Claude Opus 4.7, GPT-5 family, etc.) ignore temperature entirely — CompletionKit detects this and re-sends without the parameter.</span>
41
62
  </label>
42
63
  <div class="ck-slider-row">
43
64
  <%= form.range_field :temperature, min: 0, max: 1, step: 0.1, class: "ck-slider", id: "run_temperature", oninput: "document.getElementById('temp-value').textContent = this.value" %>
@@ -46,7 +67,9 @@
46
67
  </div>
47
68
 
48
69
  <div class="ck-field" id="judge-field">
49
- <%= form.label :judge_model, "Judge model", class: "ck-label" %>
70
+ <label class="ck-label" for="run_judge_model" style="position: relative;">
71
+ Judge model<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
72
+ </label>
50
73
  <% available = CompletionKit::ApiConfig.available_models(scope: :judging) %>
51
74
  <% if available.any? %>
52
75
  <div class="ck-select-with-action">
@@ -74,18 +97,35 @@
74
97
  <p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet.&ensp;<%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
75
98
  <% else %>
76
99
  <% if @metric_groups.any? %>
77
- <p class="ck-meta-copy" style="margin-bottom: 0.5rem;">
78
- Quick add:&ensp;
79
- <% @metric_groups.each do |g| %>
80
- <span class="ck-chip" style="cursor: pointer;" onclick="ckQuickAddMetricGroup(<%= g.metric_ids.to_json %>)"><%= g.name %></span>&ensp;
81
- <% end %>
82
- </p>
100
+ <div class="ck-metric-groups">
101
+ <span class="ck-metric-groups__label">Groups</span>
102
+ <div class="ck-metric-groups__row">
103
+ <% @metric_groups.each do |g| %>
104
+ <button type="button"
105
+ class="ck-metric-group-pill"
106
+ data-metric-group
107
+ data-metric-ids="<%= g.metric_ids.join(",") %>"
108
+ onclick="ckToggleMetricGroup(this)">
109
+ <span class="ck-metric-group-pill__check" aria-hidden="true">✓</span>
110
+ <span class="ck-metric-group-pill__label"><%= g.name %></span>
111
+ <span class="ck-metric-group-pill__count"><%= g.metric_ids.size %></span>
112
+ </button>
113
+ <% end %>
114
+ </div>
115
+ </div>
116
+ <div class="ck-metric-divider"><span>or pick individually</span></div>
83
117
  <% end %>
84
118
  <div class="ck-metric-checkboxes">
85
119
  <% @all_metrics.each do |metric| %>
86
120
  <label class="ck-checkbox-label">
87
121
  <%= check_box_tag "run[metric_ids][]", metric.id, run.metric_ids.include?(metric.id), class: "ck-checkbox", id: "run_metric_#{metric.id}" %>
88
- <span><%= metric.name %></span>
122
+ <span class="ck-checkbox-label__box" aria-hidden="true"></span>
123
+ <span class="ck-checkbox-label__body">
124
+ <span class="ck-checkbox-label__text"><%= metric.name %></span>
125
+ <% if metric.instruction.present? %>
126
+ <span class="ck-checkbox-label__hint"><%= truncate(metric.instruction.to_s, length: 90) %></span>
127
+ <% end %>
128
+ </span>
89
129
  </label>
90
130
  <% end %>
91
131
  </div>
@@ -123,25 +163,79 @@ function updateRunForm() {
123
163
 
124
164
  var datasetEl = document.getElementById('run_dataset_id');
125
165
  var datasetHint = document.getElementById('dataset-hint');
166
+ var datasetField = document.getElementById('dataset-field');
126
167
  var dataset = datasetEl ? datasetEl.value : '';
127
168
  var selectedOption = promptEl ? promptEl.options[promptEl.selectedIndex] : null;
128
169
  var hasVars = selectedOption && selectedOption.dataset.hasVariables === '1';
129
- if (datasetHint) datasetHint.style.display = (hasVars && !dataset) ? '' : 'none';
170
+ var promptVars = (selectedOption && selectedOption.dataset.variables ? selectedOption.dataset.variables.split(/,\s*/) : []).filter(Boolean);
171
+
172
+ var missingVars = [];
173
+ if (hasVars && dataset && datasetEl) {
174
+ var datasetOption = datasetEl.options[datasetEl.selectedIndex];
175
+ var headers = (datasetOption && datasetOption.dataset.headers ? datasetOption.dataset.headers.split(/,\s*/) : []).filter(Boolean);
176
+ missingVars = promptVars.filter(function(v) { return headers.indexOf(v) === -1; });
177
+ }
178
+
179
+ if (datasetField) datasetField.className = 'ck-field';
180
+ if (datasetHint) datasetHint.textContent = '';
181
+ if (missingVars.length > 0) {
182
+ if (datasetField) datasetField.className = 'ck-field ck-field--error';
183
+ if (datasetHint) datasetHint.textContent = 'Dataset is missing ' + (missingVars.length === 1 ? 'column' : 'columns') + ' the prompt needs: ' + missingVars.join(', ');
184
+ } else if (hasVars && !dataset) {
185
+ if (datasetField) datasetField.className = 'ck-field ck-field--info';
186
+ if (datasetHint) datasetHint.textContent = 'This prompt uses variables. Select a dataset to provide values.';
187
+ }
188
+
189
+ var summary = document.getElementById('prompt-summary');
190
+ if (summary) {
191
+ if (selectedOption && selectedOption.value) {
192
+ var desc = selectedOption.dataset.description || '';
193
+ var tmpl = selectedOption.dataset.templatePreview || '';
194
+ var descEl = document.getElementById('prompt-summary-description');
195
+ descEl.textContent = desc;
196
+ descEl.hidden = !desc;
197
+ document.getElementById('prompt-summary-template').textContent = tmpl;
198
+ summary.hidden = false;
199
+ } else {
200
+ summary.hidden = true;
201
+ }
202
+ }
130
203
 
131
204
  var valid = prompt !== '';
132
205
  if (judge && metrics.length === 0) valid = false;
133
206
  if (!judge && metrics.length > 0) valid = false;
207
+ if (hasVars && !dataset) valid = false;
208
+ if (missingVars.length > 0) valid = false;
134
209
  if (submitBtn) submitBtn.disabled = !valid;
210
+
211
+ ckUpdateMetricGroupsState();
135
212
  }
136
213
 
137
- function ckQuickAddMetricGroup(metricIds) {
138
- metricIds.forEach(function(id) {
214
+ function ckToggleMetricGroup(button) {
215
+ var ids = (button.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
216
+ var allChecked = ids.every(function(id) {
217
+ var cb = document.getElementById('run_metric_' + id);
218
+ return cb && cb.checked;
219
+ });
220
+ ids.forEach(function(id) {
139
221
  var cb = document.getElementById('run_metric_' + id);
140
- if (cb) cb.checked = true;
222
+ if (cb) cb.checked = !allChecked;
141
223
  });
142
224
  updateRunForm();
143
225
  }
144
226
 
227
+ function ckUpdateMetricGroupsState() {
228
+ document.querySelectorAll('[data-metric-group]').forEach(function(btn) {
229
+ var ids = (btn.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
230
+ if (ids.length === 0) return;
231
+ var allChecked = ids.every(function(id) {
232
+ var cb = document.getElementById('run_metric_' + id);
233
+ return cb && cb.checked;
234
+ });
235
+ btn.classList.toggle('ck-metric-group-pill--active', allChecked);
236
+ });
237
+ }
238
+
145
239
  var judgeEl = document.getElementById('run_judge_model');
146
240
  var promptEl = document.getElementById('run_prompt_id');
147
241
  var datasetEl = document.getElementById('run_dataset_id');
@@ -1,38 +1,61 @@
1
- <% if response.succeeded? %>
2
- <%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row ck-response-row--succeeded", id: "response_#{response.id}" do %>
3
- <span class="ck-response-row__index">#<%= index %></span>
4
- <span class="ck-response-row__text"><%= truncate(response.response_text.to_s, length: 160) %></span>
5
- <span class="ck-response-row__score">
6
- <% if response.reviewed? %>
7
- <span class="ck-score"><span class="ck-score__star">★</span> <%= response.score %></span>
1
+ <% clickable = response.succeeded? %>
2
+ <tr id="response_<%= response.id %>"<% if clickable %> onclick="window.location='<%= run_response_path(run, response, sort: params[:sort]) %>'" style="cursor: pointer;"<% end %>>
3
+ <td class="ck-response-cell__index"><%= index %></td>
4
+ <td class="ck-response-cell__text">
5
+ <% if response.status == "failed" %>
6
+ <% err = response.error_payload %>
7
+ <span class="ck-response-cell__error"><%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 160) %></span>
8
+ <% else %>
9
+ <%= truncate(response.response_text.to_s, length: 160) %>
10
+ <% end %>
11
+ </td>
12
+ <td>
13
+ <% scored_reviews = response.reviews.select { |r| r.ai_score.present? }.sort_by { |r| r.metric_name.to_s.downcase } %>
14
+ <% if scored_reviews.any? %>
15
+ <span class="ck-metric-bar ck-metric-bar--compact">
16
+ <% scored_reviews.each do |r| %>
17
+ <span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(r.ai_score.to_f) %>">
18
+ <span class="ck-metric-pip__bar"></span>
19
+ <span class="ck-metric-pip__label"><%= r.metric_name %> <strong><%= r.ai_score %></strong></span>
20
+ </span>
21
+ <% end %>
22
+ </span>
23
+ <% else %>
24
+ <span class="ck-response-cell__dim">—</span>
25
+ <% end %>
26
+ </td>
27
+ <td>
28
+ <% if response.reviewed? %>
29
+ <span class="<%= ck_badge_classes(ck_score_kind(response.score.to_f)) %>"><%= response.score %></span>
30
+ <% else %>
31
+ <span class="ck-response-cell__dim">—</span>
32
+ <% end %>
33
+ </td>
34
+ <td>
35
+ <% case response.status
36
+ when "pending" %>
37
+ <span class="ck-chip">Queued</span>
38
+ <% when "retrying" %>
39
+ <% if response.attempts.to_i <= 1 %>
40
+ <span class="ck-chip">Generating</span>
41
+ <% else %>
42
+ <span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
43
+ <% end %>
44
+ <% when "succeeded" %>
45
+ <% if response.fully_reviewed? %>
46
+ <span class="ck-chip ck-chip--done">Done</span>
8
47
  <% elsif run.status == "running" %>
9
48
  <span class="ck-chip">Judging</span>
49
+ <% else %>
50
+ <span class="ck-chip">Awaiting judge</span>
10
51
  <% end %>
11
- </span>
12
- <% end %>
13
- <% else %>
14
- <div class="ck-response-row ck-response-row--<%= response.status %>" id="response_<%= response.id %>">
15
- <span class="ck-response-row__index">#<%= index %></span>
16
- <span class="ck-response-row__text">
17
- <% if response.status == "failed" %>
18
- <% err = response.error_payload %>
19
- <span class="ck-response-row__error">
20
- <%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 120) %>
21
- </span>
22
- <% end %>
23
- </span>
24
- <span class="ck-response-row__score">
25
- <% case response.status
26
- when "pending" %>
27
- <span class="ck-chip">Queued</span>
28
- <% when "retrying" %>
29
- <span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
30
- <% when "failed" %>
31
- <%= button_to "Retry", retry_failures_run_path(run, only: response.id),
32
- method: :post,
33
- class: "ck-chip ck-chip--retry",
34
- form_class: "inline-block" %>
35
- <% end %>
36
- </span>
37
- </div>
38
- <% end %>
52
+ <% when "failed" %>
53
+ <%= button_to "Retry", retry_failures_run_path(run, only: response.id),
54
+ method: :post,
55
+ class: "ck-chip ck-chip--retry",
56
+ form_class: "inline-block",
57
+ onclick: "event.stopPropagation();" %>
58
+ <% end %>
59
+ </td>
60
+ <td class="ck-results-table__arrow"><% if clickable %>&rarr;<% end %></td>
61
+ </tr>