completion-kit 0.4.2 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +850 -69
- data/app/controllers/completion_kit/runs_controller.rb +31 -18
- data/app/controllers/completion_kit/suggestions_controller.rb +24 -0
- data/app/jobs/completion_kit/generate_row_job.rb +7 -0
- data/app/jobs/completion_kit/judge_review_job.rb +2 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +9 -4
- data/app/models/completion_kit/dataset.rb +9 -0
- data/app/models/completion_kit/provider_credential.rb +1 -1
- data/app/models/completion_kit/response.rb +7 -0
- data/app/models/completion_kit/run.rb +22 -1
- data/app/services/completion_kit/anthropic_client.rb +33 -14
- data/app/services/completion_kit/model_discovery_service.rb +35 -9
- data/app/services/completion_kit/ollama_client.rb +31 -10
- data/app/services/completion_kit/open_ai_client.rb +35 -13
- data/app/services/completion_kit/open_router_client.rb +34 -13
- data/app/services/completion_kit/worker_health.rb +4 -1
- data/app/views/completion_kit/datasets/index.html.erb +1 -1
- data/app/views/completion_kit/datasets/show.html.erb +47 -9
- data/app/views/completion_kit/metrics/_form.html.erb +1 -1
- data/app/views/completion_kit/metrics/index.html.erb +15 -2
- data/app/views/completion_kit/metrics/show.html.erb +1 -1
- data/app/views/completion_kit/prompts/index.html.erb +27 -8
- data/app/views/completion_kit/prompts/show.html.erb +6 -36
- data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +4 -2
- data/app/views/completion_kit/provider_credentials/_models_card.html.erb +1 -1
- data/app/views/completion_kit/provider_credentials/index.html.erb +1 -1
- data/app/views/completion_kit/runs/_actions.html.erb +3 -0
- data/app/views/completion_kit/runs/_form.html.erb +114 -20
- data/app/views/completion_kit/runs/_response_row.html.erb +58 -35
- data/app/views/completion_kit/runs/_row.html.erb +50 -0
- data/app/views/completion_kit/runs/_sort_toolbar.html.erb +5 -4
- data/app/views/completion_kit/runs/_status_header.html.erb +3 -2
- data/app/views/completion_kit/runs/_status_panel.html.erb +55 -21
- data/app/views/completion_kit/runs/index.html.erb +4 -16
- data/app/views/completion_kit/runs/show.html.erb +110 -16
- data/app/views/completion_kit/suggestions/show.html.erb +65 -0
- data/app/views/layouts/completion_kit/application.html.erb +71 -0
- data/config/routes.rb +8 -2
- data/db/migrate/20260507000001_add_discovery_error_to_provider_credentials.rb +5 -0
- data/db/migrate/20260507150000_add_temperature_ignored_to_runs.rb +5 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +8 -7
- data/app/views/completion_kit/runs/suggestion.html.erb +0 -47
|
@@ -14,30 +14,68 @@
|
|
|
14
14
|
|
|
15
15
|
<section>
|
|
16
16
|
<p class="ck-kicker">CSV preview</p>
|
|
17
|
-
|
|
17
|
+
<%
|
|
18
|
+
require "csv"
|
|
19
|
+
parsed_rows = []
|
|
20
|
+
parse_error = nil
|
|
21
|
+
begin
|
|
22
|
+
csv = ::CSV.parse(@dataset.csv_data.to_s)
|
|
23
|
+
parsed_rows = csv
|
|
24
|
+
rescue ::CSV::MalformedCSVError => e
|
|
25
|
+
parse_error = e.message
|
|
26
|
+
end
|
|
27
|
+
headers = parsed_rows.first || []
|
|
28
|
+
body_rows = parsed_rows.drop(1)
|
|
29
|
+
%>
|
|
30
|
+
<% if parse_error %>
|
|
31
|
+
<p class="ck-field-hint" style="color: var(--ck-warning);">Could not parse CSV: <%= parse_error %></p>
|
|
32
|
+
<pre class="ck-code ck-code--dark"><%= @dataset.csv_data %></pre>
|
|
33
|
+
<% elsif headers.empty? %>
|
|
34
|
+
<p class="ck-field-hint">Dataset is empty.</p>
|
|
35
|
+
<% else %>
|
|
36
|
+
<div class="ck-csv-table-wrap">
|
|
37
|
+
<table class="ck-csv-table">
|
|
38
|
+
<thead>
|
|
39
|
+
<tr>
|
|
40
|
+
<th class="ck-csv-table__rownum">#</th>
|
|
41
|
+
<% headers.each do |h| %>
|
|
42
|
+
<th><%= h %></th>
|
|
43
|
+
<% end %>
|
|
44
|
+
</tr>
|
|
45
|
+
</thead>
|
|
46
|
+
<tbody>
|
|
47
|
+
<% body_rows.each_with_index do |row, idx| %>
|
|
48
|
+
<tr>
|
|
49
|
+
<td class="ck-csv-table__rownum"><%= idx + 1 %></td>
|
|
50
|
+
<% headers.each_with_index do |_, i| %>
|
|
51
|
+
<td><span class="ck-csv-cell"><%= row[i] %></span></td>
|
|
52
|
+
<% end %>
|
|
53
|
+
</tr>
|
|
54
|
+
<% end %>
|
|
55
|
+
</tbody>
|
|
56
|
+
</table>
|
|
57
|
+
</div>
|
|
58
|
+
<% end %>
|
|
18
59
|
</section>
|
|
19
60
|
|
|
20
61
|
<% if @runs.any? %>
|
|
21
62
|
<section class="ck-card--spaced">
|
|
22
63
|
<p class="ck-kicker">Runs</p>
|
|
23
64
|
|
|
24
|
-
<table class="ck-results-table" style="margin-top: 0.5rem;">
|
|
65
|
+
<table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
|
|
25
66
|
<thead>
|
|
26
67
|
<tr>
|
|
27
68
|
<th>Run</th>
|
|
28
|
-
<th>Prompt</th>
|
|
29
69
|
<th>Responses</th>
|
|
70
|
+
<th>Metrics</th>
|
|
71
|
+
<th>Avg score</th>
|
|
72
|
+
<th>When</th>
|
|
30
73
|
<th></th>
|
|
31
74
|
</tr>
|
|
32
75
|
</thead>
|
|
33
76
|
<tbody>
|
|
34
77
|
<% @runs.each do |run| %>
|
|
35
|
-
|
|
36
|
-
<td><strong><%= run.name %></strong></td>
|
|
37
|
-
<td><%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-link" %></td>
|
|
38
|
-
<td><%= run.responses.size %></td>
|
|
39
|
-
<td class="ck-results-table__arrow">→</td>
|
|
40
|
-
</tr>
|
|
78
|
+
<%= render "completion_kit/runs/row", run: run %>
|
|
41
79
|
<% end %>
|
|
42
80
|
</tbody>
|
|
43
81
|
</table>
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
<div class="ck-rubric-row">
|
|
32
32
|
<div class="ck-rubric-row__stars">
|
|
33
33
|
<% 5.times do |i| %>
|
|
34
|
-
<svg viewBox="0 0 24 24" width="
|
|
34
|
+
<svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
|
|
35
35
|
<% end %>
|
|
36
36
|
<input type="hidden" name="metric[rubric_bands][<%= index %>][stars]" value="<%= band["stars"] %>">
|
|
37
37
|
</div>
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
</section>
|
|
10
10
|
|
|
11
11
|
<% if @metrics.any? %>
|
|
12
|
-
<table class="ck-results-table">
|
|
12
|
+
<table class="ck-results-table ck-metrics-table">
|
|
13
13
|
<thead>
|
|
14
14
|
<tr>
|
|
15
15
|
<th>Name</th>
|
|
@@ -23,7 +23,20 @@
|
|
|
23
23
|
<tr onclick="window.location='<%= metric_path(metric) %>'" style="cursor: pointer;">
|
|
24
24
|
<td><strong><%= metric.name %></strong></td>
|
|
25
25
|
<td class="ck-meta-copy"><%= truncate(metric.instruction.to_s, length: 90).presence || "—" %></td>
|
|
26
|
-
<td
|
|
26
|
+
<td>
|
|
27
|
+
<% groups = metric.metric_groups %>
|
|
28
|
+
<% if groups.any? %>
|
|
29
|
+
<div class="ck-metrics-table__groups">
|
|
30
|
+
<% groups.each do |g| %>
|
|
31
|
+
<%= link_to metric_group_path(g), class: "ck-metric-group-pill ck-metric-group-pill--active", onclick: "event.stopPropagation();" do %>
|
|
32
|
+
<span class="ck-metric-group-pill__label"><%= g.name %></span>
|
|
33
|
+
<% end %>
|
|
34
|
+
<% end %>
|
|
35
|
+
</div>
|
|
36
|
+
<% else %>
|
|
37
|
+
<span class="ck-metrics-table__dim">—</span>
|
|
38
|
+
<% end %>
|
|
39
|
+
</td>
|
|
27
40
|
<td class="ck-results-table__arrow">→</td>
|
|
28
41
|
</tr>
|
|
29
42
|
<% end %>
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
<div class="ck-rubric-row ck-rubric-row--display">
|
|
27
27
|
<div class="ck-rubric-row__stars">
|
|
28
28
|
<% 5.times do |i| %>
|
|
29
|
-
<svg viewBox="0 0 24 24" width="
|
|
29
|
+
<svg viewBox="0 0 24 24" width="16" height="16" stroke-width="1.75" class="ck-star <%= i < band["stars"] ? "ck-star--filled" : "ck-star--empty" %>"><polygon points="12 2 15.09 8.26 22 9.27 17 14.14 18.18 21.02 12 17.77 5.82 21.02 7 14.14 2 9.27 8.91 8.26 12 2"/></svg>
|
|
30
30
|
<% end %>
|
|
31
31
|
</div>
|
|
32
32
|
<div class="ck-rubric-row__fields">
|
|
@@ -9,31 +9,50 @@
|
|
|
9
9
|
</section>
|
|
10
10
|
|
|
11
11
|
<% if @prompts.any? %>
|
|
12
|
-
<table class="ck-results-table">
|
|
12
|
+
<table class="ck-results-table ck-prompts-table">
|
|
13
13
|
<thead>
|
|
14
14
|
<tr>
|
|
15
15
|
<th>Name</th>
|
|
16
|
+
<th>Version</th>
|
|
16
17
|
<th>Model</th>
|
|
18
|
+
<th>Best score</th>
|
|
17
19
|
<th>Runs</th>
|
|
18
|
-
<th>Last run</th>
|
|
19
20
|
<th></th>
|
|
20
21
|
</tr>
|
|
21
22
|
</thead>
|
|
22
23
|
<tbody>
|
|
23
24
|
<% @prompts.each do |prompt| %>
|
|
24
25
|
<tr onclick="window.location='<%= prompt_path(prompt) %>'" style="cursor: pointer;">
|
|
25
|
-
<td><strong><%= prompt.name %></strong
|
|
26
|
+
<td><strong><%= prompt.name %></strong></td>
|
|
27
|
+
<% latest_version = prompt.family_versions.maximum(:version_number) %>
|
|
28
|
+
<td>
|
|
29
|
+
<span class="ck-chip ck-chip--soft"><%= prompt.version_label %></span>
|
|
30
|
+
<% if prompt.version_number < latest_version %>
|
|
31
|
+
<span class="ck-meta-copy" style="margin-left: 0.4rem;">of <%= latest_version %></span>
|
|
32
|
+
<% end %>
|
|
33
|
+
</td>
|
|
26
34
|
<td><span class="ck-chip"><%= prompt.llm_model %></span></td>
|
|
27
35
|
<% family_runs = CompletionKit::Run.where(prompt_id: prompt.family_versions.select(:id)) %>
|
|
28
|
-
|
|
36
|
+
<% current_version_runs = prompt.runs.includes(responses: :reviews) %>
|
|
37
|
+
<% best_score = current_version_runs.map(&:avg_score).compact.max %>
|
|
29
38
|
<td>
|
|
30
|
-
<%
|
|
31
|
-
|
|
32
|
-
<%= time_ago_in_words(last_run.created_at) %> ago
|
|
39
|
+
<% if best_score %>
|
|
40
|
+
<span class="<%= ck_badge_classes(ck_score_kind(best_score)) %>"><%= best_score %></span>
|
|
33
41
|
<% else %>
|
|
34
|
-
|
|
42
|
+
<span class="ck-prompts-table__dim">—</span>
|
|
35
43
|
<% end %>
|
|
36
44
|
</td>
|
|
45
|
+
<td>
|
|
46
|
+
<div class="ck-prompts-table__runs">
|
|
47
|
+
<span class="ck-prompts-table__runs-count"><%= family_runs.count %></span>
|
|
48
|
+
<% last_run = family_runs.order(created_at: :desc).first %>
|
|
49
|
+
<% if last_run %>
|
|
50
|
+
<span class="ck-prompts-table__runs-when">last <time data-relative-time datetime="<%= last_run.created_at.utc.iso8601 %>"><%= time_ago_in_words(last_run.created_at) %></time> ago</span>
|
|
51
|
+
<% else %>
|
|
52
|
+
<span class="ck-prompts-table__runs-when">never run</span>
|
|
53
|
+
<% end %>
|
|
54
|
+
</div>
|
|
55
|
+
</td>
|
|
37
56
|
<td class="ck-results-table__arrow">→</td>
|
|
38
57
|
</tr>
|
|
39
58
|
<% end %>
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
<tr>
|
|
57
57
|
<td><strong>v<%= v.version_number %></strong></td>
|
|
58
58
|
<td><span class="ck-chip ck-chip--soft"><%= v.llm_model %></span></td>
|
|
59
|
-
<td class="ck-meta-copy"><%=
|
|
59
|
+
<td class="ck-meta-copy"><time datetime="<%= v.created_at.iso8601 %>" data-local-time><%= v.created_at.utc.strftime("%b %-d, %Y at %-I:%M %p UTC") %></time></td>
|
|
60
60
|
<td>
|
|
61
61
|
<% if v.current? %>
|
|
62
62
|
<span class="ck-chip">Current</span>
|
|
@@ -75,50 +75,20 @@
|
|
|
75
75
|
<section class="ck-card--spaced">
|
|
76
76
|
<p class="ck-kicker">Runs</p>
|
|
77
77
|
|
|
78
|
-
<table class="ck-results-table" style="margin-top: 0.5rem;">
|
|
78
|
+
<table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
|
|
79
79
|
<thead>
|
|
80
80
|
<tr>
|
|
81
81
|
<th>Run</th>
|
|
82
|
-
<th>Version</th>
|
|
83
82
|
<th>Responses</th>
|
|
84
|
-
<th>Avg score</th>
|
|
85
83
|
<th>Metrics</th>
|
|
84
|
+
<th>Avg score</th>
|
|
86
85
|
<th>When</th>
|
|
87
86
|
<th></th>
|
|
88
87
|
</tr>
|
|
89
88
|
</thead>
|
|
90
89
|
<tbody>
|
|
91
90
|
<% @runs.each do |run| %>
|
|
92
|
-
|
|
93
|
-
<td><span class="ck-run-name"><span class="<%= ck_run_dot(run) %>"></span><strong><%= run.name %></strong></span></td>
|
|
94
|
-
<td><span class="ck-chip ck-chip--soft">v<%= run.prompt.version_number %></span></td>
|
|
95
|
-
<td><%= run.responses.size %></td>
|
|
96
|
-
<td>
|
|
97
|
-
<% avg = run.avg_score %>
|
|
98
|
-
<% if avg %>
|
|
99
|
-
<span class="<%= ck_badge_classes(ck_score_kind(avg)) %>"><%= avg %></span>
|
|
100
|
-
<% else %>
|
|
101
|
-
—
|
|
102
|
-
<% end %>
|
|
103
|
-
</td>
|
|
104
|
-
<td>
|
|
105
|
-
<% metrics = run.metric_averages %>
|
|
106
|
-
<% if metrics.any? %>
|
|
107
|
-
<div class="ck-metric-bar ck-metric-bar--compact">
|
|
108
|
-
<% metrics.each do |m| %>
|
|
109
|
-
<span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(m[:avg]) %>">
|
|
110
|
-
<span class="ck-metric-pip__bar"></span>
|
|
111
|
-
<span class="ck-metric-pip__label"><%= m[:name] %> <strong><%= m[:avg] %></strong></span>
|
|
112
|
-
</span>
|
|
113
|
-
<% end %>
|
|
114
|
-
</div>
|
|
115
|
-
<% else %>
|
|
116
|
-
—
|
|
117
|
-
<% end %>
|
|
118
|
-
</td>
|
|
119
|
-
<td class="ck-meta-copy"><%= time_ago_in_words(run.created_at) %> ago</td>
|
|
120
|
-
<td class="ck-results-table__arrow">→</td>
|
|
121
|
-
</tr>
|
|
91
|
+
<%= render "completion_kit/runs/row", run: run %>
|
|
122
92
|
<% end %>
|
|
123
93
|
</tbody>
|
|
124
94
|
</table>
|
|
@@ -141,11 +111,11 @@
|
|
|
141
111
|
</thead>
|
|
142
112
|
<tbody>
|
|
143
113
|
<% suggestions.each do |s| %>
|
|
144
|
-
<tr onclick="window.location='<%=
|
|
114
|
+
<tr onclick="window.location='<%= suggestion_path(s, from: "prompt") %>'" style="cursor: pointer;">
|
|
145
115
|
<td><strong><%= s.run.name %></strong></td>
|
|
146
116
|
<td class="ck-meta-copy"><%= truncate(s.reasoning.to_s, length: 100) %></td>
|
|
147
117
|
<td><%= s.applied_at? ? content_tag(:span, "Applied", class: "ck-chip", style: "background: var(--ck-success-soft); color: var(--ck-success);") : "—".html_safe %></td>
|
|
148
|
-
<td class="ck-meta-copy"><%= time_ago_in_words(s.created_at)
|
|
118
|
+
<td class="ck-meta-copy"><time data-relative-time datetime="<%= s.created_at.utc.iso8601 %>"><%= time_ago_in_words(s.created_at) %></time> ago</td>
|
|
149
119
|
<td class="ck-results-table__arrow">→</td>
|
|
150
120
|
</tr>
|
|
151
121
|
<% end %>
|
|
@@ -20,11 +20,13 @@
|
|
|
20
20
|
</div>
|
|
21
21
|
<% elsif provider_credential.discovery_status == "failed" %>
|
|
22
22
|
<div class="ck-discovery-bar ck-discovery-bar--failed">
|
|
23
|
-
<div class="ck-discovery-bar__label">
|
|
23
|
+
<div class="ck-discovery-bar__label">
|
|
24
|
+
Model discovery failed<% if provider_credential.discovery_error.present? %>: <%= provider_credential.discovery_error %><% end %>
|
|
25
|
+
</div>
|
|
24
26
|
</div>
|
|
25
27
|
<% elsif provider_credential.discovery_status == "completed" && local_assigns.fetch(:show_completed, true) %>
|
|
26
28
|
<div class="ck-discovery-bar ck-discovery-bar--completed">
|
|
27
|
-
<div class="ck-discovery-bar__label">Available models list updated <%= time_ago_in_words(provider_credential.updated_at)
|
|
29
|
+
<div class="ck-discovery-bar__label">Available models list updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</div>
|
|
28
30
|
</div>
|
|
29
31
|
<% end %>
|
|
30
32
|
</div>
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
<span class="ck-model-list__summary-label">Available models <span class="ck-model-list__summary-count"><%= models.count %></span></span>
|
|
16
16
|
<span class="ck-model-list__summary-meta">
|
|
17
17
|
<% if provider_credential.discovery_status == "completed" %>
|
|
18
|
-
<span class="ck-model-list__summary-stamp">updated <%= time_ago_in_words(provider_credential.updated_at)
|
|
18
|
+
<span class="ck-model-list__summary-stamp">updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</span>
|
|
19
19
|
<% end %>
|
|
20
20
|
<button type="button" class="ck-icon-btn ck-model-list__refresh<%= ' ck-icon-btn--spinning' if discovering %>" title="Refresh models" aria-label="Refresh available models" <%= 'disabled' if discovering %> onclick="event.preventDefault();event.stopPropagation();fetch('<%= refresh_provider_credential_path(provider_credential) %>', {method:'POST',headers:{'X-CSRF-Token':document.querySelector('meta[name=csrf-token]').content}})">
|
|
21
21
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="13" height="13" aria-hidden="true"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg>
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
<span><%= provider_credential.api_endpoint.presence || default_endpoints[provider_credential.provider] %></span>
|
|
28
28
|
<span><%= provider_credential.prompt_count %> prompts</span>
|
|
29
29
|
<span><%= provider_credential.judge_count %> judges</span>
|
|
30
|
-
<span
|
|
30
|
+
<span><% if provider_credential.last_used_at %>Used <time data-relative-time datetime="<%= provider_credential.last_used_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.last_used_at) %></time> ago<% else %>Never used<% end %></span>
|
|
31
31
|
</div>
|
|
32
32
|
|
|
33
33
|
<%= render "discovery_status", provider_credential: provider_credential %>
|
|
@@ -9,6 +9,9 @@
|
|
|
9
9
|
<%= button_to "Start", generate_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
|
|
10
10
|
<% elsif run.status == "failed" %>
|
|
11
11
|
<%= button_to "Retry", generate_run_path(run), method: :post, class: ck_button_classes(:light, variant: :outline), form_class: "inline-block" %>
|
|
12
|
+
<%= button_to "Re-run as new", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
|
|
13
|
+
<% elsif run.status == "completed" %>
|
|
14
|
+
<%= button_to "Re-run", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
|
|
12
15
|
<% end %>
|
|
13
16
|
<% end %>
|
|
14
17
|
</div>
|
|
@@ -18,7 +18,29 @@
|
|
|
18
18
|
|
|
19
19
|
<div class="ck-field">
|
|
20
20
|
<%= form.label :prompt_id, "Prompt", class: "ck-label" %>
|
|
21
|
-
<%= form.select :prompt_id,
|
|
21
|
+
<%= form.select :prompt_id,
|
|
22
|
+
@prompts.map { |p|
|
|
23
|
+
vars = p.variables
|
|
24
|
+
label_parts = [p.display_name, p.llm_model]
|
|
25
|
+
label_parts << (vars.any? ? "#{vars.size} #{'var'.pluralize(vars.size)}" : "no vars")
|
|
26
|
+
[
|
|
27
|
+
label_parts.join(" · "),
|
|
28
|
+
p.id,
|
|
29
|
+
{
|
|
30
|
+
"data-has-variables" => vars.any? ? "1" : "0",
|
|
31
|
+
"data-model" => p.llm_model.to_s,
|
|
32
|
+
"data-variables" => vars.join(", "),
|
|
33
|
+
"data-description" => p.description.to_s,
|
|
34
|
+
"data-template-preview" => p.template.to_s.truncate(220, separator: " ")
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{ include_blank: "Select a prompt" },
|
|
39
|
+
{ class: "ck-input", id: "run_prompt_id" } %>
|
|
40
|
+
<div class="ck-prompt-summary" id="prompt-summary" hidden>
|
|
41
|
+
<p class="ck-prompt-summary__description" id="prompt-summary-description" hidden></p>
|
|
42
|
+
<p class="ck-prompt-summary__template" id="prompt-summary-template"></p>
|
|
43
|
+
</div>
|
|
22
44
|
</div>
|
|
23
45
|
|
|
24
46
|
<div class="ck-field" id="dataset-field">
|
|
@@ -26,18 +48,17 @@
|
|
|
26
48
|
<% if @datasets.empty? %>
|
|
27
49
|
<p class="ck-meta-copy">No datasets yet. <%= link_to "Create a dataset", new_dataset_path, class: "ck-link" %> first.</p>
|
|
28
50
|
<% else %>
|
|
29
|
-
<%= form.select :dataset_id,
|
|
51
|
+
<%= form.select :dataset_id,
|
|
52
|
+
@datasets.map { |d| [d.name, d.id, { "data-headers" => d.headers.join(",") }] },
|
|
53
|
+
{ include_blank: "Select a dataset" },
|
|
54
|
+
{ class: "ck-input", id: "run_dataset_id" } %>
|
|
30
55
|
<% end %>
|
|
31
|
-
<p class="ck-field-hint" id="dataset-hint"
|
|
56
|
+
<p class="ck-field-hint" id="dataset-hint"></p>
|
|
32
57
|
</div>
|
|
33
58
|
|
|
34
59
|
<div class="ck-field">
|
|
35
60
|
<label class="ck-label" for="run_temperature" style="position: relative;">
|
|
36
|
-
Temperature
|
|
37
|
-
<span class="ck-info-toggle">?</span>
|
|
38
|
-
<div class="ck-info-popup">
|
|
39
|
-
Controls how random the model's output is. Lower values make the model more focused and deterministic — it'll pick the most likely words. Higher values introduce more variety and creativity, but also more risk of odd phrasing. Most LLMs default to 1.0. For evaluation, try different values to see how your prompt performs under varying conditions.
|
|
40
|
-
</div>
|
|
61
|
+
Temperature<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Controls how random the model's output is. Lower values are more focused and deterministic — the model picks the most likely words. Higher values are more varied and creative, with more risk of odd phrasing. Most LLMs default to 1.0; for evaluation, try a few values and see how your prompt holds up. Newer reasoning models (Claude Opus 4.7, GPT-5 family, etc.) ignore temperature entirely — CompletionKit detects this and re-sends without the parameter.</span>
|
|
41
62
|
</label>
|
|
42
63
|
<div class="ck-slider-row">
|
|
43
64
|
<%= form.range_field :temperature, min: 0, max: 1, step: 0.1, class: "ck-slider", id: "run_temperature", oninput: "document.getElementById('temp-value').textContent = this.value" %>
|
|
@@ -46,7 +67,9 @@
|
|
|
46
67
|
</div>
|
|
47
68
|
|
|
48
69
|
<div class="ck-field" id="judge-field">
|
|
49
|
-
|
|
70
|
+
<label class="ck-label" for="run_judge_model" style="position: relative;">
|
|
71
|
+
Judge model<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
|
|
72
|
+
</label>
|
|
50
73
|
<% available = CompletionKit::ApiConfig.available_models(scope: :judging) %>
|
|
51
74
|
<% if available.any? %>
|
|
52
75
|
<div class="ck-select-with-action">
|
|
@@ -74,18 +97,35 @@
|
|
|
74
97
|
<p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet. <%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
|
|
75
98
|
<% else %>
|
|
76
99
|
<% if @metric_groups.any? %>
|
|
77
|
-
<
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
100
|
+
<div class="ck-metric-groups">
|
|
101
|
+
<span class="ck-metric-groups__label">Groups</span>
|
|
102
|
+
<div class="ck-metric-groups__row">
|
|
103
|
+
<% @metric_groups.each do |g| %>
|
|
104
|
+
<button type="button"
|
|
105
|
+
class="ck-metric-group-pill"
|
|
106
|
+
data-metric-group
|
|
107
|
+
data-metric-ids="<%= g.metric_ids.join(",") %>"
|
|
108
|
+
onclick="ckToggleMetricGroup(this)">
|
|
109
|
+
<span class="ck-metric-group-pill__check" aria-hidden="true">✓</span>
|
|
110
|
+
<span class="ck-metric-group-pill__label"><%= g.name %></span>
|
|
111
|
+
<span class="ck-metric-group-pill__count"><%= g.metric_ids.size %></span>
|
|
112
|
+
</button>
|
|
113
|
+
<% end %>
|
|
114
|
+
</div>
|
|
115
|
+
</div>
|
|
116
|
+
<div class="ck-metric-divider"><span>or pick individually</span></div>
|
|
83
117
|
<% end %>
|
|
84
118
|
<div class="ck-metric-checkboxes">
|
|
85
119
|
<% @all_metrics.each do |metric| %>
|
|
86
120
|
<label class="ck-checkbox-label">
|
|
87
121
|
<%= check_box_tag "run[metric_ids][]", metric.id, run.metric_ids.include?(metric.id), class: "ck-checkbox", id: "run_metric_#{metric.id}" %>
|
|
88
|
-
<span
|
|
122
|
+
<span class="ck-checkbox-label__box" aria-hidden="true"></span>
|
|
123
|
+
<span class="ck-checkbox-label__body">
|
|
124
|
+
<span class="ck-checkbox-label__text"><%= metric.name %></span>
|
|
125
|
+
<% if metric.instruction.present? %>
|
|
126
|
+
<span class="ck-checkbox-label__hint"><%= truncate(metric.instruction.to_s, length: 90) %></span>
|
|
127
|
+
<% end %>
|
|
128
|
+
</span>
|
|
89
129
|
</label>
|
|
90
130
|
<% end %>
|
|
91
131
|
</div>
|
|
@@ -123,25 +163,79 @@ function updateRunForm() {
|
|
|
123
163
|
|
|
124
164
|
var datasetEl = document.getElementById('run_dataset_id');
|
|
125
165
|
var datasetHint = document.getElementById('dataset-hint');
|
|
166
|
+
var datasetField = document.getElementById('dataset-field');
|
|
126
167
|
var dataset = datasetEl ? datasetEl.value : '';
|
|
127
168
|
var selectedOption = promptEl ? promptEl.options[promptEl.selectedIndex] : null;
|
|
128
169
|
var hasVars = selectedOption && selectedOption.dataset.hasVariables === '1';
|
|
129
|
-
|
|
170
|
+
var promptVars = (selectedOption && selectedOption.dataset.variables ? selectedOption.dataset.variables.split(/,\s*/) : []).filter(Boolean);
|
|
171
|
+
|
|
172
|
+
var missingVars = [];
|
|
173
|
+
if (hasVars && dataset && datasetEl) {
|
|
174
|
+
var datasetOption = datasetEl.options[datasetEl.selectedIndex];
|
|
175
|
+
var headers = (datasetOption && datasetOption.dataset.headers ? datasetOption.dataset.headers.split(/,\s*/) : []).filter(Boolean);
|
|
176
|
+
missingVars = promptVars.filter(function(v) { return headers.indexOf(v) === -1; });
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (datasetField) datasetField.className = 'ck-field';
|
|
180
|
+
if (datasetHint) datasetHint.textContent = '';
|
|
181
|
+
if (missingVars.length > 0) {
|
|
182
|
+
if (datasetField) datasetField.className = 'ck-field ck-field--error';
|
|
183
|
+
if (datasetHint) datasetHint.textContent = 'Dataset is missing ' + (missingVars.length === 1 ? 'column' : 'columns') + ' the prompt needs: ' + missingVars.join(', ');
|
|
184
|
+
} else if (hasVars && !dataset) {
|
|
185
|
+
if (datasetField) datasetField.className = 'ck-field ck-field--info';
|
|
186
|
+
if (datasetHint) datasetHint.textContent = 'This prompt uses variables. Select a dataset to provide values.';
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
var summary = document.getElementById('prompt-summary');
|
|
190
|
+
if (summary) {
|
|
191
|
+
if (selectedOption && selectedOption.value) {
|
|
192
|
+
var desc = selectedOption.dataset.description || '';
|
|
193
|
+
var tmpl = selectedOption.dataset.templatePreview || '';
|
|
194
|
+
var descEl = document.getElementById('prompt-summary-description');
|
|
195
|
+
descEl.textContent = desc;
|
|
196
|
+
descEl.hidden = !desc;
|
|
197
|
+
document.getElementById('prompt-summary-template').textContent = tmpl;
|
|
198
|
+
summary.hidden = false;
|
|
199
|
+
} else {
|
|
200
|
+
summary.hidden = true;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
130
203
|
|
|
131
204
|
var valid = prompt !== '';
|
|
132
205
|
if (judge && metrics.length === 0) valid = false;
|
|
133
206
|
if (!judge && metrics.length > 0) valid = false;
|
|
207
|
+
if (hasVars && !dataset) valid = false;
|
|
208
|
+
if (missingVars.length > 0) valid = false;
|
|
134
209
|
if (submitBtn) submitBtn.disabled = !valid;
|
|
210
|
+
|
|
211
|
+
ckUpdateMetricGroupsState();
|
|
135
212
|
}
|
|
136
213
|
|
|
137
|
-
function
|
|
138
|
-
|
|
214
|
+
function ckToggleMetricGroup(button) {
|
|
215
|
+
var ids = (button.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
|
|
216
|
+
var allChecked = ids.every(function(id) {
|
|
217
|
+
var cb = document.getElementById('run_metric_' + id);
|
|
218
|
+
return cb && cb.checked;
|
|
219
|
+
});
|
|
220
|
+
ids.forEach(function(id) {
|
|
139
221
|
var cb = document.getElementById('run_metric_' + id);
|
|
140
|
-
if (cb) cb.checked =
|
|
222
|
+
if (cb) cb.checked = !allChecked;
|
|
141
223
|
});
|
|
142
224
|
updateRunForm();
|
|
143
225
|
}
|
|
144
226
|
|
|
227
|
+
function ckUpdateMetricGroupsState() {
|
|
228
|
+
document.querySelectorAll('[data-metric-group]').forEach(function(btn) {
|
|
229
|
+
var ids = (btn.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
|
|
230
|
+
if (ids.length === 0) return;
|
|
231
|
+
var allChecked = ids.every(function(id) {
|
|
232
|
+
var cb = document.getElementById('run_metric_' + id);
|
|
233
|
+
return cb && cb.checked;
|
|
234
|
+
});
|
|
235
|
+
btn.classList.toggle('ck-metric-group-pill--active', allChecked);
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
145
239
|
var judgeEl = document.getElementById('run_judge_model');
|
|
146
240
|
var promptEl = document.getElementById('run_prompt_id');
|
|
147
241
|
var datasetEl = document.getElementById('run_dataset_id');
|
|
@@ -1,38 +1,61 @@
|
|
|
1
|
-
<%
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
<%
|
|
7
|
-
|
|
1
|
+
<% clickable = response.succeeded? %>
|
|
2
|
+
<tr id="response_<%= response.id %>"<% if clickable %> onclick="window.location='<%= run_response_path(run, response, sort: params[:sort]) %>'" style="cursor: pointer;"<% end %>>
|
|
3
|
+
<td class="ck-response-cell__index"><%= index %></td>
|
|
4
|
+
<td class="ck-response-cell__text">
|
|
5
|
+
<% if response.status == "failed" %>
|
|
6
|
+
<% err = response.error_payload %>
|
|
7
|
+
<span class="ck-response-cell__error"><%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 160) %></span>
|
|
8
|
+
<% else %>
|
|
9
|
+
<%= truncate(response.response_text.to_s, length: 160) %>
|
|
10
|
+
<% end %>
|
|
11
|
+
</td>
|
|
12
|
+
<td>
|
|
13
|
+
<% scored_reviews = response.reviews.select { |r| r.ai_score.present? }.sort_by { |r| r.metric_name.to_s.downcase } %>
|
|
14
|
+
<% if scored_reviews.any? %>
|
|
15
|
+
<span class="ck-metric-bar ck-metric-bar--compact">
|
|
16
|
+
<% scored_reviews.each do |r| %>
|
|
17
|
+
<span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(r.ai_score.to_f) %>">
|
|
18
|
+
<span class="ck-metric-pip__bar"></span>
|
|
19
|
+
<span class="ck-metric-pip__label"><%= r.metric_name %> <strong><%= r.ai_score %></strong></span>
|
|
20
|
+
</span>
|
|
21
|
+
<% end %>
|
|
22
|
+
</span>
|
|
23
|
+
<% else %>
|
|
24
|
+
<span class="ck-response-cell__dim">—</span>
|
|
25
|
+
<% end %>
|
|
26
|
+
</td>
|
|
27
|
+
<td>
|
|
28
|
+
<% if response.reviewed? %>
|
|
29
|
+
<span class="<%= ck_badge_classes(ck_score_kind(response.score.to_f)) %>"><%= response.score %></span>
|
|
30
|
+
<% else %>
|
|
31
|
+
<span class="ck-response-cell__dim">—</span>
|
|
32
|
+
<% end %>
|
|
33
|
+
</td>
|
|
34
|
+
<td>
|
|
35
|
+
<% case response.status
|
|
36
|
+
when "pending" %>
|
|
37
|
+
<span class="ck-chip">Queued</span>
|
|
38
|
+
<% when "retrying" %>
|
|
39
|
+
<% if response.attempts.to_i <= 1 %>
|
|
40
|
+
<span class="ck-chip">Generating</span>
|
|
41
|
+
<% else %>
|
|
42
|
+
<span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
|
|
43
|
+
<% end %>
|
|
44
|
+
<% when "succeeded" %>
|
|
45
|
+
<% if response.fully_reviewed? %>
|
|
46
|
+
<span class="ck-chip ck-chip--done">Done</span>
|
|
8
47
|
<% elsif run.status == "running" %>
|
|
9
48
|
<span class="ck-chip">Judging</span>
|
|
49
|
+
<% else %>
|
|
50
|
+
<span class="ck-chip">Awaiting judge</span>
|
|
10
51
|
<% end %>
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
</span>
|
|
22
|
-
<% end %>
|
|
23
|
-
</span>
|
|
24
|
-
<span class="ck-response-row__score">
|
|
25
|
-
<% case response.status
|
|
26
|
-
when "pending" %>
|
|
27
|
-
<span class="ck-chip">Queued</span>
|
|
28
|
-
<% when "retrying" %>
|
|
29
|
-
<span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
|
|
30
|
-
<% when "failed" %>
|
|
31
|
-
<%= button_to "Retry", retry_failures_run_path(run, only: response.id),
|
|
32
|
-
method: :post,
|
|
33
|
-
class: "ck-chip ck-chip--retry",
|
|
34
|
-
form_class: "inline-block" %>
|
|
35
|
-
<% end %>
|
|
36
|
-
</span>
|
|
37
|
-
</div>
|
|
38
|
-
<% end %>
|
|
52
|
+
<% when "failed" %>
|
|
53
|
+
<%= button_to "Retry", retry_failures_run_path(run, only: response.id),
|
|
54
|
+
method: :post,
|
|
55
|
+
class: "ck-chip ck-chip--retry",
|
|
56
|
+
form_class: "inline-block",
|
|
57
|
+
onclick: "event.stopPropagation();" %>
|
|
58
|
+
<% end %>
|
|
59
|
+
</td>
|
|
60
|
+
<td class="ck-results-table__arrow"><% if clickable %>→<% end %></td>
|
|
61
|
+
</tr>
|