completion-kit 0.4.1 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +1882 -785
- data/app/controllers/completion_kit/runs_controller.rb +34 -19
- data/app/controllers/completion_kit/suggestions_controller.rb +24 -0
- data/app/jobs/completion_kit/generate_row_job.rb +7 -0
- data/app/jobs/completion_kit/judge_review_job.rb +2 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +9 -4
- data/app/models/completion_kit/dataset.rb +9 -0
- data/app/models/completion_kit/provider_credential.rb +12 -1
- data/app/models/completion_kit/response.rb +7 -0
- data/app/models/completion_kit/run.rb +47 -9
- data/app/services/completion_kit/anthropic_client.rb +33 -14
- data/app/services/completion_kit/model_discovery_service.rb +133 -30
- data/app/services/completion_kit/ollama_client.rb +31 -10
- data/app/services/completion_kit/open_ai_client.rb +35 -13
- data/app/services/completion_kit/open_router_client.rb +34 -13
- data/app/services/completion_kit/worker_health.rb +4 -1
- data/app/views/completion_kit/datasets/index.html.erb +1 -1
- data/app/views/completion_kit/datasets/show.html.erb +47 -9
- data/app/views/completion_kit/metrics/_form.html.erb +1 -1
- data/app/views/completion_kit/metrics/index.html.erb +15 -2
- data/app/views/completion_kit/metrics/show.html.erb +1 -1
- data/app/views/completion_kit/prompts/index.html.erb +27 -8
- data/app/views/completion_kit/prompts/show.html.erb +6 -36
- data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +6 -4
- data/app/views/completion_kit/provider_credentials/_form.html.erb +1 -32
- data/app/views/completion_kit/provider_credentials/_models_card.html.erb +70 -0
- data/app/views/completion_kit/provider_credentials/index.html.erb +1 -1
- data/app/views/completion_kit/responses/show.html.erb +27 -6
- data/app/views/completion_kit/runs/_actions.html.erb +3 -0
- data/app/views/completion_kit/runs/_form.html.erb +114 -20
- data/app/views/completion_kit/runs/_response_row.html.erb +52 -22
- data/app/views/completion_kit/runs/_row.html.erb +50 -0
- data/app/views/completion_kit/runs/_sort_toolbar.html.erb +5 -4
- data/app/views/completion_kit/runs/_status_header.html.erb +7 -31
- data/app/views/completion_kit/runs/_status_panel.html.erb +80 -0
- data/app/views/completion_kit/runs/index.html.erb +4 -16
- data/app/views/completion_kit/runs/show.html.erb +111 -17
- data/app/views/completion_kit/suggestions/show.html.erb +65 -0
- data/app/views/layouts/completion_kit/application.html.erb +71 -0
- data/config/routes.rb +8 -2
- data/db/migrate/20260507000001_add_discovery_error_to_provider_credentials.rb +5 -0
- data/db/migrate/20260507150000_add_temperature_ignored_to_runs.rb +5 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +9 -4
- data/app/views/completion_kit/runs/_progress.html.erb +0 -18
- data/app/views/completion_kit/runs/suggestion.html.erb +0 -47
|
@@ -9,31 +9,50 @@
|
|
|
9
9
|
</section>
|
|
10
10
|
|
|
11
11
|
<% if @prompts.any? %>
|
|
12
|
-
<table class="ck-results-table">
|
|
12
|
+
<table class="ck-results-table ck-prompts-table">
|
|
13
13
|
<thead>
|
|
14
14
|
<tr>
|
|
15
15
|
<th>Name</th>
|
|
16
|
+
<th>Version</th>
|
|
16
17
|
<th>Model</th>
|
|
18
|
+
<th>Best score</th>
|
|
17
19
|
<th>Runs</th>
|
|
18
|
-
<th>Last run</th>
|
|
19
20
|
<th></th>
|
|
20
21
|
</tr>
|
|
21
22
|
</thead>
|
|
22
23
|
<tbody>
|
|
23
24
|
<% @prompts.each do |prompt| %>
|
|
24
25
|
<tr onclick="window.location='<%= prompt_path(prompt) %>'" style="cursor: pointer;">
|
|
25
|
-
<td><strong><%= prompt.name %></strong
|
|
26
|
+
<td><strong><%= prompt.name %></strong></td>
|
|
27
|
+
<% latest_version = prompt.family_versions.maximum(:version_number) %>
|
|
28
|
+
<td>
|
|
29
|
+
<span class="ck-chip ck-chip--soft"><%= prompt.version_label %></span>
|
|
30
|
+
<% if prompt.version_number < latest_version %>
|
|
31
|
+
<span class="ck-meta-copy" style="margin-left: 0.4rem;">of <%= latest_version %></span>
|
|
32
|
+
<% end %>
|
|
33
|
+
</td>
|
|
26
34
|
<td><span class="ck-chip"><%= prompt.llm_model %></span></td>
|
|
27
35
|
<% family_runs = CompletionKit::Run.where(prompt_id: prompt.family_versions.select(:id)) %>
|
|
28
|
-
|
|
36
|
+
<% current_version_runs = prompt.runs.includes(responses: :reviews) %>
|
|
37
|
+
<% best_score = current_version_runs.map(&:avg_score).compact.max %>
|
|
29
38
|
<td>
|
|
30
|
-
<%
|
|
31
|
-
|
|
32
|
-
<%= time_ago_in_words(last_run.created_at) %> ago
|
|
39
|
+
<% if best_score %>
|
|
40
|
+
<span class="<%= ck_badge_classes(ck_score_kind(best_score)) %>"><%= best_score %></span>
|
|
33
41
|
<% else %>
|
|
34
|
-
|
|
42
|
+
<span class="ck-prompts-table__dim">—</span>
|
|
35
43
|
<% end %>
|
|
36
44
|
</td>
|
|
45
|
+
<td>
|
|
46
|
+
<div class="ck-prompts-table__runs">
|
|
47
|
+
<span class="ck-prompts-table__runs-count"><%= family_runs.count %></span>
|
|
48
|
+
<% last_run = family_runs.order(created_at: :desc).first %>
|
|
49
|
+
<% if last_run %>
|
|
50
|
+
<span class="ck-prompts-table__runs-when">last <time data-relative-time datetime="<%= last_run.created_at.utc.iso8601 %>"><%= time_ago_in_words(last_run.created_at) %></time> ago</span>
|
|
51
|
+
<% else %>
|
|
52
|
+
<span class="ck-prompts-table__runs-when">never run</span>
|
|
53
|
+
<% end %>
|
|
54
|
+
</div>
|
|
55
|
+
</td>
|
|
37
56
|
<td class="ck-results-table__arrow">→</td>
|
|
38
57
|
</tr>
|
|
39
58
|
<% end %>
|
|
@@ -56,7 +56,7 @@
|
|
|
56
56
|
<tr>
|
|
57
57
|
<td><strong>v<%= v.version_number %></strong></td>
|
|
58
58
|
<td><span class="ck-chip ck-chip--soft"><%= v.llm_model %></span></td>
|
|
59
|
-
<td class="ck-meta-copy"><%=
|
|
59
|
+
<td class="ck-meta-copy"><time datetime="<%= v.created_at.iso8601 %>" data-local-time><%= v.created_at.utc.strftime("%b %-d, %Y at %-I:%M %p UTC") %></time></td>
|
|
60
60
|
<td>
|
|
61
61
|
<% if v.current? %>
|
|
62
62
|
<span class="ck-chip">Current</span>
|
|
@@ -75,50 +75,20 @@
|
|
|
75
75
|
<section class="ck-card--spaced">
|
|
76
76
|
<p class="ck-kicker">Runs</p>
|
|
77
77
|
|
|
78
|
-
<table class="ck-results-table" style="margin-top: 0.5rem;">
|
|
78
|
+
<table class="ck-results-table ck-runs-table" style="margin-top: 0.5rem;">
|
|
79
79
|
<thead>
|
|
80
80
|
<tr>
|
|
81
81
|
<th>Run</th>
|
|
82
|
-
<th>Version</th>
|
|
83
82
|
<th>Responses</th>
|
|
84
|
-
<th>Avg score</th>
|
|
85
83
|
<th>Metrics</th>
|
|
84
|
+
<th>Avg score</th>
|
|
86
85
|
<th>When</th>
|
|
87
86
|
<th></th>
|
|
88
87
|
</tr>
|
|
89
88
|
</thead>
|
|
90
89
|
<tbody>
|
|
91
90
|
<% @runs.each do |run| %>
|
|
92
|
-
|
|
93
|
-
<td><span class="ck-run-name"><span class="<%= ck_run_dot(run) %>"></span><strong><%= run.name %></strong></span></td>
|
|
94
|
-
<td><span class="ck-chip ck-chip--soft">v<%= run.prompt.version_number %></span></td>
|
|
95
|
-
<td><%= run.responses.size %></td>
|
|
96
|
-
<td>
|
|
97
|
-
<% avg = run.avg_score %>
|
|
98
|
-
<% if avg %>
|
|
99
|
-
<span class="<%= ck_badge_classes(ck_score_kind(avg)) %>"><%= avg %></span>
|
|
100
|
-
<% else %>
|
|
101
|
-
—
|
|
102
|
-
<% end %>
|
|
103
|
-
</td>
|
|
104
|
-
<td>
|
|
105
|
-
<% metrics = run.metric_averages %>
|
|
106
|
-
<% if metrics.any? %>
|
|
107
|
-
<div class="ck-metric-bar ck-metric-bar--compact">
|
|
108
|
-
<% metrics.each do |m| %>
|
|
109
|
-
<span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(m[:avg]) %>">
|
|
110
|
-
<span class="ck-metric-pip__bar"></span>
|
|
111
|
-
<span class="ck-metric-pip__label"><%= m[:name] %> <strong><%= m[:avg] %></strong></span>
|
|
112
|
-
</span>
|
|
113
|
-
<% end %>
|
|
114
|
-
</div>
|
|
115
|
-
<% else %>
|
|
116
|
-
—
|
|
117
|
-
<% end %>
|
|
118
|
-
</td>
|
|
119
|
-
<td class="ck-meta-copy"><%= time_ago_in_words(run.created_at) %> ago</td>
|
|
120
|
-
<td class="ck-results-table__arrow">→</td>
|
|
121
|
-
</tr>
|
|
91
|
+
<%= render "completion_kit/runs/row", run: run %>
|
|
122
92
|
<% end %>
|
|
123
93
|
</tbody>
|
|
124
94
|
</table>
|
|
@@ -141,11 +111,11 @@
|
|
|
141
111
|
</thead>
|
|
142
112
|
<tbody>
|
|
143
113
|
<% suggestions.each do |s| %>
|
|
144
|
-
<tr onclick="window.location='<%=
|
|
114
|
+
<tr onclick="window.location='<%= suggestion_path(s, from: "prompt") %>'" style="cursor: pointer;">
|
|
145
115
|
<td><strong><%= s.run.name %></strong></td>
|
|
146
116
|
<td class="ck-meta-copy"><%= truncate(s.reasoning.to_s, length: 100) %></td>
|
|
147
117
|
<td><%= s.applied_at? ? content_tag(:span, "Applied", class: "ck-chip", style: "background: var(--ck-success-soft); color: var(--ck-success);") : "—".html_safe %></td>
|
|
148
|
-
<td class="ck-meta-copy"><%= time_ago_in_words(s.created_at)
|
|
118
|
+
<td class="ck-meta-copy"><time data-relative-time datetime="<%= s.created_at.utc.iso8601 %>"><%= time_ago_in_words(s.created_at) %></time> ago</td>
|
|
149
119
|
<td class="ck-results-table__arrow">→</td>
|
|
150
120
|
</tr>
|
|
151
121
|
<% end %>
|
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
<div class="ck-discovery-bar">
|
|
4
4
|
<div class="ck-discovery-bar__label">
|
|
5
5
|
<% if provider_credential.discovery_total > 0 %>
|
|
6
|
-
|
|
6
|
+
Checking models… <%= provider_credential.discovery_current %>/<%= provider_credential.discovery_total %>
|
|
7
7
|
<% else %>
|
|
8
|
-
|
|
8
|
+
Looking up models…
|
|
9
9
|
<% end %>
|
|
10
10
|
</div>
|
|
11
11
|
<% if provider_credential.discovery_total > 0 %>
|
|
@@ -20,11 +20,13 @@
|
|
|
20
20
|
</div>
|
|
21
21
|
<% elsif provider_credential.discovery_status == "failed" %>
|
|
22
22
|
<div class="ck-discovery-bar ck-discovery-bar--failed">
|
|
23
|
-
<div class="ck-discovery-bar__label">
|
|
23
|
+
<div class="ck-discovery-bar__label">
|
|
24
|
+
Model discovery failed<% if provider_credential.discovery_error.present? %>: <%= provider_credential.discovery_error %><% end %>
|
|
25
|
+
</div>
|
|
24
26
|
</div>
|
|
25
27
|
<% elsif provider_credential.discovery_status == "completed" && local_assigns.fetch(:show_completed, true) %>
|
|
26
28
|
<div class="ck-discovery-bar ck-discovery-bar--completed">
|
|
27
|
-
<div class="ck-discovery-bar__label">Available models list updated <%= time_ago_in_words(provider_credential.updated_at)
|
|
29
|
+
<div class="ck-discovery-bar__label">Available models list updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</div>
|
|
28
30
|
</div>
|
|
29
31
|
<% end %>
|
|
30
32
|
</div>
|
|
@@ -36,36 +36,5 @@
|
|
|
36
36
|
|
|
37
37
|
<% if provider_credential.persisted? %>
|
|
38
38
|
<%= turbo_stream_from "completion_kit_provider_#{provider_credential.id}" %>
|
|
39
|
-
|
|
40
|
-
<% models = CompletionKit::Model.where(provider: provider_credential.provider).active.order(:model_id) %>
|
|
41
|
-
<% if models.any? || provider_credential.discovery_status.present? %>
|
|
42
|
-
<div class="ck-card ck-form-card__models">
|
|
43
|
-
<div class="ck-form-card__footer-header">
|
|
44
|
-
<%= render "discovery_status", provider_credential: provider_credential %>
|
|
45
|
-
<button type="button" class="ck-icon-btn" title="Refresh models" aria-label="Refresh available models" onclick="fetch('<%= refresh_provider_credential_path(provider_credential) %>', {method:'POST',headers:{'X-CSRF-Token':document.querySelector('meta[name=csrf-token]').content}})">
|
|
46
|
-
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="14" height="14" aria-hidden="true"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg>
|
|
47
|
-
</button>
|
|
48
|
-
</div>
|
|
49
|
-
|
|
50
|
-
<% if models.any? %>
|
|
51
|
-
<details class="ck-model-list-details">
|
|
52
|
-
<summary class="ck-label" style="cursor: pointer; user-select: none;">Available models (<%= models.count %>)</summary>
|
|
53
|
-
<div class="ck-model-list">
|
|
54
|
-
<% models.each do |m| %>
|
|
55
|
-
<span class="ck-model-list__item">
|
|
56
|
-
<%= m.display_name || m.model_id %>
|
|
57
|
-
<% if m.supports_generation && m.supports_judging %>
|
|
58
|
-
<span class="ck-model-list__badge">gen + judge</span>
|
|
59
|
-
<% elsif m.supports_generation %>
|
|
60
|
-
<span class="ck-model-list__badge">gen</span>
|
|
61
|
-
<% elsif m.supports_judging %>
|
|
62
|
-
<span class="ck-model-list__badge">judge</span>
|
|
63
|
-
<% end %>
|
|
64
|
-
</span>
|
|
65
|
-
<% end %>
|
|
66
|
-
</div>
|
|
67
|
-
</details>
|
|
68
|
-
<% end %>
|
|
69
|
-
</div>
|
|
70
|
-
<% end %>
|
|
39
|
+
<%= render "models_card", provider_credential: provider_credential %>
|
|
71
40
|
<% end %>
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
<div id="provider_models_<%= provider_credential.id %>">
|
|
2
|
+
<% models = CompletionKit::Model.where(provider: provider_credential.provider).active.order(:model_id) %>
|
|
3
|
+
<% if models.any? || provider_credential.discovery_status.present? %>
|
|
4
|
+
<div class="ck-card ck-form-card__models">
|
|
5
|
+
<% if models.none? && provider_credential.discovery_status.in?(%w[discovering failed]) %>
|
|
6
|
+
<%= render "completion_kit/provider_credentials/discovery_status", provider_credential: provider_credential, show_completed: false %>
|
|
7
|
+
<% end %>
|
|
8
|
+
|
|
9
|
+
<% if models.any? %>
|
|
10
|
+
<% discovering = provider_credential.discovery_status == "discovering" %>
|
|
11
|
+
<% recently_completed = provider_credential.discovery_status == "completed" && provider_credential.updated_at > 1.minute.ago %>
|
|
12
|
+
<% expanded = discovering || recently_completed %>
|
|
13
|
+
<details class="ck-model-list-details"<%= " open" if expanded %>>
|
|
14
|
+
<summary class="ck-model-list__summary">
|
|
15
|
+
<span class="ck-model-list__summary-label">Available models <span class="ck-model-list__summary-count"><%= models.count %></span></span>
|
|
16
|
+
<span class="ck-model-list__summary-meta">
|
|
17
|
+
<% if provider_credential.discovery_status == "completed" %>
|
|
18
|
+
<span class="ck-model-list__summary-stamp">updated <time data-relative-time datetime="<%= provider_credential.updated_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.updated_at) %></time> ago</span>
|
|
19
|
+
<% end %>
|
|
20
|
+
<button type="button" class="ck-icon-btn ck-model-list__refresh<%= ' ck-icon-btn--spinning' if discovering %>" title="Refresh models" aria-label="Refresh available models" <%= 'disabled' if discovering %> onclick="event.preventDefault();event.stopPropagation();fetch('<%= refresh_provider_credential_path(provider_credential) %>', {method:'POST',headers:{'X-CSRF-Token':document.querySelector('meta[name=csrf-token]').content}})">
|
|
21
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="13" height="13" aria-hidden="true"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg>
|
|
22
|
+
</button>
|
|
23
|
+
</span>
|
|
24
|
+
</summary>
|
|
25
|
+
<% if provider_credential.discovery_status.in?(%w[discovering failed]) %>
|
|
26
|
+
<div class="ck-model-list__progress">
|
|
27
|
+
<%= render "completion_kit/provider_credentials/discovery_status", provider_credential: provider_credential, show_completed: false %>
|
|
28
|
+
</div>
|
|
29
|
+
<% end %>
|
|
30
|
+
<div class="ck-model-table-wrap">
|
|
31
|
+
<table class="ck-model-table">
|
|
32
|
+
<thead>
|
|
33
|
+
<tr>
|
|
34
|
+
<th scope="col" class="ck-model-table__name">Model</th>
|
|
35
|
+
<th scope="col" class="ck-model-table__cap">
|
|
36
|
+
Gen<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Generation models produce the responses your prompts ask for. Pick one when creating a prompt.</span>
|
|
37
|
+
</th>
|
|
38
|
+
<th scope="col" class="ck-model-table__cap">
|
|
39
|
+
Judge<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup ck-info-popup--right">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
|
|
40
|
+
</th>
|
|
41
|
+
</tr>
|
|
42
|
+
</thead>
|
|
43
|
+
<tbody>
|
|
44
|
+
<% models.each do |m| %>
|
|
45
|
+
<tr>
|
|
46
|
+
<td class="ck-model-table__name"><%= m.display_name || m.model_id %></td>
|
|
47
|
+
<td class="ck-model-table__cap">
|
|
48
|
+
<% if m.supports_generation %>
|
|
49
|
+
<span class="ck-model-table__tick" aria-label="Supports generation">✓</span>
|
|
50
|
+
<% else %>
|
|
51
|
+
<span class="ck-model-table__dash" aria-label="No generation support">—</span>
|
|
52
|
+
<% end %>
|
|
53
|
+
</td>
|
|
54
|
+
<td class="ck-model-table__cap">
|
|
55
|
+
<% if m.supports_judging %>
|
|
56
|
+
<span class="ck-model-table__tick" aria-label="Supports judging">✓</span>
|
|
57
|
+
<% else %>
|
|
58
|
+
<span class="ck-model-table__dash" aria-label="No judging support">—</span>
|
|
59
|
+
<% end %>
|
|
60
|
+
</td>
|
|
61
|
+
</tr>
|
|
62
|
+
<% end %>
|
|
63
|
+
</tbody>
|
|
64
|
+
</table>
|
|
65
|
+
</div>
|
|
66
|
+
</details>
|
|
67
|
+
<% end %>
|
|
68
|
+
</div>
|
|
69
|
+
<% end %>
|
|
70
|
+
</div>
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
<span><%= provider_credential.api_endpoint.presence || default_endpoints[provider_credential.provider] %></span>
|
|
28
28
|
<span><%= provider_credential.prompt_count %> prompts</span>
|
|
29
29
|
<span><%= provider_credential.judge_count %> judges</span>
|
|
30
|
-
<span
|
|
30
|
+
<span><% if provider_credential.last_used_at %>Used <time data-relative-time datetime="<%= provider_credential.last_used_at.utc.iso8601 %>"><%= time_ago_in_words(provider_credential.last_used_at) %></time> ago<% else %>Never used<% end %></span>
|
|
31
31
|
</div>
|
|
32
32
|
|
|
33
33
|
<%= render "discovery_status", provider_credential: provider_credential %>
|
|
@@ -14,12 +14,6 @@
|
|
|
14
14
|
<span class="<%= ck_badge_classes(ck_score_kind(score)) %>"><%= score %></span>
|
|
15
15
|
<% end %>
|
|
16
16
|
</div>
|
|
17
|
-
<p class="ck-meta-copy">
|
|
18
|
-
<span class="ck-run-config__key">Prompt</span> <%= link_to @run.prompt.display_name, prompt_path(@run.prompt), class: "ck-link" %>
|
|
19
|
-
<% if @run.dataset %>
|
|
20
|
-
 · <span class="ck-run-config__key">Dataset</span> <%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
|
|
21
|
-
<% end %>
|
|
22
|
-
</p>
|
|
23
17
|
</div>
|
|
24
18
|
<div class="ck-actions">
|
|
25
19
|
<% if @prev_response %>
|
|
@@ -31,6 +25,33 @@
|
|
|
31
25
|
</div>
|
|
32
26
|
</section>
|
|
33
27
|
|
|
28
|
+
<div class="ck-run-config">
|
|
29
|
+
<div class="ck-run-config__row">
|
|
30
|
+
<span class="ck-run-config__key">Run</span>
|
|
31
|
+
<%= link_to @run.name, run_path(@run), class: "ck-link" %>
|
|
32
|
+
</div>
|
|
33
|
+
<div class="ck-run-config__row">
|
|
34
|
+
<span class="ck-run-config__key">Prompt</span>
|
|
35
|
+
<%= link_to @run.prompt.display_name, prompt_path(@run.prompt), class: "ck-link" %>
|
|
36
|
+
</div>
|
|
37
|
+
<% if @run.dataset %>
|
|
38
|
+
<div class="ck-run-config__row">
|
|
39
|
+
<span class="ck-run-config__key">Dataset</span>
|
|
40
|
+
<%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
|
|
41
|
+
</div>
|
|
42
|
+
<% end %>
|
|
43
|
+
<div class="ck-run-config__row">
|
|
44
|
+
<span class="ck-run-config__key">Model</span>
|
|
45
|
+
<span style="text-transform: none;"><%= @run.prompt.llm_model %></span>
|
|
46
|
+
</div>
|
|
47
|
+
<% if @run.judge_model.present? %>
|
|
48
|
+
<div class="ck-run-config__row">
|
|
49
|
+
<span class="ck-run-config__key">Judge</span>
|
|
50
|
+
<span style="text-transform: none;"><%= @run.judge_model %></span>
|
|
51
|
+
</div>
|
|
52
|
+
<% end %>
|
|
53
|
+
</div>
|
|
54
|
+
|
|
34
55
|
<section>
|
|
35
56
|
<p class="ck-kicker">Input</p>
|
|
36
57
|
<pre class="ck-code ck-code--dark"><%= begin; JSON.pretty_generate(JSON.parse(@response.input_data)); rescue; @response.input_data; end %></pre>
|
|
@@ -9,6 +9,9 @@
|
|
|
9
9
|
<%= button_to "Start", generate_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
|
|
10
10
|
<% elsif run.status == "failed" %>
|
|
11
11
|
<%= button_to "Retry", generate_run_path(run), method: :post, class: ck_button_classes(:light, variant: :outline), form_class: "inline-block" %>
|
|
12
|
+
<%= button_to "Re-run as new", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
|
|
13
|
+
<% elsif run.status == "completed" %>
|
|
14
|
+
<%= button_to "Re-run", rerun_run_path(run), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
|
|
12
15
|
<% end %>
|
|
13
16
|
<% end %>
|
|
14
17
|
</div>
|
|
@@ -18,7 +18,29 @@
|
|
|
18
18
|
|
|
19
19
|
<div class="ck-field">
|
|
20
20
|
<%= form.label :prompt_id, "Prompt", class: "ck-label" %>
|
|
21
|
-
<%= form.select :prompt_id,
|
|
21
|
+
<%= form.select :prompt_id,
|
|
22
|
+
@prompts.map { |p|
|
|
23
|
+
vars = p.variables
|
|
24
|
+
label_parts = [p.display_name, p.llm_model]
|
|
25
|
+
label_parts << (vars.any? ? "#{vars.size} #{'var'.pluralize(vars.size)}" : "no vars")
|
|
26
|
+
[
|
|
27
|
+
label_parts.join(" · "),
|
|
28
|
+
p.id,
|
|
29
|
+
{
|
|
30
|
+
"data-has-variables" => vars.any? ? "1" : "0",
|
|
31
|
+
"data-model" => p.llm_model.to_s,
|
|
32
|
+
"data-variables" => vars.join(", "),
|
|
33
|
+
"data-description" => p.description.to_s,
|
|
34
|
+
"data-template-preview" => p.template.to_s.truncate(220, separator: " ")
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
{ include_blank: "Select a prompt" },
|
|
39
|
+
{ class: "ck-input", id: "run_prompt_id" } %>
|
|
40
|
+
<div class="ck-prompt-summary" id="prompt-summary" hidden>
|
|
41
|
+
<p class="ck-prompt-summary__description" id="prompt-summary-description" hidden></p>
|
|
42
|
+
<p class="ck-prompt-summary__template" id="prompt-summary-template"></p>
|
|
43
|
+
</div>
|
|
22
44
|
</div>
|
|
23
45
|
|
|
24
46
|
<div class="ck-field" id="dataset-field">
|
|
@@ -26,18 +48,17 @@
|
|
|
26
48
|
<% if @datasets.empty? %>
|
|
27
49
|
<p class="ck-meta-copy">No datasets yet. <%= link_to "Create a dataset", new_dataset_path, class: "ck-link" %> first.</p>
|
|
28
50
|
<% else %>
|
|
29
|
-
<%= form.select :dataset_id,
|
|
51
|
+
<%= form.select :dataset_id,
|
|
52
|
+
@datasets.map { |d| [d.name, d.id, { "data-headers" => d.headers.join(",") }] },
|
|
53
|
+
{ include_blank: "Select a dataset" },
|
|
54
|
+
{ class: "ck-input", id: "run_dataset_id" } %>
|
|
30
55
|
<% end %>
|
|
31
|
-
<p class="ck-field-hint" id="dataset-hint"
|
|
56
|
+
<p class="ck-field-hint" id="dataset-hint"></p>
|
|
32
57
|
</div>
|
|
33
58
|
|
|
34
59
|
<div class="ck-field">
|
|
35
60
|
<label class="ck-label" for="run_temperature" style="position: relative;">
|
|
36
|
-
Temperature
|
|
37
|
-
<span class="ck-info-toggle">?</span>
|
|
38
|
-
<div class="ck-info-popup">
|
|
39
|
-
Controls how random the model's output is. Lower values make the model more focused and deterministic — it'll pick the most likely words. Higher values introduce more variety and creativity, but also more risk of odd phrasing. Most LLMs default to 1.0. For evaluation, try different values to see how your prompt performs under varying conditions.
|
|
40
|
-
</div>
|
|
61
|
+
Temperature<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Controls how random the model's output is. Lower values are more focused and deterministic — the model picks the most likely words. Higher values are more varied and creative, with more risk of odd phrasing. Most LLMs default to 1.0; for evaluation, try a few values and see how your prompt holds up. Newer reasoning models (Claude Opus 4.7, GPT-5 family, etc.) ignore temperature entirely — CompletionKit detects this and re-sends without the parameter.</span>
|
|
41
62
|
</label>
|
|
42
63
|
<div class="ck-slider-row">
|
|
43
64
|
<%= form.range_field :temperature, min: 0, max: 1, step: 0.1, class: "ck-slider", id: "run_temperature", oninput: "document.getElementById('temp-value').textContent = this.value" %>
|
|
@@ -46,7 +67,9 @@
|
|
|
46
67
|
</div>
|
|
47
68
|
|
|
48
69
|
<div class="ck-field" id="judge-field">
|
|
49
|
-
|
|
70
|
+
<label class="ck-label" for="run_judge_model" style="position: relative;">
|
|
71
|
+
Judge model<span class="ck-info-toggle" tabindex="0">?</span><span class="ck-info-popup">Judge models score generated responses against your metrics. Pick one when configuring a run.</span>
|
|
72
|
+
</label>
|
|
50
73
|
<% available = CompletionKit::ApiConfig.available_models(scope: :judging) %>
|
|
51
74
|
<% if available.any? %>
|
|
52
75
|
<div class="ck-select-with-action">
|
|
@@ -74,18 +97,35 @@
|
|
|
74
97
|
<p class="ck-field-hint" style="color: var(--ck-warning);">No metrics yet. <%= link_to "Create a metric", new_metric_path, class: "ck-link" %></p>
|
|
75
98
|
<% else %>
|
|
76
99
|
<% if @metric_groups.any? %>
|
|
77
|
-
<
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
100
|
+
<div class="ck-metric-groups">
|
|
101
|
+
<span class="ck-metric-groups__label">Groups</span>
|
|
102
|
+
<div class="ck-metric-groups__row">
|
|
103
|
+
<% @metric_groups.each do |g| %>
|
|
104
|
+
<button type="button"
|
|
105
|
+
class="ck-metric-group-pill"
|
|
106
|
+
data-metric-group
|
|
107
|
+
data-metric-ids="<%= g.metric_ids.join(",") %>"
|
|
108
|
+
onclick="ckToggleMetricGroup(this)">
|
|
109
|
+
<span class="ck-metric-group-pill__check" aria-hidden="true">✓</span>
|
|
110
|
+
<span class="ck-metric-group-pill__label"><%= g.name %></span>
|
|
111
|
+
<span class="ck-metric-group-pill__count"><%= g.metric_ids.size %></span>
|
|
112
|
+
</button>
|
|
113
|
+
<% end %>
|
|
114
|
+
</div>
|
|
115
|
+
</div>
|
|
116
|
+
<div class="ck-metric-divider"><span>or pick individually</span></div>
|
|
83
117
|
<% end %>
|
|
84
118
|
<div class="ck-metric-checkboxes">
|
|
85
119
|
<% @all_metrics.each do |metric| %>
|
|
86
120
|
<label class="ck-checkbox-label">
|
|
87
121
|
<%= check_box_tag "run[metric_ids][]", metric.id, run.metric_ids.include?(metric.id), class: "ck-checkbox", id: "run_metric_#{metric.id}" %>
|
|
88
|
-
<span
|
|
122
|
+
<span class="ck-checkbox-label__box" aria-hidden="true"></span>
|
|
123
|
+
<span class="ck-checkbox-label__body">
|
|
124
|
+
<span class="ck-checkbox-label__text"><%= metric.name %></span>
|
|
125
|
+
<% if metric.instruction.present? %>
|
|
126
|
+
<span class="ck-checkbox-label__hint"><%= truncate(metric.instruction.to_s, length: 90) %></span>
|
|
127
|
+
<% end %>
|
|
128
|
+
</span>
|
|
89
129
|
</label>
|
|
90
130
|
<% end %>
|
|
91
131
|
</div>
|
|
@@ -123,25 +163,79 @@ function updateRunForm() {
|
|
|
123
163
|
|
|
124
164
|
var datasetEl = document.getElementById('run_dataset_id');
|
|
125
165
|
var datasetHint = document.getElementById('dataset-hint');
|
|
166
|
+
var datasetField = document.getElementById('dataset-field');
|
|
126
167
|
var dataset = datasetEl ? datasetEl.value : '';
|
|
127
168
|
var selectedOption = promptEl ? promptEl.options[promptEl.selectedIndex] : null;
|
|
128
169
|
var hasVars = selectedOption && selectedOption.dataset.hasVariables === '1';
|
|
129
|
-
|
|
170
|
+
var promptVars = (selectedOption && selectedOption.dataset.variables ? selectedOption.dataset.variables.split(/,\s*/) : []).filter(Boolean);
|
|
171
|
+
|
|
172
|
+
var missingVars = [];
|
|
173
|
+
if (hasVars && dataset && datasetEl) {
|
|
174
|
+
var datasetOption = datasetEl.options[datasetEl.selectedIndex];
|
|
175
|
+
var headers = (datasetOption && datasetOption.dataset.headers ? datasetOption.dataset.headers.split(/,\s*/) : []).filter(Boolean);
|
|
176
|
+
missingVars = promptVars.filter(function(v) { return headers.indexOf(v) === -1; });
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (datasetField) datasetField.className = 'ck-field';
|
|
180
|
+
if (datasetHint) datasetHint.textContent = '';
|
|
181
|
+
if (missingVars.length > 0) {
|
|
182
|
+
if (datasetField) datasetField.className = 'ck-field ck-field--error';
|
|
183
|
+
if (datasetHint) datasetHint.textContent = 'Dataset is missing ' + (missingVars.length === 1 ? 'column' : 'columns') + ' the prompt needs: ' + missingVars.join(', ');
|
|
184
|
+
} else if (hasVars && !dataset) {
|
|
185
|
+
if (datasetField) datasetField.className = 'ck-field ck-field--info';
|
|
186
|
+
if (datasetHint) datasetHint.textContent = 'This prompt uses variables. Select a dataset to provide values.';
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
var summary = document.getElementById('prompt-summary');
|
|
190
|
+
if (summary) {
|
|
191
|
+
if (selectedOption && selectedOption.value) {
|
|
192
|
+
var desc = selectedOption.dataset.description || '';
|
|
193
|
+
var tmpl = selectedOption.dataset.templatePreview || '';
|
|
194
|
+
var descEl = document.getElementById('prompt-summary-description');
|
|
195
|
+
descEl.textContent = desc;
|
|
196
|
+
descEl.hidden = !desc;
|
|
197
|
+
document.getElementById('prompt-summary-template').textContent = tmpl;
|
|
198
|
+
summary.hidden = false;
|
|
199
|
+
} else {
|
|
200
|
+
summary.hidden = true;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
130
203
|
|
|
131
204
|
var valid = prompt !== '';
|
|
132
205
|
if (judge && metrics.length === 0) valid = false;
|
|
133
206
|
if (!judge && metrics.length > 0) valid = false;
|
|
207
|
+
if (hasVars && !dataset) valid = false;
|
|
208
|
+
if (missingVars.length > 0) valid = false;
|
|
134
209
|
if (submitBtn) submitBtn.disabled = !valid;
|
|
210
|
+
|
|
211
|
+
ckUpdateMetricGroupsState();
|
|
135
212
|
}
|
|
136
213
|
|
|
137
|
-
function
|
|
138
|
-
|
|
214
|
+
function ckToggleMetricGroup(button) {
|
|
215
|
+
var ids = (button.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
|
|
216
|
+
var allChecked = ids.every(function(id) {
|
|
217
|
+
var cb = document.getElementById('run_metric_' + id);
|
|
218
|
+
return cb && cb.checked;
|
|
219
|
+
});
|
|
220
|
+
ids.forEach(function(id) {
|
|
139
221
|
var cb = document.getElementById('run_metric_' + id);
|
|
140
|
-
if (cb) cb.checked =
|
|
222
|
+
if (cb) cb.checked = !allChecked;
|
|
141
223
|
});
|
|
142
224
|
updateRunForm();
|
|
143
225
|
}
|
|
144
226
|
|
|
227
|
+
function ckUpdateMetricGroupsState() {
|
|
228
|
+
document.querySelectorAll('[data-metric-group]').forEach(function(btn) {
|
|
229
|
+
var ids = (btn.getAttribute('data-metric-ids') || '').split(',').filter(Boolean);
|
|
230
|
+
if (ids.length === 0) return;
|
|
231
|
+
var allChecked = ids.every(function(id) {
|
|
232
|
+
var cb = document.getElementById('run_metric_' + id);
|
|
233
|
+
return cb && cb.checked;
|
|
234
|
+
});
|
|
235
|
+
btn.classList.toggle('ck-metric-group-pill--active', allChecked);
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
145
239
|
var judgeEl = document.getElementById('run_judge_model');
|
|
146
240
|
var promptEl = document.getElementById('run_prompt_id');
|
|
147
241
|
var datasetEl = document.getElementById('run_dataset_id');
|
|
@@ -1,31 +1,61 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
<
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
<% elsif response.status == "failed" %>
|
|
1
|
+
<% clickable = response.succeeded? %>
|
|
2
|
+
<tr id="response_<%= response.id %>"<% if clickable %> onclick="window.location='<%= run_response_path(run, response, sort: params[:sort]) %>'" style="cursor: pointer;"<% end %>>
|
|
3
|
+
<td class="ck-response-cell__index"><%= index %></td>
|
|
4
|
+
<td class="ck-response-cell__text">
|
|
5
|
+
<% if response.status == "failed" %>
|
|
7
6
|
<% err = response.error_payload %>
|
|
8
|
-
<span class="ck-response-
|
|
9
|
-
|
|
7
|
+
<span class="ck-response-cell__error"><%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 160) %></span>
|
|
8
|
+
<% else %>
|
|
9
|
+
<%= truncate(response.response_text.to_s, length: 160) %>
|
|
10
|
+
<% end %>
|
|
11
|
+
</td>
|
|
12
|
+
<td>
|
|
13
|
+
<% scored_reviews = response.reviews.select { |r| r.ai_score.present? }.sort_by { |r| r.metric_name.to_s.downcase } %>
|
|
14
|
+
<% if scored_reviews.any? %>
|
|
15
|
+
<span class="ck-metric-bar ck-metric-bar--compact">
|
|
16
|
+
<% scored_reviews.each do |r| %>
|
|
17
|
+
<span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(r.ai_score.to_f) %>">
|
|
18
|
+
<span class="ck-metric-pip__bar"></span>
|
|
19
|
+
<span class="ck-metric-pip__label"><%= r.metric_name %> <strong><%= r.ai_score %></strong></span>
|
|
20
|
+
</span>
|
|
21
|
+
<% end %>
|
|
10
22
|
</span>
|
|
23
|
+
<% else %>
|
|
24
|
+
<span class="ck-response-cell__dim">—</span>
|
|
11
25
|
<% end %>
|
|
12
|
-
</
|
|
13
|
-
<
|
|
26
|
+
</td>
|
|
27
|
+
<td>
|
|
28
|
+
<% if response.reviewed? %>
|
|
29
|
+
<span class="<%= ck_badge_classes(ck_score_kind(response.score.to_f)) %>"><%= response.score %></span>
|
|
30
|
+
<% else %>
|
|
31
|
+
<span class="ck-response-cell__dim">—</span>
|
|
32
|
+
<% end %>
|
|
33
|
+
</td>
|
|
34
|
+
<td>
|
|
14
35
|
<% case response.status
|
|
15
|
-
when "
|
|
16
|
-
|
|
17
|
-
|
|
36
|
+
when "pending" %>
|
|
37
|
+
<span class="ck-chip">Queued</span>
|
|
38
|
+
<% when "retrying" %>
|
|
39
|
+
<% if response.attempts.to_i <= 1 %>
|
|
40
|
+
<span class="ck-chip">Generating</span>
|
|
41
|
+
<% else %>
|
|
42
|
+
<span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
|
|
43
|
+
<% end %>
|
|
44
|
+
<% when "succeeded" %>
|
|
45
|
+
<% if response.fully_reviewed? %>
|
|
46
|
+
<span class="ck-chip ck-chip--done">Done</span>
|
|
18
47
|
<% elsif run.status == "running" %>
|
|
19
48
|
<span class="ck-chip">Judging</span>
|
|
49
|
+
<% else %>
|
|
50
|
+
<span class="ck-chip">Awaiting judge</span>
|
|
20
51
|
<% end %>
|
|
21
|
-
<% when "pending" %>
|
|
22
|
-
<span class="ck-chip">Queued</span>
|
|
23
|
-
<% when "retrying" %>
|
|
24
|
-
<span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
|
|
25
52
|
<% when "failed" %>
|
|
26
|
-
<%=
|
|
27
|
-
|
|
28
|
-
class: "ck-chip ck-chip--
|
|
53
|
+
<%= button_to "Retry", retry_failures_run_path(run, only: response.id),
|
|
54
|
+
method: :post,
|
|
55
|
+
class: "ck-chip ck-chip--retry",
|
|
56
|
+
form_class: "inline-block",
|
|
57
|
+
onclick: "event.stopPropagation();" %>
|
|
29
58
|
<% end %>
|
|
30
|
-
</
|
|
31
|
-
|
|
59
|
+
</td>
|
|
60
|
+
<td class="ck-results-table__arrow"><% if clickable %>→<% end %></td>
|
|
61
|
+
</tr>
|