completion-kit 0.4.2 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/assets/stylesheets/completion_kit/application.css +850 -69
- data/app/controllers/completion_kit/runs_controller.rb +31 -18
- data/app/controllers/completion_kit/suggestions_controller.rb +24 -0
- data/app/jobs/completion_kit/generate_row_job.rb +7 -0
- data/app/jobs/completion_kit/judge_review_job.rb +2 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +9 -4
- data/app/models/completion_kit/dataset.rb +9 -0
- data/app/models/completion_kit/provider_credential.rb +1 -1
- data/app/models/completion_kit/response.rb +7 -0
- data/app/models/completion_kit/run.rb +22 -1
- data/app/services/completion_kit/anthropic_client.rb +33 -14
- data/app/services/completion_kit/model_discovery_service.rb +35 -9
- data/app/services/completion_kit/ollama_client.rb +31 -10
- data/app/services/completion_kit/open_ai_client.rb +35 -13
- data/app/services/completion_kit/open_router_client.rb +34 -13
- data/app/services/completion_kit/worker_health.rb +4 -1
- data/app/views/completion_kit/datasets/index.html.erb +1 -1
- data/app/views/completion_kit/datasets/show.html.erb +47 -9
- data/app/views/completion_kit/metrics/_form.html.erb +1 -1
- data/app/views/completion_kit/metrics/index.html.erb +15 -2
- data/app/views/completion_kit/metrics/show.html.erb +1 -1
- data/app/views/completion_kit/prompts/index.html.erb +27 -8
- data/app/views/completion_kit/prompts/show.html.erb +6 -36
- data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +4 -2
- data/app/views/completion_kit/provider_credentials/_models_card.html.erb +1 -1
- data/app/views/completion_kit/provider_credentials/index.html.erb +1 -1
- data/app/views/completion_kit/runs/_actions.html.erb +3 -0
- data/app/views/completion_kit/runs/_form.html.erb +114 -20
- data/app/views/completion_kit/runs/_response_row.html.erb +58 -35
- data/app/views/completion_kit/runs/_row.html.erb +50 -0
- data/app/views/completion_kit/runs/_sort_toolbar.html.erb +5 -4
- data/app/views/completion_kit/runs/_status_header.html.erb +3 -2
- data/app/views/completion_kit/runs/_status_panel.html.erb +55 -21
- data/app/views/completion_kit/runs/index.html.erb +4 -16
- data/app/views/completion_kit/runs/show.html.erb +110 -16
- data/app/views/completion_kit/suggestions/show.html.erb +65 -0
- data/app/views/layouts/completion_kit/application.html.erb +71 -0
- data/config/routes.rb +8 -2
- data/db/migrate/20260507000001_add_discovery_error_to_provider_credentials.rb +5 -0
- data/db/migrate/20260507150000_add_temperature_ignored_to_runs.rb +5 -0
- data/lib/completion_kit/version.rb +1 -1
- metadata +8 -7
- data/app/views/completion_kit/runs/suggestion.html.erb +0 -47
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
<tr onclick="window.location='<%= run_path(run) %>'" style="cursor: pointer;">
|
|
2
|
+
<td>
|
|
3
|
+
<div class="ck-runs-table__identity">
|
|
4
|
+
<span class="ck-run-name">
|
|
5
|
+
<span class="<%= ck_run_dot(run) %>"></span>
|
|
6
|
+
<strong><%= run.name %></strong>
|
|
7
|
+
</span>
|
|
8
|
+
<div class="ck-runs-table__config">
|
|
9
|
+
<%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
|
|
10
|
+
<span class="ck-runs-table__version">v<%= run.prompt.version_number %></span>
|
|
11
|
+
<% if run.dataset %>
|
|
12
|
+
<span class="ck-runs-table__sep">·</span>
|
|
13
|
+
<%= link_to run.dataset.name, dataset_path(run.dataset), class: "ck-runs-table__config-link", onclick: "event.stopPropagation();" %>
|
|
14
|
+
<% end %>
|
|
15
|
+
</div>
|
|
16
|
+
</div>
|
|
17
|
+
</td>
|
|
18
|
+
<td>
|
|
19
|
+
<span class="ck-runs-table__count">
|
|
20
|
+
<%= run.responses.size %><% if run.dataset %><span class="ck-runs-table__count-of">/<%= run.dataset.row_count %></span><% end %>
|
|
21
|
+
</span>
|
|
22
|
+
</td>
|
|
23
|
+
<td>
|
|
24
|
+
<% metrics = run.metric_averages.sort_by { |m| m[:name].to_s.downcase } %>
|
|
25
|
+
<% if metrics.any? %>
|
|
26
|
+
<div class="ck-metric-bar ck-metric-bar--compact">
|
|
27
|
+
<% metrics.each do |m| %>
|
|
28
|
+
<span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(m[:avg]) %>">
|
|
29
|
+
<span class="ck-metric-pip__bar"></span>
|
|
30
|
+
<span class="ck-metric-pip__label"><%= m[:name] %> <strong><%= m[:avg] %></strong></span>
|
|
31
|
+
</span>
|
|
32
|
+
<% end %>
|
|
33
|
+
</div>
|
|
34
|
+
<% else %>
|
|
35
|
+
<span class="ck-runs-table__dim">—</span>
|
|
36
|
+
<% end %>
|
|
37
|
+
</td>
|
|
38
|
+
<td>
|
|
39
|
+
<% avg = run.avg_score %>
|
|
40
|
+
<% if avg %>
|
|
41
|
+
<span class="<%= ck_badge_classes(ck_score_kind(avg)) %>"><%= avg %></span>
|
|
42
|
+
<% else %>
|
|
43
|
+
<span class="ck-runs-table__dim">—</span>
|
|
44
|
+
<% end %>
|
|
45
|
+
</td>
|
|
46
|
+
<td class="ck-runs-table__when">
|
|
47
|
+
<time data-relative-time datetime="<%= run.created_at.utc.iso8601 %>"><%= time_ago_in_words(run.created_at) %></time> ago
|
|
48
|
+
</td>
|
|
49
|
+
<td class="ck-results-table__arrow">→</td>
|
|
50
|
+
</tr>
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
<div id="run_sort_toolbar">
|
|
2
|
-
<% if run.
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
<%= link_to "
|
|
2
|
+
<% if run.judge_configured? %>
|
|
3
|
+
<% active = run.status == "completed" && run.responses.joins(:reviews).exists? %>
|
|
4
|
+
<div class="ck-toolbar" style="margin-top: 1.5rem;<%= ' visibility: hidden;' unless active %>" aria-hidden="<%= !active %>">
|
|
5
|
+
<%= link_to "Best first", run_path(run, sort: "score_desc"), class: params[:sort].blank? || params[:sort] == "score_desc" ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline), tabindex: active ? nil : -1 %>
|
|
6
|
+
<%= link_to "Worst first", run_path(run, sort: "score_asc"), class: params[:sort] == "score_asc" ? ck_button_classes(:dark) : ck_button_classes(:light, variant: :outline), tabindex: active ? nil : -1 %>
|
|
6
7
|
</div>
|
|
7
8
|
<% end %>
|
|
8
9
|
</div>
|
|
@@ -6,8 +6,9 @@
|
|
|
6
6
|
<% end %>
|
|
7
7
|
|
|
8
8
|
<% if run.status == "running" && !CompletionKit::WorkerHealth.healthy? %>
|
|
9
|
-
<div class="ck-
|
|
10
|
-
No worker
|
|
9
|
+
<div class="ck-banner ck-banner--warn">
|
|
10
|
+
<strong class="ck-banner__title">No worker is processing jobs</strong>
|
|
11
|
+
<p class="ck-banner__body">Jobs are queued but nothing is consuming them. Start <code class="ck-banner__code">bin/jobs</code> (or your worker service) to resume.</p>
|
|
11
12
|
</div>
|
|
12
13
|
<% end %>
|
|
13
14
|
|
|
@@ -2,36 +2,70 @@
|
|
|
2
2
|
<div id="run_status_panel">
|
|
3
3
|
<% if run.status.in?(%w[running completed]) && snap[:generated_total] > 0 %>
|
|
4
4
|
<% failed_count = snap[:generated_failed] + snap[:judged_failed] %>
|
|
5
|
+
<% has_judge = snap[:judged_total] > 0 || run.judge_configured? %>
|
|
6
|
+
<% metric_avgs = run.metric_averages.sort_by { |m| m[:name].to_s.downcase } %>
|
|
7
|
+
<% metric_lookup = metric_avgs.index_by { |m| m[:name].to_s.downcase } %>
|
|
5
8
|
<section class="ck-run-status ck-run-status--<%= run.status %>">
|
|
6
|
-
<div class="ck-run-
|
|
7
|
-
<div class="ck-run-
|
|
8
|
-
<p class="ck-run-status__metric-label">
|
|
9
|
-
|
|
10
|
-
<span class="ck-
|
|
11
|
-
|
|
12
|
-
<span class="ck-
|
|
9
|
+
<div class="ck-run-status__cells">
|
|
10
|
+
<div class="ck-run-status__cell ck-run-status__cell--grow">
|
|
11
|
+
<p class="ck-run-status__metric-label ck-run-status__label-row">
|
|
12
|
+
<span>Outcome</span>
|
|
13
|
+
<span class="ck-status-badge ck-status-badge--<%= run.status %> ck-run-status__pill">
|
|
14
|
+
<span class="ck-status-badge__dot" aria-hidden="true"></span>
|
|
15
|
+
<span class="ck-status-badge__label"><%= run.status.upcase %></span>
|
|
16
|
+
</span>
|
|
17
|
+
</p>
|
|
18
|
+
<p class="ck-run-status__cell-value ck-run-status__summary-line">
|
|
19
|
+
<span class="ck-run-status__summary-num"><%= snap[:generated_done] %></span>
|
|
20
|
+
<span class="ck-run-status__summary-text">of <%= snap[:generated_total] %> responses</span>
|
|
21
|
+
<% if has_judge %>
|
|
22
|
+
<span class="ck-run-status__summary-sep">·</span>
|
|
23
|
+
<span class="ck-run-status__summary-num"><%= snap[:judged_done] %></span>
|
|
24
|
+
<span class="ck-run-status__summary-text">of <%= snap[:judged_total] %> judged</span>
|
|
25
|
+
<% end %>
|
|
26
|
+
<% if failed_count > 0 %>
|
|
27
|
+
<span class="ck-run-status__summary-sep">·</span>
|
|
28
|
+
<span class="ck-run-status__metric-fail"><%= failed_count %> failed</span>
|
|
13
29
|
<% end %>
|
|
14
30
|
</p>
|
|
15
31
|
</div>
|
|
16
32
|
|
|
17
|
-
<% if
|
|
18
|
-
<div class="ck-run-
|
|
19
|
-
<p class="ck-run-status__metric-label">
|
|
20
|
-
<
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
33
|
+
<% if has_judge %>
|
|
34
|
+
<div class="ck-run-status__cell">
|
|
35
|
+
<p class="ck-run-status__metric-label">Metrics</p>
|
|
36
|
+
<div class="ck-run-status__cell-value">
|
|
37
|
+
<% if run.metrics.any? %>
|
|
38
|
+
<div class="ck-metric-bar ck-metric-bar--compact">
|
|
39
|
+
<% run.metrics.order(:name).each do |metric| %>
|
|
40
|
+
<% avg_for_metric = metric_lookup[metric.name.to_s.downcase] %>
|
|
41
|
+
<% if avg_for_metric %>
|
|
42
|
+
<span class="ck-metric-pip ck-metric-pip--<%= ck_score_kind(avg_for_metric[:avg]) %>">
|
|
43
|
+
<span class="ck-metric-pip__bar"></span>
|
|
44
|
+
<span class="ck-metric-pip__label"><%= metric.name %> <strong><%= avg_for_metric[:avg] %></strong></span>
|
|
45
|
+
</span>
|
|
46
|
+
<% else %>
|
|
47
|
+
<span class="ck-metric-pip ck-metric-pip--pending">
|
|
48
|
+
<span class="ck-metric-pip__bar"></span>
|
|
49
|
+
<span class="ck-metric-pip__label"><%= metric.name %> <em>pending</em></span>
|
|
50
|
+
</span>
|
|
51
|
+
<% end %>
|
|
52
|
+
<% end %>
|
|
53
|
+
</div>
|
|
54
|
+
<% else %>
|
|
55
|
+
<span class="ck-run-status__cell-empty">—</span>
|
|
24
56
|
<% end %>
|
|
25
|
-
</
|
|
57
|
+
</div>
|
|
26
58
|
</div>
|
|
27
|
-
<% end %>
|
|
28
59
|
|
|
29
|
-
|
|
30
|
-
<div class="ck-run-status__metric">
|
|
60
|
+
<div class="ck-run-status__cell">
|
|
31
61
|
<p class="ck-run-status__metric-label">Avg score</p>
|
|
32
|
-
<
|
|
33
|
-
|
|
34
|
-
|
|
62
|
+
<div class="ck-run-status__cell-value">
|
|
63
|
+
<% if run.avg_score %>
|
|
64
|
+
<span class="<%= ck_badge_classes(ck_score_kind(run.avg_score)) %> ck-badge--lg"><%= run.avg_score %></span>
|
|
65
|
+
<% else %>
|
|
66
|
+
<span class="ck-run-status__cell-empty">—</span>
|
|
67
|
+
<% end %>
|
|
68
|
+
</div>
|
|
35
69
|
</div>
|
|
36
70
|
<% end %>
|
|
37
71
|
</div>
|
|
@@ -9,32 +9,20 @@
|
|
|
9
9
|
</section>
|
|
10
10
|
|
|
11
11
|
<% if @runs.any? %>
|
|
12
|
-
<table class="ck-results-table">
|
|
12
|
+
<table class="ck-results-table ck-runs-table">
|
|
13
13
|
<thead>
|
|
14
14
|
<tr>
|
|
15
15
|
<th>Run</th>
|
|
16
|
-
<th>Prompt</th>
|
|
17
16
|
<th>Responses</th>
|
|
17
|
+
<th>Metrics</th>
|
|
18
18
|
<th>Avg score</th>
|
|
19
|
+
<th>When</th>
|
|
19
20
|
<th></th>
|
|
20
21
|
</tr>
|
|
21
22
|
</thead>
|
|
22
23
|
<tbody>
|
|
23
24
|
<% @runs.each do |run| %>
|
|
24
|
-
|
|
25
|
-
<td><span class="ck-run-name"><span class="<%= ck_run_dot(run) %>"></span><strong><%= run.name %></strong></span></td>
|
|
26
|
-
<td><%= link_to run.prompt.name, prompt_path(run.prompt), class: "ck-link" %>  <span class="ck-chip ck-chip--soft">v<%= run.prompt.version_number %></span></td>
|
|
27
|
-
<td><%= run.responses.size %></td>
|
|
28
|
-
<td>
|
|
29
|
-
<% avg = run.avg_score %>
|
|
30
|
-
<% if avg %>
|
|
31
|
-
<span class="<%= ck_badge_classes(ck_score_kind(avg)) %>"><%= avg %></span>
|
|
32
|
-
<% else %>
|
|
33
|
-
—
|
|
34
|
-
<% end %>
|
|
35
|
-
</td>
|
|
36
|
-
<td class="ck-results-table__arrow">→</td>
|
|
37
|
-
</tr>
|
|
25
|
+
<%= render "row", run: run %>
|
|
38
26
|
<% end %>
|
|
39
27
|
</tbody>
|
|
40
28
|
</table>
|
|
@@ -7,6 +7,11 @@
|
|
|
7
7
|
|
|
8
8
|
<%= render "status_header", run: @run %>
|
|
9
9
|
|
|
10
|
+
<% if @run.dataset %>
|
|
11
|
+
<% dataset_lines = @run.dataset.csv_data.to_s.lines %>
|
|
12
|
+
<% dataset_preview_lines = dataset_lines.first(50) %>
|
|
13
|
+
<% end %>
|
|
14
|
+
|
|
10
15
|
<div class="ck-run-config">
|
|
11
16
|
<div class="ck-run-config__row">
|
|
12
17
|
<span class="ck-run-config__key">Created</span>
|
|
@@ -15,7 +20,11 @@
|
|
|
15
20
|
<div class="ck-run-config__row">
|
|
16
21
|
<span class="ck-run-config__key">Dataset</span>
|
|
17
22
|
<% if @run.dataset %>
|
|
18
|
-
|
|
23
|
+
<span class="ck-run-config__dataset">
|
|
24
|
+
<%= link_to @run.dataset.name, dataset_path(@run.dataset), class: "ck-link" %>
|
|
25
|
+
<span class="ck-run-config__dataset-meta"><%= dataset_lines.count %> rows</span>
|
|
26
|
+
<button type="button" class="ck-run-config__dataset-preview" onclick="document.getElementById('dataset-preview-<%= @run.id %>').showModal()">Preview</button>
|
|
27
|
+
</span>
|
|
19
28
|
<% else %>
|
|
20
29
|
<span class="ck-run-config__none">None</span>
|
|
21
30
|
<% end %>
|
|
@@ -38,14 +47,18 @@
|
|
|
38
47
|
<div class="ck-run-config__row">
|
|
39
48
|
<span class="ck-run-config__key">Temperature</span>
|
|
40
49
|
<span><%= @run.temperature %></span>
|
|
50
|
+
<% if @run.temperature_ignored? %>
|
|
51
|
+
<span class="ck-run-config__warn" style="color: var(--ck-dim);" title="The model rejected the temperature parameter, so CompletionKit re-sent the request without it.">ignored by model</span>
|
|
52
|
+
<% end %>
|
|
41
53
|
</div>
|
|
42
54
|
</div>
|
|
43
55
|
|
|
44
56
|
<div class="ck-prompt-preview">
|
|
45
57
|
<div class="ck-prompt-preview__header">
|
|
46
58
|
<p class="ck-kicker">Prompt</p>
|
|
47
|
-
<%
|
|
48
|
-
|
|
59
|
+
<% latest_suggestion = @run.suggestions.order(created_at: :desc).first %>
|
|
60
|
+
<% if latest_suggestion %>
|
|
61
|
+
<%= link_to "View suggestion", suggestion_path(latest_suggestion, from: "run"), class: ck_button_classes(:light, variant: :outline) + " ck-button--sm" %>
|
|
49
62
|
<% elsif @run.status == "completed" && @run.responses.joins(:reviews).exists? %>
|
|
50
63
|
<%= button_to "Suggest improvements", suggest_run_path(@run), method: :post, class: ck_button_classes(:light, variant: :outline) + " ck-button--sm", form_class: "inline-block" %>
|
|
51
64
|
<% end %>
|
|
@@ -57,23 +70,104 @@
|
|
|
57
70
|
</div>
|
|
58
71
|
|
|
59
72
|
<% if @run.dataset %>
|
|
60
|
-
<
|
|
61
|
-
<
|
|
62
|
-
|
|
63
|
-
|
|
73
|
+
<dialog id="dataset-preview-<%= @run.id %>" class="ck-modal" onclick="if(event.target===this)this.close()">
|
|
74
|
+
<article class="ck-modal__panel" onclick="event.stopPropagation()">
|
|
75
|
+
<header class="ck-modal__header">
|
|
76
|
+
<div class="ck-modal__heading">
|
|
77
|
+
<h2 class="ck-modal__title"><%= @run.dataset.name %></h2>
|
|
78
|
+
<span class="ck-modal__meta"><%= dataset_lines.count %> rows</span>
|
|
79
|
+
</div>
|
|
80
|
+
<button type="button" class="ck-modal__close" aria-label="Close" onclick="this.closest('dialog').close()">×</button>
|
|
81
|
+
</header>
|
|
82
|
+
<div class="ck-modal__body">
|
|
83
|
+
<%
|
|
84
|
+
require "csv"
|
|
85
|
+
parsed_rows = []
|
|
86
|
+
parse_error = nil
|
|
87
|
+
begin
|
|
88
|
+
parsed_rows = ::CSV.parse(@run.dataset.csv_data.to_s)
|
|
89
|
+
rescue ::CSV::MalformedCSVError => e
|
|
90
|
+
parse_error = e.message
|
|
91
|
+
end
|
|
92
|
+
headers = parsed_rows.first || []
|
|
93
|
+
body_rows = parsed_rows.drop(1).first(50)
|
|
94
|
+
remaining = parsed_rows.drop(1).count - body_rows.count
|
|
95
|
+
%>
|
|
96
|
+
<% if parse_error %>
|
|
97
|
+
<p class="ck-field-hint" style="color: var(--ck-warning); padding: 1rem;">Could not parse CSV: <%= parse_error %></p>
|
|
98
|
+
<% elsif headers.empty? %>
|
|
99
|
+
<p class="ck-field-hint" style="padding: 1rem;">Dataset is empty.</p>
|
|
100
|
+
<% else %>
|
|
101
|
+
<div class="ck-csv-table-wrap">
|
|
102
|
+
<table class="ck-csv-table">
|
|
103
|
+
<thead>
|
|
104
|
+
<tr>
|
|
105
|
+
<th class="ck-csv-table__rownum">#</th>
|
|
106
|
+
<% headers.each do |h| %>
|
|
107
|
+
<th><%= h %></th>
|
|
108
|
+
<% end %>
|
|
109
|
+
</tr>
|
|
110
|
+
</thead>
|
|
111
|
+
<tbody>
|
|
112
|
+
<% body_rows.each_with_index do |row, idx| %>
|
|
113
|
+
<tr>
|
|
114
|
+
<td class="ck-csv-table__rownum"><%= idx + 1 %></td>
|
|
115
|
+
<% headers.each_with_index do |_, i| %>
|
|
116
|
+
<td><span class="ck-csv-cell"><%= row[i] %></span></td>
|
|
117
|
+
<% end %>
|
|
118
|
+
</tr>
|
|
119
|
+
<% end %>
|
|
120
|
+
</tbody>
|
|
121
|
+
</table>
|
|
122
|
+
</div>
|
|
123
|
+
<% if remaining > 0 %>
|
|
124
|
+
<p class="ck-modal__meta" style="margin: 0.65rem 0 0; padding: 0 0.25rem;"><%= remaining %> more rows in the full dataset.</p>
|
|
125
|
+
<% end %>
|
|
126
|
+
<% end %>
|
|
127
|
+
</div>
|
|
128
|
+
<footer class="ck-modal__footer">
|
|
129
|
+
<%= link_to "Open dataset", dataset_path(@run.dataset), class: ck_button_classes(:light, variant: :outline) %>
|
|
130
|
+
</footer>
|
|
131
|
+
</article>
|
|
132
|
+
</dialog>
|
|
64
133
|
<% end %>
|
|
65
134
|
|
|
66
135
|
<%= render "status_panel", run: @run %>
|
|
67
136
|
|
|
68
|
-
<% valid_responses = @responses.reject { |r| r.response_text&.start_with?("Error:") } %>
|
|
69
|
-
|
|
70
137
|
<%= render "sort_toolbar", run: @run %>
|
|
71
138
|
|
|
72
|
-
|
|
73
|
-
|
|
139
|
+
<p class="ck-kicker" style="margin-top: 1.5rem;<%= ' display:none;' unless @responses.any? %>" id="responses_kicker">Responses</p>
|
|
140
|
+
<table class="ck-results-table ck-responses-table"<%= ' hidden'.html_safe unless @responses.any? %> id="responses_table">
|
|
141
|
+
<thead>
|
|
142
|
+
<tr>
|
|
143
|
+
<th>#</th>
|
|
144
|
+
<th>Response</th>
|
|
145
|
+
<th>Metrics</th>
|
|
146
|
+
<th>Avg score</th>
|
|
147
|
+
<th>Status</th>
|
|
148
|
+
<th></th>
|
|
149
|
+
</tr>
|
|
150
|
+
</thead>
|
|
151
|
+
<tbody id="run_responses">
|
|
152
|
+
<% @responses.each_with_index do |response, idx| %>
|
|
153
|
+
<%= render "response_row", run: @run, response: response, index: idx + 1 %>
|
|
154
|
+
<% end %>
|
|
155
|
+
</tbody>
|
|
156
|
+
</table>
|
|
157
|
+
|
|
158
|
+
<% if @run.status.in?(%w[pending running]) %>
|
|
159
|
+
<script>
|
|
160
|
+
(function() {
|
|
161
|
+
if (window.ckRunStatusPoll) clearInterval(window.ckRunStatusPoll);
|
|
162
|
+
var url = '<%= refresh_status_run_path(@run) %>';
|
|
163
|
+
var refresh = function() {
|
|
164
|
+
fetch(url, { headers: { Accept: 'text/vnd.turbo-stream.html' }, credentials: 'same-origin' })
|
|
165
|
+
.then(function(r) { return r.ok ? r.text() : null; })
|
|
166
|
+
.then(function(html) { if (html) Turbo.renderStreamMessage(html); })
|
|
167
|
+
.catch(function() {});
|
|
168
|
+
};
|
|
169
|
+
setTimeout(refresh, 1000);
|
|
170
|
+
window.ckRunStatusPoll = setInterval(refresh, 15000);
|
|
171
|
+
})();
|
|
172
|
+
</script>
|
|
74
173
|
<% end %>
|
|
75
|
-
<div id="run_responses">
|
|
76
|
-
<% valid_responses.each_with_index do |response, idx| %>
|
|
77
|
-
<%= render "response_row", run: @run, response: response, index: idx + 1 %>
|
|
78
|
-
<% end %>
|
|
79
|
-
</div>
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
<ol class="ck-breadcrumb">
|
|
2
|
+
<% if @from == "run" %>
|
|
3
|
+
<li><%= link_to "Runs", runs_path %></li>
|
|
4
|
+
<li><%= link_to @run.name, run_path(@run) %></li>
|
|
5
|
+
<% else %>
|
|
6
|
+
<li><%= link_to "Prompts", prompts_path %></li>
|
|
7
|
+
<li><%= link_to @run.prompt.name, prompt_path(@run.prompt) %></li>
|
|
8
|
+
<% end %>
|
|
9
|
+
<li>Suggestion</li>
|
|
10
|
+
</ol>
|
|
11
|
+
|
|
12
|
+
<section class="ck-page-header">
|
|
13
|
+
<div>
|
|
14
|
+
<p class="ck-kicker">Prompt improvement</p>
|
|
15
|
+
<h1 class="ck-title"><%= @run.prompt.name %></h1>
|
|
16
|
+
<p class="ck-meta-copy">
|
|
17
|
+
Prompt: <%= link_to @run.prompt.name, prompt_path(@run.prompt), class: "ck-link" %>
|
|
18
|
+
· Run: <%= link_to @run.name, run_path(@run), class: "ck-link" %>
|
|
19
|
+
· <%= @run.responses.count %> responses scored
|
|
20
|
+
<% if @run.avg_score %>
|
|
21
|
+
<span class="<%= ck_badge_classes(ck_score_kind(@run.avg_score)) %>"><%= @run.avg_score %></span>
|
|
22
|
+
<% else %>
|
|
23
|
+
—
|
|
24
|
+
<% end %>
|
|
25
|
+
</p>
|
|
26
|
+
</div>
|
|
27
|
+
<div class="ck-actions">
|
|
28
|
+
<% if @from == "run" %>
|
|
29
|
+
<%= link_to "Back to run", run_path(@run), class: ck_button_classes(:light, variant: :outline) %>
|
|
30
|
+
<% else %>
|
|
31
|
+
<%= link_to "Back to prompt", prompt_path(@run.prompt), class: ck_button_classes(:light, variant: :outline) %>
|
|
32
|
+
<% end %>
|
|
33
|
+
<% if @suggestion.applied_at? %>
|
|
34
|
+
<span class="ck-chip" style="background: var(--ck-success-soft); color: var(--ck-success);">Applied</span>
|
|
35
|
+
<% else %>
|
|
36
|
+
<%= button_to "Apply suggestion", apply_suggestion_path(@suggestion), method: :post, class: ck_button_classes(:dark), form_class: "inline-block" %>
|
|
37
|
+
<% end %>
|
|
38
|
+
</div>
|
|
39
|
+
</section>
|
|
40
|
+
|
|
41
|
+
<div class="ck-suggest-reasoning">
|
|
42
|
+
<p class="ck-kicker">Why these changes</p>
|
|
43
|
+
<div class="ck-suggest-reasoning__body"><%= simple_format(@suggestion.reasoning) %></div>
|
|
44
|
+
</div>
|
|
45
|
+
|
|
46
|
+
<div class="ck-suggest-diff">
|
|
47
|
+
<div class="ck-suggest-diff__pane">
|
|
48
|
+
<div class="ck-suggest-diff__header">
|
|
49
|
+
<span class="ck-suggest-diff__label ck-suggest-diff__label--before">Original prompt</span>
|
|
50
|
+
<span class="ck-suggest-diff__version"><%= @suggestion.prompt.version_label %></span>
|
|
51
|
+
</div>
|
|
52
|
+
<pre class="ck-suggest-diff__code"><%= ck_word_diff_old(@suggestion.original_template, @suggestion.suggested_template) %></pre>
|
|
53
|
+
</div>
|
|
54
|
+
<div class="ck-suggest-diff__pane">
|
|
55
|
+
<div class="ck-suggest-diff__header">
|
|
56
|
+
<span class="ck-suggest-diff__label ck-suggest-diff__label--after">Suggested prompt</span>
|
|
57
|
+
</div>
|
|
58
|
+
<pre class="ck-suggest-diff__code"><%= ck_word_diff_new(@suggestion.original_template, @suggestion.suggested_template) %></pre>
|
|
59
|
+
</div>
|
|
60
|
+
</div>
|
|
61
|
+
|
|
62
|
+
<div class="ck-suggest-full">
|
|
63
|
+
<p class="ck-kicker">Full suggested prompt</p>
|
|
64
|
+
<pre class="ck-code ck-code--dark"><%= @suggestion.suggested_template %></pre>
|
|
65
|
+
</div>
|
|
@@ -48,6 +48,77 @@ document.addEventListener("turbo:load", function() {
|
|
|
48
48
|
var d = new Date(el.getAttribute("datetime"));
|
|
49
49
|
el.textContent = d.toLocaleString(undefined, {year:"numeric",month:"short",day:"numeric",hour:"2-digit",minute:"2-digit"});
|
|
50
50
|
});
|
|
51
|
+
ckTickRelativeTimes();
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
function ckRelativeTime(then) {
|
|
55
|
+
var seconds = Math.round((Date.now() - then.getTime()) / 1000);
|
|
56
|
+
if (seconds < 5) return "just now";
|
|
57
|
+
if (seconds < 60) return "less than a minute";
|
|
58
|
+
var minutes = Math.round(seconds / 60);
|
|
59
|
+
if (minutes < 60) return minutes === 1 ? "1 minute" : minutes + " minutes";
|
|
60
|
+
var hours = Math.round(minutes / 60);
|
|
61
|
+
if (hours < 24) return hours === 1 ? "about 1 hour" : "about " + hours + " hours";
|
|
62
|
+
var days = Math.round(hours / 24);
|
|
63
|
+
if (days < 30) return days === 1 ? "1 day" : days + " days";
|
|
64
|
+
var months = Math.round(days / 30);
|
|
65
|
+
if (months < 12) return months === 1 ? "about 1 month" : "about " + months + " months";
|
|
66
|
+
var years = Math.round(days / 365);
|
|
67
|
+
return years === 1 ? "about 1 year" : "about " + years + " years";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function ckRelativeTimeCompact(then) {
|
|
71
|
+
var seconds = Math.round((Date.now() - then.getTime()) / 1000);
|
|
72
|
+
if (seconds < 60) return "now";
|
|
73
|
+
var minutes = Math.round(seconds / 60);
|
|
74
|
+
if (minutes < 60) return minutes + "m";
|
|
75
|
+
var hours = Math.round(minutes / 60);
|
|
76
|
+
if (hours < 24) return hours + "h";
|
|
77
|
+
var days = Math.round(hours / 24);
|
|
78
|
+
if (days < 30) return days + "d";
|
|
79
|
+
var months = Math.round(days / 30);
|
|
80
|
+
if (months < 12) return months + "mo";
|
|
81
|
+
var years = Math.round(days / 365);
|
|
82
|
+
return years + "y";
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function ckTickRelativeTimes() {
|
|
86
|
+
document.querySelectorAll("[data-relative-time]").forEach(function(el) {
|
|
87
|
+
var then = new Date(el.getAttribute("datetime"));
|
|
88
|
+
if (isNaN(then.getTime())) return;
|
|
89
|
+
var verbose = el.getAttribute("data-relative-time") === "verbose";
|
|
90
|
+
el.textContent = verbose ? ckRelativeTime(then) : ckRelativeTimeCompact(then);
|
|
91
|
+
el.setAttribute("title", then.toLocaleString());
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (!window.ckRelativeTimeInterval) {
|
|
96
|
+
window.ckRelativeTimeInterval = setInterval(ckTickRelativeTimes, 30000);
|
|
97
|
+
}
|
|
98
|
+
document.addEventListener("turbo:before-stream-render", function() {
|
|
99
|
+
requestAnimationFrame(ckTickRelativeTimes);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
var ckCsvHoverTimer = null;
|
|
103
|
+
var ckCsvHoverRow = null;
|
|
104
|
+
document.addEventListener("mouseover", function(e) {
|
|
105
|
+
var row = e.target.closest && e.target.closest(".ck-csv-table tbody tr");
|
|
106
|
+
if (!row || row === ckCsvHoverRow) return;
|
|
107
|
+
if (ckCsvHoverRow) ckCsvHoverRow.classList.remove("ck-csv-row--expanded");
|
|
108
|
+
ckCsvHoverRow = row;
|
|
109
|
+
clearTimeout(ckCsvHoverTimer);
|
|
110
|
+
ckCsvHoverTimer = setTimeout(function() {
|
|
111
|
+
if (ckCsvHoverRow === row) row.classList.add("ck-csv-row--expanded");
|
|
112
|
+
}, 350);
|
|
113
|
+
});
|
|
114
|
+
document.addEventListener("mouseout", function(e) {
|
|
115
|
+
var row = e.target.closest && e.target.closest(".ck-csv-table tbody tr");
|
|
116
|
+
if (!row) return;
|
|
117
|
+
var related = e.relatedTarget && e.relatedTarget.closest && e.relatedTarget.closest(".ck-csv-table tbody tr");
|
|
118
|
+
if (related === row) return;
|
|
119
|
+
clearTimeout(ckCsvHoverTimer);
|
|
120
|
+
row.classList.remove("ck-csv-row--expanded");
|
|
121
|
+
if (ckCsvHoverRow === row) ckCsvHoverRow = null;
|
|
51
122
|
});
|
|
52
123
|
|
|
53
124
|
var ckRefreshing = false;
|
data/config/routes.rb
CHANGED
|
@@ -14,14 +14,20 @@ CompletionKit::Engine.routes.draw do
|
|
|
14
14
|
resources :runs do
|
|
15
15
|
member do
|
|
16
16
|
post :generate
|
|
17
|
-
get :suggestion
|
|
18
17
|
post :suggest
|
|
19
|
-
post :apply_suggestion
|
|
20
18
|
post :retry_failures
|
|
19
|
+
post :rerun
|
|
20
|
+
get :refresh_status
|
|
21
21
|
end
|
|
22
22
|
resources :responses, only: [:show]
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
+
resources :suggestions, only: [:show] do
|
|
26
|
+
member do
|
|
27
|
+
post :apply
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
25
31
|
resources :provider_credentials, only: [:index, :new, :create, :edit, :update] do
|
|
26
32
|
post :refresh, on: :member
|
|
27
33
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: completion-kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.8
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Damien Bastin
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rails
|
|
@@ -248,6 +247,7 @@ files:
|
|
|
248
247
|
- app/controllers/completion_kit/provider_credentials_controller.rb
|
|
249
248
|
- app/controllers/completion_kit/responses_controller.rb
|
|
250
249
|
- app/controllers/completion_kit/runs_controller.rb
|
|
250
|
+
- app/controllers/completion_kit/suggestions_controller.rb
|
|
251
251
|
- app/helpers/completion_kit/application_helper.rb
|
|
252
252
|
- app/jobs/completion_kit/application_job.rb
|
|
253
253
|
- app/jobs/completion_kit/generate_row_job.rb
|
|
@@ -321,6 +321,7 @@ files:
|
|
|
321
321
|
- app/views/completion_kit/runs/_actions.html.erb
|
|
322
322
|
- app/views/completion_kit/runs/_form.html.erb
|
|
323
323
|
- app/views/completion_kit/runs/_response_row.html.erb
|
|
324
|
+
- app/views/completion_kit/runs/_row.html.erb
|
|
324
325
|
- app/views/completion_kit/runs/_sort_toolbar.html.erb
|
|
325
326
|
- app/views/completion_kit/runs/_status_header.html.erb
|
|
326
327
|
- app/views/completion_kit/runs/_status_panel.html.erb
|
|
@@ -328,7 +329,7 @@ files:
|
|
|
328
329
|
- app/views/completion_kit/runs/index.html.erb
|
|
329
330
|
- app/views/completion_kit/runs/new.html.erb
|
|
330
331
|
- app/views/completion_kit/runs/show.html.erb
|
|
331
|
-
- app/views/completion_kit/
|
|
332
|
+
- app/views/completion_kit/suggestions/show.html.erb
|
|
332
333
|
- app/views/layouts/completion_kit/application.html.erb
|
|
333
334
|
- config/routes.rb
|
|
334
335
|
- db/migrate/20260311000001_create_completion_kit_tables.rb
|
|
@@ -348,6 +349,8 @@ files:
|
|
|
348
349
|
- db/migrate/20260501000003_add_status_and_error_to_reviews.rb
|
|
349
350
|
- db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb
|
|
350
351
|
- db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb
|
|
352
|
+
- db/migrate/20260507000001_add_discovery_error_to_provider_credentials.rb
|
|
353
|
+
- db/migrate/20260507150000_add_temperature_ignored_to_runs.rb
|
|
351
354
|
- lib/completion-kit.rb
|
|
352
355
|
- lib/completion_kit.rb
|
|
353
356
|
- lib/completion_kit/concurrency_check.rb
|
|
@@ -365,7 +368,6 @@ metadata:
|
|
|
365
368
|
homepage_uri: https://github.com/homemade-software-inc/completion-kit
|
|
366
369
|
source_code_uri: https://github.com/homemade-software-inc/completion-kit
|
|
367
370
|
changelog_uri: https://github.com/homemade-software-inc/completion-kit/blob/main/CHANGELOG.md
|
|
368
|
-
post_install_message:
|
|
369
371
|
rdoc_options: []
|
|
370
372
|
require_paths:
|
|
371
373
|
- lib
|
|
@@ -380,8 +382,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
380
382
|
- !ruby/object:Gem::Version
|
|
381
383
|
version: '0'
|
|
382
384
|
requirements: []
|
|
383
|
-
rubygems_version: 3.
|
|
384
|
-
signing_key:
|
|
385
|
+
rubygems_version: 3.6.9
|
|
385
386
|
specification_version: 4
|
|
386
387
|
summary: Your prompts need tests too. Run them against real data, score outputs with
|
|
387
388
|
an LLM judge, iterate until they work.
|