completion-kit 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -1
- data/Rakefile +0 -3
- data/app/assets/stylesheets/completion_kit/application.css +87 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
- data/app/controllers/completion_kit/runs_controller.rb +31 -11
- data/app/helpers/completion_kit/application_helper.rb +4 -12
- data/app/jobs/completion_kit/generate_row_job.rb +102 -0
- data/app/jobs/completion_kit/judge_review_job.rb +110 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
- data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
- data/app/models/completion_kit/prompt.rb +4 -0
- data/app/models/completion_kit/response.rb +29 -2
- data/app/models/completion_kit/review.rb +17 -2
- data/app/models/completion_kit/run.rb +90 -96
- data/app/services/completion_kit/anthropic_client.rb +13 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
- data/app/services/completion_kit/ollama_client.rb +13 -0
- data/app/services/completion_kit/open_ai_client.rb +11 -0
- data/app/services/completion_kit/open_router_client.rb +13 -0
- data/app/services/completion_kit/worker_health.rb +10 -0
- data/app/views/completion_kit/api_reference/index.html.erb +0 -5
- data/app/views/completion_kit/prompts/_form.html.erb +8 -5
- data/app/views/completion_kit/runs/_actions.html.erb +1 -1
- data/app/views/completion_kit/runs/_form.html.erb +6 -3
- data/app/views/completion_kit/runs/_progress.html.erb +1 -1
- data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
- data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
- data/app/views/completion_kit/runs/show.html.erb +1 -1
- data/app/views/layouts/completion_kit/application.html.erb +28 -2
- data/config/routes.rb +2 -2
- data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
- data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
- data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
- data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
- data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
- data/lib/completion_kit/concurrency_check.rb +16 -0
- data/lib/completion_kit/errors.rb +16 -0
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +2 -2
- data/lib/tasks/completion_kit_runs.rake +13 -0
- metadata +29 -5
- data/app/jobs/completion_kit/generate_job.rb +0 -12
- data/app/jobs/completion_kit/judge_job.rb +0 -12
|
@@ -2,7 +2,7 @@ module CompletionKit
|
|
|
2
2
|
class Run < ApplicationRecord
|
|
3
3
|
include Turbo::Broadcastable
|
|
4
4
|
|
|
5
|
-
STATUSES = %w[pending
|
|
5
|
+
STATUSES = %w[pending running completed failed].freeze
|
|
6
6
|
|
|
7
7
|
belongs_to :prompt
|
|
8
8
|
belongs_to :dataset, optional: true
|
|
@@ -17,6 +17,30 @@ module CompletionKit
|
|
|
17
17
|
before_validation :set_default_status, on: :create
|
|
18
18
|
before_validation :set_auto_name, on: :create
|
|
19
19
|
|
|
20
|
+
def mark_completed!
|
|
21
|
+
update!(status: "completed")
|
|
22
|
+
broadcast_ui
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def outstanding_work_zero?
|
|
26
|
+
return false if responses.where.not(status: Response::TERMINAL_STATUSES).exists?
|
|
27
|
+
|
|
28
|
+
metric_ids = metrics.pluck(:id)
|
|
29
|
+
return true if metric_ids.empty?
|
|
30
|
+
|
|
31
|
+
succeeded_response_ids = responses.where(status: "succeeded").pluck(:id)
|
|
32
|
+
expected_reviews = succeeded_response_ids.size * metric_ids.size
|
|
33
|
+
return true if expected_reviews.zero?
|
|
34
|
+
|
|
35
|
+
terminal_review_count = Review.where(
|
|
36
|
+
response_id: succeeded_response_ids,
|
|
37
|
+
metric_id: metric_ids,
|
|
38
|
+
status: Review::TERMINAL_STATUSES
|
|
39
|
+
).count
|
|
40
|
+
|
|
41
|
+
terminal_review_count >= expected_reviews
|
|
42
|
+
end
|
|
43
|
+
|
|
20
44
|
def judge_configured?
|
|
21
45
|
judge_model.present? && metrics.any? && ApiConfig.valid_for_model?(judge_model)
|
|
22
46
|
end
|
|
@@ -45,134 +69,104 @@ module CompletionKit
|
|
|
45
69
|
end
|
|
46
70
|
end
|
|
47
71
|
|
|
48
|
-
def
|
|
72
|
+
def start!
|
|
49
73
|
rows = if dataset
|
|
50
74
|
CsvProcessor.process_self(self)
|
|
51
75
|
else
|
|
52
76
|
[{}]
|
|
53
77
|
end
|
|
54
78
|
|
|
55
|
-
if rows.empty?
|
|
56
|
-
errors.add(:base, "Dataset has no rows")
|
|
57
|
-
return false
|
|
58
|
-
end
|
|
79
|
+
return fail_with_summary!("Dataset has no rows") if rows.empty?
|
|
59
80
|
|
|
60
81
|
client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
|
|
61
|
-
|
|
62
82
|
unless client.configured?
|
|
63
|
-
|
|
64
|
-
errors.add(:base, msg)
|
|
65
|
-
update_columns(status: "failed", error_message: msg) if persisted?
|
|
66
|
-
return false
|
|
83
|
+
return fail_with_summary!("LLM API not configured: #{client.configuration_errors.join(', ')}")
|
|
67
84
|
end
|
|
68
85
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
response_text = client.generate_completion(rendered, model: prompt.llm_model, temperature: temperature)
|
|
78
|
-
|
|
79
|
-
resp = responses.create!(
|
|
80
|
-
input_data: input,
|
|
81
|
-
response_text: response_text,
|
|
82
|
-
expected_output: row["expected_output"]
|
|
86
|
+
transaction do
|
|
87
|
+
responses.destroy_all
|
|
88
|
+
update!(
|
|
89
|
+
status: "running",
|
|
90
|
+
progress_current: 0,
|
|
91
|
+
progress_total: rows.length,
|
|
92
|
+
failure_summary: nil,
|
|
93
|
+
error_message: nil
|
|
83
94
|
)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
broadcast_ui
|
|
95
|
+
rows.each_with_index do |row, index|
|
|
96
|
+
input = row.empty? ? nil : row.to_json
|
|
97
|
+
response = responses.create!(
|
|
98
|
+
status: "pending",
|
|
99
|
+
row_index: index,
|
|
100
|
+
input_data: input,
|
|
101
|
+
expected_output: row["expected_output"]
|
|
102
|
+
)
|
|
103
|
+
GenerateRowJob.perform_later(id, response.id)
|
|
104
|
+
end
|
|
95
105
|
end
|
|
96
106
|
|
|
97
|
-
true
|
|
98
|
-
rescue Faraday::Error => e
|
|
99
|
-
update_columns(status: "failed", error_message: e.message)
|
|
100
|
-
errors.add(:base, e.message)
|
|
101
107
|
broadcast_ui
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
update_columns(status: "failed", error_message: e.message) if persisted?
|
|
105
|
-
errors.add(:base, e.message)
|
|
106
|
-
broadcast_ui if persisted?
|
|
107
|
-
false
|
|
108
|
+
broadcast_clear_responses
|
|
109
|
+
true
|
|
108
110
|
end
|
|
109
111
|
|
|
110
|
-
def
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
broadcast_ui
|
|
114
|
-
|
|
115
|
-
judge = JudgeService.new(ApiConfig.for_model(judge_model).merge(judge_model: judge_model))
|
|
116
|
-
evaluation_count = 0
|
|
117
|
-
|
|
118
|
-
responses.find_each do |response|
|
|
119
|
-
metrics.each do |metric|
|
|
120
|
-
evaluation = judge.evaluate(
|
|
121
|
-
response.response_text,
|
|
122
|
-
response.expected_output,
|
|
123
|
-
prompt.template,
|
|
124
|
-
criteria: metric.instruction.to_s,
|
|
125
|
-
rubric_text: metric.display_rubric_text,
|
|
126
|
-
input_data: response.input_data
|
|
127
|
-
)
|
|
112
|
+
def generate_responses!
|
|
113
|
+
start!
|
|
114
|
+
end
|
|
128
115
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
status: "evaluated",
|
|
134
|
-
ai_score: evaluation[:score],
|
|
135
|
-
ai_feedback: evaluation[:feedback]
|
|
136
|
-
)
|
|
137
|
-
review.save!
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
evaluation_count += 1
|
|
141
|
-
update_columns(progress_current: evaluation_count)
|
|
142
|
-
broadcast_progress
|
|
143
|
-
end
|
|
116
|
+
def progress_snapshot
|
|
117
|
+
generated_done = responses.where(status: "succeeded").count
|
|
118
|
+
generated_failed = responses.where(status: "failed").count
|
|
119
|
+
generated_total = progress_total
|
|
144
120
|
|
|
145
|
-
|
|
146
|
-
|
|
121
|
+
metric_count = metrics.count
|
|
122
|
+
succeeded_count = generated_done
|
|
123
|
+
judged_total = succeeded_count * metric_count
|
|
124
|
+
judged_done = Review.joins(:response)
|
|
125
|
+
.where(completion_kit_responses: { run_id: id }, status: "succeeded").count
|
|
126
|
+
judged_failed = Review.joins(:response)
|
|
127
|
+
.where(completion_kit_responses: { run_id: id }, status: "failed").count
|
|
147
128
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
rescue StandardError => e
|
|
157
|
-
update_columns(status: "failed", error_message: e.message) if persisted?
|
|
158
|
-
errors.add(:base, e.message)
|
|
159
|
-
broadcast_ui if persisted?
|
|
160
|
-
false
|
|
129
|
+
{
|
|
130
|
+
generated_done: generated_done,
|
|
131
|
+
generated_total: generated_total,
|
|
132
|
+
generated_failed: generated_failed,
|
|
133
|
+
judged_done: judged_done,
|
|
134
|
+
judged_total: judged_total,
|
|
135
|
+
judged_failed: judged_failed
|
|
136
|
+
}
|
|
161
137
|
end
|
|
162
138
|
|
|
163
139
|
def as_json(options = {})
|
|
140
|
+
snap = progress_snapshot
|
|
164
141
|
{
|
|
165
142
|
id: id, name: name, status: status, prompt_id: prompt_id,
|
|
166
143
|
dataset_id: dataset_id, judge_model: judge_model, temperature: temperature,
|
|
167
144
|
created_at: created_at, updated_at: updated_at,
|
|
168
145
|
responses_count: responses.count, avg_score: avg_score,
|
|
169
|
-
progress_current:
|
|
170
|
-
|
|
146
|
+
progress_current: snap[:generated_done],
|
|
147
|
+
progress_total: snap[:generated_total],
|
|
148
|
+
progress: {
|
|
149
|
+
generated: { done: snap[:generated_done], total: snap[:generated_total], failed: snap[:generated_failed] },
|
|
150
|
+
judged: { done: snap[:judged_done], total: snap[:judged_total], failed: snap[:judged_failed] }
|
|
151
|
+
},
|
|
152
|
+
failed_response_ids: responses.where(status: "failed").pluck(:id),
|
|
153
|
+
failure_summary: failure_summary,
|
|
154
|
+
error_message: error_message,
|
|
155
|
+
metric_ids: metric_ids
|
|
171
156
|
}
|
|
172
157
|
end
|
|
173
158
|
|
|
174
159
|
private
|
|
175
160
|
|
|
161
|
+
def fail_with_summary!(message)
|
|
162
|
+
errors.add(:base, message)
|
|
163
|
+
if persisted?
|
|
164
|
+
update_columns(status: "failed", failure_summary: message, error_message: message)
|
|
165
|
+
broadcast_ui
|
|
166
|
+
end
|
|
167
|
+
false
|
|
168
|
+
end
|
|
169
|
+
|
|
176
170
|
def broadcast_ui
|
|
177
171
|
broadcast_progress
|
|
178
172
|
broadcast_status_header
|
|
@@ -27,12 +27,25 @@ module CompletionKit
|
|
|
27
27
|
}.to_json
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
if response.status == 429
|
|
31
|
+
raise CompletionKit::RateLimitError.new(
|
|
32
|
+
response.body.to_s.truncate(500),
|
|
33
|
+
provider: "anthropic",
|
|
34
|
+
status: 429,
|
|
35
|
+
retry_after: nil
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
30
39
|
if response.success?
|
|
31
40
|
data = JSON.parse(response.body)
|
|
32
41
|
data["content"][0]["text"].strip
|
|
33
42
|
else
|
|
34
43
|
"Error: #{response.status} - #{response.body}"
|
|
35
44
|
end
|
|
45
|
+
rescue CompletionKit::RateLimitError
|
|
46
|
+
raise
|
|
47
|
+
rescue Faraday::Error
|
|
48
|
+
raise
|
|
36
49
|
rescue => e
|
|
37
50
|
"Error: #{e.message}"
|
|
38
51
|
end
|
|
@@ -49,11 +49,6 @@ module CompletionKit
|
|
|
49
49
|
description: "Generate responses for a run using its prompt and dataset",
|
|
50
50
|
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
51
51
|
handler: :generate
|
|
52
|
-
},
|
|
53
|
-
"runs_judge" => {
|
|
54
|
-
description: "Judge responses for a run using configured metrics",
|
|
55
|
-
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
56
|
-
handler: :judge
|
|
57
52
|
}
|
|
58
53
|
}.freeze
|
|
59
54
|
|
|
@@ -92,14 +87,11 @@ module CompletionKit
|
|
|
92
87
|
|
|
93
88
|
def self.generate(args)
|
|
94
89
|
run = Run.find(args["id"])
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
run = Run.find(args["id"])
|
|
101
|
-
JudgeJob.perform_later(run.id)
|
|
102
|
-
text_result(run.reload.as_json)
|
|
90
|
+
if run.start!
|
|
91
|
+
text_result(run.reload.as_json)
|
|
92
|
+
else
|
|
93
|
+
text_result(run.failure_summary || run.errors.full_messages.to_sentence)
|
|
94
|
+
end
|
|
103
95
|
end
|
|
104
96
|
end
|
|
105
97
|
end
|
|
@@ -19,12 +19,25 @@ module CompletionKit
|
|
|
19
19
|
}.to_json
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
+
if response.status == 429
|
|
23
|
+
raise CompletionKit::RateLimitError.new(
|
|
24
|
+
response.body.to_s.truncate(500),
|
|
25
|
+
provider: "ollama",
|
|
26
|
+
status: 429,
|
|
27
|
+
retry_after: nil
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
|
|
22
31
|
if response.success?
|
|
23
32
|
data = JSON.parse(response.body)
|
|
24
33
|
data["choices"][0]["text"].strip
|
|
25
34
|
else
|
|
26
35
|
"Error: #{response.status} - #{response.body}"
|
|
27
36
|
end
|
|
37
|
+
rescue CompletionKit::RateLimitError
|
|
38
|
+
raise
|
|
39
|
+
rescue Faraday::Error
|
|
40
|
+
raise
|
|
28
41
|
rescue => e
|
|
29
42
|
"Error: #{e.message}"
|
|
30
43
|
end
|
|
@@ -27,12 +27,23 @@ module CompletionKit
|
|
|
27
27
|
}.to_json
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
if response.status == 429
|
|
31
|
+
raise CompletionKit::RateLimitError.new(
|
|
32
|
+
response.body.to_s.truncate(500),
|
|
33
|
+
provider: "openai",
|
|
34
|
+
status: 429,
|
|
35
|
+
retry_after: response.headers && response.headers["Retry-After"]&.to_i
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
30
39
|
if response.success?
|
|
31
40
|
data = JSON.parse(response.body)
|
|
32
41
|
data["output"][0]["content"][0]["text"].strip
|
|
33
42
|
else
|
|
34
43
|
"Error: #{response.status} - #{response.body}"
|
|
35
44
|
end
|
|
45
|
+
rescue CompletionKit::RateLimitError
|
|
46
|
+
raise
|
|
36
47
|
rescue Faraday::Error => e
|
|
37
48
|
raise
|
|
38
49
|
rescue => e
|
|
@@ -25,12 +25,25 @@ module CompletionKit
|
|
|
25
25
|
}.to_json
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
+
if response.status == 429
|
|
29
|
+
raise CompletionKit::RateLimitError.new(
|
|
30
|
+
response.body.to_s.truncate(500),
|
|
31
|
+
provider: "openrouter",
|
|
32
|
+
status: 429,
|
|
33
|
+
retry_after: response.headers && response.headers["Retry-After"]&.to_i
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
28
37
|
if response.success?
|
|
29
38
|
data = JSON.parse(response.body)
|
|
30
39
|
data.dig("choices", 0, "message", "content").to_s.strip
|
|
31
40
|
else
|
|
32
41
|
"Error: #{response.status} - #{response.body}"
|
|
33
42
|
end
|
|
43
|
+
rescue CompletionKit::RateLimitError
|
|
44
|
+
raise
|
|
45
|
+
rescue Faraday::Error
|
|
46
|
+
raise
|
|
34
47
|
rescue => e
|
|
35
48
|
"Error: #{e.message}"
|
|
36
49
|
end
|
|
@@ -174,11 +174,6 @@ end %>
|
|
|
174
174
|
<p class="ck-meta-copy">Start generating responses. Returns 202 Accepted. Poll the run to check progress.</p>
|
|
175
175
|
<%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/generate \\\n -H \"Authorization: Bearer #{token_display}\"" %>
|
|
176
176
|
</div>
|
|
177
|
-
<div class="ck-api-endpoint">
|
|
178
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs/:id/judge</p>
|
|
179
|
-
<p class="ck-meta-copy">Start judging responses with the configured judge model and metrics. Returns 202 Accepted.</p>
|
|
180
|
-
<%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/judge \\\n -H \"Authorization: Bearer #{token_display}\"" %>
|
|
181
|
-
</div>
|
|
182
177
|
<div class="ck-api-endpoint">
|
|
183
178
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">PATCH</span> /api/v1/runs/:id</p>
|
|
184
179
|
<p class="ck-meta-copy">Update a run. Accepts same params as create.</p>
|
|
@@ -32,21 +32,24 @@
|
|
|
32
32
|
<% available = CompletionKit::ApiConfig.available_models(scope: :generation) %>
|
|
33
33
|
<% if available.any? %>
|
|
34
34
|
<div class="ck-select-with-action">
|
|
35
|
-
<%= form.select :llm_model, ck_grouped_models(available, prompt.llm_model), {}, { class: "ck-input", id: "prompt_llm_model" } %>
|
|
35
|
+
<%= form.select :llm_model, ck_grouped_models(available, prompt.llm_model), { include_blank: "— Select a model —" }, { class: "ck-input", id: "prompt_llm_model" } %>
|
|
36
36
|
<button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
|
|
37
37
|
</div>
|
|
38
38
|
<% else %>
|
|
39
39
|
<p class="ck-meta-copy">No models available. <%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %> or click refresh after configuring a provider.</p>
|
|
40
40
|
<% end %>
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
<div hidden data-refresh-progress-carriers>
|
|
42
|
+
<% CompletionKit::ProviderCredential.find_each do |pc| %>
|
|
43
|
+
<%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
|
|
44
|
+
<%= render "completion_kit/provider_credentials/discovery_status", provider_credential: pc, show_completed: false %>
|
|
45
|
+
<% end %>
|
|
46
|
+
</div>
|
|
44
47
|
<p class="ck-field-hint" id="refresh-status" style="min-height: 1.2em; margin-top: -0.25rem; font-size: 0.75rem;"> </p>
|
|
45
48
|
</div>
|
|
46
49
|
|
|
47
50
|
<div class="ck-actions">
|
|
48
51
|
<%= link_to "Cancel", prompts_path, class: ck_button_classes(:light, variant: :outline) %>
|
|
49
|
-
<%= form.submit(prompt.persisted? ? "Save prompt" : "Create prompt", class: ck_button_classes(:dark)) %>
|
|
52
|
+
<%= form.submit(prompt.persisted? ? "Save prompt" : "Create prompt", class: ck_button_classes(:dark), disabled: available.empty?) %>
|
|
50
53
|
</div>
|
|
51
54
|
</div>
|
|
52
55
|
<% end %>
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<div class="ck-actions" id="run_actions">
|
|
2
|
-
<% running = run.status == "
|
|
2
|
+
<% running = run.status == "running" %>
|
|
3
3
|
<%= button_to run_path(run), method: :delete, form_class: "inline-block", class: "ck-icon-btn", title: "Delete run", "aria-label": "Delete run", disabled: running, data: { turbo_confirm: "Delete this run and all its responses?" } do %><%= heroicon_tag "trash", variant: :outline, size: 16, "aria-hidden": "true" %><% end %>
|
|
4
4
|
<% if running %>
|
|
5
5
|
<%= link_to "Edit", edit_run_path(run), class: ck_button_classes(:light, variant: :outline) + " disabled", "aria-disabled": "true", tabindex: "-1" %>
|
|
@@ -54,9 +54,12 @@
|
|
|
54
54
|
<button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
|
|
55
55
|
</div>
|
|
56
56
|
<p class="ck-field-hint" id="judge-hint"></p>
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
<div hidden data-refresh-progress-carriers>
|
|
58
|
+
<% CompletionKit::ProviderCredential.find_each do |pc| %>
|
|
59
|
+
<%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
|
|
60
|
+
<%= render "completion_kit/provider_credentials/discovery_status", provider_credential: pc, show_completed: false %>
|
|
61
|
+
<% end %>
|
|
62
|
+
</div>
|
|
60
63
|
<p class="ck-field-hint" id="refresh-status" style="min-height: 1.2em; margin-top: -0.25rem; font-size: 0.75rem;"> </p>
|
|
61
64
|
<% else %>
|
|
62
65
|
<p class="ck-field-hint" style="color: var(--ck-warning);">No judge models available. <%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %></p>
|
|
@@ -1,13 +1,31 @@
|
|
|
1
|
-
<%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row", id: "response_#{response.id}" do %>
|
|
1
|
+
<%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row ck-response-row--#{response.status}", id: "response_#{response.id}" do %>
|
|
2
2
|
<span class="ck-response-row__index">#<%= index %></span>
|
|
3
|
-
<span class="ck-response-row__text"
|
|
3
|
+
<span class="ck-response-row__text">
|
|
4
|
+
<% if response.succeeded? %>
|
|
5
|
+
<%= truncate(response.response_text.to_s, length: 160) %>
|
|
6
|
+
<% elsif response.status == "failed" %>
|
|
7
|
+
<% err = response.error_payload %>
|
|
8
|
+
<span class="ck-response-row__error">
|
|
9
|
+
<%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 120) %>
|
|
10
|
+
</span>
|
|
11
|
+
<% end %>
|
|
12
|
+
</span>
|
|
4
13
|
<span class="ck-response-row__score">
|
|
5
|
-
<%
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
14
|
+
<% case response.status
|
|
15
|
+
when "succeeded" %>
|
|
16
|
+
<% if response.reviewed? %>
|
|
17
|
+
<span class="ck-score"><span class="ck-score__star">★</span> <%= response.score %></span>
|
|
18
|
+
<% elsif run.status == "running" %>
|
|
19
|
+
<span class="ck-chip">Judging</span>
|
|
20
|
+
<% end %>
|
|
21
|
+
<% when "pending" %>
|
|
22
|
+
<span class="ck-chip">Queued</span>
|
|
23
|
+
<% when "retrying" %>
|
|
24
|
+
<span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
|
|
25
|
+
<% when "failed" %>
|
|
26
|
+
<%= link_to "Retry", retry_failures_run_path(run, only: response.id),
|
|
27
|
+
data: { turbo_method: :post },
|
|
28
|
+
class: "ck-chip ck-chip--danger ck-chip--retry" %>
|
|
11
29
|
<% end %>
|
|
12
30
|
</span>
|
|
13
31
|
<% end %>
|
|
@@ -1,9 +1,17 @@
|
|
|
1
|
+
<% snap = run.progress_snapshot %>
|
|
1
2
|
<div id="run_status_header">
|
|
2
3
|
<% if run.status == "failed" %>
|
|
3
4
|
<div class="ck-flash ck-flash--alert">
|
|
4
|
-
<%= run.error_message.presence || "Run failed." %>
|
|
5
|
+
<%= run.failure_summary.presence || run.error_message.presence || "Run failed." %>
|
|
5
6
|
</div>
|
|
6
7
|
<% end %>
|
|
8
|
+
|
|
9
|
+
<% if run.status == "running" && !CompletionKit::WorkerHealth.healthy? %>
|
|
10
|
+
<div class="ck-flash ck-flash--alert">
|
|
11
|
+
No worker process is running. Generate and judge jobs are queued but nothing is processing them. Start <code>bin/jobs</code> (or your worker service) to resume.
|
|
12
|
+
</div>
|
|
13
|
+
<% end %>
|
|
14
|
+
|
|
7
15
|
<section class="ck-page-header">
|
|
8
16
|
<div>
|
|
9
17
|
<p class="ck-kicker"><span class="<%= ck_run_dot(run) %>"></span> <%= ck_run_status_label(run) %></p>
|
|
@@ -12,4 +20,31 @@
|
|
|
12
20
|
</div>
|
|
13
21
|
<%= render "completion_kit/runs/actions", run: run %>
|
|
14
22
|
</section>
|
|
23
|
+
|
|
24
|
+
<% if run.status.in?(%w[running completed]) && snap[:generated_total] > 0 %>
|
|
25
|
+
<div class="ck-progress-block">
|
|
26
|
+
<div class="ck-progress-line">
|
|
27
|
+
Generated <%= snap[:generated_done] %>/<%= snap[:generated_total] %>
|
|
28
|
+
<% if snap[:generated_failed] > 0 %>
|
|
29
|
+
<span class="ck-progress-failed">(<%= snap[:generated_failed] %> failed)</span>
|
|
30
|
+
<% end %>
|
|
31
|
+
</div>
|
|
32
|
+
<% if snap[:judged_total] > 0 %>
|
|
33
|
+
<div class="ck-progress-line">
|
|
34
|
+
Judged <%= snap[:judged_done] %>/<%= snap[:judged_total] %>
|
|
35
|
+
<% if snap[:judged_failed] > 0 %>
|
|
36
|
+
<span class="ck-progress-failed">(<%= snap[:judged_failed] %> failed)</span>
|
|
37
|
+
<% end %>
|
|
38
|
+
</div>
|
|
39
|
+
<% end %>
|
|
40
|
+
<% failed_count = snap[:generated_failed] + snap[:judged_failed] %>
|
|
41
|
+
<% if failed_count > 0 %>
|
|
42
|
+
<%= button_to "Retry #{failed_count} failed #{"row".pluralize(failed_count)}",
|
|
43
|
+
retry_failures_run_path(run),
|
|
44
|
+
method: :post,
|
|
45
|
+
class: ck_button_classes(:light, variant: :outline),
|
|
46
|
+
form_class: "inline-block" %>
|
|
47
|
+
<% end %>
|
|
48
|
+
</div>
|
|
49
|
+
<% end %>
|
|
15
50
|
</div>
|
|
@@ -52,7 +52,7 @@
|
|
|
52
52
|
</div>
|
|
53
53
|
<p class="ck-prompt-preview__text" id="prompt_text"><%= @run.prompt.template %></p>
|
|
54
54
|
<% if @run.prompt.template.length > 200 %>
|
|
55
|
-
<button type="button" class="ck-
|
|
55
|
+
<button type="button" class="ck-disclosure-toggle" id="prompt_toggle" aria-expanded="false" aria-controls="prompt_text" onclick="var t=document.getElementById('prompt_text');var l=this;var expanded=t.classList.toggle('ck-prompt-preview__text--expanded');l.firstChild.textContent=expanded?'Show less':'Show more';l.setAttribute('aria-expanded',expanded?'true':'false')"><span>Show more</span></button>
|
|
56
56
|
<% end %>
|
|
57
57
|
</div>
|
|
58
58
|
|
|
@@ -56,8 +56,7 @@ function ckRefreshModels() {
|
|
|
56
56
|
ckRefreshing = true;
|
|
57
57
|
var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
|
|
58
58
|
if (btn) btn.classList.add('ck-icon-btn--spinning');
|
|
59
|
-
|
|
60
|
-
if (status) status.textContent = 'Refreshing models\u2026';
|
|
59
|
+
ckUpdateRefreshProgress();
|
|
61
60
|
var csrfToken = document.querySelector('meta[name="csrf-token"]').getAttribute("content");
|
|
62
61
|
fetch("/completion_kit/refresh_models", {
|
|
63
62
|
method: "POST",
|
|
@@ -65,8 +64,35 @@ function ckRefreshModels() {
|
|
|
65
64
|
});
|
|
66
65
|
}
|
|
67
66
|
|
|
67
|
+
function ckUpdateRefreshProgress() {
|
|
68
|
+
var status = document.getElementById('refresh-status');
|
|
69
|
+
if (!status) return;
|
|
70
|
+
var carriers = document.querySelectorAll('[data-refresh-progress-carriers] [id^="discovery_status_"]');
|
|
71
|
+
var totalCurrent = 0, totalTotal = 0, anyDiscovering = false;
|
|
72
|
+
carriers.forEach(function(node) {
|
|
73
|
+
if (!node.querySelector('.ck-discovery-bar')) return;
|
|
74
|
+
if (node.querySelector('.ck-discovery-bar--failed') || node.querySelector('.ck-discovery-bar--completed')) return;
|
|
75
|
+
anyDiscovering = true;
|
|
76
|
+
var match = node.textContent.match(/(\d+)\s*\/\s*(\d+)/);
|
|
77
|
+
if (match) {
|
|
78
|
+
totalCurrent += parseInt(match[1], 10);
|
|
79
|
+
totalTotal += parseInt(match[2], 10);
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
if (anyDiscovering || ckRefreshing) {
|
|
83
|
+
if (totalTotal > 0) {
|
|
84
|
+
status.textContent = 'Refreshing models\u2026 ' + totalCurrent + '/' + totalTotal;
|
|
85
|
+
} else {
|
|
86
|
+
status.textContent = 'Refreshing models\u2026';
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
68
91
|
document.addEventListener("turbo:before-stream-render", function(event) {
|
|
69
92
|
var target = event.target.getAttribute("target");
|
|
93
|
+
if (target && target.indexOf("discovery_status_") === 0) {
|
|
94
|
+
requestAnimationFrame(ckUpdateRefreshProgress);
|
|
95
|
+
}
|
|
70
96
|
if (target === "prompt_llm_model" || target === "run_judge_model") {
|
|
71
97
|
ckRefreshing = false;
|
|
72
98
|
var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
|
data/config/routes.rb
CHANGED
|
@@ -14,10 +14,10 @@ CompletionKit::Engine.routes.draw do
|
|
|
14
14
|
resources :runs do
|
|
15
15
|
member do
|
|
16
16
|
post :generate
|
|
17
|
-
post :judge
|
|
18
17
|
get :suggestion
|
|
19
18
|
post :suggest
|
|
20
19
|
post :apply_suggestion
|
|
20
|
+
post :retry_failures
|
|
21
21
|
end
|
|
22
22
|
resources :responses, only: [:show]
|
|
23
23
|
end
|
|
@@ -39,7 +39,7 @@ CompletionKit::Engine.routes.draw do
|
|
|
39
39
|
resources :runs do
|
|
40
40
|
member do
|
|
41
41
|
post :generate
|
|
42
|
-
post :
|
|
42
|
+
post :retry_failures
|
|
43
43
|
end
|
|
44
44
|
resources :responses, only: [:index, :show]
|
|
45
45
|
end
|