completion-kit 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -1
  3. data/app/assets/stylesheets/completion_kit/application.css +87 -0
  4. data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
  5. data/app/controllers/completion_kit/runs_controller.rb +31 -11
  6. data/app/helpers/completion_kit/application_helper.rb +4 -12
  7. data/app/jobs/completion_kit/generate_row_job.rb +102 -0
  8. data/app/jobs/completion_kit/judge_review_job.rb +110 -0
  9. data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
  10. data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
  11. data/app/models/completion_kit/prompt.rb +4 -0
  12. data/app/models/completion_kit/response.rb +29 -2
  13. data/app/models/completion_kit/review.rb +17 -2
  14. data/app/models/completion_kit/run.rb +90 -96
  15. data/app/services/completion_kit/anthropic_client.rb +13 -0
  16. data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
  17. data/app/services/completion_kit/ollama_client.rb +13 -0
  18. data/app/services/completion_kit/open_ai_client.rb +11 -0
  19. data/app/services/completion_kit/open_router_client.rb +13 -0
  20. data/app/services/completion_kit/worker_health.rb +10 -0
  21. data/app/views/completion_kit/api_reference/index.html.erb +0 -5
  22. data/app/views/completion_kit/prompts/_form.html.erb +8 -5
  23. data/app/views/completion_kit/runs/_actions.html.erb +1 -1
  24. data/app/views/completion_kit/runs/_form.html.erb +6 -3
  25. data/app/views/completion_kit/runs/_progress.html.erb +1 -1
  26. data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
  27. data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
  28. data/app/views/completion_kit/runs/show.html.erb +1 -1
  29. data/app/views/layouts/completion_kit/application.html.erb +28 -2
  30. data/config/routes.rb +2 -2
  31. data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
  32. data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
  33. data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
  34. data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
  35. data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
  36. data/lib/completion_kit/concurrency_check.rb +16 -0
  37. data/lib/completion_kit/errors.rb +16 -0
  38. data/lib/completion_kit/version.rb +1 -1
  39. data/lib/completion_kit.rb +2 -2
  40. data/lib/tasks/completion_kit_runs.rake +13 -0
  41. metadata +29 -5
  42. data/app/jobs/completion_kit/generate_job.rb +0 -12
  43. data/app/jobs/completion_kit/judge_job.rb +0 -12
@@ -2,7 +2,7 @@ module CompletionKit
2
2
  class Run < ApplicationRecord
3
3
  include Turbo::Broadcastable
4
4
 
5
- STATUSES = %w[pending generating judging completed failed].freeze
5
+ STATUSES = %w[pending running completed failed].freeze
6
6
 
7
7
  belongs_to :prompt
8
8
  belongs_to :dataset, optional: true
@@ -17,6 +17,30 @@ module CompletionKit
17
17
  before_validation :set_default_status, on: :create
18
18
  before_validation :set_auto_name, on: :create
19
19
 
20
+ def mark_completed!
21
+ update!(status: "completed")
22
+ broadcast_ui
23
+ end
24
+
25
+ def outstanding_work_zero?
26
+ return false if responses.where.not(status: Response::TERMINAL_STATUSES).exists?
27
+
28
+ metric_ids = metrics.pluck(:id)
29
+ return true if metric_ids.empty?
30
+
31
+ succeeded_response_ids = responses.where(status: "succeeded").pluck(:id)
32
+ expected_reviews = succeeded_response_ids.size * metric_ids.size
33
+ return true if expected_reviews.zero?
34
+
35
+ terminal_review_count = Review.where(
36
+ response_id: succeeded_response_ids,
37
+ metric_id: metric_ids,
38
+ status: Review::TERMINAL_STATUSES
39
+ ).count
40
+
41
+ terminal_review_count >= expected_reviews
42
+ end
43
+
20
44
  def judge_configured?
21
45
  judge_model.present? && metrics.any? && ApiConfig.valid_for_model?(judge_model)
22
46
  end
@@ -45,134 +69,104 @@ module CompletionKit
45
69
  end
46
70
  end
47
71
 
48
- def generate_responses!
72
+ def start!
49
73
  rows = if dataset
50
74
  CsvProcessor.process_self(self)
51
75
  else
52
76
  [{}]
53
77
  end
54
78
 
55
- if rows.empty?
56
- errors.add(:base, "Dataset has no rows")
57
- return false
58
- end
79
+ return fail_with_summary!("Dataset has no rows") if rows.empty?
59
80
 
60
81
  client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
61
-
62
82
  unless client.configured?
63
- msg = "LLM API not configured: #{client.configuration_errors.join(', ')}"
64
- errors.add(:base, msg)
65
- update_columns(status: "failed", error_message: msg) if persisted?
66
- return false
83
+ return fail_with_summary!("LLM API not configured: #{client.configuration_errors.join(', ')}")
67
84
  end
68
85
 
69
- update!(status: "generating", progress_current: 0, progress_total: rows.length, error_message: nil)
70
- responses.destroy_all
71
- broadcast_ui
72
- broadcast_clear_responses
73
-
74
- rows.each_with_index do |row, index|
75
- input = row.empty? ? nil : row.to_json
76
- rendered = CsvProcessor.apply_variables(prompt, row)
77
- response_text = client.generate_completion(rendered, model: prompt.llm_model, temperature: temperature)
78
-
79
- resp = responses.create!(
80
- input_data: input,
81
- response_text: response_text,
82
- expected_output: row["expected_output"]
86
+ transaction do
87
+ responses.destroy_all
88
+ update!(
89
+ status: "running",
90
+ progress_current: 0,
91
+ progress_total: rows.length,
92
+ failure_summary: nil,
93
+ error_message: nil
83
94
  )
84
-
85
- update_columns(progress_current: index + 1)
86
- broadcast_progress
87
- broadcast_response(resp)
88
- end
89
-
90
- if judge_configured?
91
- judge_responses!
92
- else
93
- update!(status: "completed")
94
- broadcast_ui
95
+ rows.each_with_index do |row, index|
96
+ input = row.empty? ? nil : row.to_json
97
+ response = responses.create!(
98
+ status: "pending",
99
+ row_index: index,
100
+ input_data: input,
101
+ expected_output: row["expected_output"]
102
+ )
103
+ GenerateRowJob.perform_later(id, response.id)
104
+ end
95
105
  end
96
106
 
97
- true
98
- rescue Faraday::Error => e
99
- update_columns(status: "failed", error_message: e.message)
100
- errors.add(:base, e.message)
101
107
  broadcast_ui
102
- false
103
- rescue StandardError => e
104
- update_columns(status: "failed", error_message: e.message) if persisted?
105
- errors.add(:base, e.message)
106
- broadcast_ui if persisted?
107
- false
108
+ broadcast_clear_responses
109
+ true
108
110
  end
109
111
 
110
- def judge_responses!
111
- total_evaluations = responses.count * metrics.count
112
- update!(status: "judging", progress_current: 0, progress_total: total_evaluations, error_message: nil)
113
- broadcast_ui
114
-
115
- judge = JudgeService.new(ApiConfig.for_model(judge_model).merge(judge_model: judge_model))
116
- evaluation_count = 0
117
-
118
- responses.find_each do |response|
119
- metrics.each do |metric|
120
- evaluation = judge.evaluate(
121
- response.response_text,
122
- response.expected_output,
123
- prompt.template,
124
- criteria: metric.instruction.to_s,
125
- rubric_text: metric.display_rubric_text,
126
- input_data: response.input_data
127
- )
112
+ def generate_responses!
113
+ start!
114
+ end
128
115
 
129
- response.reviews.find_or_initialize_by(metric_id: metric.id).tap do |review|
130
- review.assign_attributes(
131
- metric_name: metric.name,
132
- instruction: metric.instruction.to_s,
133
- status: "evaluated",
134
- ai_score: evaluation[:score],
135
- ai_feedback: evaluation[:feedback]
136
- )
137
- review.save!
138
- end
139
-
140
- evaluation_count += 1
141
- update_columns(progress_current: evaluation_count)
142
- broadcast_progress
143
- end
116
+ def progress_snapshot
117
+ generated_done = responses.where(status: "succeeded").count
118
+ generated_failed = responses.where(status: "failed").count
119
+ generated_total = progress_total
144
120
 
145
- broadcast_response_update(response)
146
- end
121
+ metric_count = metrics.count
122
+ succeeded_count = generated_done
123
+ judged_total = succeeded_count * metric_count
124
+ judged_done = Review.joins(:response)
125
+ .where(completion_kit_responses: { run_id: id }, status: "succeeded").count
126
+ judged_failed = Review.joins(:response)
127
+ .where(completion_kit_responses: { run_id: id }, status: "failed").count
147
128
 
148
- update!(status: "completed")
149
- broadcast_ui
150
- true
151
- rescue Faraday::Error => e
152
- update_columns(status: "failed", error_message: e.message)
153
- errors.add(:base, e.message)
154
- broadcast_ui
155
- false
156
- rescue StandardError => e
157
- update_columns(status: "failed", error_message: e.message) if persisted?
158
- errors.add(:base, e.message)
159
- broadcast_ui if persisted?
160
- false
129
+ {
130
+ generated_done: generated_done,
131
+ generated_total: generated_total,
132
+ generated_failed: generated_failed,
133
+ judged_done: judged_done,
134
+ judged_total: judged_total,
135
+ judged_failed: judged_failed
136
+ }
161
137
  end
162
138
 
163
139
  def as_json(options = {})
140
+ snap = progress_snapshot
164
141
  {
165
142
  id: id, name: name, status: status, prompt_id: prompt_id,
166
143
  dataset_id: dataset_id, judge_model: judge_model, temperature: temperature,
167
144
  created_at: created_at, updated_at: updated_at,
168
145
  responses_count: responses.count, avg_score: avg_score,
169
- progress_current: progress_current, progress_total: progress_total,
170
- error_message: error_message, metric_ids: metric_ids
146
+ progress_current: snap[:generated_done],
147
+ progress_total: snap[:generated_total],
148
+ progress: {
149
+ generated: { done: snap[:generated_done], total: snap[:generated_total], failed: snap[:generated_failed] },
150
+ judged: { done: snap[:judged_done], total: snap[:judged_total], failed: snap[:judged_failed] }
151
+ },
152
+ failed_response_ids: responses.where(status: "failed").pluck(:id),
153
+ failure_summary: failure_summary,
154
+ error_message: error_message,
155
+ metric_ids: metric_ids
171
156
  }
172
157
  end
173
158
 
174
159
  private
175
160
 
161
+ def fail_with_summary!(message)
162
+ errors.add(:base, message)
163
+ if persisted?
164
+ update_columns(status: "failed", failure_summary: message, error_message: message)
165
+ broadcast_ui
166
+ end
167
+ false
168
+ end
169
+
176
170
  def broadcast_ui
177
171
  broadcast_progress
178
172
  broadcast_status_header
@@ -27,12 +27,25 @@ module CompletionKit
27
27
  }.to_json
28
28
  end
29
29
 
30
+ if response.status == 429
31
+ raise CompletionKit::RateLimitError.new(
32
+ response.body.to_s.truncate(500),
33
+ provider: "anthropic",
34
+ status: 429,
35
+ retry_after: nil
36
+ )
37
+ end
38
+
30
39
  if response.success?
31
40
  data = JSON.parse(response.body)
32
41
  data["content"][0]["text"].strip
33
42
  else
34
43
  "Error: #{response.status} - #{response.body}"
35
44
  end
45
+ rescue CompletionKit::RateLimitError
46
+ raise
47
+ rescue Faraday::Error
48
+ raise
36
49
  rescue => e
37
50
  "Error: #{e.message}"
38
51
  end
@@ -49,11 +49,6 @@ module CompletionKit
49
49
  description: "Generate responses for a run using its prompt and dataset",
50
50
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
51
51
  handler: :generate
52
- },
53
- "runs_judge" => {
54
- description: "Judge responses for a run using configured metrics",
55
- inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
56
- handler: :judge
57
52
  }
58
53
  }.freeze
59
54
 
@@ -92,14 +87,11 @@ module CompletionKit
92
87
 
93
88
  def self.generate(args)
94
89
  run = Run.find(args["id"])
95
- GenerateJob.perform_later(run.id)
96
- text_result(run.reload.as_json)
97
- end
98
-
99
- def self.judge(args)
100
- run = Run.find(args["id"])
101
- JudgeJob.perform_later(run.id)
102
- text_result(run.reload.as_json)
90
+ if run.start!
91
+ text_result(run.reload.as_json)
92
+ else
93
+ text_result(run.failure_summary || run.errors.full_messages.to_sentence)
94
+ end
103
95
  end
104
96
  end
105
97
  end
@@ -19,12 +19,25 @@ module CompletionKit
19
19
  }.to_json
20
20
  end
21
21
 
22
+ if response.status == 429
23
+ raise CompletionKit::RateLimitError.new(
24
+ response.body.to_s.truncate(500),
25
+ provider: "ollama",
26
+ status: 429,
27
+ retry_after: nil
28
+ )
29
+ end
30
+
22
31
  if response.success?
23
32
  data = JSON.parse(response.body)
24
33
  data["choices"][0]["text"].strip
25
34
  else
26
35
  "Error: #{response.status} - #{response.body}"
27
36
  end
37
+ rescue CompletionKit::RateLimitError
38
+ raise
39
+ rescue Faraday::Error
40
+ raise
28
41
  rescue => e
29
42
  "Error: #{e.message}"
30
43
  end
@@ -27,12 +27,23 @@ module CompletionKit
27
27
  }.to_json
28
28
  end
29
29
 
30
+ if response.status == 429
31
+ raise CompletionKit::RateLimitError.new(
32
+ response.body.to_s.truncate(500),
33
+ provider: "openai",
34
+ status: 429,
35
+ retry_after: response.headers && response.headers["Retry-After"]&.to_i
36
+ )
37
+ end
38
+
30
39
  if response.success?
31
40
  data = JSON.parse(response.body)
32
41
  data["output"][0]["content"][0]["text"].strip
33
42
  else
34
43
  "Error: #{response.status} - #{response.body}"
35
44
  end
45
+ rescue CompletionKit::RateLimitError
46
+ raise
36
47
  rescue Faraday::Error => e
37
48
  raise
38
49
  rescue => e
@@ -25,12 +25,25 @@ module CompletionKit
25
25
  }.to_json
26
26
  end
27
27
 
28
+ if response.status == 429
29
+ raise CompletionKit::RateLimitError.new(
30
+ response.body.to_s.truncate(500),
31
+ provider: "openrouter",
32
+ status: 429,
33
+ retry_after: response.headers && response.headers["Retry-After"]&.to_i
34
+ )
35
+ end
36
+
28
37
  if response.success?
29
38
  data = JSON.parse(response.body)
30
39
  data.dig("choices", 0, "message", "content").to_s.strip
31
40
  else
32
41
  "Error: #{response.status} - #{response.body}"
33
42
  end
43
+ rescue CompletionKit::RateLimitError
44
+ raise
45
+ rescue Faraday::Error
46
+ raise
34
47
  rescue => e
35
48
  "Error: #{e.message}"
36
49
  end
@@ -0,0 +1,10 @@
1
+ module CompletionKit
2
+ class WorkerHealth
3
+ HEARTBEAT_THRESHOLD = 30.seconds
4
+
5
+ def self.healthy?
6
+ return true unless defined?(::SolidQueue::Process)
7
+ ::SolidQueue::Process.where("last_heartbeat_at > ?", HEARTBEAT_THRESHOLD.ago).exists?
8
+ end
9
+ end
10
+ end
@@ -174,11 +174,6 @@ end %>
174
174
  <p class="ck-meta-copy">Start generating responses. Returns 202 Accepted. Poll the run to check progress.</p>
175
175
  <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/generate \\\n -H \"Authorization: Bearer #{token_display}\"" %>
176
176
  </div>
177
- <div class="ck-api-endpoint">
178
- <p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs/:id/judge</p>
179
- <p class="ck-meta-copy">Start judging responses with the configured judge model and metrics. Returns 202 Accepted.</p>
180
- <%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/judge \\\n -H \"Authorization: Bearer #{token_display}\"" %>
181
- </div>
182
177
  <div class="ck-api-endpoint">
183
178
  <p class="ck-api-method"><span class="ck-chip ck-chip--soft">PATCH</span> /api/v1/runs/:id</p>
184
179
  <p class="ck-meta-copy">Update a run. Accepts same params as create.</p>
@@ -32,21 +32,24 @@
32
32
  <% available = CompletionKit::ApiConfig.available_models(scope: :generation) %>
33
33
  <% if available.any? %>
34
34
  <div class="ck-select-with-action">
35
- <%= form.select :llm_model, ck_grouped_models(available, prompt.llm_model), {}, { class: "ck-input", id: "prompt_llm_model" } %>
35
+ <%= form.select :llm_model, ck_grouped_models(available, prompt.llm_model), { include_blank: "— Select a model —" }, { class: "ck-input", id: "prompt_llm_model" } %>
36
36
  <button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
37
37
  </div>
38
38
  <% else %>
39
39
  <p class="ck-meta-copy">No models available. <%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %> or click refresh after configuring a provider.</p>
40
40
  <% end %>
41
- <% CompletionKit::ProviderCredential.find_each do |pc| %>
42
- <%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
43
- <% end %>
41
+ <div hidden data-refresh-progress-carriers>
42
+ <% CompletionKit::ProviderCredential.find_each do |pc| %>
43
+ <%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
44
+ <%= render "completion_kit/provider_credentials/discovery_status", provider_credential: pc, show_completed: false %>
45
+ <% end %>
46
+ </div>
44
47
  <p class="ck-field-hint" id="refresh-status" style="min-height: 1.2em; margin-top: -0.25rem; font-size: 0.75rem;">&nbsp;</p>
45
48
  </div>
46
49
 
47
50
  <div class="ck-actions">
48
51
  <%= link_to "Cancel", prompts_path, class: ck_button_classes(:light, variant: :outline) %>
49
- <%= form.submit(prompt.persisted? ? "Save prompt" : "Create prompt", class: ck_button_classes(:dark)) %>
52
+ <%= form.submit(prompt.persisted? ? "Save prompt" : "Create prompt", class: ck_button_classes(:dark), disabled: available.empty?) %>
50
53
  </div>
51
54
  </div>
52
55
  <% end %>
@@ -1,5 +1,5 @@
1
1
  <div class="ck-actions" id="run_actions">
2
- <% running = run.status == "generating" || run.status == "judging" %>
2
+ <% running = run.status == "running" %>
3
3
  <%= button_to run_path(run), method: :delete, form_class: "inline-block", class: "ck-icon-btn", title: "Delete run", "aria-label": "Delete run", disabled: running, data: { turbo_confirm: "Delete this run and all its responses?" } do %><%= heroicon_tag "trash", variant: :outline, size: 16, "aria-hidden": "true" %><% end %>
4
4
  <% if running %>
5
5
  <%= link_to "Edit", edit_run_path(run), class: ck_button_classes(:light, variant: :outline) + " disabled", "aria-disabled": "true", tabindex: "-1" %>
@@ -54,9 +54,12 @@
54
54
  <button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
55
55
  </div>
56
56
  <p class="ck-field-hint" id="judge-hint"></p>
57
- <% CompletionKit::ProviderCredential.find_each do |pc| %>
58
- <%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
59
- <% end %>
57
+ <div hidden data-refresh-progress-carriers>
58
+ <% CompletionKit::ProviderCredential.find_each do |pc| %>
59
+ <%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
60
+ <%= render "completion_kit/provider_credentials/discovery_status", provider_credential: pc, show_completed: false %>
61
+ <% end %>
62
+ </div>
60
63
  <p class="ck-field-hint" id="refresh-status" style="min-height: 1.2em; margin-top: -0.25rem; font-size: 0.75rem;">&nbsp;</p>
61
64
  <% else %>
62
65
  <p class="ck-field-hint" style="color: var(--ck-warning);">No judge models available.&ensp;<%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %></p>
@@ -1,5 +1,5 @@
1
1
  <div id="run_progress">
2
- <% if run.status == "generating" || run.status == "judging" %>
2
+ <% if run.status == "running" %>
3
3
  <div class="ck-discovery-bar">
4
4
  <div class="ck-discovery-bar__label">
5
5
  <%= ck_run_status_label(run) %>
@@ -1,13 +1,31 @@
1
- <%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row", id: "response_#{response.id}" do %>
1
+ <%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row ck-response-row--#{response.status}", id: "response_#{response.id}" do %>
2
2
  <span class="ck-response-row__index">#<%= index %></span>
3
- <span class="ck-response-row__text"><%= truncate(response.response_text.to_s, length: 160) %></span>
3
+ <span class="ck-response-row__text">
4
+ <% if response.succeeded? %>
5
+ <%= truncate(response.response_text.to_s, length: 160) %>
6
+ <% elsif response.status == "failed" %>
7
+ <% err = response.error_payload %>
8
+ <span class="ck-response-row__error">
9
+ <%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 120) %>
10
+ </span>
11
+ <% end %>
12
+ </span>
4
13
  <span class="ck-response-row__score">
5
- <% if response.reviewed? %>
6
- <span class="ck-score"><span class="ck-score__star">★</span> <%= response.score %></span>
7
- <% elsif run.status == "failed" %>
8
- <span class="ck-chip">Failed</span>
9
- <% elsif run.status == "judging" %>
10
- <span class="ck-chip">Judging</span>
14
+ <% case response.status
15
+ when "succeeded" %>
16
+ <% if response.reviewed? %>
17
+ <span class="ck-score"><span class="ck-score__star">★</span> <%= response.score %></span>
18
+ <% elsif run.status == "running" %>
19
+ <span class="ck-chip">Judging</span>
20
+ <% end %>
21
+ <% when "pending" %>
22
+ <span class="ck-chip">Queued</span>
23
+ <% when "retrying" %>
24
+ <span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
25
+ <% when "failed" %>
26
+ <%= link_to "Retry", retry_failures_run_path(run, only: response.id),
27
+ data: { turbo_method: :post },
28
+ class: "ck-chip ck-chip--danger ck-chip--retry" %>
11
29
  <% end %>
12
30
  </span>
13
31
  <% end %>
@@ -1,9 +1,17 @@
1
+ <% snap = run.progress_snapshot %>
1
2
  <div id="run_status_header">
2
3
  <% if run.status == "failed" %>
3
4
  <div class="ck-flash ck-flash--alert">
4
- <%= run.error_message.presence || "Run failed." %>
5
+ <%= run.failure_summary.presence || run.error_message.presence || "Run failed." %>
5
6
  </div>
6
7
  <% end %>
8
+
9
+ <% if run.status == "running" && !CompletionKit::WorkerHealth.healthy? %>
10
+ <div class="ck-flash ck-flash--alert">
11
+ No worker process is running. Generate and judge jobs are queued but nothing is processing them. Start <code>bin/jobs</code> (or your worker service) to resume.
12
+ </div>
13
+ <% end %>
14
+
7
15
  <section class="ck-page-header">
8
16
  <div>
9
17
  <p class="ck-kicker"><span class="<%= ck_run_dot(run) %>"></span> <%= ck_run_status_label(run) %></p>
@@ -12,4 +20,31 @@
12
20
  </div>
13
21
  <%= render "completion_kit/runs/actions", run: run %>
14
22
  </section>
23
+
24
+ <% if run.status.in?(%w[running completed]) && snap[:generated_total] > 0 %>
25
+ <div class="ck-progress-block">
26
+ <div class="ck-progress-line">
27
+ Generated <%= snap[:generated_done] %>/<%= snap[:generated_total] %>
28
+ <% if snap[:generated_failed] > 0 %>
29
+ <span class="ck-progress-failed">(<%= snap[:generated_failed] %> failed)</span>
30
+ <% end %>
31
+ </div>
32
+ <% if snap[:judged_total] > 0 %>
33
+ <div class="ck-progress-line">
34
+ Judged <%= snap[:judged_done] %>/<%= snap[:judged_total] %>
35
+ <% if snap[:judged_failed] > 0 %>
36
+ <span class="ck-progress-failed">(<%= snap[:judged_failed] %> failed)</span>
37
+ <% end %>
38
+ </div>
39
+ <% end %>
40
+ <% failed_count = snap[:generated_failed] + snap[:judged_failed] %>
41
+ <% if failed_count > 0 %>
42
+ <%= button_to "Retry #{failed_count} failed #{"row".pluralize(failed_count)}",
43
+ retry_failures_run_path(run),
44
+ method: :post,
45
+ class: ck_button_classes(:light, variant: :outline),
46
+ form_class: "inline-block" %>
47
+ <% end %>
48
+ </div>
49
+ <% end %>
15
50
  </div>
@@ -52,7 +52,7 @@
52
52
  </div>
53
53
  <p class="ck-prompt-preview__text" id="prompt_text"><%= @run.prompt.template %></p>
54
54
  <% if @run.prompt.template.length > 200 %>
55
- <button type="button" class="ck-link" id="prompt_toggle" aria-expanded="false" aria-controls="prompt_text" onclick="var t=document.getElementById('prompt_text');var l=this;var expanded=t.classList.toggle('ck-prompt-preview__text--expanded');l.textContent=expanded?'Show less':'Show more';l.setAttribute('aria-expanded',expanded?'true':'false')">Show more</button>
55
+ <button type="button" class="ck-disclosure-toggle" id="prompt_toggle" aria-expanded="false" aria-controls="prompt_text" onclick="var t=document.getElementById('prompt_text');var l=this;var expanded=t.classList.toggle('ck-prompt-preview__text--expanded');l.firstChild.textContent=expanded?'Show less':'Show more';l.setAttribute('aria-expanded',expanded?'true':'false')"><span>Show more</span></button>
56
56
  <% end %>
57
57
  </div>
58
58
 
@@ -56,8 +56,7 @@ function ckRefreshModels() {
56
56
  ckRefreshing = true;
57
57
  var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
58
58
  if (btn) btn.classList.add('ck-icon-btn--spinning');
59
- var status = document.getElementById('refresh-status');
60
- if (status) status.textContent = 'Refreshing models\u2026';
59
+ ckUpdateRefreshProgress();
61
60
  var csrfToken = document.querySelector('meta[name="csrf-token"]').getAttribute("content");
62
61
  fetch("/completion_kit/refresh_models", {
63
62
  method: "POST",
@@ -65,8 +64,35 @@ function ckRefreshModels() {
65
64
  });
66
65
  }
67
66
 
67
+ function ckUpdateRefreshProgress() {
68
+ var status = document.getElementById('refresh-status');
69
+ if (!status) return;
70
+ var carriers = document.querySelectorAll('[data-refresh-progress-carriers] [id^="discovery_status_"]');
71
+ var totalCurrent = 0, totalTotal = 0, anyDiscovering = false;
72
+ carriers.forEach(function(node) {
73
+ if (!node.querySelector('.ck-discovery-bar')) return;
74
+ if (node.querySelector('.ck-discovery-bar--failed') || node.querySelector('.ck-discovery-bar--completed')) return;
75
+ anyDiscovering = true;
76
+ var match = node.textContent.match(/(\d+)\s*\/\s*(\d+)/);
77
+ if (match) {
78
+ totalCurrent += parseInt(match[1], 10);
79
+ totalTotal += parseInt(match[2], 10);
80
+ }
81
+ });
82
+ if (anyDiscovering || ckRefreshing) {
83
+ if (totalTotal > 0) {
84
+ status.textContent = 'Refreshing models\u2026 ' + totalCurrent + '/' + totalTotal;
85
+ } else {
86
+ status.textContent = 'Refreshing models\u2026';
87
+ }
88
+ }
89
+ }
90
+
68
91
  document.addEventListener("turbo:before-stream-render", function(event) {
69
92
  var target = event.target.getAttribute("target");
93
+ if (target && target.indexOf("discovery_status_") === 0) {
94
+ requestAnimationFrame(ckUpdateRefreshProgress);
95
+ }
70
96
  if (target === "prompt_llm_model" || target === "run_judge_model") {
71
97
  ckRefreshing = false;
72
98
  var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
data/config/routes.rb CHANGED
@@ -14,10 +14,10 @@ CompletionKit::Engine.routes.draw do
14
14
  resources :runs do
15
15
  member do
16
16
  post :generate
17
- post :judge
18
17
  get :suggestion
19
18
  post :suggest
20
19
  post :apply_suggestion
20
+ post :retry_failures
21
21
  end
22
22
  resources :responses, only: [:show]
23
23
  end
@@ -39,7 +39,7 @@ CompletionKit::Engine.routes.draw do
39
39
  resources :runs do
40
40
  member do
41
41
  post :generate
42
- post :judge
42
+ post :retry_failures
43
43
  end
44
44
  resources :responses, only: [:index, :show]
45
45
  end