completion-kit 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -1
  3. data/Rakefile +0 -3
  4. data/app/assets/stylesheets/completion_kit/application.css +87 -0
  5. data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
  6. data/app/controllers/completion_kit/runs_controller.rb +31 -11
  7. data/app/helpers/completion_kit/application_helper.rb +4 -12
  8. data/app/jobs/completion_kit/generate_row_job.rb +102 -0
  9. data/app/jobs/completion_kit/judge_review_job.rb +110 -0
  10. data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
  11. data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
  12. data/app/models/completion_kit/prompt.rb +4 -0
  13. data/app/models/completion_kit/response.rb +29 -2
  14. data/app/models/completion_kit/review.rb +17 -2
  15. data/app/models/completion_kit/run.rb +90 -96
  16. data/app/services/completion_kit/anthropic_client.rb +13 -0
  17. data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
  18. data/app/services/completion_kit/ollama_client.rb +13 -0
  19. data/app/services/completion_kit/open_ai_client.rb +11 -0
  20. data/app/services/completion_kit/open_router_client.rb +13 -0
  21. data/app/services/completion_kit/worker_health.rb +10 -0
  22. data/app/views/completion_kit/api_reference/index.html.erb +0 -5
  23. data/app/views/completion_kit/prompts/_form.html.erb +8 -5
  24. data/app/views/completion_kit/runs/_actions.html.erb +1 -1
  25. data/app/views/completion_kit/runs/_form.html.erb +6 -3
  26. data/app/views/completion_kit/runs/_progress.html.erb +1 -1
  27. data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
  28. data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
  29. data/app/views/completion_kit/runs/show.html.erb +1 -1
  30. data/app/views/layouts/completion_kit/application.html.erb +28 -2
  31. data/config/routes.rb +2 -2
  32. data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
  33. data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
  34. data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
  35. data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
  36. data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
  37. data/lib/completion_kit/concurrency_check.rb +16 -0
  38. data/lib/completion_kit/errors.rb +16 -0
  39. data/lib/completion_kit/version.rb +1 -1
  40. data/lib/completion_kit.rb +2 -2
  41. data/lib/tasks/completion_kit_runs.rake +13 -0
  42. metadata +29 -5
  43. data/app/jobs/completion_kit/generate_job.rb +0 -12
  44. data/app/jobs/completion_kit/judge_job.rb +0 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 158791345bf5503ca9dbca6d6e8374c17b7802cbee673cf783d6c2f33ac144db
4
- data.tar.gz: 6912a4de3c685d62adeaa5670a2ad6db6c847f941d225356db788ec1275d7652
3
+ metadata.gz: 2ae15d465cd8a4a617df6ff8f8cd401495b18b07813e944149e4eba940d007e9
4
+ data.tar.gz: bd35dcea6cc46ca13d388b448b5d5ee3028519786e40154931fa5724d09d894b
5
5
  SHA512:
6
- metadata.gz: '09bf06065c3c7219456f2f5f6588d0121fcbc4c7d68baf6fff9cb18f6196dae7170d0068ad0d3d7f46cb1e4e339ec775eecd355321ee8866ac1252af5234ca4d'
7
- data.tar.gz: 12e422f715cb21eed7d759ad8b2924a3f7daeff88d816c278f8c0af778e6dd9583203a25be7214d9959646a6a435420658d481f0b983b66097e765db5244eb2f
6
+ metadata.gz: f51ddf1a4d6a1eae534078d8e12b3f8a4e03576ee665cc575025038d0e63db72f44ab32627aa660b8863d1b49639286668818581c36efbbd2a21e99bba8f5e1b
7
+ data.tar.gz: 171fc9597f7b44a80c3f0d80a0ff5abd258fb766a34089632c4040940dd1bbcc9dcf0544bed77309c398d8e2187025e25271fa9a9fb2709c23f04d6cc2836cfa
data/README.md CHANGED
@@ -35,9 +35,20 @@ cd completion-kit/standalone
35
35
  bundle install
36
36
  bin/rails completion_kit:install:migrations
37
37
  bin/rails db:migrate
38
+ ```
39
+
40
+ Then run **both** processes — a web server and a Solid Queue worker. In two terminals:
41
+
42
+ ```bash
38
43
  bin/rails server
39
44
  ```
40
45
 
46
+ ```bash
47
+ bin/jobs
48
+ ```
49
+
50
+ Or with [foreman](https://github.com/ddollar/foreman) in one terminal: `foreman start -f Procfile.dev`.
51
+
41
52
  Visit `http://localhost:3000`. Add a provider credential (Settings), create a prompt, upload a CSV dataset, and run it.
42
53
 
43
54
  ### Or mount as an engine in your existing Rails app
@@ -51,7 +62,7 @@ bin/rails generate completion_kit:install
51
62
  bin/rails db:migrate
52
63
  ```
53
64
 
54
- The engine mounts at `/completion_kit` in your app.
65
+ The engine mounts at `/completion_kit` in your app. CompletionKit's generate and judge flows enqueue Active Job jobs (`CompletionKit::GenerateRowJob`, `CompletionKit::JudgeReviewJob`, `CompletionKit::RunCompletionCheckJob`), so your host app needs an Active Job adapter that actually processes them — Solid Queue, Sidekiq, GoodJob, etc. The `:async` adapter is **not** suitable for production: it runs jobs in the web Puma's thread pool with no durability and no retry, and a long LLM call will block request handling.
55
66
 
56
67
  ## Providers
57
68
 
data/Rakefile CHANGED
@@ -1,7 +1,4 @@
1
1
  require "bundler/setup"
2
-
3
- load "rails/tasks/statistics.rake"
4
-
5
2
  require "bundler/gem_tasks"
6
3
  require "rspec/core/rake_task"
7
4
 
@@ -274,6 +274,39 @@ form.button_to {
274
274
  color: var(--ck-accent);
275
275
  }
276
276
 
277
+ .ck-disclosure-toggle {
278
+ appearance: none;
279
+ background: transparent;
280
+ border: 0;
281
+ padding: 0;
282
+ margin: 0.5rem 0 0;
283
+ font-family: var(--ck-mono);
284
+ font-size: 0.75rem;
285
+ font-weight: 500;
286
+ letter-spacing: 0.12em;
287
+ text-transform: uppercase;
288
+ color: var(--ck-muted);
289
+ cursor: pointer;
290
+ transition: color 0.15s;
291
+ }
292
+
293
+ .ck-disclosure-toggle:hover,
294
+ .ck-disclosure-toggle:focus-visible {
295
+ color: var(--ck-accent);
296
+ outline: none;
297
+ }
298
+
299
+ .ck-disclosure-toggle::after {
300
+ content: " ↓";
301
+ display: inline-block;
302
+ margin-left: 0.25rem;
303
+ transition: transform 0.15s;
304
+ }
305
+
306
+ .ck-disclosure-toggle[aria-expanded="true"]::after {
307
+ transform: rotate(180deg);
308
+ }
309
+
277
310
  .ck-list {
278
311
  display: grid;
279
312
  gap: 0.5rem;
@@ -385,6 +418,18 @@ tr:hover .ck-chip--publish {
385
418
  color: var(--ck-accent);
386
419
  }
387
420
 
421
+ .ck-chip--warning {
422
+ background: var(--ck-warning-soft);
423
+ border-color: rgba(224, 164, 88, 0.3);
424
+ color: var(--ck-warning);
425
+ }
426
+
427
+ .ck-chip--danger {
428
+ background: var(--ck-danger-soft);
429
+ border-color: rgba(248, 113, 113, 0.3);
430
+ color: var(--ck-danger);
431
+ }
432
+
388
433
  .ck-badge--high {
389
434
  background: var(--ck-success-soft);
390
435
  border: 1px solid rgba(34, 197, 94, 0.25);
@@ -679,6 +724,27 @@ tr:hover .ck-chip--publish {
679
724
  color: var(--ck-text);
680
725
  }
681
726
 
727
+ .ck-progress-block {
728
+ padding: 0.5rem 1rem 0.75rem;
729
+ border-top: 1px solid var(--ck-line);
730
+ font-size: 0.72rem;
731
+ font-family: var(--ck-mono);
732
+ color: var(--ck-muted);
733
+ display: flex;
734
+ flex-direction: column;
735
+ gap: 0.25rem;
736
+ }
737
+
738
+ .ck-progress-line {
739
+ display: flex;
740
+ gap: 0.4rem;
741
+ align-items: baseline;
742
+ }
743
+
744
+ .ck-progress-failed {
745
+ color: var(--ck-danger);
746
+ }
747
+
682
748
  .ck-model-list-details summary {
683
749
  list-style: none;
684
750
  }
@@ -802,6 +868,12 @@ tr:hover .ck-chip--publish {
802
868
  color: var(--ck-muted);
803
869
  }
804
870
 
871
+ .ck-field--info #refresh-status,
872
+ .ck-field--warn #refresh-status,
873
+ .ck-field--error #refresh-status {
874
+ color: var(--ck-muted);
875
+ }
876
+
805
877
  .ck-field--info .ck-input {
806
878
  border-color: var(--ck-accent);
807
879
  }
@@ -1815,6 +1887,21 @@ select.ck-input {
1815
1887
  flex-shrink: 0;
1816
1888
  }
1817
1889
 
1890
+ .ck-response-row--pending .ck-response-row__text,
1891
+ .ck-response-row--retrying .ck-response-row__text {
1892
+ color: var(--ck-dim);
1893
+ }
1894
+
1895
+ .ck-response-row--failed .ck-response-row__text {
1896
+ color: var(--ck-danger);
1897
+ opacity: 0.8;
1898
+ }
1899
+
1900
+ .ck-response-row__error {
1901
+ font-family: var(--ck-mono);
1902
+ font-size: 0.82rem;
1903
+ }
1904
+
1818
1905
  .ck-score {
1819
1906
  font-size: 0.85rem;
1820
1907
  color: var(--ck-muted);
@@ -2,7 +2,7 @@ module CompletionKit
2
2
  module Api
3
3
  module V1
4
4
  class RunsController < BaseController
5
- before_action :set_run, only: [:show, :update, :destroy, :generate, :judge]
5
+ before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures]
6
6
 
7
7
  def index
8
8
  render json: Run.order(created_at: :desc)
@@ -37,12 +37,33 @@ module CompletionKit
37
37
  end
38
38
 
39
39
  def generate
40
- GenerateJob.perform_later(@run.id)
41
- render json: @run.reload, status: :accepted
40
+ if @run.start!
41
+ render json: @run.reload, status: :accepted
42
+ else
43
+ render json: { errors: [@run.failure_summary || @run.errors.full_messages.to_sentence] }, status: :unprocessable_entity
44
+ end
42
45
  end
43
46
 
44
- def judge
45
- JudgeJob.perform_later(@run.id)
47
+ def retry_failures
48
+ scope = @run.responses.where(status: "failed")
49
+ scope = scope.where(id: params[:only]) if params[:only].present?
50
+
51
+ ActiveRecord::Base.transaction do
52
+ failed_response_ids = scope.pluck(:id)
53
+ CompletionKit::Review.where(response_id: failed_response_ids, status: "failed").update_all(
54
+ status: "pending", attempts: 0,
55
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
56
+ ai_score: nil, ai_feedback: nil
57
+ )
58
+ scope.update_all(
59
+ status: "pending", attempts: 0,
60
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
61
+ response_text: nil
62
+ )
63
+ @run.update!(status: "running")
64
+ failed_response_ids.each { |rid| CompletionKit::GenerateRowJob.perform_later(@run.id, rid) }
65
+ end
66
+
46
67
  render json: @run.reload, status: :accepted
47
68
  end
48
69
 
@@ -1,6 +1,6 @@
1
1
  module CompletionKit
2
2
  class RunsController < ApplicationController
3
- before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :judge, :suggest, :suggestion, :apply_suggestion]
3
+ before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :suggestion, :apply_suggestion, :retry_failures]
4
4
  before_action :load_form_collections, only: [:new, :edit, :create, :update]
5
5
 
6
6
  def index
@@ -63,17 +63,11 @@ module CompletionKit
63
63
  end
64
64
 
65
65
  def generate
66
- @run.update!(status: "generating", progress_current: 0, progress_total: 0, error_message: nil)
67
- GenerateJob.perform_later(@run.id)
68
- redirect_to run_path(@run)
69
- end
70
-
71
- def judge
72
- if params[:run]
73
- @run.update(judge_model: params[:run][:judge_model])
66
+ if @run.start!
67
+ redirect_to run_path(@run)
68
+ else
69
+ redirect_to run_path(@run), alert: @run.failure_summary || @run.errors.full_messages.to_sentence
74
70
  end
75
- JudgeJob.perform_later(@run.id)
76
- redirect_to run_path(@run)
77
71
  end
78
72
 
79
73
  def suggest
@@ -93,6 +87,32 @@ module CompletionKit
93
87
  return redirect_to run_path(@run), alert: "No suggestion available. Generate one first." unless @suggestion
94
88
  end
95
89
 
90
+ def retry_failures
91
+ scope = @run.responses.where(status: "failed")
92
+ scope = scope.where(id: params[:only]) if params[:only].present?
93
+
94
+ ActiveRecord::Base.transaction do
95
+ failed_response_ids = scope.pluck(:id)
96
+ Review.where(response_id: failed_response_ids, status: "failed").update_all(
97
+ status: "pending",
98
+ attempts: 0,
99
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
100
+ ai_score: nil, ai_feedback: nil
101
+ )
102
+ scope.update_all(
103
+ status: "pending",
104
+ attempts: 0,
105
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
106
+ response_text: nil
107
+ )
108
+ @run.update!(status: "running")
109
+ failed_response_ids.each { |rid| GenerateRowJob.perform_later(@run.id, rid) }
110
+ end
111
+
112
+ @run.send(:broadcast_ui)
113
+ redirect_to run_path(@run)
114
+ end
115
+
96
116
  def apply_suggestion
97
117
  suggestion = @run.suggestions.order(created_at: :desc).first
98
118
  return redirect_to run_path(@run), alert: "No suggestion to apply." unless suggestion
@@ -35,8 +35,6 @@ module CompletionKit
35
35
  "ck-badge ck-badge--pending"
36
36
  when "running"
37
37
  "ck-badge ck-badge--running"
38
- when "generating", "judging"
39
- "ck-badge ck-badge--running"
40
38
  when "completed"
41
39
  "ck-badge ck-badge--high"
42
40
  when "failed"
@@ -48,7 +46,7 @@ module CompletionKit
48
46
 
49
47
  def ck_run_dot(run)
50
48
  case run.status
51
- when "generating", "judging" then "ck-dot ck-dot--running"
49
+ when "running" then "ck-dot ck-dot--running"
52
50
  when "failed" then "ck-dot ck-dot--failed"
53
51
  when "completed" then "ck-dot ck-dot--completed"
54
52
  else "ck-dot ck-dot--pending"
@@ -58,17 +56,11 @@ module CompletionKit
58
56
  def ck_run_status_label(run)
59
57
  case run.status
60
58
  when "pending" then "Ready to run"
61
- when "generating"
62
- if run.progress_total.to_i > 0
63
- "Generating responses (#{run.progress_current}/#{run.progress_total})"
64
- else
65
- "Generating responses…"
66
- end
67
- when "judging"
59
+ when "running"
68
60
  if run.progress_total.to_i > 0
69
- "Judging (#{run.progress_current}/#{run.progress_total} evaluations)"
61
+ "Running (#{run.progress_current}/#{run.progress_total})"
70
62
  else
71
- "Judging…"
63
+ "Running…"
72
64
  end
73
65
  when "completed" then "Completed"
74
66
  when "failed" then "Failed"
@@ -0,0 +1,102 @@
1
+ require "faraday"
2
+
3
+ module CompletionKit
4
+ class GenerateRowJob < ApplicationJob
5
+ queue_as :llm
6
+
7
+ limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
8
+ key: ->(run_id, _) { "run:#{run_id}" },
9
+ duration: 10.minutes
10
+
11
+ def self.rate_limit_wait(executions)
12
+ 30 * executions
13
+ end
14
+
15
+ retry_on Faraday::TimeoutError,
16
+ Faraday::ConnectionFailed,
17
+ wait: :polynomially_longer, attempts: 5
18
+
19
+ retry_on CompletionKit::RateLimitError,
20
+ wait: method(:rate_limit_wait), attempts: 5
21
+
22
+ discard_on ActiveJob::DeserializationError
23
+ discard_on CompletionKit::ConfigurationError
24
+
25
+ rescue_from(StandardError) do |error|
26
+ record_terminal_failure!(error)
27
+ enqueue_completion_check
28
+ end
29
+
30
+ before_perform do |job|
31
+ response = Response.find_by(id: job.arguments.last)
32
+ next unless response
33
+ response.update_columns(status: "retrying", attempts: response.attempts + 1)
34
+ response.run.send(:broadcast_response_update, response) if response.run
35
+ end
36
+
37
+ def perform(run_id, response_id)
38
+ @run_id = run_id
39
+ @response_id = response_id
40
+
41
+ response = Response.find(response_id)
42
+ run = response.run
43
+ prompt = run.prompt
44
+
45
+ row = parsed_input(response)
46
+ rendered = CsvProcessor.apply_variables(prompt, row)
47
+ client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
48
+
49
+ raise ConfigurationError, client.configuration_errors.join(", ") unless client.configured?
50
+
51
+ text = client.generate_completion(rendered, model: prompt.llm_model, temperature: run.temperature)
52
+
53
+ response.update!(
54
+ status: "succeeded",
55
+ response_text: text,
56
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil
57
+ )
58
+ run.send(:broadcast_response_update, response)
59
+
60
+ if run.judge_configured?
61
+ run.metrics.each do |metric|
62
+ JudgeReviewJob.perform_later(response.id, metric.id)
63
+ end
64
+ end
65
+
66
+ enqueue_completion_check
67
+ end
68
+
69
+ private
70
+
71
+ def parsed_input(response)
72
+ return {} if response.input_data.blank?
73
+ JSON.parse(response.input_data)
74
+ rescue JSON::ParserError
75
+ {}
76
+ end
77
+
78
+ def record_terminal_failure!(error)
79
+ response_id = @response_id || arguments.last
80
+ response = Response.find_by(id: response_id)
81
+ return unless response
82
+
83
+ response.update_columns(
84
+ status: "failed",
85
+ error_provider: provider_for(response),
86
+ error_class: error.class.name,
87
+ error_status: error.respond_to?(:status) ? error.status : nil,
88
+ error_message: error.message.to_s.truncate(2000)
89
+ )
90
+ response.run&.send(:broadcast_response_update, response)
91
+ end
92
+
93
+ def provider_for(response)
94
+ response.run&.prompt&.llm_model_provider
95
+ end
96
+
97
+ def enqueue_completion_check
98
+ run_id = @run_id || arguments.first
99
+ RunCompletionCheckJob.perform_later(run_id)
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,110 @@
1
+ require "faraday"
2
+
3
+ module CompletionKit
4
+ class JudgeReviewJob < ApplicationJob
5
+ queue_as :llm
6
+
7
+ limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
8
+ key: ->(response_id, _) { "run:#{Response.find_by(id: response_id)&.run_id}" },
9
+ duration: 10.minutes
10
+
11
+ def self.rate_limit_wait(executions)
12
+ 30 * executions
13
+ end
14
+
15
+ retry_on Faraday::TimeoutError,
16
+ Faraday::ConnectionFailed,
17
+ wait: :polynomially_longer, attempts: 5
18
+
19
+ retry_on CompletionKit::RateLimitError,
20
+ wait: method(:rate_limit_wait), attempts: 5
21
+
22
+ discard_on ActiveJob::DeserializationError
23
+ discard_on CompletionKit::ConfigurationError
24
+
25
+ rescue_from(StandardError) do |error|
26
+ record_terminal_failure!(error)
27
+ enqueue_completion_check
28
+ end
29
+
30
+ before_perform do |job|
31
+ response_id, metric_id = job.arguments
32
+ response = Response.find_by(id: response_id)
33
+ next unless response
34
+ review = response.reviews.find_or_initialize_by(metric_id: metric_id)
35
+ review.metric_name ||= Metric.find_by(id: metric_id)&.name || "(deleted metric)"
36
+ review.attempts = (review.attempts || 0) + 1
37
+ review.status = "retrying"
38
+ review.save!(validate: false)
39
+ response.run.send(:broadcast_response_update, response) if response.run
40
+ end
41
+
42
+ def perform(response_id, metric_id)
43
+ @response_id = response_id
44
+ @metric_id = metric_id
45
+
46
+ response = Response.find(response_id)
47
+ metric = Metric.find(metric_id)
48
+ run = response.run
49
+
50
+ config = ApiConfig.for_model(run.judge_model).merge(judge_model: run.judge_model)
51
+ judge = JudgeService.new(config)
52
+
53
+ evaluation = judge.evaluate(
54
+ response.response_text,
55
+ response.expected_output,
56
+ run.prompt.template,
57
+ criteria: metric.instruction.to_s,
58
+ rubric_text: metric.display_rubric_text,
59
+ input_data: response.input_data
60
+ )
61
+
62
+ review = response.reviews.find_or_initialize_by(metric_id: metric.id)
63
+ review.assign_attributes(
64
+ metric_name: metric.name,
65
+ instruction: metric.instruction.to_s,
66
+ status: "succeeded",
67
+ ai_score: evaluation[:score],
68
+ ai_feedback: evaluation[:feedback],
69
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil
70
+ )
71
+ review.save!
72
+
73
+ run.send(:broadcast_response_update, response)
74
+ enqueue_completion_check
75
+ end
76
+
77
+ private
78
+
79
+ def record_terminal_failure!(error)
80
+ response_id = @response_id || arguments.first
81
+ metric_id = @metric_id || arguments.last
82
+ response = Response.find_by(id: response_id)
83
+ return unless response
84
+
85
+ review = response.reviews.find_or_initialize_by(metric_id: metric_id)
86
+ review.assign_attributes(
87
+ metric_name: review.metric_name || Metric.find_by(id: metric_id)&.name || "(deleted metric)",
88
+ status: "failed",
89
+ error_provider: provider_for(response),
90
+ error_class: error.class.name,
91
+ error_status: error.respond_to?(:status) ? error.status : nil,
92
+ error_message: error.message.to_s.truncate(2000)
93
+ )
94
+ review.save!(validate: false)
95
+ response.run&.send(:broadcast_response_update, response)
96
+ end
97
+
98
+ def provider_for(response)
99
+ run = response.run
100
+ return nil unless run&.judge_model
101
+ ApiConfig.provider_for_model(run.judge_model)
102
+ end
103
+
104
+ def enqueue_completion_check
105
+ response_id = @response_id || arguments.first
106
+ response = Response.find_by(id: response_id)
107
+ RunCompletionCheckJob.perform_later(response.run_id) if response
108
+ end
109
+ end
110
+ end
@@ -1,7 +1,29 @@
1
+ require "faraday"
2
+
1
3
  module CompletionKit
2
4
  class ModelDiscoveryJob < ApplicationJob
3
5
  queue_as :default
4
6
 
7
+ def self.rate_limit_wait(executions)
8
+ 30 * executions
9
+ end
10
+
11
+ retry_on Faraday::TimeoutError,
12
+ Faraday::ConnectionFailed,
13
+ wait: :polynomially_longer, attempts: 5
14
+
15
+ retry_on CompletionKit::RateLimitError,
16
+ wait: method(:rate_limit_wait), attempts: 5
17
+
18
+ discard_on ActiveJob::DeserializationError
19
+
20
+ rescue_from(StandardError) do |_error|
21
+ credential = ProviderCredential.find(arguments.first)
22
+ credential.update_columns(discovery_status: "failed")
23
+ credential.reload
24
+ credential.broadcast_discovery_progress
25
+ end
26
+
5
27
  def perform(provider_credential_id)
6
28
  credential = ProviderCredential.find_by(id: provider_credential_id)
7
29
  return unless credential
@@ -20,10 +42,6 @@ module CompletionKit
20
42
  credential.update_columns(discovery_status: "completed", updated_at: Time.current)
21
43
  credential.reload
22
44
  credential.broadcast_discovery_complete
23
- rescue StandardError
24
- credential.update_columns(discovery_status: "failed")
25
- credential.reload
26
- credential.broadcast_discovery_progress
27
45
  end
28
46
  end
29
47
  end
@@ -0,0 +1,18 @@
1
+ module CompletionKit
2
+ class RunCompletionCheckJob < ApplicationJob
3
+ queue_as :default
4
+
5
+ limits_concurrency to: 1,
6
+ key: ->(run_id) { "run:#{run_id}:completion" },
7
+ duration: 5.minutes
8
+
9
+ def perform(run_id)
10
+ run = Run.find_by(id: run_id)
11
+ return unless run
12
+ return unless run.status == "running"
13
+ return unless run.outstanding_work_zero?
14
+
15
+ run.mark_completed!
16
+ end
17
+ end
18
+ end
@@ -42,6 +42,10 @@ module CompletionKit
42
42
  "#{name} — #{version_label}"
43
43
  end
44
44
 
45
+ def llm_model_provider
46
+ ApiConfig.provider_for_model(llm_model)
47
+ end
48
+
45
49
  def family_versions
46
50
  self.class.where(family_key: family_key).order(version_number: :desc, created_at: :desc)
47
51
  end
@@ -1,18 +1,34 @@
1
1
  module CompletionKit
2
2
  class Response < ApplicationRecord
3
+ STATUSES = %w[pending retrying succeeded failed].freeze
4
+ TERMINAL_STATUSES = %w[succeeded failed].freeze
5
+
3
6
  belongs_to :run
4
7
  has_many :reviews, dependent: :destroy
5
8
 
6
9
  delegate :prompt, to: :run
7
10
 
8
- validates :response_text, presence: true
11
+ validates :response_text, presence: true, if: :succeeded?
12
+ validates :status, inclusion: { in: STATUSES }
13
+
14
+ before_validation :set_default_status, on: :create
15
+
16
+ def terminal?
17
+ TERMINAL_STATUSES.include?(status)
18
+ end
19
+
20
+ def succeeded?
21
+ status == "succeeded"
22
+ end
9
23
 
10
24
  def as_json(options = {})
11
25
  {
12
26
  id: id, run_id: run_id, input_data: input_data,
13
27
  response_text: response_text, expected_output: expected_output,
14
28
  created_at: created_at, score: score, reviewed: reviewed?,
15
- reviews: reviews.map(&:as_json)
29
+ reviews: reviews.map(&:as_json),
30
+ status: status, attempts: attempts, row_index: row_index,
31
+ error: error_payload
16
32
  }
17
33
  end
18
34
 
@@ -26,5 +42,16 @@ module CompletionKit
26
42
  def reviewed?
27
43
  reviews.any? { |r| r.ai_score.present? }
28
44
  end
45
+
46
+ def error_payload
47
+ return nil if error_class.blank?
48
+ { provider: error_provider, class: error_class, status: error_status, message: error_message }
49
+ end
50
+
51
+ private
52
+
53
+ def set_default_status
54
+ self.status ||= "pending"
55
+ end
29
56
  end
30
57
  end
@@ -1,6 +1,7 @@
1
1
  module CompletionKit
2
2
  class Review < ApplicationRecord
3
- STATUSES = %w[pending evaluated failed].freeze
3
+ STATUSES = %w[pending retrying succeeded failed].freeze
4
+ TERMINAL_STATUSES = %w[succeeded failed].freeze
4
5
 
5
6
  belongs_to :response
6
7
  belongs_to :metric, optional: true
@@ -11,11 +12,25 @@ module CompletionKit
11
12
 
12
13
  before_validation :set_default_status
13
14
 
15
+ def terminal?
16
+ TERMINAL_STATUSES.include?(status)
17
+ end
18
+
19
+ def succeeded?
20
+ status == "succeeded"
21
+ end
22
+
23
+ def error_payload
24
+ return nil if error_class.blank?
25
+ { provider: error_provider, class: error_class, status: error_status, message: error_message }
26
+ end
27
+
14
28
  def as_json(options = {})
15
29
  {
16
30
  id: id, response_id: response_id, metric_id: metric_id,
17
31
  metric_name: metric_name, ai_score: ai_score,
18
- ai_feedback: ai_feedback, status: status
32
+ ai_feedback: ai_feedback, status: status, attempts: attempts,
33
+ error: error_payload
19
34
  }
20
35
  end
21
36