completion-kit 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +97 -0
  3. data/README.md +19 -2
  4. data/app/assets/stylesheets/completion_kit/application.css +87 -0
  5. data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
  6. data/app/controllers/completion_kit/runs_controller.rb +31 -11
  7. data/app/helpers/completion_kit/application_helper.rb +4 -12
  8. data/app/jobs/completion_kit/generate_row_job.rb +102 -0
  9. data/app/jobs/completion_kit/judge_review_job.rb +110 -0
  10. data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
  11. data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
  12. data/app/models/completion_kit/prompt.rb +4 -0
  13. data/app/models/completion_kit/response.rb +29 -2
  14. data/app/models/completion_kit/review.rb +17 -2
  15. data/app/models/completion_kit/run.rb +90 -96
  16. data/app/services/completion_kit/anthropic_client.rb +13 -0
  17. data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
  18. data/app/services/completion_kit/ollama_client.rb +13 -0
  19. data/app/services/completion_kit/open_ai_client.rb +11 -0
  20. data/app/services/completion_kit/open_router_client.rb +13 -0
  21. data/app/services/completion_kit/worker_health.rb +10 -0
  22. data/app/views/completion_kit/api_reference/index.html.erb +0 -5
  23. data/app/views/completion_kit/prompts/_form.html.erb +8 -5
  24. data/app/views/completion_kit/runs/_actions.html.erb +1 -1
  25. data/app/views/completion_kit/runs/_form.html.erb +6 -3
  26. data/app/views/completion_kit/runs/_progress.html.erb +1 -1
  27. data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
  28. data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
  29. data/app/views/completion_kit/runs/show.html.erb +1 -1
  30. data/app/views/layouts/completion_kit/application.html.erb +28 -2
  31. data/config/routes.rb +2 -2
  32. data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
  33. data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
  34. data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
  35. data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
  36. data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
  37. data/lib/completion_kit/concurrency_check.rb +16 -0
  38. data/lib/completion_kit/errors.rb +16 -0
  39. data/lib/completion_kit/version.rb +1 -1
  40. data/lib/completion_kit.rb +2 -2
  41. data/lib/tasks/completion_kit_runs.rake +13 -0
  42. metadata +31 -7
  43. data/MIT-LICENSE +0 -20
  44. data/app/jobs/completion_kit/generate_job.rb +0 -12
  45. data/app/jobs/completion_kit/judge_job.rb +0 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c87481def48cfd6193ee591e9ac1ab1a2be6b7de63a3275c82f7f7175804abfc
4
- data.tar.gz: 333b23de10b7e81daeac7c118f2c4ae13b2624304a1d04404b206aa5dffce092
3
+ metadata.gz: 0111ef5469e6634ac46f899c5e78a67aa212a174027ce253c7172a326a375121
4
+ data.tar.gz: 73162904d2924d4434b724d8e14e7c38e86ef4262de73c18585a6cc38b87e0cb
5
5
  SHA512:
6
- metadata.gz: '0538aae286aeffe40644e580dc84a27ee1bc2a187fe0a060f33bc4dfb30e04a769f9daa2b8a59b19633d35b8f79172a40bcd03d75d136c5dc1d4a8af92e5aa4a'
7
- data.tar.gz: 1843ea626685029288ec9533edc07a800bca18b8e435de22d1b6c32c59e4b2d52c08999833f7b123d983855e445a95ae814cb12a31aa21c041bac11d7a70c466
6
+ metadata.gz: 5a38d31eeb9fdc4482890799fe34ac7fbf57009c77874bcbcd0b4fc6b37f1878d4890137f83bdab52db469a7e91323438ae31d491272aa022e9c7f55fc5ad16a
7
+ data.tar.gz: 64eac5ee675ed6090835b291b64b4cd6dfe30a5c7db36589c8411e9e67331c762977c97533108a9a9c17680dbad43cfcacb024e68666c5af70ec76b7772844de
data/LICENSE ADDED
@@ -0,0 +1,97 @@
1
+ Business Source License 1.1
2
+
3
+ Licensor: Homemade Software, Inc.
4
+
5
+ Licensed Work: CompletionKit
6
+ The Licensed Work is Copyright © 2026 Homemade
7
+ Software, Inc.
8
+
9
+ Additional Use Grant: You may use the Licensed Work for any purpose,
10
+ including in production, except to offer the Licensed
11
+ Work (or any derivative work) to third parties as a
12
+ hosted or managed service whose primary value is the
13
+ functionality of the Licensed Work itself.
14
+
15
+ Change Date: 2029-04-25
16
+
17
+ Change License: GNU General Public License (GPL) Version 3
18
+
19
+ For information about alternative licensing arrangements for the Licensed
20
+ Work, please contact hello@homemade.software.
21
+
22
+ --------------------------------------------------------------------------------
23
+
24
+ Business Source License 1.1
25
+
26
+ Terms
27
+
28
+ The Licensor hereby grants you the right to copy, modify, create derivative
29
+ works, redistribute, and make non-production use of the Licensed Work. The
30
+ Licensor may make an Additional Use Grant, above, permitting limited
31
+ production use.
32
+
33
+ Effective on the Change Date, or the fourth anniversary of the first publicly
34
+ available distribution of a specific version of the Licensed Work under this
35
+ License, whichever comes first, the Licensor hereby grants you rights under
36
+ the terms of the Change License, and the rights granted in the paragraph
37
+ above terminate.
38
+
39
+ If your use of the Licensed Work does not comply with the requirements
40
+ currently in effect as described in this License, you must purchase a
41
+ commercial license from the Licensor, its affiliated entities, or authorized
42
+ resellers, or you must refrain from using the Licensed Work.
43
+
44
+ All copies of the original and modified Licensed Work, and derivative works
45
+ of the Licensed Work, are subject to this License. This License applies
46
+ separately for each version of the Licensed Work and the Change Date may
47
+ vary for each version of the Licensed Work released by Licensor.
48
+
49
+ You must conspicuously display this License on each original or modified
50
+ copy of the Licensed Work. If you receive the Licensed Work in original or
51
+ modified form from a third party, the terms and conditions set forth in this
52
+ License apply to your use of that work.
53
+
54
+ Any use of the Licensed Work in violation of this License will automatically
55
+ terminate your rights under this License for the current and all other
56
+ versions of the Licensed Work.
57
+
58
+ This License does not grant you any right in any trademark or logo of
59
+ Licensor or its affiliates (provided that you may use a trademark or logo of
60
+ Licensor as expressly required by this License).
61
+
62
+ TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
63
+ AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
64
+ EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
65
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
66
+ TITLE.
67
+
68
+ MariaDB hereby grants you permission to use this License's text to license
69
+ your works, and to refer to it using the trademark "Business Source
70
+ License", as long as you comply with the Covenants of Licensor below.
71
+
72
+ Covenants of Licensor
73
+
74
+ In consideration of the right to use this License's text and the "Business
75
+ Source License" name and trademark, Licensor covenants to MariaDB, and to
76
+ all other recipients of the Licensed Work to be provided by Licensor:
77
+
78
+ 1. To specify as the Change License the GPL Version 2.0 or any later
79
+ version, or a license that is compatible with GPL Version 2.0 or a later
80
+ version, where "compatible" means that software provided under the Change
81
+ License can be included in a program with software provided under GPL
82
+ Version 2.0 or a later version. Licensor may specify additional Change
83
+ Licenses without limitation.
84
+
85
+ 2. To either: (a) specify an additional grant of rights to use that does not
86
+ impose any additional restriction on the right granted in this License,
87
+ as the Additional Use Grant; or (b) insert the text "None".
88
+
89
+ 3. To specify a Change Date.
90
+
91
+ 4. Not to modify this License in any other way.
92
+
93
+ Notice
94
+
95
+ The Business Source License (this document, or the "License") is not an
96
+ Open Source license. However, the Licensed Work will eventually be made
97
+ available under an Open Source License, as stated in this License.
data/README.md CHANGED
@@ -15,6 +15,8 @@ It's the difference between "this prompt seems to work" and "this prompt scores
15
15
 
16
16
  **[completionkit.com](https://completionkit.com)** | **[RubyGems](https://rubygems.org/gems/completion-kit)**
17
17
 
18
+ > **CompletionKit Cloud** is coming — hosted, managed CompletionKit with zero setup. Early access opening soon at [app.completionkit.com](https://app.completionkit.com).
19
+
18
20
  ![Prompts index](https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/screenshots/prompts.png)
19
21
 
20
22
  ![Prompt detail with metrics and rubrics](https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/screenshots/prompt-detail.png)
@@ -33,9 +35,20 @@ cd completion-kit/standalone
33
35
  bundle install
34
36
  bin/rails completion_kit:install:migrations
35
37
  bin/rails db:migrate
38
+ ```
39
+
40
+ Then run **both** processes — a web server and a Solid Queue worker. In two terminals:
41
+
42
+ ```bash
36
43
  bin/rails server
37
44
  ```
38
45
 
46
+ ```bash
47
+ bin/jobs
48
+ ```
49
+
50
+ Or with [foreman](https://github.com/ddollar/foreman) in one terminal: `foreman start -f Procfile.dev`.
51
+
39
52
  Visit `http://localhost:3000`. Add a provider credential (Settings), create a prompt, upload a CSV dataset, and run it.
40
53
 
41
54
  ### Or mount as an engine in your existing Rails app
@@ -49,7 +62,7 @@ bin/rails generate completion_kit:install
49
62
  bin/rails db:migrate
50
63
  ```
51
64
 
52
- The engine mounts at `/completion_kit` in your app.
65
+ The engine mounts at `/completion_kit` in your app. CompletionKit's generate and judge flows enqueue Active Job jobs (`CompletionKit::GenerateRowJob`, `CompletionKit::JudgeReviewJob`, `CompletionKit::RunCompletionCheckJob`), so your host app needs an Active Job adapter that actually processes them — Solid Queue, Sidekiq, GoodJob, etc. The `:async` adapter is **not** suitable for production: it runs jobs in the web Puma's thread pool with no durability and no retry, and a long LLM call will block request handling.
53
66
 
54
67
  ## Providers
55
68
 
@@ -200,4 +213,8 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, testing, and pull
200
213
 
201
214
  ## License
202
215
 
203
- [MIT](https://opensource.org/licenses/MIT)
216
+ CompletionKit 0.3.0 and later are licensed under the [Business Source License 1.1](LICENSE). You may use CompletionKit freely for any purpose, including production, except to offer it (or a derivative) to third parties as a hosted or managed service whose primary value is CompletionKit itself. Three years after each release, that version automatically re-licenses to GPL-3.
217
+
218
+ CompletionKit 0.2.x and earlier remain available under the [MIT License](https://github.com/homemade-software-inc/completion-kit/blob/v0.2.0/MIT-LICENSE).
219
+
220
+ For alternative licensing, contact hello@homemade.software.
@@ -274,6 +274,39 @@ form.button_to {
274
274
  color: var(--ck-accent);
275
275
  }
276
276
 
277
+ .ck-disclosure-toggle {
278
+ appearance: none;
279
+ background: transparent;
280
+ border: 0;
281
+ padding: 0;
282
+ margin: 0.5rem 0 0;
283
+ font-family: var(--ck-mono);
284
+ font-size: 0.75rem;
285
+ font-weight: 500;
286
+ letter-spacing: 0.12em;
287
+ text-transform: uppercase;
288
+ color: var(--ck-muted);
289
+ cursor: pointer;
290
+ transition: color 0.15s;
291
+ }
292
+
293
+ .ck-disclosure-toggle:hover,
294
+ .ck-disclosure-toggle:focus-visible {
295
+ color: var(--ck-accent);
296
+ outline: none;
297
+ }
298
+
299
+ .ck-disclosure-toggle::after {
300
+ content: " ↓";
301
+ display: inline-block;
302
+ margin-left: 0.25rem;
303
+ transition: transform 0.15s;
304
+ }
305
+
306
+ .ck-disclosure-toggle[aria-expanded="true"]::after {
307
+ transform: rotate(180deg);
308
+ }
309
+
277
310
  .ck-list {
278
311
  display: grid;
279
312
  gap: 0.5rem;
@@ -385,6 +418,18 @@ tr:hover .ck-chip--publish {
385
418
  color: var(--ck-accent);
386
419
  }
387
420
 
421
+ .ck-chip--warning {
422
+ background: var(--ck-warning-soft);
423
+ border-color: rgba(224, 164, 88, 0.3);
424
+ color: var(--ck-warning);
425
+ }
426
+
427
+ .ck-chip--danger {
428
+ background: var(--ck-danger-soft);
429
+ border-color: rgba(248, 113, 113, 0.3);
430
+ color: var(--ck-danger);
431
+ }
432
+
388
433
  .ck-badge--high {
389
434
  background: var(--ck-success-soft);
390
435
  border: 1px solid rgba(34, 197, 94, 0.25);
@@ -679,6 +724,27 @@ tr:hover .ck-chip--publish {
679
724
  color: var(--ck-text);
680
725
  }
681
726
 
727
+ .ck-progress-block {
728
+ padding: 0.5rem 1rem 0.75rem;
729
+ border-top: 1px solid var(--ck-line);
730
+ font-size: 0.72rem;
731
+ font-family: var(--ck-mono);
732
+ color: var(--ck-muted);
733
+ display: flex;
734
+ flex-direction: column;
735
+ gap: 0.25rem;
736
+ }
737
+
738
+ .ck-progress-line {
739
+ display: flex;
740
+ gap: 0.4rem;
741
+ align-items: baseline;
742
+ }
743
+
744
+ .ck-progress-failed {
745
+ color: var(--ck-danger);
746
+ }
747
+
682
748
  .ck-model-list-details summary {
683
749
  list-style: none;
684
750
  }
@@ -802,6 +868,12 @@ tr:hover .ck-chip--publish {
802
868
  color: var(--ck-muted);
803
869
  }
804
870
 
871
+ .ck-field--info #refresh-status,
872
+ .ck-field--warn #refresh-status,
873
+ .ck-field--error #refresh-status {
874
+ color: var(--ck-muted);
875
+ }
876
+
805
877
  .ck-field--info .ck-input {
806
878
  border-color: var(--ck-accent);
807
879
  }
@@ -1815,6 +1887,21 @@ select.ck-input {
1815
1887
  flex-shrink: 0;
1816
1888
  }
1817
1889
 
1890
+ .ck-response-row--pending .ck-response-row__text,
1891
+ .ck-response-row--retrying .ck-response-row__text {
1892
+ color: var(--ck-dim);
1893
+ }
1894
+
1895
+ .ck-response-row--failed .ck-response-row__text {
1896
+ color: var(--ck-danger);
1897
+ opacity: 0.8;
1898
+ }
1899
+
1900
+ .ck-response-row__error {
1901
+ font-family: var(--ck-mono);
1902
+ font-size: 0.82rem;
1903
+ }
1904
+
1818
1905
  .ck-score {
1819
1906
  font-size: 0.85rem;
1820
1907
  color: var(--ck-muted);
@@ -2,7 +2,7 @@ module CompletionKit
2
2
  module Api
3
3
  module V1
4
4
  class RunsController < BaseController
5
- before_action :set_run, only: [:show, :update, :destroy, :generate, :judge]
5
+ before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures]
6
6
 
7
7
  def index
8
8
  render json: Run.order(created_at: :desc)
@@ -37,12 +37,33 @@ module CompletionKit
37
37
  end
38
38
 
39
39
  def generate
40
- GenerateJob.perform_later(@run.id)
41
- render json: @run.reload, status: :accepted
40
+ if @run.start!
41
+ render json: @run.reload, status: :accepted
42
+ else
43
+ render json: { errors: [@run.failure_summary || @run.errors.full_messages.to_sentence] }, status: :unprocessable_entity
44
+ end
42
45
  end
43
46
 
44
- def judge
45
- JudgeJob.perform_later(@run.id)
47
+ def retry_failures
48
+ scope = @run.responses.where(status: "failed")
49
+ scope = scope.where(id: params[:only]) if params[:only].present?
50
+
51
+ ActiveRecord::Base.transaction do
52
+ failed_response_ids = scope.pluck(:id)
53
+ CompletionKit::Review.where(response_id: failed_response_ids, status: "failed").update_all(
54
+ status: "pending", attempts: 0,
55
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
56
+ ai_score: nil, ai_feedback: nil
57
+ )
58
+ scope.update_all(
59
+ status: "pending", attempts: 0,
60
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
61
+ response_text: nil
62
+ )
63
+ @run.update!(status: "running")
64
+ failed_response_ids.each { |rid| CompletionKit::GenerateRowJob.perform_later(@run.id, rid) }
65
+ end
66
+
46
67
  render json: @run.reload, status: :accepted
47
68
  end
48
69
 
@@ -1,6 +1,6 @@
1
1
  module CompletionKit
2
2
  class RunsController < ApplicationController
3
- before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :judge, :suggest, :suggestion, :apply_suggestion]
3
+ before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :suggestion, :apply_suggestion, :retry_failures]
4
4
  before_action :load_form_collections, only: [:new, :edit, :create, :update]
5
5
 
6
6
  def index
@@ -63,17 +63,11 @@ module CompletionKit
63
63
  end
64
64
 
65
65
  def generate
66
- @run.update!(status: "generating", progress_current: 0, progress_total: 0, error_message: nil)
67
- GenerateJob.perform_later(@run.id)
68
- redirect_to run_path(@run)
69
- end
70
-
71
- def judge
72
- if params[:run]
73
- @run.update(judge_model: params[:run][:judge_model])
66
+ if @run.start!
67
+ redirect_to run_path(@run)
68
+ else
69
+ redirect_to run_path(@run), alert: @run.failure_summary || @run.errors.full_messages.to_sentence
74
70
  end
75
- JudgeJob.perform_later(@run.id)
76
- redirect_to run_path(@run)
77
71
  end
78
72
 
79
73
  def suggest
@@ -93,6 +87,32 @@ module CompletionKit
93
87
  return redirect_to run_path(@run), alert: "No suggestion available. Generate one first." unless @suggestion
94
88
  end
95
89
 
90
+ def retry_failures
91
+ scope = @run.responses.where(status: "failed")
92
+ scope = scope.where(id: params[:only]) if params[:only].present?
93
+
94
+ ActiveRecord::Base.transaction do
95
+ failed_response_ids = scope.pluck(:id)
96
+ Review.where(response_id: failed_response_ids, status: "failed").update_all(
97
+ status: "pending",
98
+ attempts: 0,
99
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
100
+ ai_score: nil, ai_feedback: nil
101
+ )
102
+ scope.update_all(
103
+ status: "pending",
104
+ attempts: 0,
105
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
106
+ response_text: nil
107
+ )
108
+ @run.update!(status: "running")
109
+ failed_response_ids.each { |rid| GenerateRowJob.perform_later(@run.id, rid) }
110
+ end
111
+
112
+ @run.send(:broadcast_ui)
113
+ redirect_to run_path(@run)
114
+ end
115
+
96
116
  def apply_suggestion
97
117
  suggestion = @run.suggestions.order(created_at: :desc).first
98
118
  return redirect_to run_path(@run), alert: "No suggestion to apply." unless suggestion
@@ -35,8 +35,6 @@ module CompletionKit
35
35
  "ck-badge ck-badge--pending"
36
36
  when "running"
37
37
  "ck-badge ck-badge--running"
38
- when "generating", "judging"
39
- "ck-badge ck-badge--running"
40
38
  when "completed"
41
39
  "ck-badge ck-badge--high"
42
40
  when "failed"
@@ -48,7 +46,7 @@ module CompletionKit
48
46
 
49
47
  def ck_run_dot(run)
50
48
  case run.status
51
- when "generating", "judging" then "ck-dot ck-dot--running"
49
+ when "running" then "ck-dot ck-dot--running"
52
50
  when "failed" then "ck-dot ck-dot--failed"
53
51
  when "completed" then "ck-dot ck-dot--completed"
54
52
  else "ck-dot ck-dot--pending"
@@ -58,17 +56,11 @@ module CompletionKit
58
56
  def ck_run_status_label(run)
59
57
  case run.status
60
58
  when "pending" then "Ready to run"
61
- when "generating"
62
- if run.progress_total.to_i > 0
63
- "Generating responses (#{run.progress_current}/#{run.progress_total})"
64
- else
65
- "Generating responses…"
66
- end
67
- when "judging"
59
+ when "running"
68
60
  if run.progress_total.to_i > 0
69
- "Judging (#{run.progress_current}/#{run.progress_total} evaluations)"
61
+ "Running (#{run.progress_current}/#{run.progress_total})"
70
62
  else
71
- "Judging…"
63
+ "Running…"
72
64
  end
73
65
  when "completed" then "Completed"
74
66
  when "failed" then "Failed"
@@ -0,0 +1,102 @@
1
+ require "faraday"
2
+
3
+ module CompletionKit
4
+ class GenerateRowJob < ApplicationJob
5
+ queue_as :llm
6
+
7
+ limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
8
+ key: ->(run_id, _) { "run:#{run_id}" },
9
+ duration: 10.minutes
10
+
11
+ def self.rate_limit_wait(executions)
12
+ 30 * executions
13
+ end
14
+
15
+ retry_on Faraday::TimeoutError,
16
+ Faraday::ConnectionFailed,
17
+ wait: :polynomially_longer, attempts: 5
18
+
19
+ retry_on CompletionKit::RateLimitError,
20
+ wait: method(:rate_limit_wait), attempts: 5
21
+
22
+ discard_on ActiveJob::DeserializationError
23
+ discard_on CompletionKit::ConfigurationError
24
+
25
+ rescue_from(StandardError) do |error|
26
+ record_terminal_failure!(error)
27
+ enqueue_completion_check
28
+ end
29
+
30
+ before_perform do |job|
31
+ response = Response.find_by(id: job.arguments.last)
32
+ next unless response
33
+ response.update_columns(status: "retrying", attempts: response.attempts + 1)
34
+ response.run.send(:broadcast_response_update, response) if response.run
35
+ end
36
+
37
+ def perform(run_id, response_id)
38
+ @run_id = run_id
39
+ @response_id = response_id
40
+
41
+ response = Response.find(response_id)
42
+ run = response.run
43
+ prompt = run.prompt
44
+
45
+ row = parsed_input(response)
46
+ rendered = CsvProcessor.apply_variables(prompt, row)
47
+ client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
48
+
49
+ raise ConfigurationError, client.configuration_errors.join(", ") unless client.configured?
50
+
51
+ text = client.generate_completion(rendered, model: prompt.llm_model, temperature: run.temperature)
52
+
53
+ response.update!(
54
+ status: "succeeded",
55
+ response_text: text,
56
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil
57
+ )
58
+ run.send(:broadcast_response_update, response)
59
+
60
+ if run.judge_configured?
61
+ run.metrics.each do |metric|
62
+ JudgeReviewJob.perform_later(response.id, metric.id)
63
+ end
64
+ end
65
+
66
+ enqueue_completion_check
67
+ end
68
+
69
+ private
70
+
71
+ def parsed_input(response)
72
+ return {} if response.input_data.blank?
73
+ JSON.parse(response.input_data)
74
+ rescue JSON::ParserError
75
+ {}
76
+ end
77
+
78
+ def record_terminal_failure!(error)
79
+ response_id = @response_id || arguments.last
80
+ response = Response.find_by(id: response_id)
81
+ return unless response
82
+
83
+ response.update_columns(
84
+ status: "failed",
85
+ error_provider: provider_for(response),
86
+ error_class: error.class.name,
87
+ error_status: error.respond_to?(:status) ? error.status : nil,
88
+ error_message: error.message.to_s.truncate(2000)
89
+ )
90
+ response.run&.send(:broadcast_response_update, response)
91
+ end
92
+
93
+ def provider_for(response)
94
+ response.run&.prompt&.llm_model_provider
95
+ end
96
+
97
+ def enqueue_completion_check
98
+ run_id = @run_id || arguments.first
99
+ RunCompletionCheckJob.perform_later(run_id)
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,110 @@
1
+ require "faraday"
2
+
3
+ module CompletionKit
4
+ class JudgeReviewJob < ApplicationJob
5
+ queue_as :llm
6
+
7
+ limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
8
+ key: ->(response_id, _) { "run:#{Response.find_by(id: response_id)&.run_id}" },
9
+ duration: 10.minutes
10
+
11
+ def self.rate_limit_wait(executions)
12
+ 30 * executions
13
+ end
14
+
15
+ retry_on Faraday::TimeoutError,
16
+ Faraday::ConnectionFailed,
17
+ wait: :polynomially_longer, attempts: 5
18
+
19
+ retry_on CompletionKit::RateLimitError,
20
+ wait: method(:rate_limit_wait), attempts: 5
21
+
22
+ discard_on ActiveJob::DeserializationError
23
+ discard_on CompletionKit::ConfigurationError
24
+
25
+ rescue_from(StandardError) do |error|
26
+ record_terminal_failure!(error)
27
+ enqueue_completion_check
28
+ end
29
+
30
+ before_perform do |job|
31
+ response_id, metric_id = job.arguments
32
+ response = Response.find_by(id: response_id)
33
+ next unless response
34
+ review = response.reviews.find_or_initialize_by(metric_id: metric_id)
35
+ review.metric_name ||= Metric.find_by(id: metric_id)&.name || "(deleted metric)"
36
+ review.attempts = (review.attempts || 0) + 1
37
+ review.status = "retrying"
38
+ review.save!(validate: false)
39
+ response.run.send(:broadcast_response_update, response) if response.run
40
+ end
41
+
42
+ def perform(response_id, metric_id)
43
+ @response_id = response_id
44
+ @metric_id = metric_id
45
+
46
+ response = Response.find(response_id)
47
+ metric = Metric.find(metric_id)
48
+ run = response.run
49
+
50
+ config = ApiConfig.for_model(run.judge_model).merge(judge_model: run.judge_model)
51
+ judge = JudgeService.new(config)
52
+
53
+ evaluation = judge.evaluate(
54
+ response.response_text,
55
+ response.expected_output,
56
+ run.prompt.template,
57
+ criteria: metric.instruction.to_s,
58
+ rubric_text: metric.display_rubric_text,
59
+ input_data: response.input_data
60
+ )
61
+
62
+ review = response.reviews.find_or_initialize_by(metric_id: metric.id)
63
+ review.assign_attributes(
64
+ metric_name: metric.name,
65
+ instruction: metric.instruction.to_s,
66
+ status: "succeeded",
67
+ ai_score: evaluation[:score],
68
+ ai_feedback: evaluation[:feedback],
69
+ error_provider: nil, error_class: nil, error_status: nil, error_message: nil
70
+ )
71
+ review.save!
72
+
73
+ run.send(:broadcast_response_update, response)
74
+ enqueue_completion_check
75
+ end
76
+
77
+ private
78
+
79
+ def record_terminal_failure!(error)
80
+ response_id = @response_id || arguments.first
81
+ metric_id = @metric_id || arguments.last
82
+ response = Response.find_by(id: response_id)
83
+ return unless response
84
+
85
+ review = response.reviews.find_or_initialize_by(metric_id: metric_id)
86
+ review.assign_attributes(
87
+ metric_name: review.metric_name || Metric.find_by(id: metric_id)&.name || "(deleted metric)",
88
+ status: "failed",
89
+ error_provider: provider_for(response),
90
+ error_class: error.class.name,
91
+ error_status: error.respond_to?(:status) ? error.status : nil,
92
+ error_message: error.message.to_s.truncate(2000)
93
+ )
94
+ review.save!(validate: false)
95
+ response.run&.send(:broadcast_response_update, response)
96
+ end
97
+
98
+ def provider_for(response)
99
+ run = response.run
100
+ return nil unless run&.judge_model
101
+ ApiConfig.provider_for_model(run.judge_model)
102
+ end
103
+
104
+ def enqueue_completion_check
105
+ response_id = @response_id || arguments.first
106
+ response = Response.find_by(id: response_id)
107
+ RunCompletionCheckJob.perform_later(response.run_id) if response
108
+ end
109
+ end
110
+ end
@@ -1,7 +1,29 @@
1
+ require "faraday"
2
+
1
3
  module CompletionKit
2
4
  class ModelDiscoveryJob < ApplicationJob
3
5
  queue_as :default
4
6
 
7
+ def self.rate_limit_wait(executions)
8
+ 30 * executions
9
+ end
10
+
11
+ retry_on Faraday::TimeoutError,
12
+ Faraday::ConnectionFailed,
13
+ wait: :polynomially_longer, attempts: 5
14
+
15
+ retry_on CompletionKit::RateLimitError,
16
+ wait: method(:rate_limit_wait), attempts: 5
17
+
18
+ discard_on ActiveJob::DeserializationError
19
+
20
+ rescue_from(StandardError) do |_error|
21
+ credential = ProviderCredential.find(arguments.first)
22
+ credential.update_columns(discovery_status: "failed")
23
+ credential.reload
24
+ credential.broadcast_discovery_progress
25
+ end
26
+
5
27
  def perform(provider_credential_id)
6
28
  credential = ProviderCredential.find_by(id: provider_credential_id)
7
29
  return unless credential
@@ -20,10 +42,6 @@ module CompletionKit
20
42
  credential.update_columns(discovery_status: "completed", updated_at: Time.current)
21
43
  credential.reload
22
44
  credential.broadcast_discovery_complete
23
- rescue StandardError
24
- credential.update_columns(discovery_status: "failed")
25
- credential.reload
26
- credential.broadcast_discovery_progress
27
45
  end
28
46
  end
29
47
  end