completion-kit 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -1
- data/app/assets/stylesheets/completion_kit/application.css +87 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
- data/app/controllers/completion_kit/runs_controller.rb +31 -11
- data/app/helpers/completion_kit/application_helper.rb +4 -12
- data/app/jobs/completion_kit/generate_row_job.rb +102 -0
- data/app/jobs/completion_kit/judge_review_job.rb +110 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
- data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
- data/app/models/completion_kit/prompt.rb +4 -0
- data/app/models/completion_kit/response.rb +29 -2
- data/app/models/completion_kit/review.rb +17 -2
- data/app/models/completion_kit/run.rb +90 -96
- data/app/services/completion_kit/anthropic_client.rb +13 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
- data/app/services/completion_kit/ollama_client.rb +13 -0
- data/app/services/completion_kit/open_ai_client.rb +11 -0
- data/app/services/completion_kit/open_router_client.rb +13 -0
- data/app/services/completion_kit/worker_health.rb +10 -0
- data/app/views/completion_kit/api_reference/index.html.erb +0 -5
- data/app/views/completion_kit/prompts/_form.html.erb +8 -5
- data/app/views/completion_kit/runs/_actions.html.erb +1 -1
- data/app/views/completion_kit/runs/_form.html.erb +6 -3
- data/app/views/completion_kit/runs/_progress.html.erb +1 -1
- data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
- data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
- data/app/views/completion_kit/runs/show.html.erb +1 -1
- data/app/views/layouts/completion_kit/application.html.erb +28 -2
- data/config/routes.rb +2 -2
- data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
- data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
- data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
- data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
- data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
- data/lib/completion_kit/concurrency_check.rb +16 -0
- data/lib/completion_kit/errors.rb +16 -0
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +2 -2
- data/lib/tasks/completion_kit_runs.rake +13 -0
- metadata +29 -5
- data/app/jobs/completion_kit/generate_job.rb +0 -12
- data/app/jobs/completion_kit/judge_job.rb +0 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0111ef5469e6634ac46f899c5e78a67aa212a174027ce253c7172a326a375121
|
|
4
|
+
data.tar.gz: 73162904d2924d4434b724d8e14e7c38e86ef4262de73c18585a6cc38b87e0cb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5a38d31eeb9fdc4482890799fe34ac7fbf57009c77874bcbcd0b4fc6b37f1878d4890137f83bdab52db469a7e91323438ae31d491272aa022e9c7f55fc5ad16a
|
|
7
|
+
data.tar.gz: 64eac5ee675ed6090835b291b64b4cd6dfe30a5c7db36589c8411e9e67331c762977c97533108a9a9c17680dbad43cfcacb024e68666c5af70ec76b7772844de
|
data/README.md
CHANGED
|
@@ -35,9 +35,20 @@ cd completion-kit/standalone
|
|
|
35
35
|
bundle install
|
|
36
36
|
bin/rails completion_kit:install:migrations
|
|
37
37
|
bin/rails db:migrate
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Then run **both** processes — a web server and a Solid Queue worker. In two terminals:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
38
43
|
bin/rails server
|
|
39
44
|
```
|
|
40
45
|
|
|
46
|
+
```bash
|
|
47
|
+
bin/jobs
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Or with [foreman](https://github.com/ddollar/foreman) in one terminal: `foreman start -f Procfile.dev`.
|
|
51
|
+
|
|
41
52
|
Visit `http://localhost:3000`. Add a provider credential (Settings), create a prompt, upload a CSV dataset, and run it.
|
|
42
53
|
|
|
43
54
|
### Or mount as an engine in your existing Rails app
|
|
@@ -51,7 +62,7 @@ bin/rails generate completion_kit:install
|
|
|
51
62
|
bin/rails db:migrate
|
|
52
63
|
```
|
|
53
64
|
|
|
54
|
-
The engine mounts at `/completion_kit` in your app.
|
|
65
|
+
The engine mounts at `/completion_kit` in your app. CompletionKit's generate and judge flows enqueue Active Job jobs (`CompletionKit::GenerateRowJob`, `CompletionKit::JudgeReviewJob`, `CompletionKit::RunCompletionCheckJob`), so your host app needs an Active Job adapter that actually processes them — Solid Queue, Sidekiq, GoodJob, etc. The `:async` adapter is **not** suitable for production: it runs jobs in the web Puma's thread pool with no durability and no retry, and a long LLM call will block request handling.
|
|
55
66
|
|
|
56
67
|
## Providers
|
|
57
68
|
|
|
@@ -274,6 +274,39 @@ form.button_to {
|
|
|
274
274
|
color: var(--ck-accent);
|
|
275
275
|
}
|
|
276
276
|
|
|
277
|
+
.ck-disclosure-toggle {
|
|
278
|
+
appearance: none;
|
|
279
|
+
background: transparent;
|
|
280
|
+
border: 0;
|
|
281
|
+
padding: 0;
|
|
282
|
+
margin: 0.5rem 0 0;
|
|
283
|
+
font-family: var(--ck-mono);
|
|
284
|
+
font-size: 0.75rem;
|
|
285
|
+
font-weight: 500;
|
|
286
|
+
letter-spacing: 0.12em;
|
|
287
|
+
text-transform: uppercase;
|
|
288
|
+
color: var(--ck-muted);
|
|
289
|
+
cursor: pointer;
|
|
290
|
+
transition: color 0.15s;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
.ck-disclosure-toggle:hover,
|
|
294
|
+
.ck-disclosure-toggle:focus-visible {
|
|
295
|
+
color: var(--ck-accent);
|
|
296
|
+
outline: none;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
.ck-disclosure-toggle::after {
|
|
300
|
+
content: " ↓";
|
|
301
|
+
display: inline-block;
|
|
302
|
+
margin-left: 0.25rem;
|
|
303
|
+
transition: transform 0.15s;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
.ck-disclosure-toggle[aria-expanded="true"]::after {
|
|
307
|
+
transform: rotate(180deg);
|
|
308
|
+
}
|
|
309
|
+
|
|
277
310
|
.ck-list {
|
|
278
311
|
display: grid;
|
|
279
312
|
gap: 0.5rem;
|
|
@@ -385,6 +418,18 @@ tr:hover .ck-chip--publish {
|
|
|
385
418
|
color: var(--ck-accent);
|
|
386
419
|
}
|
|
387
420
|
|
|
421
|
+
.ck-chip--warning {
|
|
422
|
+
background: var(--ck-warning-soft);
|
|
423
|
+
border-color: rgba(224, 164, 88, 0.3);
|
|
424
|
+
color: var(--ck-warning);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
.ck-chip--danger {
|
|
428
|
+
background: var(--ck-danger-soft);
|
|
429
|
+
border-color: rgba(248, 113, 113, 0.3);
|
|
430
|
+
color: var(--ck-danger);
|
|
431
|
+
}
|
|
432
|
+
|
|
388
433
|
.ck-badge--high {
|
|
389
434
|
background: var(--ck-success-soft);
|
|
390
435
|
border: 1px solid rgba(34, 197, 94, 0.25);
|
|
@@ -679,6 +724,27 @@ tr:hover .ck-chip--publish {
|
|
|
679
724
|
color: var(--ck-text);
|
|
680
725
|
}
|
|
681
726
|
|
|
727
|
+
.ck-progress-block {
|
|
728
|
+
padding: 0.5rem 1rem 0.75rem;
|
|
729
|
+
border-top: 1px solid var(--ck-line);
|
|
730
|
+
font-size: 0.72rem;
|
|
731
|
+
font-family: var(--ck-mono);
|
|
732
|
+
color: var(--ck-muted);
|
|
733
|
+
display: flex;
|
|
734
|
+
flex-direction: column;
|
|
735
|
+
gap: 0.25rem;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
.ck-progress-line {
|
|
739
|
+
display: flex;
|
|
740
|
+
gap: 0.4rem;
|
|
741
|
+
align-items: baseline;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
.ck-progress-failed {
|
|
745
|
+
color: var(--ck-danger);
|
|
746
|
+
}
|
|
747
|
+
|
|
682
748
|
.ck-model-list-details summary {
|
|
683
749
|
list-style: none;
|
|
684
750
|
}
|
|
@@ -802,6 +868,12 @@ tr:hover .ck-chip--publish {
|
|
|
802
868
|
color: var(--ck-muted);
|
|
803
869
|
}
|
|
804
870
|
|
|
871
|
+
.ck-field--info #refresh-status,
|
|
872
|
+
.ck-field--warn #refresh-status,
|
|
873
|
+
.ck-field--error #refresh-status {
|
|
874
|
+
color: var(--ck-muted);
|
|
875
|
+
}
|
|
876
|
+
|
|
805
877
|
.ck-field--info .ck-input {
|
|
806
878
|
border-color: var(--ck-accent);
|
|
807
879
|
}
|
|
@@ -1815,6 +1887,21 @@ select.ck-input {
|
|
|
1815
1887
|
flex-shrink: 0;
|
|
1816
1888
|
}
|
|
1817
1889
|
|
|
1890
|
+
.ck-response-row--pending .ck-response-row__text,
|
|
1891
|
+
.ck-response-row--retrying .ck-response-row__text {
|
|
1892
|
+
color: var(--ck-dim);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
.ck-response-row--failed .ck-response-row__text {
|
|
1896
|
+
color: var(--ck-danger);
|
|
1897
|
+
opacity: 0.8;
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
.ck-response-row__error {
|
|
1901
|
+
font-family: var(--ck-mono);
|
|
1902
|
+
font-size: 0.82rem;
|
|
1903
|
+
}
|
|
1904
|
+
|
|
1818
1905
|
.ck-score {
|
|
1819
1906
|
font-size: 0.85rem;
|
|
1820
1907
|
color: var(--ck-muted);
|
|
@@ -2,7 +2,7 @@ module CompletionKit
|
|
|
2
2
|
module Api
|
|
3
3
|
module V1
|
|
4
4
|
class RunsController < BaseController
|
|
5
|
-
before_action :set_run, only: [:show, :update, :destroy, :generate, :
|
|
5
|
+
before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
8
|
render json: Run.order(created_at: :desc)
|
|
@@ -37,12 +37,33 @@ module CompletionKit
|
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
def generate
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
if @run.start!
|
|
41
|
+
render json: @run.reload, status: :accepted
|
|
42
|
+
else
|
|
43
|
+
render json: { errors: [@run.failure_summary || @run.errors.full_messages.to_sentence] }, status: :unprocessable_entity
|
|
44
|
+
end
|
|
42
45
|
end
|
|
43
46
|
|
|
44
|
-
def
|
|
45
|
-
|
|
47
|
+
def retry_failures
|
|
48
|
+
scope = @run.responses.where(status: "failed")
|
|
49
|
+
scope = scope.where(id: params[:only]) if params[:only].present?
|
|
50
|
+
|
|
51
|
+
ActiveRecord::Base.transaction do
|
|
52
|
+
failed_response_ids = scope.pluck(:id)
|
|
53
|
+
CompletionKit::Review.where(response_id: failed_response_ids, status: "failed").update_all(
|
|
54
|
+
status: "pending", attempts: 0,
|
|
55
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
56
|
+
ai_score: nil, ai_feedback: nil
|
|
57
|
+
)
|
|
58
|
+
scope.update_all(
|
|
59
|
+
status: "pending", attempts: 0,
|
|
60
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
61
|
+
response_text: nil
|
|
62
|
+
)
|
|
63
|
+
@run.update!(status: "running")
|
|
64
|
+
failed_response_ids.each { |rid| CompletionKit::GenerateRowJob.perform_later(@run.id, rid) }
|
|
65
|
+
end
|
|
66
|
+
|
|
46
67
|
render json: @run.reload, status: :accepted
|
|
47
68
|
end
|
|
48
69
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class RunsController < ApplicationController
|
|
3
|
-
before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :
|
|
3
|
+
before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :suggestion, :apply_suggestion, :retry_failures]
|
|
4
4
|
before_action :load_form_collections, only: [:new, :edit, :create, :update]
|
|
5
5
|
|
|
6
6
|
def index
|
|
@@ -63,17 +63,11 @@ module CompletionKit
|
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
def generate
|
|
66
|
-
@run.
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def judge
|
|
72
|
-
if params[:run]
|
|
73
|
-
@run.update(judge_model: params[:run][:judge_model])
|
|
66
|
+
if @run.start!
|
|
67
|
+
redirect_to run_path(@run)
|
|
68
|
+
else
|
|
69
|
+
redirect_to run_path(@run), alert: @run.failure_summary || @run.errors.full_messages.to_sentence
|
|
74
70
|
end
|
|
75
|
-
JudgeJob.perform_later(@run.id)
|
|
76
|
-
redirect_to run_path(@run)
|
|
77
71
|
end
|
|
78
72
|
|
|
79
73
|
def suggest
|
|
@@ -93,6 +87,32 @@ module CompletionKit
|
|
|
93
87
|
return redirect_to run_path(@run), alert: "No suggestion available. Generate one first." unless @suggestion
|
|
94
88
|
end
|
|
95
89
|
|
|
90
|
+
def retry_failures
|
|
91
|
+
scope = @run.responses.where(status: "failed")
|
|
92
|
+
scope = scope.where(id: params[:only]) if params[:only].present?
|
|
93
|
+
|
|
94
|
+
ActiveRecord::Base.transaction do
|
|
95
|
+
failed_response_ids = scope.pluck(:id)
|
|
96
|
+
Review.where(response_id: failed_response_ids, status: "failed").update_all(
|
|
97
|
+
status: "pending",
|
|
98
|
+
attempts: 0,
|
|
99
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
100
|
+
ai_score: nil, ai_feedback: nil
|
|
101
|
+
)
|
|
102
|
+
scope.update_all(
|
|
103
|
+
status: "pending",
|
|
104
|
+
attempts: 0,
|
|
105
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
106
|
+
response_text: nil
|
|
107
|
+
)
|
|
108
|
+
@run.update!(status: "running")
|
|
109
|
+
failed_response_ids.each { |rid| GenerateRowJob.perform_later(@run.id, rid) }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
@run.send(:broadcast_ui)
|
|
113
|
+
redirect_to run_path(@run)
|
|
114
|
+
end
|
|
115
|
+
|
|
96
116
|
def apply_suggestion
|
|
97
117
|
suggestion = @run.suggestions.order(created_at: :desc).first
|
|
98
118
|
return redirect_to run_path(@run), alert: "No suggestion to apply." unless suggestion
|
|
@@ -35,8 +35,6 @@ module CompletionKit
|
|
|
35
35
|
"ck-badge ck-badge--pending"
|
|
36
36
|
when "running"
|
|
37
37
|
"ck-badge ck-badge--running"
|
|
38
|
-
when "generating", "judging"
|
|
39
|
-
"ck-badge ck-badge--running"
|
|
40
38
|
when "completed"
|
|
41
39
|
"ck-badge ck-badge--high"
|
|
42
40
|
when "failed"
|
|
@@ -48,7 +46,7 @@ module CompletionKit
|
|
|
48
46
|
|
|
49
47
|
def ck_run_dot(run)
|
|
50
48
|
case run.status
|
|
51
|
-
when "
|
|
49
|
+
when "running" then "ck-dot ck-dot--running"
|
|
52
50
|
when "failed" then "ck-dot ck-dot--failed"
|
|
53
51
|
when "completed" then "ck-dot ck-dot--completed"
|
|
54
52
|
else "ck-dot ck-dot--pending"
|
|
@@ -58,17 +56,11 @@ module CompletionKit
|
|
|
58
56
|
def ck_run_status_label(run)
|
|
59
57
|
case run.status
|
|
60
58
|
when "pending" then "Ready to run"
|
|
61
|
-
when "
|
|
62
|
-
if run.progress_total.to_i > 0
|
|
63
|
-
"Generating responses (#{run.progress_current}/#{run.progress_total})"
|
|
64
|
-
else
|
|
65
|
-
"Generating responses…"
|
|
66
|
-
end
|
|
67
|
-
when "judging"
|
|
59
|
+
when "running"
|
|
68
60
|
if run.progress_total.to_i > 0
|
|
69
|
-
"
|
|
61
|
+
"Running (#{run.progress_current}/#{run.progress_total})"
|
|
70
62
|
else
|
|
71
|
-
"
|
|
63
|
+
"Running…"
|
|
72
64
|
end
|
|
73
65
|
when "completed" then "Completed"
|
|
74
66
|
when "failed" then "Failed"
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
|
|
3
|
+
module CompletionKit
|
|
4
|
+
class GenerateRowJob < ApplicationJob
|
|
5
|
+
queue_as :llm
|
|
6
|
+
|
|
7
|
+
limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
|
|
8
|
+
key: ->(run_id, _) { "run:#{run_id}" },
|
|
9
|
+
duration: 10.minutes
|
|
10
|
+
|
|
11
|
+
def self.rate_limit_wait(executions)
|
|
12
|
+
30 * executions
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
retry_on Faraday::TimeoutError,
|
|
16
|
+
Faraday::ConnectionFailed,
|
|
17
|
+
wait: :polynomially_longer, attempts: 5
|
|
18
|
+
|
|
19
|
+
retry_on CompletionKit::RateLimitError,
|
|
20
|
+
wait: method(:rate_limit_wait), attempts: 5
|
|
21
|
+
|
|
22
|
+
discard_on ActiveJob::DeserializationError
|
|
23
|
+
discard_on CompletionKit::ConfigurationError
|
|
24
|
+
|
|
25
|
+
rescue_from(StandardError) do |error|
|
|
26
|
+
record_terminal_failure!(error)
|
|
27
|
+
enqueue_completion_check
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
before_perform do |job|
|
|
31
|
+
response = Response.find_by(id: job.arguments.last)
|
|
32
|
+
next unless response
|
|
33
|
+
response.update_columns(status: "retrying", attempts: response.attempts + 1)
|
|
34
|
+
response.run.send(:broadcast_response_update, response) if response.run
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def perform(run_id, response_id)
|
|
38
|
+
@run_id = run_id
|
|
39
|
+
@response_id = response_id
|
|
40
|
+
|
|
41
|
+
response = Response.find(response_id)
|
|
42
|
+
run = response.run
|
|
43
|
+
prompt = run.prompt
|
|
44
|
+
|
|
45
|
+
row = parsed_input(response)
|
|
46
|
+
rendered = CsvProcessor.apply_variables(prompt, row)
|
|
47
|
+
client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
|
|
48
|
+
|
|
49
|
+
raise ConfigurationError, client.configuration_errors.join(", ") unless client.configured?
|
|
50
|
+
|
|
51
|
+
text = client.generate_completion(rendered, model: prompt.llm_model, temperature: run.temperature)
|
|
52
|
+
|
|
53
|
+
response.update!(
|
|
54
|
+
status: "succeeded",
|
|
55
|
+
response_text: text,
|
|
56
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil
|
|
57
|
+
)
|
|
58
|
+
run.send(:broadcast_response_update, response)
|
|
59
|
+
|
|
60
|
+
if run.judge_configured?
|
|
61
|
+
run.metrics.each do |metric|
|
|
62
|
+
JudgeReviewJob.perform_later(response.id, metric.id)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
enqueue_completion_check
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def parsed_input(response)
|
|
72
|
+
return {} if response.input_data.blank?
|
|
73
|
+
JSON.parse(response.input_data)
|
|
74
|
+
rescue JSON::ParserError
|
|
75
|
+
{}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def record_terminal_failure!(error)
|
|
79
|
+
response_id = @response_id || arguments.last
|
|
80
|
+
response = Response.find_by(id: response_id)
|
|
81
|
+
return unless response
|
|
82
|
+
|
|
83
|
+
response.update_columns(
|
|
84
|
+
status: "failed",
|
|
85
|
+
error_provider: provider_for(response),
|
|
86
|
+
error_class: error.class.name,
|
|
87
|
+
error_status: error.respond_to?(:status) ? error.status : nil,
|
|
88
|
+
error_message: error.message.to_s.truncate(2000)
|
|
89
|
+
)
|
|
90
|
+
response.run&.send(:broadcast_response_update, response)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def provider_for(response)
|
|
94
|
+
response.run&.prompt&.llm_model_provider
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def enqueue_completion_check
|
|
98
|
+
run_id = @run_id || arguments.first
|
|
99
|
+
RunCompletionCheckJob.perform_later(run_id)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
|
|
3
|
+
module CompletionKit
|
|
4
|
+
class JudgeReviewJob < ApplicationJob
|
|
5
|
+
queue_as :llm
|
|
6
|
+
|
|
7
|
+
limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
|
|
8
|
+
key: ->(response_id, _) { "run:#{Response.find_by(id: response_id)&.run_id}" },
|
|
9
|
+
duration: 10.minutes
|
|
10
|
+
|
|
11
|
+
def self.rate_limit_wait(executions)
|
|
12
|
+
30 * executions
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
retry_on Faraday::TimeoutError,
|
|
16
|
+
Faraday::ConnectionFailed,
|
|
17
|
+
wait: :polynomially_longer, attempts: 5
|
|
18
|
+
|
|
19
|
+
retry_on CompletionKit::RateLimitError,
|
|
20
|
+
wait: method(:rate_limit_wait), attempts: 5
|
|
21
|
+
|
|
22
|
+
discard_on ActiveJob::DeserializationError
|
|
23
|
+
discard_on CompletionKit::ConfigurationError
|
|
24
|
+
|
|
25
|
+
rescue_from(StandardError) do |error|
|
|
26
|
+
record_terminal_failure!(error)
|
|
27
|
+
enqueue_completion_check
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
before_perform do |job|
|
|
31
|
+
response_id, metric_id = job.arguments
|
|
32
|
+
response = Response.find_by(id: response_id)
|
|
33
|
+
next unless response
|
|
34
|
+
review = response.reviews.find_or_initialize_by(metric_id: metric_id)
|
|
35
|
+
review.metric_name ||= Metric.find_by(id: metric_id)&.name || "(deleted metric)"
|
|
36
|
+
review.attempts = (review.attempts || 0) + 1
|
|
37
|
+
review.status = "retrying"
|
|
38
|
+
review.save!(validate: false)
|
|
39
|
+
response.run.send(:broadcast_response_update, response) if response.run
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def perform(response_id, metric_id)
|
|
43
|
+
@response_id = response_id
|
|
44
|
+
@metric_id = metric_id
|
|
45
|
+
|
|
46
|
+
response = Response.find(response_id)
|
|
47
|
+
metric = Metric.find(metric_id)
|
|
48
|
+
run = response.run
|
|
49
|
+
|
|
50
|
+
config = ApiConfig.for_model(run.judge_model).merge(judge_model: run.judge_model)
|
|
51
|
+
judge = JudgeService.new(config)
|
|
52
|
+
|
|
53
|
+
evaluation = judge.evaluate(
|
|
54
|
+
response.response_text,
|
|
55
|
+
response.expected_output,
|
|
56
|
+
run.prompt.template,
|
|
57
|
+
criteria: metric.instruction.to_s,
|
|
58
|
+
rubric_text: metric.display_rubric_text,
|
|
59
|
+
input_data: response.input_data
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
review = response.reviews.find_or_initialize_by(metric_id: metric.id)
|
|
63
|
+
review.assign_attributes(
|
|
64
|
+
metric_name: metric.name,
|
|
65
|
+
instruction: metric.instruction.to_s,
|
|
66
|
+
status: "succeeded",
|
|
67
|
+
ai_score: evaluation[:score],
|
|
68
|
+
ai_feedback: evaluation[:feedback],
|
|
69
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil
|
|
70
|
+
)
|
|
71
|
+
review.save!
|
|
72
|
+
|
|
73
|
+
run.send(:broadcast_response_update, response)
|
|
74
|
+
enqueue_completion_check
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def record_terminal_failure!(error)
|
|
80
|
+
response_id = @response_id || arguments.first
|
|
81
|
+
metric_id = @metric_id || arguments.last
|
|
82
|
+
response = Response.find_by(id: response_id)
|
|
83
|
+
return unless response
|
|
84
|
+
|
|
85
|
+
review = response.reviews.find_or_initialize_by(metric_id: metric_id)
|
|
86
|
+
review.assign_attributes(
|
|
87
|
+
metric_name: review.metric_name || Metric.find_by(id: metric_id)&.name || "(deleted metric)",
|
|
88
|
+
status: "failed",
|
|
89
|
+
error_provider: provider_for(response),
|
|
90
|
+
error_class: error.class.name,
|
|
91
|
+
error_status: error.respond_to?(:status) ? error.status : nil,
|
|
92
|
+
error_message: error.message.to_s.truncate(2000)
|
|
93
|
+
)
|
|
94
|
+
review.save!(validate: false)
|
|
95
|
+
response.run&.send(:broadcast_response_update, response)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def provider_for(response)
|
|
99
|
+
run = response.run
|
|
100
|
+
return nil unless run&.judge_model
|
|
101
|
+
ApiConfig.provider_for_model(run.judge_model)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def enqueue_completion_check
|
|
105
|
+
response_id = @response_id || arguments.first
|
|
106
|
+
response = Response.find_by(id: response_id)
|
|
107
|
+
RunCompletionCheckJob.perform_later(response.run_id) if response
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -1,7 +1,29 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
|
|
1
3
|
module CompletionKit
|
|
2
4
|
class ModelDiscoveryJob < ApplicationJob
|
|
3
5
|
queue_as :default
|
|
4
6
|
|
|
7
|
+
def self.rate_limit_wait(executions)
|
|
8
|
+
30 * executions
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
retry_on Faraday::TimeoutError,
|
|
12
|
+
Faraday::ConnectionFailed,
|
|
13
|
+
wait: :polynomially_longer, attempts: 5
|
|
14
|
+
|
|
15
|
+
retry_on CompletionKit::RateLimitError,
|
|
16
|
+
wait: method(:rate_limit_wait), attempts: 5
|
|
17
|
+
|
|
18
|
+
discard_on ActiveJob::DeserializationError
|
|
19
|
+
|
|
20
|
+
rescue_from(StandardError) do |_error|
|
|
21
|
+
credential = ProviderCredential.find(arguments.first)
|
|
22
|
+
credential.update_columns(discovery_status: "failed")
|
|
23
|
+
credential.reload
|
|
24
|
+
credential.broadcast_discovery_progress
|
|
25
|
+
end
|
|
26
|
+
|
|
5
27
|
def perform(provider_credential_id)
|
|
6
28
|
credential = ProviderCredential.find_by(id: provider_credential_id)
|
|
7
29
|
return unless credential
|
|
@@ -20,10 +42,6 @@ module CompletionKit
|
|
|
20
42
|
credential.update_columns(discovery_status: "completed", updated_at: Time.current)
|
|
21
43
|
credential.reload
|
|
22
44
|
credential.broadcast_discovery_complete
|
|
23
|
-
rescue StandardError
|
|
24
|
-
credential.update_columns(discovery_status: "failed")
|
|
25
|
-
credential.reload
|
|
26
|
-
credential.broadcast_discovery_progress
|
|
27
45
|
end
|
|
28
46
|
end
|
|
29
47
|
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class RunCompletionCheckJob < ApplicationJob
|
|
3
|
+
queue_as :default
|
|
4
|
+
|
|
5
|
+
limits_concurrency to: 1,
|
|
6
|
+
key: ->(run_id) { "run:#{run_id}:completion" },
|
|
7
|
+
duration: 5.minutes
|
|
8
|
+
|
|
9
|
+
def perform(run_id)
|
|
10
|
+
run = Run.find_by(id: run_id)
|
|
11
|
+
return unless run
|
|
12
|
+
return unless run.status == "running"
|
|
13
|
+
return unless run.outstanding_work_zero?
|
|
14
|
+
|
|
15
|
+
run.mark_completed!
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -42,6 +42,10 @@ module CompletionKit
|
|
|
42
42
|
"#{name} — #{version_label}"
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
+
def llm_model_provider
|
|
46
|
+
ApiConfig.provider_for_model(llm_model)
|
|
47
|
+
end
|
|
48
|
+
|
|
45
49
|
def family_versions
|
|
46
50
|
self.class.where(family_key: family_key).order(version_number: :desc, created_at: :desc)
|
|
47
51
|
end
|
|
@@ -1,18 +1,34 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class Response < ApplicationRecord
|
|
3
|
+
STATUSES = %w[pending retrying succeeded failed].freeze
|
|
4
|
+
TERMINAL_STATUSES = %w[succeeded failed].freeze
|
|
5
|
+
|
|
3
6
|
belongs_to :run
|
|
4
7
|
has_many :reviews, dependent: :destroy
|
|
5
8
|
|
|
6
9
|
delegate :prompt, to: :run
|
|
7
10
|
|
|
8
|
-
validates :response_text, presence: true
|
|
11
|
+
validates :response_text, presence: true, if: :succeeded?
|
|
12
|
+
validates :status, inclusion: { in: STATUSES }
|
|
13
|
+
|
|
14
|
+
before_validation :set_default_status, on: :create
|
|
15
|
+
|
|
16
|
+
def terminal?
|
|
17
|
+
TERMINAL_STATUSES.include?(status)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def succeeded?
|
|
21
|
+
status == "succeeded"
|
|
22
|
+
end
|
|
9
23
|
|
|
10
24
|
def as_json(options = {})
|
|
11
25
|
{
|
|
12
26
|
id: id, run_id: run_id, input_data: input_data,
|
|
13
27
|
response_text: response_text, expected_output: expected_output,
|
|
14
28
|
created_at: created_at, score: score, reviewed: reviewed?,
|
|
15
|
-
reviews: reviews.map(&:as_json)
|
|
29
|
+
reviews: reviews.map(&:as_json),
|
|
30
|
+
status: status, attempts: attempts, row_index: row_index,
|
|
31
|
+
error: error_payload
|
|
16
32
|
}
|
|
17
33
|
end
|
|
18
34
|
|
|
@@ -26,5 +42,16 @@ module CompletionKit
|
|
|
26
42
|
def reviewed?
|
|
27
43
|
reviews.any? { |r| r.ai_score.present? }
|
|
28
44
|
end
|
|
45
|
+
|
|
46
|
+
def error_payload
|
|
47
|
+
return nil if error_class.blank?
|
|
48
|
+
{ provider: error_provider, class: error_class, status: error_status, message: error_message }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def set_default_status
|
|
54
|
+
self.status ||= "pending"
|
|
55
|
+
end
|
|
29
56
|
end
|
|
30
57
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class Review < ApplicationRecord
|
|
3
|
-
STATUSES = %w[pending
|
|
3
|
+
STATUSES = %w[pending retrying succeeded failed].freeze
|
|
4
|
+
TERMINAL_STATUSES = %w[succeeded failed].freeze
|
|
4
5
|
|
|
5
6
|
belongs_to :response
|
|
6
7
|
belongs_to :metric, optional: true
|
|
@@ -11,11 +12,25 @@ module CompletionKit
|
|
|
11
12
|
|
|
12
13
|
before_validation :set_default_status
|
|
13
14
|
|
|
15
|
+
def terminal?
|
|
16
|
+
TERMINAL_STATUSES.include?(status)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def succeeded?
|
|
20
|
+
status == "succeeded"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def error_payload
|
|
24
|
+
return nil if error_class.blank?
|
|
25
|
+
{ provider: error_provider, class: error_class, status: error_status, message: error_message }
|
|
26
|
+
end
|
|
27
|
+
|
|
14
28
|
def as_json(options = {})
|
|
15
29
|
{
|
|
16
30
|
id: id, response_id: response_id, metric_id: metric_id,
|
|
17
31
|
metric_name: metric_name, ai_score: ai_score,
|
|
18
|
-
ai_feedback: ai_feedback, status: status
|
|
32
|
+
ai_feedback: ai_feedback, status: status, attempts: attempts,
|
|
33
|
+
error: error_payload
|
|
19
34
|
}
|
|
20
35
|
end
|
|
21
36
|
|