completion-kit 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +97 -0
- data/README.md +19 -2
- data/app/assets/stylesheets/completion_kit/application.css +87 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
- data/app/controllers/completion_kit/runs_controller.rb +31 -11
- data/app/helpers/completion_kit/application_helper.rb +4 -12
- data/app/jobs/completion_kit/generate_row_job.rb +102 -0
- data/app/jobs/completion_kit/judge_review_job.rb +110 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
- data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
- data/app/models/completion_kit/prompt.rb +4 -0
- data/app/models/completion_kit/response.rb +29 -2
- data/app/models/completion_kit/review.rb +17 -2
- data/app/models/completion_kit/run.rb +90 -96
- data/app/services/completion_kit/anthropic_client.rb +13 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
- data/app/services/completion_kit/ollama_client.rb +13 -0
- data/app/services/completion_kit/open_ai_client.rb +11 -0
- data/app/services/completion_kit/open_router_client.rb +13 -0
- data/app/services/completion_kit/worker_health.rb +10 -0
- data/app/views/completion_kit/api_reference/index.html.erb +0 -5
- data/app/views/completion_kit/prompts/_form.html.erb +8 -5
- data/app/views/completion_kit/runs/_actions.html.erb +1 -1
- data/app/views/completion_kit/runs/_form.html.erb +6 -3
- data/app/views/completion_kit/runs/_progress.html.erb +1 -1
- data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
- data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
- data/app/views/completion_kit/runs/show.html.erb +1 -1
- data/app/views/layouts/completion_kit/application.html.erb +28 -2
- data/config/routes.rb +2 -2
- data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
- data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
- data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
- data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
- data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
- data/lib/completion_kit/concurrency_check.rb +16 -0
- data/lib/completion_kit/errors.rb +16 -0
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +2 -2
- data/lib/tasks/completion_kit_runs.rake +13 -0
- metadata +31 -7
- data/MIT-LICENSE +0 -20
- data/app/jobs/completion_kit/generate_job.rb +0 -12
- data/app/jobs/completion_kit/judge_job.rb +0 -12
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0111ef5469e6634ac46f899c5e78a67aa212a174027ce253c7172a326a375121
|
|
4
|
+
data.tar.gz: 73162904d2924d4434b724d8e14e7c38e86ef4262de73c18585a6cc38b87e0cb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5a38d31eeb9fdc4482890799fe34ac7fbf57009c77874bcbcd0b4fc6b37f1878d4890137f83bdab52db469a7e91323438ae31d491272aa022e9c7f55fc5ad16a
|
|
7
|
+
data.tar.gz: 64eac5ee675ed6090835b291b64b4cd6dfe30a5c7db36589c8411e9e67331c762977c97533108a9a9c17680dbad43cfcacb024e68666c5af70ec76b7772844de
|
data/LICENSE
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
Business Source License 1.1
|
|
2
|
+
|
|
3
|
+
Licensor: Homemade Software, Inc.
|
|
4
|
+
|
|
5
|
+
Licensed Work: CompletionKit
|
|
6
|
+
The Licensed Work is Copyright © 2026 Homemade
|
|
7
|
+
Software, Inc.
|
|
8
|
+
|
|
9
|
+
Additional Use Grant: You may use the Licensed Work for any purpose,
|
|
10
|
+
including in production, except to offer the Licensed
|
|
11
|
+
Work (or any derivative work) to third parties as a
|
|
12
|
+
hosted or managed service whose primary value is the
|
|
13
|
+
functionality of the Licensed Work itself.
|
|
14
|
+
|
|
15
|
+
Change Date: 2029-04-25
|
|
16
|
+
|
|
17
|
+
Change License: GNU General Public License (GPL) Version 3
|
|
18
|
+
|
|
19
|
+
For information about alternative licensing arrangements for the Licensed
|
|
20
|
+
Work, please contact hello@homemade.software.
|
|
21
|
+
|
|
22
|
+
--------------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
Business Source License 1.1
|
|
25
|
+
|
|
26
|
+
Terms
|
|
27
|
+
|
|
28
|
+
The Licensor hereby grants you the right to copy, modify, create derivative
|
|
29
|
+
works, redistribute, and make non-production use of the Licensed Work. The
|
|
30
|
+
Licensor may make an Additional Use Grant, above, permitting limited
|
|
31
|
+
production use.
|
|
32
|
+
|
|
33
|
+
Effective on the Change Date, or the fourth anniversary of the first publicly
|
|
34
|
+
available distribution of a specific version of the Licensed Work under this
|
|
35
|
+
License, whichever comes first, the Licensor hereby grants you rights under
|
|
36
|
+
the terms of the Change License, and the rights granted in the paragraph
|
|
37
|
+
above terminate.
|
|
38
|
+
|
|
39
|
+
If your use of the Licensed Work does not comply with the requirements
|
|
40
|
+
currently in effect as described in this License, you must purchase a
|
|
41
|
+
commercial license from the Licensor, its affiliated entities, or authorized
|
|
42
|
+
resellers, or you must refrain from using the Licensed Work.
|
|
43
|
+
|
|
44
|
+
All copies of the original and modified Licensed Work, and derivative works
|
|
45
|
+
of the Licensed Work, are subject to this License. This License applies
|
|
46
|
+
separately for each version of the Licensed Work and the Change Date may
|
|
47
|
+
vary for each version of the Licensed Work released by Licensor.
|
|
48
|
+
|
|
49
|
+
You must conspicuously display this License on each original or modified
|
|
50
|
+
copy of the Licensed Work. If you receive the Licensed Work in original or
|
|
51
|
+
modified form from a third party, the terms and conditions set forth in this
|
|
52
|
+
License apply to your use of that work.
|
|
53
|
+
|
|
54
|
+
Any use of the Licensed Work in violation of this License will automatically
|
|
55
|
+
terminate your rights under this License for the current and all other
|
|
56
|
+
versions of the Licensed Work.
|
|
57
|
+
|
|
58
|
+
This License does not grant you any right in any trademark or logo of
|
|
59
|
+
Licensor or its affiliates (provided that you may use a trademark or logo of
|
|
60
|
+
Licensor as expressly required by this License).
|
|
61
|
+
|
|
62
|
+
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
|
63
|
+
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
|
64
|
+
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
|
65
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
|
66
|
+
TITLE.
|
|
67
|
+
|
|
68
|
+
MariaDB hereby grants you permission to use this License's text to license
|
|
69
|
+
your works, and to refer to it using the trademark "Business Source
|
|
70
|
+
License", as long as you comply with the Covenants of Licensor below.
|
|
71
|
+
|
|
72
|
+
Covenants of Licensor
|
|
73
|
+
|
|
74
|
+
In consideration of the right to use this License's text and the "Business
|
|
75
|
+
Source License" name and trademark, Licensor covenants to MariaDB, and to
|
|
76
|
+
all other recipients of the Licensed Work to be provided by Licensor:
|
|
77
|
+
|
|
78
|
+
1. To specify as the Change License the GPL Version 2.0 or any later
|
|
79
|
+
version, or a license that is compatible with GPL Version 2.0 or a later
|
|
80
|
+
version, where "compatible" means that software provided under the Change
|
|
81
|
+
License can be included in a program with software provided under GPL
|
|
82
|
+
Version 2.0 or a later version. Licensor may specify additional Change
|
|
83
|
+
Licenses without limitation.
|
|
84
|
+
|
|
85
|
+
2. To either: (a) specify an additional grant of rights to use that does not
|
|
86
|
+
impose any additional restriction on the right granted in this License,
|
|
87
|
+
as the Additional Use Grant; or (b) insert the text "None".
|
|
88
|
+
|
|
89
|
+
3. To specify a Change Date.
|
|
90
|
+
|
|
91
|
+
4. Not to modify this License in any other way.
|
|
92
|
+
|
|
93
|
+
Notice
|
|
94
|
+
|
|
95
|
+
The Business Source License (this document, or the "License") is not an
|
|
96
|
+
Open Source license. However, the Licensed Work will eventually be made
|
|
97
|
+
available under an Open Source License, as stated in this License.
|
data/README.md
CHANGED
|
@@ -15,6 +15,8 @@ It's the difference between "this prompt seems to work" and "this prompt scores
|
|
|
15
15
|
|
|
16
16
|
**[completionkit.com](https://completionkit.com)** | **[RubyGems](https://rubygems.org/gems/completion-kit)**
|
|
17
17
|
|
|
18
|
+
> **CompletionKit Cloud** is coming — hosted, managed CompletionKit with zero setup. Early access opening soon at [app.completionkit.com](https://app.completionkit.com).
|
|
19
|
+
|
|
18
20
|

|
|
19
21
|
|
|
20
22
|

|
|
@@ -33,9 +35,20 @@ cd completion-kit/standalone
|
|
|
33
35
|
bundle install
|
|
34
36
|
bin/rails completion_kit:install:migrations
|
|
35
37
|
bin/rails db:migrate
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Then run **both** processes — a web server and a Solid Queue worker. In two terminals:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
36
43
|
bin/rails server
|
|
37
44
|
```
|
|
38
45
|
|
|
46
|
+
```bash
|
|
47
|
+
bin/jobs
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Or with [foreman](https://github.com/ddollar/foreman) in one terminal: `foreman start -f Procfile.dev`.
|
|
51
|
+
|
|
39
52
|
Visit `http://localhost:3000`. Add a provider credential (Settings), create a prompt, upload a CSV dataset, and run it.
|
|
40
53
|
|
|
41
54
|
### Or mount as an engine in your existing Rails app
|
|
@@ -49,7 +62,7 @@ bin/rails generate completion_kit:install
|
|
|
49
62
|
bin/rails db:migrate
|
|
50
63
|
```
|
|
51
64
|
|
|
52
|
-
The engine mounts at `/completion_kit` in your app.
|
|
65
|
+
The engine mounts at `/completion_kit` in your app. CompletionKit's generate and judge flows enqueue Active Job jobs (`CompletionKit::GenerateRowJob`, `CompletionKit::JudgeReviewJob`, `CompletionKit::RunCompletionCheckJob`), so your host app needs an Active Job adapter that actually processes them — Solid Queue, Sidekiq, GoodJob, etc. The `:async` adapter is **not** suitable for production: it runs jobs in the web Puma's thread pool with no durability and no retry, and a long LLM call will block request handling.
|
|
53
66
|
|
|
54
67
|
## Providers
|
|
55
68
|
|
|
@@ -200,4 +213,8 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, testing, and pull
|
|
|
200
213
|
|
|
201
214
|
## License
|
|
202
215
|
|
|
203
|
-
[
|
|
216
|
+
CompletionKit 0.3.0 and later are licensed under the [Business Source License 1.1](LICENSE). You may use CompletionKit freely for any purpose, including production, except to offer it (or a derivative) to third parties as a hosted or managed service whose primary value is CompletionKit itself. Three years after each release, that version automatically re-licenses to GPL-3.
|
|
217
|
+
|
|
218
|
+
CompletionKit 0.2.x and earlier remain available under the [MIT License](https://github.com/homemade-software-inc/completion-kit/blob/v0.2.0/MIT-LICENSE).
|
|
219
|
+
|
|
220
|
+
For alternative licensing, contact hello@homemade.software.
|
|
@@ -274,6 +274,39 @@ form.button_to {
|
|
|
274
274
|
color: var(--ck-accent);
|
|
275
275
|
}
|
|
276
276
|
|
|
277
|
+
.ck-disclosure-toggle {
|
|
278
|
+
appearance: none;
|
|
279
|
+
background: transparent;
|
|
280
|
+
border: 0;
|
|
281
|
+
padding: 0;
|
|
282
|
+
margin: 0.5rem 0 0;
|
|
283
|
+
font-family: var(--ck-mono);
|
|
284
|
+
font-size: 0.75rem;
|
|
285
|
+
font-weight: 500;
|
|
286
|
+
letter-spacing: 0.12em;
|
|
287
|
+
text-transform: uppercase;
|
|
288
|
+
color: var(--ck-muted);
|
|
289
|
+
cursor: pointer;
|
|
290
|
+
transition: color 0.15s;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
.ck-disclosure-toggle:hover,
|
|
294
|
+
.ck-disclosure-toggle:focus-visible {
|
|
295
|
+
color: var(--ck-accent);
|
|
296
|
+
outline: none;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
.ck-disclosure-toggle::after {
|
|
300
|
+
content: " ↓";
|
|
301
|
+
display: inline-block;
|
|
302
|
+
margin-left: 0.25rem;
|
|
303
|
+
transition: transform 0.15s;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
.ck-disclosure-toggle[aria-expanded="true"]::after {
|
|
307
|
+
transform: rotate(180deg);
|
|
308
|
+
}
|
|
309
|
+
|
|
277
310
|
.ck-list {
|
|
278
311
|
display: grid;
|
|
279
312
|
gap: 0.5rem;
|
|
@@ -385,6 +418,18 @@ tr:hover .ck-chip--publish {
|
|
|
385
418
|
color: var(--ck-accent);
|
|
386
419
|
}
|
|
387
420
|
|
|
421
|
+
.ck-chip--warning {
|
|
422
|
+
background: var(--ck-warning-soft);
|
|
423
|
+
border-color: rgba(224, 164, 88, 0.3);
|
|
424
|
+
color: var(--ck-warning);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
.ck-chip--danger {
|
|
428
|
+
background: var(--ck-danger-soft);
|
|
429
|
+
border-color: rgba(248, 113, 113, 0.3);
|
|
430
|
+
color: var(--ck-danger);
|
|
431
|
+
}
|
|
432
|
+
|
|
388
433
|
.ck-badge--high {
|
|
389
434
|
background: var(--ck-success-soft);
|
|
390
435
|
border: 1px solid rgba(34, 197, 94, 0.25);
|
|
@@ -679,6 +724,27 @@ tr:hover .ck-chip--publish {
|
|
|
679
724
|
color: var(--ck-text);
|
|
680
725
|
}
|
|
681
726
|
|
|
727
|
+
.ck-progress-block {
|
|
728
|
+
padding: 0.5rem 1rem 0.75rem;
|
|
729
|
+
border-top: 1px solid var(--ck-line);
|
|
730
|
+
font-size: 0.72rem;
|
|
731
|
+
font-family: var(--ck-mono);
|
|
732
|
+
color: var(--ck-muted);
|
|
733
|
+
display: flex;
|
|
734
|
+
flex-direction: column;
|
|
735
|
+
gap: 0.25rem;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
.ck-progress-line {
|
|
739
|
+
display: flex;
|
|
740
|
+
gap: 0.4rem;
|
|
741
|
+
align-items: baseline;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
.ck-progress-failed {
|
|
745
|
+
color: var(--ck-danger);
|
|
746
|
+
}
|
|
747
|
+
|
|
682
748
|
.ck-model-list-details summary {
|
|
683
749
|
list-style: none;
|
|
684
750
|
}
|
|
@@ -802,6 +868,12 @@ tr:hover .ck-chip--publish {
|
|
|
802
868
|
color: var(--ck-muted);
|
|
803
869
|
}
|
|
804
870
|
|
|
871
|
+
.ck-field--info #refresh-status,
|
|
872
|
+
.ck-field--warn #refresh-status,
|
|
873
|
+
.ck-field--error #refresh-status {
|
|
874
|
+
color: var(--ck-muted);
|
|
875
|
+
}
|
|
876
|
+
|
|
805
877
|
.ck-field--info .ck-input {
|
|
806
878
|
border-color: var(--ck-accent);
|
|
807
879
|
}
|
|
@@ -1815,6 +1887,21 @@ select.ck-input {
|
|
|
1815
1887
|
flex-shrink: 0;
|
|
1816
1888
|
}
|
|
1817
1889
|
|
|
1890
|
+
.ck-response-row--pending .ck-response-row__text,
|
|
1891
|
+
.ck-response-row--retrying .ck-response-row__text {
|
|
1892
|
+
color: var(--ck-dim);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
.ck-response-row--failed .ck-response-row__text {
|
|
1896
|
+
color: var(--ck-danger);
|
|
1897
|
+
opacity: 0.8;
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
.ck-response-row__error {
|
|
1901
|
+
font-family: var(--ck-mono);
|
|
1902
|
+
font-size: 0.82rem;
|
|
1903
|
+
}
|
|
1904
|
+
|
|
1818
1905
|
.ck-score {
|
|
1819
1906
|
font-size: 0.85rem;
|
|
1820
1907
|
color: var(--ck-muted);
|
|
@@ -2,7 +2,7 @@ module CompletionKit
|
|
|
2
2
|
module Api
|
|
3
3
|
module V1
|
|
4
4
|
class RunsController < BaseController
|
|
5
|
-
before_action :set_run, only: [:show, :update, :destroy, :generate, :
|
|
5
|
+
before_action :set_run, only: [:show, :update, :destroy, :generate, :retry_failures]
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
8
|
render json: Run.order(created_at: :desc)
|
|
@@ -37,12 +37,33 @@ module CompletionKit
|
|
|
37
37
|
end
|
|
38
38
|
|
|
39
39
|
def generate
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
if @run.start!
|
|
41
|
+
render json: @run.reload, status: :accepted
|
|
42
|
+
else
|
|
43
|
+
render json: { errors: [@run.failure_summary || @run.errors.full_messages.to_sentence] }, status: :unprocessable_entity
|
|
44
|
+
end
|
|
42
45
|
end
|
|
43
46
|
|
|
44
|
-
def
|
|
45
|
-
|
|
47
|
+
def retry_failures
|
|
48
|
+
scope = @run.responses.where(status: "failed")
|
|
49
|
+
scope = scope.where(id: params[:only]) if params[:only].present?
|
|
50
|
+
|
|
51
|
+
ActiveRecord::Base.transaction do
|
|
52
|
+
failed_response_ids = scope.pluck(:id)
|
|
53
|
+
CompletionKit::Review.where(response_id: failed_response_ids, status: "failed").update_all(
|
|
54
|
+
status: "pending", attempts: 0,
|
|
55
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
56
|
+
ai_score: nil, ai_feedback: nil
|
|
57
|
+
)
|
|
58
|
+
scope.update_all(
|
|
59
|
+
status: "pending", attempts: 0,
|
|
60
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
61
|
+
response_text: nil
|
|
62
|
+
)
|
|
63
|
+
@run.update!(status: "running")
|
|
64
|
+
failed_response_ids.each { |rid| CompletionKit::GenerateRowJob.perform_later(@run.id, rid) }
|
|
65
|
+
end
|
|
66
|
+
|
|
46
67
|
render json: @run.reload, status: :accepted
|
|
47
68
|
end
|
|
48
69
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class RunsController < ApplicationController
|
|
3
|
-
before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :
|
|
3
|
+
before_action :set_run, only: [:show, :edit, :update, :destroy, :generate, :suggest, :suggestion, :apply_suggestion, :retry_failures]
|
|
4
4
|
before_action :load_form_collections, only: [:new, :edit, :create, :update]
|
|
5
5
|
|
|
6
6
|
def index
|
|
@@ -63,17 +63,11 @@ module CompletionKit
|
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
def generate
|
|
66
|
-
@run.
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def judge
|
|
72
|
-
if params[:run]
|
|
73
|
-
@run.update(judge_model: params[:run][:judge_model])
|
|
66
|
+
if @run.start!
|
|
67
|
+
redirect_to run_path(@run)
|
|
68
|
+
else
|
|
69
|
+
redirect_to run_path(@run), alert: @run.failure_summary || @run.errors.full_messages.to_sentence
|
|
74
70
|
end
|
|
75
|
-
JudgeJob.perform_later(@run.id)
|
|
76
|
-
redirect_to run_path(@run)
|
|
77
71
|
end
|
|
78
72
|
|
|
79
73
|
def suggest
|
|
@@ -93,6 +87,32 @@ module CompletionKit
|
|
|
93
87
|
return redirect_to run_path(@run), alert: "No suggestion available. Generate one first." unless @suggestion
|
|
94
88
|
end
|
|
95
89
|
|
|
90
|
+
def retry_failures
|
|
91
|
+
scope = @run.responses.where(status: "failed")
|
|
92
|
+
scope = scope.where(id: params[:only]) if params[:only].present?
|
|
93
|
+
|
|
94
|
+
ActiveRecord::Base.transaction do
|
|
95
|
+
failed_response_ids = scope.pluck(:id)
|
|
96
|
+
Review.where(response_id: failed_response_ids, status: "failed").update_all(
|
|
97
|
+
status: "pending",
|
|
98
|
+
attempts: 0,
|
|
99
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
100
|
+
ai_score: nil, ai_feedback: nil
|
|
101
|
+
)
|
|
102
|
+
scope.update_all(
|
|
103
|
+
status: "pending",
|
|
104
|
+
attempts: 0,
|
|
105
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil,
|
|
106
|
+
response_text: nil
|
|
107
|
+
)
|
|
108
|
+
@run.update!(status: "running")
|
|
109
|
+
failed_response_ids.each { |rid| GenerateRowJob.perform_later(@run.id, rid) }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
@run.send(:broadcast_ui)
|
|
113
|
+
redirect_to run_path(@run)
|
|
114
|
+
end
|
|
115
|
+
|
|
96
116
|
def apply_suggestion
|
|
97
117
|
suggestion = @run.suggestions.order(created_at: :desc).first
|
|
98
118
|
return redirect_to run_path(@run), alert: "No suggestion to apply." unless suggestion
|
|
@@ -35,8 +35,6 @@ module CompletionKit
|
|
|
35
35
|
"ck-badge ck-badge--pending"
|
|
36
36
|
when "running"
|
|
37
37
|
"ck-badge ck-badge--running"
|
|
38
|
-
when "generating", "judging"
|
|
39
|
-
"ck-badge ck-badge--running"
|
|
40
38
|
when "completed"
|
|
41
39
|
"ck-badge ck-badge--high"
|
|
42
40
|
when "failed"
|
|
@@ -48,7 +46,7 @@ module CompletionKit
|
|
|
48
46
|
|
|
49
47
|
def ck_run_dot(run)
|
|
50
48
|
case run.status
|
|
51
|
-
when "
|
|
49
|
+
when "running" then "ck-dot ck-dot--running"
|
|
52
50
|
when "failed" then "ck-dot ck-dot--failed"
|
|
53
51
|
when "completed" then "ck-dot ck-dot--completed"
|
|
54
52
|
else "ck-dot ck-dot--pending"
|
|
@@ -58,17 +56,11 @@ module CompletionKit
|
|
|
58
56
|
def ck_run_status_label(run)
|
|
59
57
|
case run.status
|
|
60
58
|
when "pending" then "Ready to run"
|
|
61
|
-
when "
|
|
62
|
-
if run.progress_total.to_i > 0
|
|
63
|
-
"Generating responses (#{run.progress_current}/#{run.progress_total})"
|
|
64
|
-
else
|
|
65
|
-
"Generating responses…"
|
|
66
|
-
end
|
|
67
|
-
when "judging"
|
|
59
|
+
when "running"
|
|
68
60
|
if run.progress_total.to_i > 0
|
|
69
|
-
"
|
|
61
|
+
"Running (#{run.progress_current}/#{run.progress_total})"
|
|
70
62
|
else
|
|
71
|
-
"
|
|
63
|
+
"Running…"
|
|
72
64
|
end
|
|
73
65
|
when "completed" then "Completed"
|
|
74
66
|
when "failed" then "Failed"
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
|
|
3
|
+
module CompletionKit
|
|
4
|
+
class GenerateRowJob < ApplicationJob
|
|
5
|
+
queue_as :llm
|
|
6
|
+
|
|
7
|
+
limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
|
|
8
|
+
key: ->(run_id, _) { "run:#{run_id}" },
|
|
9
|
+
duration: 10.minutes
|
|
10
|
+
|
|
11
|
+
def self.rate_limit_wait(executions)
|
|
12
|
+
30 * executions
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
retry_on Faraday::TimeoutError,
|
|
16
|
+
Faraday::ConnectionFailed,
|
|
17
|
+
wait: :polynomially_longer, attempts: 5
|
|
18
|
+
|
|
19
|
+
retry_on CompletionKit::RateLimitError,
|
|
20
|
+
wait: method(:rate_limit_wait), attempts: 5
|
|
21
|
+
|
|
22
|
+
discard_on ActiveJob::DeserializationError
|
|
23
|
+
discard_on CompletionKit::ConfigurationError
|
|
24
|
+
|
|
25
|
+
rescue_from(StandardError) do |error|
|
|
26
|
+
record_terminal_failure!(error)
|
|
27
|
+
enqueue_completion_check
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
before_perform do |job|
|
|
31
|
+
response = Response.find_by(id: job.arguments.last)
|
|
32
|
+
next unless response
|
|
33
|
+
response.update_columns(status: "retrying", attempts: response.attempts + 1)
|
|
34
|
+
response.run.send(:broadcast_response_update, response) if response.run
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def perform(run_id, response_id)
|
|
38
|
+
@run_id = run_id
|
|
39
|
+
@response_id = response_id
|
|
40
|
+
|
|
41
|
+
response = Response.find(response_id)
|
|
42
|
+
run = response.run
|
|
43
|
+
prompt = run.prompt
|
|
44
|
+
|
|
45
|
+
row = parsed_input(response)
|
|
46
|
+
rendered = CsvProcessor.apply_variables(prompt, row)
|
|
47
|
+
client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
|
|
48
|
+
|
|
49
|
+
raise ConfigurationError, client.configuration_errors.join(", ") unless client.configured?
|
|
50
|
+
|
|
51
|
+
text = client.generate_completion(rendered, model: prompt.llm_model, temperature: run.temperature)
|
|
52
|
+
|
|
53
|
+
response.update!(
|
|
54
|
+
status: "succeeded",
|
|
55
|
+
response_text: text,
|
|
56
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil
|
|
57
|
+
)
|
|
58
|
+
run.send(:broadcast_response_update, response)
|
|
59
|
+
|
|
60
|
+
if run.judge_configured?
|
|
61
|
+
run.metrics.each do |metric|
|
|
62
|
+
JudgeReviewJob.perform_later(response.id, metric.id)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
enqueue_completion_check
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def parsed_input(response)
|
|
72
|
+
return {} if response.input_data.blank?
|
|
73
|
+
JSON.parse(response.input_data)
|
|
74
|
+
rescue JSON::ParserError
|
|
75
|
+
{}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def record_terminal_failure!(error)
|
|
79
|
+
response_id = @response_id || arguments.last
|
|
80
|
+
response = Response.find_by(id: response_id)
|
|
81
|
+
return unless response
|
|
82
|
+
|
|
83
|
+
response.update_columns(
|
|
84
|
+
status: "failed",
|
|
85
|
+
error_provider: provider_for(response),
|
|
86
|
+
error_class: error.class.name,
|
|
87
|
+
error_status: error.respond_to?(:status) ? error.status : nil,
|
|
88
|
+
error_message: error.message.to_s.truncate(2000)
|
|
89
|
+
)
|
|
90
|
+
response.run&.send(:broadcast_response_update, response)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def provider_for(response)
|
|
94
|
+
response.run&.prompt&.llm_model_provider
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def enqueue_completion_check
|
|
98
|
+
run_id = @run_id || arguments.first
|
|
99
|
+
RunCompletionCheckJob.perform_later(run_id)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
|
|
3
|
+
module CompletionKit
|
|
4
|
+
class JudgeReviewJob < ApplicationJob
|
|
5
|
+
queue_as :llm
|
|
6
|
+
|
|
7
|
+
limits_concurrency to: ENV.fetch("COMPLETION_KIT_PER_RUN_CONCURRENCY", 5).to_i,
|
|
8
|
+
key: ->(response_id, _) { "run:#{Response.find_by(id: response_id)&.run_id}" },
|
|
9
|
+
duration: 10.minutes
|
|
10
|
+
|
|
11
|
+
def self.rate_limit_wait(executions)
|
|
12
|
+
30 * executions
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
retry_on Faraday::TimeoutError,
|
|
16
|
+
Faraday::ConnectionFailed,
|
|
17
|
+
wait: :polynomially_longer, attempts: 5
|
|
18
|
+
|
|
19
|
+
retry_on CompletionKit::RateLimitError,
|
|
20
|
+
wait: method(:rate_limit_wait), attempts: 5
|
|
21
|
+
|
|
22
|
+
discard_on ActiveJob::DeserializationError
|
|
23
|
+
discard_on CompletionKit::ConfigurationError
|
|
24
|
+
|
|
25
|
+
rescue_from(StandardError) do |error|
|
|
26
|
+
record_terminal_failure!(error)
|
|
27
|
+
enqueue_completion_check
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
before_perform do |job|
|
|
31
|
+
response_id, metric_id = job.arguments
|
|
32
|
+
response = Response.find_by(id: response_id)
|
|
33
|
+
next unless response
|
|
34
|
+
review = response.reviews.find_or_initialize_by(metric_id: metric_id)
|
|
35
|
+
review.metric_name ||= Metric.find_by(id: metric_id)&.name || "(deleted metric)"
|
|
36
|
+
review.attempts = (review.attempts || 0) + 1
|
|
37
|
+
review.status = "retrying"
|
|
38
|
+
review.save!(validate: false)
|
|
39
|
+
response.run.send(:broadcast_response_update, response) if response.run
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def perform(response_id, metric_id)
|
|
43
|
+
@response_id = response_id
|
|
44
|
+
@metric_id = metric_id
|
|
45
|
+
|
|
46
|
+
response = Response.find(response_id)
|
|
47
|
+
metric = Metric.find(metric_id)
|
|
48
|
+
run = response.run
|
|
49
|
+
|
|
50
|
+
config = ApiConfig.for_model(run.judge_model).merge(judge_model: run.judge_model)
|
|
51
|
+
judge = JudgeService.new(config)
|
|
52
|
+
|
|
53
|
+
evaluation = judge.evaluate(
|
|
54
|
+
response.response_text,
|
|
55
|
+
response.expected_output,
|
|
56
|
+
run.prompt.template,
|
|
57
|
+
criteria: metric.instruction.to_s,
|
|
58
|
+
rubric_text: metric.display_rubric_text,
|
|
59
|
+
input_data: response.input_data
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
review = response.reviews.find_or_initialize_by(metric_id: metric.id)
|
|
63
|
+
review.assign_attributes(
|
|
64
|
+
metric_name: metric.name,
|
|
65
|
+
instruction: metric.instruction.to_s,
|
|
66
|
+
status: "succeeded",
|
|
67
|
+
ai_score: evaluation[:score],
|
|
68
|
+
ai_feedback: evaluation[:feedback],
|
|
69
|
+
error_provider: nil, error_class: nil, error_status: nil, error_message: nil
|
|
70
|
+
)
|
|
71
|
+
review.save!
|
|
72
|
+
|
|
73
|
+
run.send(:broadcast_response_update, response)
|
|
74
|
+
enqueue_completion_check
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
private
|
|
78
|
+
|
|
79
|
+
def record_terminal_failure!(error)
|
|
80
|
+
response_id = @response_id || arguments.first
|
|
81
|
+
metric_id = @metric_id || arguments.last
|
|
82
|
+
response = Response.find_by(id: response_id)
|
|
83
|
+
return unless response
|
|
84
|
+
|
|
85
|
+
review = response.reviews.find_or_initialize_by(metric_id: metric_id)
|
|
86
|
+
review.assign_attributes(
|
|
87
|
+
metric_name: review.metric_name || Metric.find_by(id: metric_id)&.name || "(deleted metric)",
|
|
88
|
+
status: "failed",
|
|
89
|
+
error_provider: provider_for(response),
|
|
90
|
+
error_class: error.class.name,
|
|
91
|
+
error_status: error.respond_to?(:status) ? error.status : nil,
|
|
92
|
+
error_message: error.message.to_s.truncate(2000)
|
|
93
|
+
)
|
|
94
|
+
review.save!(validate: false)
|
|
95
|
+
response.run&.send(:broadcast_response_update, response)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def provider_for(response)
|
|
99
|
+
run = response.run
|
|
100
|
+
return nil unless run&.judge_model
|
|
101
|
+
ApiConfig.provider_for_model(run.judge_model)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def enqueue_completion_check
|
|
105
|
+
response_id = @response_id || arguments.first
|
|
106
|
+
response = Response.find_by(id: response_id)
|
|
107
|
+
RunCompletionCheckJob.perform_later(response.run_id) if response
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -1,7 +1,29 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
|
|
1
3
|
module CompletionKit
|
|
2
4
|
class ModelDiscoveryJob < ApplicationJob
|
|
3
5
|
queue_as :default
|
|
4
6
|
|
|
7
|
+
def self.rate_limit_wait(executions)
|
|
8
|
+
30 * executions
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
retry_on Faraday::TimeoutError,
|
|
12
|
+
Faraday::ConnectionFailed,
|
|
13
|
+
wait: :polynomially_longer, attempts: 5
|
|
14
|
+
|
|
15
|
+
retry_on CompletionKit::RateLimitError,
|
|
16
|
+
wait: method(:rate_limit_wait), attempts: 5
|
|
17
|
+
|
|
18
|
+
discard_on ActiveJob::DeserializationError
|
|
19
|
+
|
|
20
|
+
rescue_from(StandardError) do |_error|
|
|
21
|
+
credential = ProviderCredential.find(arguments.first)
|
|
22
|
+
credential.update_columns(discovery_status: "failed")
|
|
23
|
+
credential.reload
|
|
24
|
+
credential.broadcast_discovery_progress
|
|
25
|
+
end
|
|
26
|
+
|
|
5
27
|
def perform(provider_credential_id)
|
|
6
28
|
credential = ProviderCredential.find_by(id: provider_credential_id)
|
|
7
29
|
return unless credential
|
|
@@ -20,10 +42,6 @@ module CompletionKit
|
|
|
20
42
|
credential.update_columns(discovery_status: "completed", updated_at: Time.current)
|
|
21
43
|
credential.reload
|
|
22
44
|
credential.broadcast_discovery_complete
|
|
23
|
-
rescue StandardError
|
|
24
|
-
credential.update_columns(discovery_status: "failed")
|
|
25
|
-
credential.reload
|
|
26
|
-
credential.broadcast_discovery_progress
|
|
27
45
|
end
|
|
28
46
|
end
|
|
29
47
|
end
|