completion-kit 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +97 -0
- data/README.md +19 -2
- data/app/assets/stylesheets/completion_kit/application.css +87 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
- data/app/controllers/completion_kit/runs_controller.rb +31 -11
- data/app/helpers/completion_kit/application_helper.rb +4 -12
- data/app/jobs/completion_kit/generate_row_job.rb +102 -0
- data/app/jobs/completion_kit/judge_review_job.rb +110 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
- data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
- data/app/models/completion_kit/prompt.rb +4 -0
- data/app/models/completion_kit/response.rb +29 -2
- data/app/models/completion_kit/review.rb +17 -2
- data/app/models/completion_kit/run.rb +90 -96
- data/app/services/completion_kit/anthropic_client.rb +13 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
- data/app/services/completion_kit/ollama_client.rb +13 -0
- data/app/services/completion_kit/open_ai_client.rb +11 -0
- data/app/services/completion_kit/open_router_client.rb +13 -0
- data/app/services/completion_kit/worker_health.rb +10 -0
- data/app/views/completion_kit/api_reference/index.html.erb +0 -5
- data/app/views/completion_kit/prompts/_form.html.erb +8 -5
- data/app/views/completion_kit/runs/_actions.html.erb +1 -1
- data/app/views/completion_kit/runs/_form.html.erb +6 -3
- data/app/views/completion_kit/runs/_progress.html.erb +1 -1
- data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
- data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
- data/app/views/completion_kit/runs/show.html.erb +1 -1
- data/app/views/layouts/completion_kit/application.html.erb +28 -2
- data/config/routes.rb +2 -2
- data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
- data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
- data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
- data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
- data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
- data/lib/completion_kit/concurrency_check.rb +16 -0
- data/lib/completion_kit/errors.rb +16 -0
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +2 -2
- data/lib/tasks/completion_kit_runs.rake +13 -0
- metadata +31 -7
- data/MIT-LICENSE +0 -20
- data/app/jobs/completion_kit/generate_job.rb +0 -12
- data/app/jobs/completion_kit/judge_job.rb +0 -12
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class RunCompletionCheckJob < ApplicationJob
|
|
3
|
+
queue_as :default
|
|
4
|
+
|
|
5
|
+
limits_concurrency to: 1,
|
|
6
|
+
key: ->(run_id) { "run:#{run_id}:completion" },
|
|
7
|
+
duration: 5.minutes
|
|
8
|
+
|
|
9
|
+
def perform(run_id)
|
|
10
|
+
run = Run.find_by(id: run_id)
|
|
11
|
+
return unless run
|
|
12
|
+
return unless run.status == "running"
|
|
13
|
+
return unless run.outstanding_work_zero?
|
|
14
|
+
|
|
15
|
+
run.mark_completed!
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -42,6 +42,10 @@ module CompletionKit
|
|
|
42
42
|
"#{name} — #{version_label}"
|
|
43
43
|
end
|
|
44
44
|
|
|
45
|
+
def llm_model_provider
|
|
46
|
+
ApiConfig.provider_for_model(llm_model)
|
|
47
|
+
end
|
|
48
|
+
|
|
45
49
|
def family_versions
|
|
46
50
|
self.class.where(family_key: family_key).order(version_number: :desc, created_at: :desc)
|
|
47
51
|
end
|
|
@@ -1,18 +1,34 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class Response < ApplicationRecord
|
|
3
|
+
STATUSES = %w[pending retrying succeeded failed].freeze
|
|
4
|
+
TERMINAL_STATUSES = %w[succeeded failed].freeze
|
|
5
|
+
|
|
3
6
|
belongs_to :run
|
|
4
7
|
has_many :reviews, dependent: :destroy
|
|
5
8
|
|
|
6
9
|
delegate :prompt, to: :run
|
|
7
10
|
|
|
8
|
-
validates :response_text, presence: true
|
|
11
|
+
validates :response_text, presence: true, if: :succeeded?
|
|
12
|
+
validates :status, inclusion: { in: STATUSES }
|
|
13
|
+
|
|
14
|
+
before_validation :set_default_status, on: :create
|
|
15
|
+
|
|
16
|
+
def terminal?
|
|
17
|
+
TERMINAL_STATUSES.include?(status)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def succeeded?
|
|
21
|
+
status == "succeeded"
|
|
22
|
+
end
|
|
9
23
|
|
|
10
24
|
def as_json(options = {})
|
|
11
25
|
{
|
|
12
26
|
id: id, run_id: run_id, input_data: input_data,
|
|
13
27
|
response_text: response_text, expected_output: expected_output,
|
|
14
28
|
created_at: created_at, score: score, reviewed: reviewed?,
|
|
15
|
-
reviews: reviews.map(&:as_json)
|
|
29
|
+
reviews: reviews.map(&:as_json),
|
|
30
|
+
status: status, attempts: attempts, row_index: row_index,
|
|
31
|
+
error: error_payload
|
|
16
32
|
}
|
|
17
33
|
end
|
|
18
34
|
|
|
@@ -26,5 +42,16 @@ module CompletionKit
|
|
|
26
42
|
def reviewed?
|
|
27
43
|
reviews.any? { |r| r.ai_score.present? }
|
|
28
44
|
end
|
|
45
|
+
|
|
46
|
+
def error_payload
|
|
47
|
+
return nil if error_class.blank?
|
|
48
|
+
{ provider: error_provider, class: error_class, status: error_status, message: error_message }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def set_default_status
|
|
54
|
+
self.status ||= "pending"
|
|
55
|
+
end
|
|
29
56
|
end
|
|
30
57
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class Review < ApplicationRecord
|
|
3
|
-
STATUSES = %w[pending
|
|
3
|
+
STATUSES = %w[pending retrying succeeded failed].freeze
|
|
4
|
+
TERMINAL_STATUSES = %w[succeeded failed].freeze
|
|
4
5
|
|
|
5
6
|
belongs_to :response
|
|
6
7
|
belongs_to :metric, optional: true
|
|
@@ -11,11 +12,25 @@ module CompletionKit
|
|
|
11
12
|
|
|
12
13
|
before_validation :set_default_status
|
|
13
14
|
|
|
15
|
+
def terminal?
|
|
16
|
+
TERMINAL_STATUSES.include?(status)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def succeeded?
|
|
20
|
+
status == "succeeded"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def error_payload
|
|
24
|
+
return nil if error_class.blank?
|
|
25
|
+
{ provider: error_provider, class: error_class, status: error_status, message: error_message }
|
|
26
|
+
end
|
|
27
|
+
|
|
14
28
|
def as_json(options = {})
|
|
15
29
|
{
|
|
16
30
|
id: id, response_id: response_id, metric_id: metric_id,
|
|
17
31
|
metric_name: metric_name, ai_score: ai_score,
|
|
18
|
-
ai_feedback: ai_feedback, status: status
|
|
32
|
+
ai_feedback: ai_feedback, status: status, attempts: attempts,
|
|
33
|
+
error: error_payload
|
|
19
34
|
}
|
|
20
35
|
end
|
|
21
36
|
|
|
@@ -2,7 +2,7 @@ module CompletionKit
|
|
|
2
2
|
class Run < ApplicationRecord
|
|
3
3
|
include Turbo::Broadcastable
|
|
4
4
|
|
|
5
|
-
STATUSES = %w[pending
|
|
5
|
+
STATUSES = %w[pending running completed failed].freeze
|
|
6
6
|
|
|
7
7
|
belongs_to :prompt
|
|
8
8
|
belongs_to :dataset, optional: true
|
|
@@ -17,6 +17,30 @@ module CompletionKit
|
|
|
17
17
|
before_validation :set_default_status, on: :create
|
|
18
18
|
before_validation :set_auto_name, on: :create
|
|
19
19
|
|
|
20
|
+
def mark_completed!
|
|
21
|
+
update!(status: "completed")
|
|
22
|
+
broadcast_ui
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def outstanding_work_zero?
|
|
26
|
+
return false if responses.where.not(status: Response::TERMINAL_STATUSES).exists?
|
|
27
|
+
|
|
28
|
+
metric_ids = metrics.pluck(:id)
|
|
29
|
+
return true if metric_ids.empty?
|
|
30
|
+
|
|
31
|
+
succeeded_response_ids = responses.where(status: "succeeded").pluck(:id)
|
|
32
|
+
expected_reviews = succeeded_response_ids.size * metric_ids.size
|
|
33
|
+
return true if expected_reviews.zero?
|
|
34
|
+
|
|
35
|
+
terminal_review_count = Review.where(
|
|
36
|
+
response_id: succeeded_response_ids,
|
|
37
|
+
metric_id: metric_ids,
|
|
38
|
+
status: Review::TERMINAL_STATUSES
|
|
39
|
+
).count
|
|
40
|
+
|
|
41
|
+
terminal_review_count >= expected_reviews
|
|
42
|
+
end
|
|
43
|
+
|
|
20
44
|
def judge_configured?
|
|
21
45
|
judge_model.present? && metrics.any? && ApiConfig.valid_for_model?(judge_model)
|
|
22
46
|
end
|
|
@@ -45,134 +69,104 @@ module CompletionKit
|
|
|
45
69
|
end
|
|
46
70
|
end
|
|
47
71
|
|
|
48
|
-
def
|
|
72
|
+
def start!
|
|
49
73
|
rows = if dataset
|
|
50
74
|
CsvProcessor.process_self(self)
|
|
51
75
|
else
|
|
52
76
|
[{}]
|
|
53
77
|
end
|
|
54
78
|
|
|
55
|
-
if rows.empty?
|
|
56
|
-
errors.add(:base, "Dataset has no rows")
|
|
57
|
-
return false
|
|
58
|
-
end
|
|
79
|
+
return fail_with_summary!("Dataset has no rows") if rows.empty?
|
|
59
80
|
|
|
60
81
|
client = LlmClient.for_model(prompt.llm_model, ApiConfig.for_model(prompt.llm_model))
|
|
61
|
-
|
|
62
82
|
unless client.configured?
|
|
63
|
-
|
|
64
|
-
errors.add(:base, msg)
|
|
65
|
-
update_columns(status: "failed", error_message: msg) if persisted?
|
|
66
|
-
return false
|
|
83
|
+
return fail_with_summary!("LLM API not configured: #{client.configuration_errors.join(', ')}")
|
|
67
84
|
end
|
|
68
85
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
response_text = client.generate_completion(rendered, model: prompt.llm_model, temperature: temperature)
|
|
78
|
-
|
|
79
|
-
resp = responses.create!(
|
|
80
|
-
input_data: input,
|
|
81
|
-
response_text: response_text,
|
|
82
|
-
expected_output: row["expected_output"]
|
|
86
|
+
transaction do
|
|
87
|
+
responses.destroy_all
|
|
88
|
+
update!(
|
|
89
|
+
status: "running",
|
|
90
|
+
progress_current: 0,
|
|
91
|
+
progress_total: rows.length,
|
|
92
|
+
failure_summary: nil,
|
|
93
|
+
error_message: nil
|
|
83
94
|
)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
broadcast_ui
|
|
95
|
+
rows.each_with_index do |row, index|
|
|
96
|
+
input = row.empty? ? nil : row.to_json
|
|
97
|
+
response = responses.create!(
|
|
98
|
+
status: "pending",
|
|
99
|
+
row_index: index,
|
|
100
|
+
input_data: input,
|
|
101
|
+
expected_output: row["expected_output"]
|
|
102
|
+
)
|
|
103
|
+
GenerateRowJob.perform_later(id, response.id)
|
|
104
|
+
end
|
|
95
105
|
end
|
|
96
106
|
|
|
97
|
-
true
|
|
98
|
-
rescue Faraday::Error => e
|
|
99
|
-
update_columns(status: "failed", error_message: e.message)
|
|
100
|
-
errors.add(:base, e.message)
|
|
101
107
|
broadcast_ui
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
update_columns(status: "failed", error_message: e.message) if persisted?
|
|
105
|
-
errors.add(:base, e.message)
|
|
106
|
-
broadcast_ui if persisted?
|
|
107
|
-
false
|
|
108
|
+
broadcast_clear_responses
|
|
109
|
+
true
|
|
108
110
|
end
|
|
109
111
|
|
|
110
|
-
def
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
broadcast_ui
|
|
114
|
-
|
|
115
|
-
judge = JudgeService.new(ApiConfig.for_model(judge_model).merge(judge_model: judge_model))
|
|
116
|
-
evaluation_count = 0
|
|
117
|
-
|
|
118
|
-
responses.find_each do |response|
|
|
119
|
-
metrics.each do |metric|
|
|
120
|
-
evaluation = judge.evaluate(
|
|
121
|
-
response.response_text,
|
|
122
|
-
response.expected_output,
|
|
123
|
-
prompt.template,
|
|
124
|
-
criteria: metric.instruction.to_s,
|
|
125
|
-
rubric_text: metric.display_rubric_text,
|
|
126
|
-
input_data: response.input_data
|
|
127
|
-
)
|
|
112
|
+
def generate_responses!
|
|
113
|
+
start!
|
|
114
|
+
end
|
|
128
115
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
status: "evaluated",
|
|
134
|
-
ai_score: evaluation[:score],
|
|
135
|
-
ai_feedback: evaluation[:feedback]
|
|
136
|
-
)
|
|
137
|
-
review.save!
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
evaluation_count += 1
|
|
141
|
-
update_columns(progress_current: evaluation_count)
|
|
142
|
-
broadcast_progress
|
|
143
|
-
end
|
|
116
|
+
def progress_snapshot
|
|
117
|
+
generated_done = responses.where(status: "succeeded").count
|
|
118
|
+
generated_failed = responses.where(status: "failed").count
|
|
119
|
+
generated_total = progress_total
|
|
144
120
|
|
|
145
|
-
|
|
146
|
-
|
|
121
|
+
metric_count = metrics.count
|
|
122
|
+
succeeded_count = generated_done
|
|
123
|
+
judged_total = succeeded_count * metric_count
|
|
124
|
+
judged_done = Review.joins(:response)
|
|
125
|
+
.where(completion_kit_responses: { run_id: id }, status: "succeeded").count
|
|
126
|
+
judged_failed = Review.joins(:response)
|
|
127
|
+
.where(completion_kit_responses: { run_id: id }, status: "failed").count
|
|
147
128
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
rescue StandardError => e
|
|
157
|
-
update_columns(status: "failed", error_message: e.message) if persisted?
|
|
158
|
-
errors.add(:base, e.message)
|
|
159
|
-
broadcast_ui if persisted?
|
|
160
|
-
false
|
|
129
|
+
{
|
|
130
|
+
generated_done: generated_done,
|
|
131
|
+
generated_total: generated_total,
|
|
132
|
+
generated_failed: generated_failed,
|
|
133
|
+
judged_done: judged_done,
|
|
134
|
+
judged_total: judged_total,
|
|
135
|
+
judged_failed: judged_failed
|
|
136
|
+
}
|
|
161
137
|
end
|
|
162
138
|
|
|
163
139
|
def as_json(options = {})
|
|
140
|
+
snap = progress_snapshot
|
|
164
141
|
{
|
|
165
142
|
id: id, name: name, status: status, prompt_id: prompt_id,
|
|
166
143
|
dataset_id: dataset_id, judge_model: judge_model, temperature: temperature,
|
|
167
144
|
created_at: created_at, updated_at: updated_at,
|
|
168
145
|
responses_count: responses.count, avg_score: avg_score,
|
|
169
|
-
progress_current:
|
|
170
|
-
|
|
146
|
+
progress_current: snap[:generated_done],
|
|
147
|
+
progress_total: snap[:generated_total],
|
|
148
|
+
progress: {
|
|
149
|
+
generated: { done: snap[:generated_done], total: snap[:generated_total], failed: snap[:generated_failed] },
|
|
150
|
+
judged: { done: snap[:judged_done], total: snap[:judged_total], failed: snap[:judged_failed] }
|
|
151
|
+
},
|
|
152
|
+
failed_response_ids: responses.where(status: "failed").pluck(:id),
|
|
153
|
+
failure_summary: failure_summary,
|
|
154
|
+
error_message: error_message,
|
|
155
|
+
metric_ids: metric_ids
|
|
171
156
|
}
|
|
172
157
|
end
|
|
173
158
|
|
|
174
159
|
private
|
|
175
160
|
|
|
161
|
+
def fail_with_summary!(message)
|
|
162
|
+
errors.add(:base, message)
|
|
163
|
+
if persisted?
|
|
164
|
+
update_columns(status: "failed", failure_summary: message, error_message: message)
|
|
165
|
+
broadcast_ui
|
|
166
|
+
end
|
|
167
|
+
false
|
|
168
|
+
end
|
|
169
|
+
|
|
176
170
|
def broadcast_ui
|
|
177
171
|
broadcast_progress
|
|
178
172
|
broadcast_status_header
|
|
@@ -27,12 +27,25 @@ module CompletionKit
|
|
|
27
27
|
}.to_json
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
if response.status == 429
|
|
31
|
+
raise CompletionKit::RateLimitError.new(
|
|
32
|
+
response.body.to_s.truncate(500),
|
|
33
|
+
provider: "anthropic",
|
|
34
|
+
status: 429,
|
|
35
|
+
retry_after: nil
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
30
39
|
if response.success?
|
|
31
40
|
data = JSON.parse(response.body)
|
|
32
41
|
data["content"][0]["text"].strip
|
|
33
42
|
else
|
|
34
43
|
"Error: #{response.status} - #{response.body}"
|
|
35
44
|
end
|
|
45
|
+
rescue CompletionKit::RateLimitError
|
|
46
|
+
raise
|
|
47
|
+
rescue Faraday::Error
|
|
48
|
+
raise
|
|
36
49
|
rescue => e
|
|
37
50
|
"Error: #{e.message}"
|
|
38
51
|
end
|
|
@@ -49,11 +49,6 @@ module CompletionKit
|
|
|
49
49
|
description: "Generate responses for a run using its prompt and dataset",
|
|
50
50
|
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
51
51
|
handler: :generate
|
|
52
|
-
},
|
|
53
|
-
"runs_judge" => {
|
|
54
|
-
description: "Judge responses for a run using configured metrics",
|
|
55
|
-
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
56
|
-
handler: :judge
|
|
57
52
|
}
|
|
58
53
|
}.freeze
|
|
59
54
|
|
|
@@ -92,14 +87,11 @@ module CompletionKit
|
|
|
92
87
|
|
|
93
88
|
def self.generate(args)
|
|
94
89
|
run = Run.find(args["id"])
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
run = Run.find(args["id"])
|
|
101
|
-
JudgeJob.perform_later(run.id)
|
|
102
|
-
text_result(run.reload.as_json)
|
|
90
|
+
if run.start!
|
|
91
|
+
text_result(run.reload.as_json)
|
|
92
|
+
else
|
|
93
|
+
text_result(run.failure_summary || run.errors.full_messages.to_sentence)
|
|
94
|
+
end
|
|
103
95
|
end
|
|
104
96
|
end
|
|
105
97
|
end
|
|
@@ -19,12 +19,25 @@ module CompletionKit
|
|
|
19
19
|
}.to_json
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
+
if response.status == 429
|
|
23
|
+
raise CompletionKit::RateLimitError.new(
|
|
24
|
+
response.body.to_s.truncate(500),
|
|
25
|
+
provider: "ollama",
|
|
26
|
+
status: 429,
|
|
27
|
+
retry_after: nil
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
|
|
22
31
|
if response.success?
|
|
23
32
|
data = JSON.parse(response.body)
|
|
24
33
|
data["choices"][0]["text"].strip
|
|
25
34
|
else
|
|
26
35
|
"Error: #{response.status} - #{response.body}"
|
|
27
36
|
end
|
|
37
|
+
rescue CompletionKit::RateLimitError
|
|
38
|
+
raise
|
|
39
|
+
rescue Faraday::Error
|
|
40
|
+
raise
|
|
28
41
|
rescue => e
|
|
29
42
|
"Error: #{e.message}"
|
|
30
43
|
end
|
|
@@ -27,12 +27,23 @@ module CompletionKit
|
|
|
27
27
|
}.to_json
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
if response.status == 429
|
|
31
|
+
raise CompletionKit::RateLimitError.new(
|
|
32
|
+
response.body.to_s.truncate(500),
|
|
33
|
+
provider: "openai",
|
|
34
|
+
status: 429,
|
|
35
|
+
retry_after: response.headers && response.headers["Retry-After"]&.to_i
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
30
39
|
if response.success?
|
|
31
40
|
data = JSON.parse(response.body)
|
|
32
41
|
data["output"][0]["content"][0]["text"].strip
|
|
33
42
|
else
|
|
34
43
|
"Error: #{response.status} - #{response.body}"
|
|
35
44
|
end
|
|
45
|
+
rescue CompletionKit::RateLimitError
|
|
46
|
+
raise
|
|
36
47
|
rescue Faraday::Error => e
|
|
37
48
|
raise
|
|
38
49
|
rescue => e
|
|
@@ -25,12 +25,25 @@ module CompletionKit
|
|
|
25
25
|
}.to_json
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
+
if response.status == 429
|
|
29
|
+
raise CompletionKit::RateLimitError.new(
|
|
30
|
+
response.body.to_s.truncate(500),
|
|
31
|
+
provider: "openrouter",
|
|
32
|
+
status: 429,
|
|
33
|
+
retry_after: response.headers && response.headers["Retry-After"]&.to_i
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
28
37
|
if response.success?
|
|
29
38
|
data = JSON.parse(response.body)
|
|
30
39
|
data.dig("choices", 0, "message", "content").to_s.strip
|
|
31
40
|
else
|
|
32
41
|
"Error: #{response.status} - #{response.body}"
|
|
33
42
|
end
|
|
43
|
+
rescue CompletionKit::RateLimitError
|
|
44
|
+
raise
|
|
45
|
+
rescue Faraday::Error
|
|
46
|
+
raise
|
|
34
47
|
rescue => e
|
|
35
48
|
"Error: #{e.message}"
|
|
36
49
|
end
|
|
@@ -174,11 +174,6 @@ end %>
|
|
|
174
174
|
<p class="ck-meta-copy">Start generating responses. Returns 202 Accepted. Poll the run to check progress.</p>
|
|
175
175
|
<%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/generate \\\n -H \"Authorization: Bearer #{token_display}\"" %>
|
|
176
176
|
</div>
|
|
177
|
-
<div class="ck-api-endpoint">
|
|
178
|
-
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">POST</span> /api/v1/runs/:id/judge</p>
|
|
179
|
-
<p class="ck-meta-copy">Start judging responses with the configured judge model and metrics. Returns 202 Accepted.</p>
|
|
180
|
-
<%= render "example", base_url: @base_url, token: token_display, real_token: @token, cmd: "curl -X POST #{@base_url}/api/v1/runs/1/judge \\\n -H \"Authorization: Bearer #{token_display}\"" %>
|
|
181
|
-
</div>
|
|
182
177
|
<div class="ck-api-endpoint">
|
|
183
178
|
<p class="ck-api-method"><span class="ck-chip ck-chip--soft">PATCH</span> /api/v1/runs/:id</p>
|
|
184
179
|
<p class="ck-meta-copy">Update a run. Accepts same params as create.</p>
|
|
@@ -32,21 +32,24 @@
|
|
|
32
32
|
<% available = CompletionKit::ApiConfig.available_models(scope: :generation) %>
|
|
33
33
|
<% if available.any? %>
|
|
34
34
|
<div class="ck-select-with-action">
|
|
35
|
-
<%= form.select :llm_model, ck_grouped_models(available, prompt.llm_model), {}, { class: "ck-input", id: "prompt_llm_model" } %>
|
|
35
|
+
<%= form.select :llm_model, ck_grouped_models(available, prompt.llm_model), { include_blank: "— Select a model —" }, { class: "ck-input", id: "prompt_llm_model" } %>
|
|
36
36
|
<button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
|
|
37
37
|
</div>
|
|
38
38
|
<% else %>
|
|
39
39
|
<p class="ck-meta-copy">No models available. <%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %> or click refresh after configuring a provider.</p>
|
|
40
40
|
<% end %>
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
<div hidden data-refresh-progress-carriers>
|
|
42
|
+
<% CompletionKit::ProviderCredential.find_each do |pc| %>
|
|
43
|
+
<%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
|
|
44
|
+
<%= render "completion_kit/provider_credentials/discovery_status", provider_credential: pc, show_completed: false %>
|
|
45
|
+
<% end %>
|
|
46
|
+
</div>
|
|
44
47
|
<p class="ck-field-hint" id="refresh-status" style="min-height: 1.2em; margin-top: -0.25rem; font-size: 0.75rem;"> </p>
|
|
45
48
|
</div>
|
|
46
49
|
|
|
47
50
|
<div class="ck-actions">
|
|
48
51
|
<%= link_to "Cancel", prompts_path, class: ck_button_classes(:light, variant: :outline) %>
|
|
49
|
-
<%= form.submit(prompt.persisted? ? "Save prompt" : "Create prompt", class: ck_button_classes(:dark)) %>
|
|
52
|
+
<%= form.submit(prompt.persisted? ? "Save prompt" : "Create prompt", class: ck_button_classes(:dark), disabled: available.empty?) %>
|
|
50
53
|
</div>
|
|
51
54
|
</div>
|
|
52
55
|
<% end %>
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
<div class="ck-actions" id="run_actions">
|
|
2
|
-
<% running = run.status == "
|
|
2
|
+
<% running = run.status == "running" %>
|
|
3
3
|
<%= button_to run_path(run), method: :delete, form_class: "inline-block", class: "ck-icon-btn", title: "Delete run", "aria-label": "Delete run", disabled: running, data: { turbo_confirm: "Delete this run and all its responses?" } do %><%= heroicon_tag "trash", variant: :outline, size: 16, "aria-hidden": "true" %><% end %>
|
|
4
4
|
<% if running %>
|
|
5
5
|
<%= link_to "Edit", edit_run_path(run), class: ck_button_classes(:light, variant: :outline) + " disabled", "aria-disabled": "true", tabindex: "-1" %>
|
|
@@ -54,9 +54,12 @@
|
|
|
54
54
|
<button type="button" class="ck-icon-btn" title="Refresh models" onclick="ckRefreshModels()"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16" fill="currentColor" width="16" height="16"><path fill-rule="evenodd" d="M13.836 2.477a.75.75 0 0 1 .75.75v3.182a.75.75 0 0 1-.75.75h-3.182a.75.75 0 0 1 0-1.5h1.37l-.84-.841a4.5 4.5 0 0 0-7.08.681.75.75 0 0 1-1.264-.808 6 6 0 0 1 9.44-.908l.84.84V3.227a.75.75 0 0 1 .75-.75Zm-.911 7.5A.75.75 0 0 1 13.199 11a6 6 0 0 1-9.44.908l-.84-.84v1.68a.75.75 0 0 1-1.5 0V9.567a.75.75 0 0 1 .75-.75h3.182a.75.75 0 0 1 0 1.5h-1.37l.84.841a4.5 4.5 0 0 0 7.08-.681.75.75 0 0 1 1.024-.274Z" clip-rule="evenodd"/></svg></button>
|
|
55
55
|
</div>
|
|
56
56
|
<p class="ck-field-hint" id="judge-hint"></p>
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
<div hidden data-refresh-progress-carriers>
|
|
58
|
+
<% CompletionKit::ProviderCredential.find_each do |pc| %>
|
|
59
|
+
<%= turbo_stream_from "completion_kit_provider_#{pc.id}" %>
|
|
60
|
+
<%= render "completion_kit/provider_credentials/discovery_status", provider_credential: pc, show_completed: false %>
|
|
61
|
+
<% end %>
|
|
62
|
+
</div>
|
|
60
63
|
<p class="ck-field-hint" id="refresh-status" style="min-height: 1.2em; margin-top: -0.25rem; font-size: 0.75rem;"> </p>
|
|
61
64
|
<% else %>
|
|
62
65
|
<p class="ck-field-hint" style="color: var(--ck-warning);">No judge models available. <%= link_to "Add a provider", provider_credentials_path, class: "ck-link" %></p>
|
|
@@ -1,13 +1,31 @@
|
|
|
1
|
-
<%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row", id: "response_#{response.id}" do %>
|
|
1
|
+
<%= link_to run_response_path(run, response, sort: params[:sort]), class: "ck-response-row ck-response-row--#{response.status}", id: "response_#{response.id}" do %>
|
|
2
2
|
<span class="ck-response-row__index">#<%= index %></span>
|
|
3
|
-
<span class="ck-response-row__text"
|
|
3
|
+
<span class="ck-response-row__text">
|
|
4
|
+
<% if response.succeeded? %>
|
|
5
|
+
<%= truncate(response.response_text.to_s, length: 160) %>
|
|
6
|
+
<% elsif response.status == "failed" %>
|
|
7
|
+
<% err = response.error_payload %>
|
|
8
|
+
<span class="ck-response-row__error">
|
|
9
|
+
<%= err && err[:provider]&.titleize %><%= " #{err[:status]}" if err && err[:status] %> — <%= truncate(err && err[:message].to_s, length: 120) %>
|
|
10
|
+
</span>
|
|
11
|
+
<% end %>
|
|
12
|
+
</span>
|
|
4
13
|
<span class="ck-response-row__score">
|
|
5
|
-
<%
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
14
|
+
<% case response.status
|
|
15
|
+
when "succeeded" %>
|
|
16
|
+
<% if response.reviewed? %>
|
|
17
|
+
<span class="ck-score"><span class="ck-score__star">★</span> <%= response.score %></span>
|
|
18
|
+
<% elsif run.status == "running" %>
|
|
19
|
+
<span class="ck-chip">Judging</span>
|
|
20
|
+
<% end %>
|
|
21
|
+
<% when "pending" %>
|
|
22
|
+
<span class="ck-chip">Queued</span>
|
|
23
|
+
<% when "retrying" %>
|
|
24
|
+
<span class="ck-chip ck-chip--warning">Retrying <%= response.attempts %>/5</span>
|
|
25
|
+
<% when "failed" %>
|
|
26
|
+
<%= link_to "Retry", retry_failures_run_path(run, only: response.id),
|
|
27
|
+
data: { turbo_method: :post },
|
|
28
|
+
class: "ck-chip ck-chip--danger ck-chip--retry" %>
|
|
11
29
|
<% end %>
|
|
12
30
|
</span>
|
|
13
31
|
<% end %>
|