completion-kit 0.1.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +192 -0
- data/Rakefile +12 -0
- data/app/assets/config/completion_kit_manifest.js +1 -0
- data/app/assets/config/manifest.js +3 -0
- data/app/assets/images/completion_kit/logo.svg +6 -0
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
- data/app/assets/stylesheets/completion_kit/application.css +2214 -0
- data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
- data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
- data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
- data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
- data/app/controllers/completion_kit/application_controller.rb +31 -0
- data/app/controllers/completion_kit/criteria_controller.rb +67 -0
- data/app/controllers/completion_kit/datasets_controller.rb +53 -0
- data/app/controllers/completion_kit/mcp_controller.rb +57 -0
- data/app/controllers/completion_kit/metrics_controller.rb +52 -0
- data/app/controllers/completion_kit/prompts_controller.rb +69 -0
- data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
- data/app/controllers/completion_kit/responses_controller.rb +44 -0
- data/app/controllers/completion_kit/runs_controller.rb +131 -0
- data/app/helpers/completion_kit/application_helper.rb +193 -0
- data/app/jobs/completion_kit/application_job.rb +4 -0
- data/app/jobs/completion_kit/generate_job.rb +12 -0
- data/app/jobs/completion_kit/judge_job.rb +12 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
- data/app/mailers/completion_kit/application_mailer.rb +6 -0
- data/app/models/completion_kit/application_record.rb +5 -0
- data/app/models/completion_kit/criteria.rb +22 -0
- data/app/models/completion_kit/criteria_membership.rb +20 -0
- data/app/models/completion_kit/dataset.rb +24 -0
- data/app/models/completion_kit/metric.rb +97 -0
- data/app/models/completion_kit/model.rb +13 -0
- data/app/models/completion_kit/prompt.rb +99 -0
- data/app/models/completion_kit/provider_credential.rb +114 -0
- data/app/models/completion_kit/response.rb +30 -0
- data/app/models/completion_kit/review.rb +28 -0
- data/app/models/completion_kit/run.rb +253 -0
- data/app/models/completion_kit/run_metric.rb +6 -0
- data/app/models/completion_kit/suggestion.rb +8 -0
- data/app/services/completion_kit/anthropic_client.rb +86 -0
- data/app/services/completion_kit/api_config.rb +80 -0
- data/app/services/completion_kit/csv_processor.rb +65 -0
- data/app/services/completion_kit/judge_service.rb +87 -0
- data/app/services/completion_kit/llm_client.rb +45 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
- data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
- data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
- data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
- data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
- data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
- data/app/services/completion_kit/model_discovery_service.rb +223 -0
- data/app/services/completion_kit/ollama_client.rb +80 -0
- data/app/services/completion_kit/open_ai_client.rb +71 -0
- data/app/services/completion_kit/open_router_client.rb +69 -0
- data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
- data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
- data/app/views/completion_kit/api_reference/index.html.erb +308 -0
- data/app/views/completion_kit/criteria/_form.html.erb +46 -0
- data/app/views/completion_kit/criteria/edit.html.erb +14 -0
- data/app/views/completion_kit/criteria/index.html.erb +37 -0
- data/app/views/completion_kit/criteria/new.html.erb +13 -0
- data/app/views/completion_kit/criteria/show.html.erb +37 -0
- data/app/views/completion_kit/datasets/_form.html.erb +29 -0
- data/app/views/completion_kit/datasets/edit.html.erb +13 -0
- data/app/views/completion_kit/datasets/index.html.erb +38 -0
- data/app/views/completion_kit/datasets/new.html.erb +12 -0
- data/app/views/completion_kit/datasets/show.html.erb +45 -0
- data/app/views/completion_kit/metrics/_form.html.erb +72 -0
- data/app/views/completion_kit/metrics/edit.html.erb +13 -0
- data/app/views/completion_kit/metrics/index.html.erb +34 -0
- data/app/views/completion_kit/metrics/new.html.erb +12 -0
- data/app/views/completion_kit/metrics/show.html.erb +49 -0
- data/app/views/completion_kit/prompts/_form.html.erb +52 -0
- data/app/views/completion_kit/prompts/edit.html.erb +13 -0
- data/app/views/completion_kit/prompts/index.html.erb +46 -0
- data/app/views/completion_kit/prompts/new.html.erb +12 -0
- data/app/views/completion_kit/prompts/show.html.erb +156 -0
- data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
- data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
- data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
- data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
- data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
- data/app/views/completion_kit/responses/show.html.erb +87 -0
- data/app/views/completion_kit/runs/_actions.html.erb +14 -0
- data/app/views/completion_kit/runs/_form.html.erb +159 -0
- data/app/views/completion_kit/runs/_progress.html.erb +18 -0
- data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
- data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
- data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
- data/app/views/completion_kit/runs/edit.html.erb +14 -0
- data/app/views/completion_kit/runs/index.html.erb +43 -0
- data/app/views/completion_kit/runs/new.html.erb +12 -0
- data/app/views/completion_kit/runs/show.html.erb +79 -0
- data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
- data/app/views/layouts/completion_kit/application.html.erb +77 -0
- data/config/routes.rb +55 -0
- data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
- data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
- data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
- data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
- data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
- data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
- data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
- data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
- data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
- data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
- data/lib/completion-kit.rb +1 -0
- data/lib/completion_kit/engine.rb +35 -0
- data/lib/completion_kit/version.rb +3 -0
- data/lib/completion_kit.rb +55 -0
- data/lib/generators/completion_kit/install_generator.rb +21 -0
- data/lib/generators/completion_kit/templates/README +20 -0
- data/lib/generators/completion_kit/templates/initializer.rb +43 -0
- metadata +361 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
require "faraday/retry"
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module CompletionKit
|
|
6
|
+
class ModelDiscoveryService
|
|
7
|
+
def initialize(config:)
|
|
8
|
+
@provider = config[:provider]
|
|
9
|
+
@api_key = config[:api_key]
|
|
10
|
+
@api_endpoint = config[:api_endpoint]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def refresh!(&on_progress)
|
|
14
|
+
models_with_names = fetch_models
|
|
15
|
+
reconcile(models_with_names)
|
|
16
|
+
return if %w[openrouter ollama].include?(@provider)
|
|
17
|
+
probe_new_models(&on_progress)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def fetch_models
|
|
23
|
+
case @provider
|
|
24
|
+
when "openai" then fetch_openai_models
|
|
25
|
+
when "anthropic" then fetch_anthropic_models
|
|
26
|
+
when "openrouter" then fetch_openrouter_models
|
|
27
|
+
when "ollama" then fetch_ollama_models
|
|
28
|
+
else []
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def fetch_connection(base_url)
|
|
33
|
+
Faraday.new(url: base_url) do |f|
|
|
34
|
+
f.options.timeout = 15
|
|
35
|
+
f.options.open_timeout = 5
|
|
36
|
+
f.adapter Faraday.default_adapter
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def fetch_openai_models
|
|
41
|
+
response = fetch_connection("https://api.openai.com").get("/v1/models") do |req|
|
|
42
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
43
|
+
end
|
|
44
|
+
return [] unless response.success?
|
|
45
|
+
JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: nil } }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def fetch_anthropic_models
|
|
49
|
+
response = fetch_connection("https://api.anthropic.com").get("/v1/models?limit=100") do |req|
|
|
50
|
+
req.headers["x-api-key"] = @api_key
|
|
51
|
+
req.headers["anthropic-version"] = "2023-06-01"
|
|
52
|
+
end
|
|
53
|
+
return [] unless response.success?
|
|
54
|
+
JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: e["display_name"] } }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def fetch_openrouter_models
|
|
58
|
+
response = fetch_connection("https://openrouter.ai").get("/api/v1/models") do |req|
|
|
59
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
60
|
+
req.headers["HTTP-Referer"] = "https://completionkit.com"
|
|
61
|
+
req.headers["X-Title"] = "CompletionKit"
|
|
62
|
+
end
|
|
63
|
+
return [] unless response.success?
|
|
64
|
+
JSON.parse(response.body).fetch("data", []).filter_map do |entry|
|
|
65
|
+
next nil if entry["deprecated"] == true
|
|
66
|
+
context_length = entry["context_length"].to_i
|
|
67
|
+
next nil if context_length < 8192
|
|
68
|
+
{ id: entry["id"], display_name: entry["name"] }
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def fetch_ollama_models
|
|
73
|
+
return [] if @api_endpoint.nil?
|
|
74
|
+
base_url = @api_endpoint.to_s.delete_suffix("/")
|
|
75
|
+
response = fetch_connection(base_url).get("/models") do |req|
|
|
76
|
+
req.headers["Authorization"] = "Bearer #{@api_key}" if @api_key.present?
|
|
77
|
+
end
|
|
78
|
+
return [] unless response.success?
|
|
79
|
+
JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: e["id"] } }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def reconcile(models_with_names)
|
|
83
|
+
api_model_ids = models_with_names.map { |m| m[:id] }
|
|
84
|
+
names_by_id = models_with_names.each_with_object({}) { |m, h| h[m[:id]] = m[:display_name] }
|
|
85
|
+
existing = Model.where(provider: @provider).index_by(&:model_id)
|
|
86
|
+
|
|
87
|
+
api_model_ids.each do |model_id|
|
|
88
|
+
if existing[model_id]
|
|
89
|
+
attrs = { status: "active", retired_at: nil }
|
|
90
|
+
attrs[:display_name] = names_by_id[model_id] if names_by_id[model_id].present?
|
|
91
|
+
existing[model_id].update!(attrs) if existing[model_id].status == "retired" || names_by_id[model_id].present?
|
|
92
|
+
else
|
|
93
|
+
attrs = {
|
|
94
|
+
provider: @provider,
|
|
95
|
+
model_id: model_id,
|
|
96
|
+
display_name: names_by_id[model_id],
|
|
97
|
+
status: "active",
|
|
98
|
+
discovered_at: Time.current
|
|
99
|
+
}
|
|
100
|
+
if %w[openrouter ollama].include?(@provider)
|
|
101
|
+
attrs[:supports_generation] = true
|
|
102
|
+
attrs[:probed_at] = nil
|
|
103
|
+
end
|
|
104
|
+
Model.create!(attrs)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
active_not_in_api = Model.where(provider: @provider, status: "active")
|
|
109
|
+
.where.not(model_id: api_model_ids)
|
|
110
|
+
active_not_in_api.update_all(status: "retired", retired_at: Time.current)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def probe_new_models(&on_progress)
|
|
114
|
+
unprobed = Model.where(provider: @provider, supports_generation: nil, status: "active")
|
|
115
|
+
total = unprobed.count
|
|
116
|
+
current = 0
|
|
117
|
+
unprobed.find_each do |model|
|
|
118
|
+
probe_generation(model)
|
|
119
|
+
probe_judging(model) if model.supports_generation
|
|
120
|
+
model.probed_at = Time.current
|
|
121
|
+
model.status = "failed" if model.supports_generation == false
|
|
122
|
+
model.save!
|
|
123
|
+
current += 1
|
|
124
|
+
on_progress&.call(current, total)
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def probe_generation(model)
|
|
129
|
+
response = send_probe(model.model_id, "Say hello", 20)
|
|
130
|
+
if response.success?
|
|
131
|
+
text = extract_text(response)
|
|
132
|
+
if text.present?
|
|
133
|
+
model.supports_generation = true
|
|
134
|
+
else
|
|
135
|
+
model.supports_generation = false
|
|
136
|
+
model.generation_error = "Empty response"
|
|
137
|
+
end
|
|
138
|
+
else
|
|
139
|
+
model.supports_generation = false
|
|
140
|
+
model.generation_error = "#{response.status} - #{response.body.truncate(500)}"
|
|
141
|
+
end
|
|
142
|
+
rescue StandardError => e
|
|
143
|
+
model.supports_generation = false
|
|
144
|
+
model.generation_error = e.message
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def probe_judging(model)
|
|
148
|
+
judge_input = <<~PROMPT
|
|
149
|
+
You are an expert evaluator. You MUST respond with ONLY two lines in this exact format, nothing else:
|
|
150
|
+
|
|
151
|
+
Score: <integer from 1 to 5>
|
|
152
|
+
Feedback: <one sentence explaining why>
|
|
153
|
+
|
|
154
|
+
AI output to evaluate: The sky is blue.
|
|
155
|
+
PROMPT
|
|
156
|
+
|
|
157
|
+
response = send_probe(model.model_id, judge_input, 50)
|
|
158
|
+
if response.success?
|
|
159
|
+
text = extract_text(response).to_s
|
|
160
|
+
if text.match?(/Score:\s*\d/i)
|
|
161
|
+
model.supports_judging = true
|
|
162
|
+
else
|
|
163
|
+
model.supports_judging = false
|
|
164
|
+
model.judging_error = "Response not in Score/Feedback format: #{text.truncate(200)}"
|
|
165
|
+
end
|
|
166
|
+
else
|
|
167
|
+
model.supports_judging = false
|
|
168
|
+
model.judging_error = "#{response.status} - #{response.body.truncate(500)}"
|
|
169
|
+
end
|
|
170
|
+
rescue StandardError => e
|
|
171
|
+
model.supports_judging = false
|
|
172
|
+
model.judging_error = e.message
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def send_probe(model_id, input, max_tokens)
|
|
176
|
+
if @provider == "openai"
|
|
177
|
+
openai_probe(model_id, input, max_tokens)
|
|
178
|
+
else
|
|
179
|
+
anthropic_probe(model_id, input, max_tokens)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def extract_text(response)
|
|
184
|
+
data = JSON.parse(response.body)
|
|
185
|
+
if @provider == "openai"
|
|
186
|
+
data.dig("output", 0, "content", 0, "text")
|
|
187
|
+
else
|
|
188
|
+
data.dig("content", 0, "text")
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def openai_probe(model_id, input, max_tokens)
|
|
193
|
+
conn = Faraday.new(url: "https://api.openai.com") do |f|
|
|
194
|
+
f.options.timeout = 15
|
|
195
|
+
f.options.open_timeout = 5
|
|
196
|
+
f.request :retry, max: 1, interval: 0.5
|
|
197
|
+
f.adapter Faraday.default_adapter
|
|
198
|
+
end
|
|
199
|
+
conn.post do |req|
|
|
200
|
+
req.url "/v1/responses"
|
|
201
|
+
req.headers["Content-Type"] = "application/json"
|
|
202
|
+
req.headers["Authorization"] = "Bearer #{@api_key}"
|
|
203
|
+
req.body = { model: model_id, input: input, max_output_tokens: max_tokens, store: false }.to_json
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def anthropic_probe(model_id, input, max_tokens)
|
|
208
|
+
conn = Faraday.new(url: "https://api.anthropic.com") do |f|
|
|
209
|
+
f.options.timeout = 15
|
|
210
|
+
f.options.open_timeout = 5
|
|
211
|
+
f.request :retry, max: 1, interval: 0.5
|
|
212
|
+
f.adapter Faraday.default_adapter
|
|
213
|
+
end
|
|
214
|
+
conn.post do |req|
|
|
215
|
+
req.url "/v1/messages"
|
|
216
|
+
req.headers["Content-Type"] = "application/json"
|
|
217
|
+
req.headers["x-api-key"] = @api_key
|
|
218
|
+
req.headers["anthropic-version"] = "2023-06-01"
|
|
219
|
+
req.body = { model: model_id, messages: [{ role: "user", content: input }], max_tokens: max_tokens }.to_json
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class OllamaClient < LlmClient
|
|
3
|
+
def generate_completion(prompt, options = {})
|
|
4
|
+
return "Error: API endpoint not configured" unless configured?
|
|
5
|
+
|
|
6
|
+
require "faraday"
|
|
7
|
+
require "faraday/retry"
|
|
8
|
+
require "json"
|
|
9
|
+
|
|
10
|
+
model = options[:model]
|
|
11
|
+
max_tokens = options[:max_tokens] || 1000
|
|
12
|
+
temperature = options[:temperature] || 0.7
|
|
13
|
+
|
|
14
|
+
conn = Faraday.new(url: api_endpoint) do |f|
|
|
15
|
+
f.request :retry, max: 2, interval: 0.5
|
|
16
|
+
f.adapter Faraday.default_adapter
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
response = conn.post do |req|
|
|
20
|
+
req.url "/v1/completions"
|
|
21
|
+
req.headers["Content-Type"] = "application/json"
|
|
22
|
+
req.headers["Authorization"] = "Bearer #{api_key}" if api_key.present?
|
|
23
|
+
req.body = {
|
|
24
|
+
model: model,
|
|
25
|
+
prompt: prompt,
|
|
26
|
+
max_tokens: max_tokens,
|
|
27
|
+
temperature: temperature
|
|
28
|
+
}.to_json
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
if response.success?
|
|
32
|
+
data = JSON.parse(response.body)
|
|
33
|
+
data["choices"][0]["text"].strip
|
|
34
|
+
else
|
|
35
|
+
"Error: #{response.status} - #{response.body}"
|
|
36
|
+
end
|
|
37
|
+
rescue => e
|
|
38
|
+
"Error: #{e.message}"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def available_models
|
|
42
|
+
return [] unless configured?
|
|
43
|
+
|
|
44
|
+
require "faraday"
|
|
45
|
+
require "faraday/retry"
|
|
46
|
+
require "json"
|
|
47
|
+
|
|
48
|
+
response = Faraday.get("#{api_endpoint}/v1/models") do |req|
|
|
49
|
+
req.headers["Authorization"] = "Bearer #{api_key}" if api_key.present?
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
return [] unless response.success?
|
|
53
|
+
|
|
54
|
+
models = JSON.parse(response.body).fetch("data", []).map { |entry| entry["id"] }.sort
|
|
55
|
+
models.map { |id| { id: id, name: id } }
|
|
56
|
+
rescue StandardError
|
|
57
|
+
[]
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def configured?
|
|
61
|
+
api_endpoint.present?
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def configuration_errors
|
|
65
|
+
errors = []
|
|
66
|
+
errors << "Ollama API endpoint is not configured" unless api_endpoint.present?
|
|
67
|
+
errors
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def api_key
|
|
73
|
+
@config[:api_key] || ENV["OLLAMA_API_KEY"]
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def api_endpoint
|
|
77
|
+
(@config[:api_endpoint] || ENV["OLLAMA_API_ENDPOINT"] || "http://localhost:11434/v1").to_s.delete_suffix("/")
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class OpenAiClient < LlmClient
|
|
3
|
+
STATIC_MODELS = [
|
|
4
|
+
{ id: "gpt-5.4-mini", name: "GPT-5.4 Mini" },
|
|
5
|
+
{ id: "gpt-4.1-mini", name: "GPT-4.1 Mini" },
|
|
6
|
+
{ id: "gpt-4o-mini", name: "GPT-4o Mini" }
|
|
7
|
+
].freeze
|
|
8
|
+
|
|
9
|
+
def generate_completion(prompt, options = {})
|
|
10
|
+
return "Error: API key not configured" unless configured?
|
|
11
|
+
|
|
12
|
+
require "faraday"
|
|
13
|
+
require "faraday/retry"
|
|
14
|
+
require "json"
|
|
15
|
+
|
|
16
|
+
model = options[:model] || "gpt-4.1-mini"
|
|
17
|
+
max_tokens = options[:max_tokens] || 1000
|
|
18
|
+
temperature = options[:temperature] || 0.7
|
|
19
|
+
|
|
20
|
+
conn = Faraday.new(url: "https://api.openai.com") do |f|
|
|
21
|
+
f.request :retry, max: 2, interval: 0.5
|
|
22
|
+
f.adapter Faraday.default_adapter
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
response = conn.post do |req|
|
|
26
|
+
req.url "/v1/responses"
|
|
27
|
+
req.headers["Content-Type"] = "application/json"
|
|
28
|
+
req.headers["Authorization"] = "Bearer #{api_key}"
|
|
29
|
+
req.body = {
|
|
30
|
+
model: model,
|
|
31
|
+
input: prompt,
|
|
32
|
+
instructions: "You are a helpful assistant.",
|
|
33
|
+
max_output_tokens: max_tokens,
|
|
34
|
+
temperature: temperature,
|
|
35
|
+
store: false
|
|
36
|
+
}.to_json
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
if response.success?
|
|
40
|
+
data = JSON.parse(response.body)
|
|
41
|
+
data["output"][0]["content"][0]["text"].strip
|
|
42
|
+
else
|
|
43
|
+
"Error: #{response.status} - #{response.body}"
|
|
44
|
+
end
|
|
45
|
+
rescue Faraday::Error => e
|
|
46
|
+
raise
|
|
47
|
+
rescue => e
|
|
48
|
+
"Error: #{e.message}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def available_models
|
|
52
|
+
STATIC_MODELS
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def configured?
|
|
56
|
+
api_key.present?
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def configuration_errors
|
|
60
|
+
errors = []
|
|
61
|
+
errors << "OpenAI API key is not configured" unless api_key.present?
|
|
62
|
+
errors
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def api_key
|
|
68
|
+
@config[:api_key] || ENV["OPENAI_API_KEY"]
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class OpenRouterClient < LlmClient
|
|
3
|
+
BASE_URL = "https://openrouter.ai/api/v1".freeze
|
|
4
|
+
REFERER = "https://completionkit.com".freeze
|
|
5
|
+
APP_TITLE = "CompletionKit".freeze
|
|
6
|
+
|
|
7
|
+
def generate_completion(prompt, options = {})
|
|
8
|
+
return "Error: API key not configured" unless configured?
|
|
9
|
+
|
|
10
|
+
require "faraday"
|
|
11
|
+
require "faraday/retry"
|
|
12
|
+
require "json"
|
|
13
|
+
|
|
14
|
+
model = options[:model] || "openai/gpt-4o-mini"
|
|
15
|
+
max_tokens = options[:max_tokens] || 1000
|
|
16
|
+
temperature = options[:temperature] || 0.7
|
|
17
|
+
|
|
18
|
+
conn = Faraday.new(url: BASE_URL) do |f|
|
|
19
|
+
f.options.timeout = 30
|
|
20
|
+
f.options.open_timeout = 5
|
|
21
|
+
f.request :retry, max: 2, interval: 0.5
|
|
22
|
+
f.adapter Faraday.default_adapter
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
response = conn.post do |req|
|
|
26
|
+
req.url "/chat/completions"
|
|
27
|
+
req.headers["Content-Type"] = "application/json"
|
|
28
|
+
req.headers["Authorization"] = "Bearer #{api_key}"
|
|
29
|
+
req.headers["HTTP-Referer"] = REFERER
|
|
30
|
+
req.headers["X-Title"] = APP_TITLE
|
|
31
|
+
req.body = {
|
|
32
|
+
model: model,
|
|
33
|
+
messages: [{ role: "user", content: prompt }],
|
|
34
|
+
max_tokens: max_tokens,
|
|
35
|
+
temperature: temperature
|
|
36
|
+
}.to_json
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
if response.success?
|
|
40
|
+
data = JSON.parse(response.body)
|
|
41
|
+
data.dig("choices", 0, "message", "content").to_s.strip
|
|
42
|
+
else
|
|
43
|
+
"Error: #{response.status} - #{response.body}"
|
|
44
|
+
end
|
|
45
|
+
rescue => e
|
|
46
|
+
"Error: #{e.message}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def available_models
|
|
50
|
+
[]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def configured?
|
|
54
|
+
api_key.present?
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def configuration_errors
|
|
58
|
+
errors = []
|
|
59
|
+
errors << "OpenRouter API key is not configured" unless api_key.present?
|
|
60
|
+
errors
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
private
|
|
64
|
+
|
|
65
|
+
def api_key
|
|
66
|
+
@config[:api_key] || ENV["OPENROUTER_API_KEY"]
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class PromptImprovementService
|
|
3
|
+
def initialize(run)
|
|
4
|
+
@run = run
|
|
5
|
+
@prompt = run.prompt
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def suggest
|
|
9
|
+
client = LlmClient.for_model(@prompt.llm_model, ApiConfig.for_model(@prompt.llm_model))
|
|
10
|
+
response = client.generate_completion(build_meta_prompt, model: @prompt.llm_model, max_tokens: 2000, temperature: 0.4)
|
|
11
|
+
parse_response(response)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
private
|
|
15
|
+
|
|
16
|
+
def build_meta_prompt
|
|
17
|
+
sections = []
|
|
18
|
+
sections << "You are an expert prompt engineer. Analyze the following prompt and its test results, then suggest an improved version."
|
|
19
|
+
sections << ""
|
|
20
|
+
sections << "## Current Prompt"
|
|
21
|
+
sections << "```"
|
|
22
|
+
sections << @prompt.template
|
|
23
|
+
sections << "```"
|
|
24
|
+
sections << ""
|
|
25
|
+
sections << "## Test Results"
|
|
26
|
+
sections << ""
|
|
27
|
+
|
|
28
|
+
reviews_by_response = @run.responses.includes(:reviews).limit(20)
|
|
29
|
+
|
|
30
|
+
reviews_by_response.each_with_index do |resp, i|
|
|
31
|
+
sections << "### Response #{i + 1}"
|
|
32
|
+
if resp.input_data.present?
|
|
33
|
+
sections << "Input: #{resp.input_data.truncate(200)}"
|
|
34
|
+
end
|
|
35
|
+
sections << "Output: #{resp.response_text.to_s.truncate(300)}"
|
|
36
|
+
if resp.expected_output.present?
|
|
37
|
+
sections << "Expected: #{resp.expected_output.truncate(200)}"
|
|
38
|
+
end
|
|
39
|
+
resp.reviews.each do |review|
|
|
40
|
+
sections << " #{review.metric_name}: #{review.ai_score}/5 — #{review.ai_feedback}"
|
|
41
|
+
end
|
|
42
|
+
sections << ""
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
avg = @run.avg_score
|
|
46
|
+
sections << "## Overall Score: #{avg}/5" if avg
|
|
47
|
+
|
|
48
|
+
metric_avgs = @run.metric_averages
|
|
49
|
+
if metric_avgs.any?
|
|
50
|
+
sections << "## Metric Averages"
|
|
51
|
+
metric_avgs.each { |m| sections << " #{m[:name]}: #{m[:avg]}/5" }
|
|
52
|
+
sections << ""
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
sections << "## Instructions"
|
|
56
|
+
sections << "Based on the test results above, suggest an improved version of the prompt."
|
|
57
|
+
sections << "Focus on addressing the weakest scoring areas while preserving what works well."
|
|
58
|
+
sections << ""
|
|
59
|
+
sections << "Respond in EXACTLY this format:"
|
|
60
|
+
sections << ""
|
|
61
|
+
sections << "REASONING:"
|
|
62
|
+
sections << "<2-4 bullet points explaining what you'd change and why>"
|
|
63
|
+
sections << ""
|
|
64
|
+
sections << "IMPROVED_PROMPT:"
|
|
65
|
+
sections << "<the full improved prompt template, preserving all {{variable}} placeholders>"
|
|
66
|
+
|
|
67
|
+
sections.join("\n")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def parse_response(text)
|
|
71
|
+
reasoning_match = text.match(/REASONING:\s*\n(.*?)(?=IMPROVED_PROMPT:)/m)
|
|
72
|
+
prompt_match = text.match(/IMPROVED_PROMPT:\s*\n(.*)/m)
|
|
73
|
+
|
|
74
|
+
{
|
|
75
|
+
"reasoning" => reasoning_match ? reasoning_match[1].strip : "No reasoning provided.",
|
|
76
|
+
"suggested_template" => prompt_match ? prompt_match[1].strip : text.strip,
|
|
77
|
+
"original_template" => @prompt.template
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
<div class="ck-api-example">
|
|
2
|
+
<pre class="ck-code ck-code--dark"><%= cmd %></pre>
|
|
3
|
+
<button type="button" class="ck-api-copy" aria-label="Copy example to clipboard" onclick="ckCopyExample(this)" data-real-token="<%= real_token %>" data-display-token="<%= token %>">
|
|
4
|
+
<%= heroicon_tag "clipboard-document", variant: :outline, size: 14, "aria-hidden": "true" %>
|
|
5
|
+
</button>
|
|
6
|
+
</div>
|