completion-kit 0.1.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +192 -0
  4. data/Rakefile +12 -0
  5. data/app/assets/config/completion_kit_manifest.js +1 -0
  6. data/app/assets/config/manifest.js +3 -0
  7. data/app/assets/images/completion_kit/logo.svg +6 -0
  8. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
  9. data/app/assets/stylesheets/completion_kit/application.css +2214 -0
  10. data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
  11. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
  12. data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
  13. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
  14. data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
  15. data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
  16. data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
  17. data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
  18. data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
  19. data/app/controllers/completion_kit/application_controller.rb +31 -0
  20. data/app/controllers/completion_kit/criteria_controller.rb +67 -0
  21. data/app/controllers/completion_kit/datasets_controller.rb +53 -0
  22. data/app/controllers/completion_kit/mcp_controller.rb +57 -0
  23. data/app/controllers/completion_kit/metrics_controller.rb +52 -0
  24. data/app/controllers/completion_kit/prompts_controller.rb +69 -0
  25. data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
  26. data/app/controllers/completion_kit/responses_controller.rb +44 -0
  27. data/app/controllers/completion_kit/runs_controller.rb +131 -0
  28. data/app/helpers/completion_kit/application_helper.rb +193 -0
  29. data/app/jobs/completion_kit/application_job.rb +4 -0
  30. data/app/jobs/completion_kit/generate_job.rb +12 -0
  31. data/app/jobs/completion_kit/judge_job.rb +12 -0
  32. data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
  33. data/app/mailers/completion_kit/application_mailer.rb +6 -0
  34. data/app/models/completion_kit/application_record.rb +5 -0
  35. data/app/models/completion_kit/criteria.rb +22 -0
  36. data/app/models/completion_kit/criteria_membership.rb +20 -0
  37. data/app/models/completion_kit/dataset.rb +24 -0
  38. data/app/models/completion_kit/metric.rb +97 -0
  39. data/app/models/completion_kit/model.rb +13 -0
  40. data/app/models/completion_kit/prompt.rb +99 -0
  41. data/app/models/completion_kit/provider_credential.rb +114 -0
  42. data/app/models/completion_kit/response.rb +30 -0
  43. data/app/models/completion_kit/review.rb +28 -0
  44. data/app/models/completion_kit/run.rb +253 -0
  45. data/app/models/completion_kit/run_metric.rb +6 -0
  46. data/app/models/completion_kit/suggestion.rb +8 -0
  47. data/app/services/completion_kit/anthropic_client.rb +86 -0
  48. data/app/services/completion_kit/api_config.rb +80 -0
  49. data/app/services/completion_kit/csv_processor.rb +65 -0
  50. data/app/services/completion_kit/judge_service.rb +87 -0
  51. data/app/services/completion_kit/llm_client.rb +45 -0
  52. data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
  53. data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
  54. data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
  55. data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
  56. data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
  57. data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
  58. data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
  59. data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
  60. data/app/services/completion_kit/model_discovery_service.rb +223 -0
  61. data/app/services/completion_kit/ollama_client.rb +80 -0
  62. data/app/services/completion_kit/open_ai_client.rb +71 -0
  63. data/app/services/completion_kit/open_router_client.rb +69 -0
  64. data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
  65. data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
  66. data/app/views/completion_kit/api_reference/index.html.erb +308 -0
  67. data/app/views/completion_kit/criteria/_form.html.erb +46 -0
  68. data/app/views/completion_kit/criteria/edit.html.erb +14 -0
  69. data/app/views/completion_kit/criteria/index.html.erb +37 -0
  70. data/app/views/completion_kit/criteria/new.html.erb +13 -0
  71. data/app/views/completion_kit/criteria/show.html.erb +37 -0
  72. data/app/views/completion_kit/datasets/_form.html.erb +29 -0
  73. data/app/views/completion_kit/datasets/edit.html.erb +13 -0
  74. data/app/views/completion_kit/datasets/index.html.erb +38 -0
  75. data/app/views/completion_kit/datasets/new.html.erb +12 -0
  76. data/app/views/completion_kit/datasets/show.html.erb +45 -0
  77. data/app/views/completion_kit/metrics/_form.html.erb +72 -0
  78. data/app/views/completion_kit/metrics/edit.html.erb +13 -0
  79. data/app/views/completion_kit/metrics/index.html.erb +34 -0
  80. data/app/views/completion_kit/metrics/new.html.erb +12 -0
  81. data/app/views/completion_kit/metrics/show.html.erb +49 -0
  82. data/app/views/completion_kit/prompts/_form.html.erb +52 -0
  83. data/app/views/completion_kit/prompts/edit.html.erb +13 -0
  84. data/app/views/completion_kit/prompts/index.html.erb +46 -0
  85. data/app/views/completion_kit/prompts/new.html.erb +12 -0
  86. data/app/views/completion_kit/prompts/show.html.erb +156 -0
  87. data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
  88. data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
  89. data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
  90. data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
  91. data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
  92. data/app/views/completion_kit/responses/show.html.erb +87 -0
  93. data/app/views/completion_kit/runs/_actions.html.erb +14 -0
  94. data/app/views/completion_kit/runs/_form.html.erb +159 -0
  95. data/app/views/completion_kit/runs/_progress.html.erb +18 -0
  96. data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
  97. data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
  98. data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
  99. data/app/views/completion_kit/runs/edit.html.erb +14 -0
  100. data/app/views/completion_kit/runs/index.html.erb +43 -0
  101. data/app/views/completion_kit/runs/new.html.erb +12 -0
  102. data/app/views/completion_kit/runs/show.html.erb +79 -0
  103. data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
  104. data/app/views/layouts/completion_kit/application.html.erb +77 -0
  105. data/config/routes.rb +55 -0
  106. data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
  107. data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
  108. data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
  109. data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
  110. data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
  111. data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
  112. data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
  113. data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
  114. data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
  115. data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
  116. data/lib/completion-kit.rb +1 -0
  117. data/lib/completion_kit/engine.rb +35 -0
  118. data/lib/completion_kit/version.rb +3 -0
  119. data/lib/completion_kit.rb +55 -0
  120. data/lib/generators/completion_kit/install_generator.rb +21 -0
  121. data/lib/generators/completion_kit/templates/README +20 -0
  122. data/lib/generators/completion_kit/templates/initializer.rb +43 -0
  123. metadata +361 -0
@@ -0,0 +1,223 @@
1
+ require "faraday"
2
+ require "faraday/retry"
3
+ require "json"
4
+
5
+ module CompletionKit
6
+ class ModelDiscoveryService
7
+ def initialize(config:)
8
+ @provider = config[:provider]
9
+ @api_key = config[:api_key]
10
+ @api_endpoint = config[:api_endpoint]
11
+ end
12
+
13
+ def refresh!(&on_progress)
14
+ models_with_names = fetch_models
15
+ reconcile(models_with_names)
16
+ return if %w[openrouter ollama].include?(@provider)
17
+ probe_new_models(&on_progress)
18
+ end
19
+
20
+ private
21
+
22
+ def fetch_models
23
+ case @provider
24
+ when "openai" then fetch_openai_models
25
+ when "anthropic" then fetch_anthropic_models
26
+ when "openrouter" then fetch_openrouter_models
27
+ when "ollama" then fetch_ollama_models
28
+ else []
29
+ end
30
+ end
31
+
32
+ def fetch_connection(base_url)
33
+ Faraday.new(url: base_url) do |f|
34
+ f.options.timeout = 15
35
+ f.options.open_timeout = 5
36
+ f.adapter Faraday.default_adapter
37
+ end
38
+ end
39
+
40
+ def fetch_openai_models
41
+ response = fetch_connection("https://api.openai.com").get("/v1/models") do |req|
42
+ req.headers["Authorization"] = "Bearer #{@api_key}"
43
+ end
44
+ return [] unless response.success?
45
+ JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: nil } }
46
+ end
47
+
48
+ def fetch_anthropic_models
49
+ response = fetch_connection("https://api.anthropic.com").get("/v1/models?limit=100") do |req|
50
+ req.headers["x-api-key"] = @api_key
51
+ req.headers["anthropic-version"] = "2023-06-01"
52
+ end
53
+ return [] unless response.success?
54
+ JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: e["display_name"] } }
55
+ end
56
+
57
+ def fetch_openrouter_models
58
+ response = fetch_connection("https://openrouter.ai").get("/api/v1/models") do |req|
59
+ req.headers["Authorization"] = "Bearer #{@api_key}"
60
+ req.headers["HTTP-Referer"] = "https://completionkit.com"
61
+ req.headers["X-Title"] = "CompletionKit"
62
+ end
63
+ return [] unless response.success?
64
+ JSON.parse(response.body).fetch("data", []).filter_map do |entry|
65
+ next nil if entry["deprecated"] == true
66
+ context_length = entry["context_length"].to_i
67
+ next nil if context_length < 8192
68
+ { id: entry["id"], display_name: entry["name"] }
69
+ end
70
+ end
71
+
72
+ def fetch_ollama_models
73
+ return [] if @api_endpoint.nil?
74
+ base_url = @api_endpoint.to_s.delete_suffix("/")
75
+ response = fetch_connection(base_url).get("/models") do |req|
76
+ req.headers["Authorization"] = "Bearer #{@api_key}" if @api_key.present?
77
+ end
78
+ return [] unless response.success?
79
+ JSON.parse(response.body).fetch("data", []).map { |e| { id: e["id"], display_name: e["id"] } }
80
+ end
81
+
82
+ def reconcile(models_with_names)
83
+ api_model_ids = models_with_names.map { |m| m[:id] }
84
+ names_by_id = models_with_names.each_with_object({}) { |m, h| h[m[:id]] = m[:display_name] }
85
+ existing = Model.where(provider: @provider).index_by(&:model_id)
86
+
87
+ api_model_ids.each do |model_id|
88
+ if existing[model_id]
89
+ attrs = { status: "active", retired_at: nil }
90
+ attrs[:display_name] = names_by_id[model_id] if names_by_id[model_id].present?
91
+ existing[model_id].update!(attrs) if existing[model_id].status == "retired" || names_by_id[model_id].present?
92
+ else
93
+ attrs = {
94
+ provider: @provider,
95
+ model_id: model_id,
96
+ display_name: names_by_id[model_id],
97
+ status: "active",
98
+ discovered_at: Time.current
99
+ }
100
+ if %w[openrouter ollama].include?(@provider)
101
+ attrs[:supports_generation] = true
102
+ attrs[:probed_at] = nil
103
+ end
104
+ Model.create!(attrs)
105
+ end
106
+ end
107
+
108
+ active_not_in_api = Model.where(provider: @provider, status: "active")
109
+ .where.not(model_id: api_model_ids)
110
+ active_not_in_api.update_all(status: "retired", retired_at: Time.current)
111
+ end
112
+
113
+ def probe_new_models(&on_progress)
114
+ unprobed = Model.where(provider: @provider, supports_generation: nil, status: "active")
115
+ total = unprobed.count
116
+ current = 0
117
+ unprobed.find_each do |model|
118
+ probe_generation(model)
119
+ probe_judging(model) if model.supports_generation
120
+ model.probed_at = Time.current
121
+ model.status = "failed" if model.supports_generation == false
122
+ model.save!
123
+ current += 1
124
+ on_progress&.call(current, total)
125
+ end
126
+ end
127
+
128
+ def probe_generation(model)
129
+ response = send_probe(model.model_id, "Say hello", 20)
130
+ if response.success?
131
+ text = extract_text(response)
132
+ if text.present?
133
+ model.supports_generation = true
134
+ else
135
+ model.supports_generation = false
136
+ model.generation_error = "Empty response"
137
+ end
138
+ else
139
+ model.supports_generation = false
140
+ model.generation_error = "#{response.status} - #{response.body.truncate(500)}"
141
+ end
142
+ rescue StandardError => e
143
+ model.supports_generation = false
144
+ model.generation_error = e.message
145
+ end
146
+
147
+ def probe_judging(model)
148
+ judge_input = <<~PROMPT
149
+ You are an expert evaluator. You MUST respond with ONLY two lines in this exact format, nothing else:
150
+
151
+ Score: <integer from 1 to 5>
152
+ Feedback: <one sentence explaining why>
153
+
154
+ AI output to evaluate: The sky is blue.
155
+ PROMPT
156
+
157
+ response = send_probe(model.model_id, judge_input, 50)
158
+ if response.success?
159
+ text = extract_text(response).to_s
160
+ if text.match?(/Score:\s*\d/i)
161
+ model.supports_judging = true
162
+ else
163
+ model.supports_judging = false
164
+ model.judging_error = "Response not in Score/Feedback format: #{text.truncate(200)}"
165
+ end
166
+ else
167
+ model.supports_judging = false
168
+ model.judging_error = "#{response.status} - #{response.body.truncate(500)}"
169
+ end
170
+ rescue StandardError => e
171
+ model.supports_judging = false
172
+ model.judging_error = e.message
173
+ end
174
+
175
+ def send_probe(model_id, input, max_tokens)
176
+ if @provider == "openai"
177
+ openai_probe(model_id, input, max_tokens)
178
+ else
179
+ anthropic_probe(model_id, input, max_tokens)
180
+ end
181
+ end
182
+
183
+ def extract_text(response)
184
+ data = JSON.parse(response.body)
185
+ if @provider == "openai"
186
+ data.dig("output", 0, "content", 0, "text")
187
+ else
188
+ data.dig("content", 0, "text")
189
+ end
190
+ end
191
+
192
+ def openai_probe(model_id, input, max_tokens)
193
+ conn = Faraday.new(url: "https://api.openai.com") do |f|
194
+ f.options.timeout = 15
195
+ f.options.open_timeout = 5
196
+ f.request :retry, max: 1, interval: 0.5
197
+ f.adapter Faraday.default_adapter
198
+ end
199
+ conn.post do |req|
200
+ req.url "/v1/responses"
201
+ req.headers["Content-Type"] = "application/json"
202
+ req.headers["Authorization"] = "Bearer #{@api_key}"
203
+ req.body = { model: model_id, input: input, max_output_tokens: max_tokens, store: false }.to_json
204
+ end
205
+ end
206
+
207
+ def anthropic_probe(model_id, input, max_tokens)
208
+ conn = Faraday.new(url: "https://api.anthropic.com") do |f|
209
+ f.options.timeout = 15
210
+ f.options.open_timeout = 5
211
+ f.request :retry, max: 1, interval: 0.5
212
+ f.adapter Faraday.default_adapter
213
+ end
214
+ conn.post do |req|
215
+ req.url "/v1/messages"
216
+ req.headers["Content-Type"] = "application/json"
217
+ req.headers["x-api-key"] = @api_key
218
+ req.headers["anthropic-version"] = "2023-06-01"
219
+ req.body = { model: model_id, messages: [{ role: "user", content: input }], max_tokens: max_tokens }.to_json
220
+ end
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,80 @@
1
+ module CompletionKit
2
+ class OllamaClient < LlmClient
3
+ def generate_completion(prompt, options = {})
4
+ return "Error: API endpoint not configured" unless configured?
5
+
6
+ require "faraday"
7
+ require "faraday/retry"
8
+ require "json"
9
+
10
+ model = options[:model]
11
+ max_tokens = options[:max_tokens] || 1000
12
+ temperature = options[:temperature] || 0.7
13
+
14
+ conn = Faraday.new(url: api_endpoint) do |f|
15
+ f.request :retry, max: 2, interval: 0.5
16
+ f.adapter Faraday.default_adapter
17
+ end
18
+
19
+ response = conn.post do |req|
20
+ req.url "/v1/completions"
21
+ req.headers["Content-Type"] = "application/json"
22
+ req.headers["Authorization"] = "Bearer #{api_key}" if api_key.present?
23
+ req.body = {
24
+ model: model,
25
+ prompt: prompt,
26
+ max_tokens: max_tokens,
27
+ temperature: temperature
28
+ }.to_json
29
+ end
30
+
31
+ if response.success?
32
+ data = JSON.parse(response.body)
33
+ data["choices"][0]["text"].strip
34
+ else
35
+ "Error: #{response.status} - #{response.body}"
36
+ end
37
+ rescue => e
38
+ "Error: #{e.message}"
39
+ end
40
+
41
+ def available_models
42
+ return [] unless configured?
43
+
44
+ require "faraday"
45
+ require "faraday/retry"
46
+ require "json"
47
+
48
+ response = Faraday.get("#{api_endpoint}/v1/models") do |req|
49
+ req.headers["Authorization"] = "Bearer #{api_key}" if api_key.present?
50
+ end
51
+
52
+ return [] unless response.success?
53
+
54
+ models = JSON.parse(response.body).fetch("data", []).map { |entry| entry["id"] }.sort
55
+ models.map { |id| { id: id, name: id } }
56
+ rescue StandardError
57
+ []
58
+ end
59
+
60
+ def configured?
61
+ api_endpoint.present?
62
+ end
63
+
64
+ def configuration_errors
65
+ errors = []
66
+ errors << "Ollama API endpoint is not configured" unless api_endpoint.present?
67
+ errors
68
+ end
69
+
70
+ private
71
+
72
+ def api_key
73
+ @config[:api_key] || ENV["OLLAMA_API_KEY"]
74
+ end
75
+
76
+ def api_endpoint
77
+ (@config[:api_endpoint] || ENV["OLLAMA_API_ENDPOINT"] || "http://localhost:11434/v1").to_s.delete_suffix("/")
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,71 @@
1
+ module CompletionKit
2
+ class OpenAiClient < LlmClient
3
+ STATIC_MODELS = [
4
+ { id: "gpt-5.4-mini", name: "GPT-5.4 Mini" },
5
+ { id: "gpt-4.1-mini", name: "GPT-4.1 Mini" },
6
+ { id: "gpt-4o-mini", name: "GPT-4o Mini" }
7
+ ].freeze
8
+
9
+ def generate_completion(prompt, options = {})
10
+ return "Error: API key not configured" unless configured?
11
+
12
+ require "faraday"
13
+ require "faraday/retry"
14
+ require "json"
15
+
16
+ model = options[:model] || "gpt-4.1-mini"
17
+ max_tokens = options[:max_tokens] || 1000
18
+ temperature = options[:temperature] || 0.7
19
+
20
+ conn = Faraday.new(url: "https://api.openai.com") do |f|
21
+ f.request :retry, max: 2, interval: 0.5
22
+ f.adapter Faraday.default_adapter
23
+ end
24
+
25
+ response = conn.post do |req|
26
+ req.url "/v1/responses"
27
+ req.headers["Content-Type"] = "application/json"
28
+ req.headers["Authorization"] = "Bearer #{api_key}"
29
+ req.body = {
30
+ model: model,
31
+ input: prompt,
32
+ instructions: "You are a helpful assistant.",
33
+ max_output_tokens: max_tokens,
34
+ temperature: temperature,
35
+ store: false
36
+ }.to_json
37
+ end
38
+
39
+ if response.success?
40
+ data = JSON.parse(response.body)
41
+ data["output"][0]["content"][0]["text"].strip
42
+ else
43
+ "Error: #{response.status} - #{response.body}"
44
+ end
45
+ rescue Faraday::Error => e
46
+ raise
47
+ rescue => e
48
+ "Error: #{e.message}"
49
+ end
50
+
51
+ def available_models
52
+ STATIC_MODELS
53
+ end
54
+
55
+ def configured?
56
+ api_key.present?
57
+ end
58
+
59
+ def configuration_errors
60
+ errors = []
61
+ errors << "OpenAI API key is not configured" unless api_key.present?
62
+ errors
63
+ end
64
+
65
+ private
66
+
67
+ def api_key
68
+ @config[:api_key] || ENV["OPENAI_API_KEY"]
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,69 @@
1
+ module CompletionKit
2
+ class OpenRouterClient < LlmClient
3
+ BASE_URL = "https://openrouter.ai/api/v1".freeze
4
+ REFERER = "https://completionkit.com".freeze
5
+ APP_TITLE = "CompletionKit".freeze
6
+
7
+ def generate_completion(prompt, options = {})
8
+ return "Error: API key not configured" unless configured?
9
+
10
+ require "faraday"
11
+ require "faraday/retry"
12
+ require "json"
13
+
14
+ model = options[:model] || "openai/gpt-4o-mini"
15
+ max_tokens = options[:max_tokens] || 1000
16
+ temperature = options[:temperature] || 0.7
17
+
18
+ conn = Faraday.new(url: BASE_URL) do |f|
19
+ f.options.timeout = 30
20
+ f.options.open_timeout = 5
21
+ f.request :retry, max: 2, interval: 0.5
22
+ f.adapter Faraday.default_adapter
23
+ end
24
+
25
+ response = conn.post do |req|
26
+ req.url "/chat/completions"
27
+ req.headers["Content-Type"] = "application/json"
28
+ req.headers["Authorization"] = "Bearer #{api_key}"
29
+ req.headers["HTTP-Referer"] = REFERER
30
+ req.headers["X-Title"] = APP_TITLE
31
+ req.body = {
32
+ model: model,
33
+ messages: [{ role: "user", content: prompt }],
34
+ max_tokens: max_tokens,
35
+ temperature: temperature
36
+ }.to_json
37
+ end
38
+
39
+ if response.success?
40
+ data = JSON.parse(response.body)
41
+ data.dig("choices", 0, "message", "content").to_s.strip
42
+ else
43
+ "Error: #{response.status} - #{response.body}"
44
+ end
45
+ rescue => e
46
+ "Error: #{e.message}"
47
+ end
48
+
49
+ def available_models
50
+ []
51
+ end
52
+
53
+ def configured?
54
+ api_key.present?
55
+ end
56
+
57
+ def configuration_errors
58
+ errors = []
59
+ errors << "OpenRouter API key is not configured" unless api_key.present?
60
+ errors
61
+ end
62
+
63
+ private
64
+
65
+ def api_key
66
+ @config[:api_key] || ENV["OPENROUTER_API_KEY"]
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,81 @@
1
+ module CompletionKit
2
+ class PromptImprovementService
3
+ def initialize(run)
4
+ @run = run
5
+ @prompt = run.prompt
6
+ end
7
+
8
+ def suggest
9
+ client = LlmClient.for_model(@prompt.llm_model, ApiConfig.for_model(@prompt.llm_model))
10
+ response = client.generate_completion(build_meta_prompt, model: @prompt.llm_model, max_tokens: 2000, temperature: 0.4)
11
+ parse_response(response)
12
+ end
13
+
14
+ private
15
+
16
+ def build_meta_prompt
17
+ sections = []
18
+ sections << "You are an expert prompt engineer. Analyze the following prompt and its test results, then suggest an improved version."
19
+ sections << ""
20
+ sections << "## Current Prompt"
21
+ sections << "```"
22
+ sections << @prompt.template
23
+ sections << "```"
24
+ sections << ""
25
+ sections << "## Test Results"
26
+ sections << ""
27
+
28
+ reviews_by_response = @run.responses.includes(:reviews).limit(20)
29
+
30
+ reviews_by_response.each_with_index do |resp, i|
31
+ sections << "### Response #{i + 1}"
32
+ if resp.input_data.present?
33
+ sections << "Input: #{resp.input_data.truncate(200)}"
34
+ end
35
+ sections << "Output: #{resp.response_text.to_s.truncate(300)}"
36
+ if resp.expected_output.present?
37
+ sections << "Expected: #{resp.expected_output.truncate(200)}"
38
+ end
39
+ resp.reviews.each do |review|
40
+ sections << " #{review.metric_name}: #{review.ai_score}/5 — #{review.ai_feedback}"
41
+ end
42
+ sections << ""
43
+ end
44
+
45
+ avg = @run.avg_score
46
+ sections << "## Overall Score: #{avg}/5" if avg
47
+
48
+ metric_avgs = @run.metric_averages
49
+ if metric_avgs.any?
50
+ sections << "## Metric Averages"
51
+ metric_avgs.each { |m| sections << " #{m[:name]}: #{m[:avg]}/5" }
52
+ sections << ""
53
+ end
54
+
55
+ sections << "## Instructions"
56
+ sections << "Based on the test results above, suggest an improved version of the prompt."
57
+ sections << "Focus on addressing the weakest scoring areas while preserving what works well."
58
+ sections << ""
59
+ sections << "Respond in EXACTLY this format:"
60
+ sections << ""
61
+ sections << "REASONING:"
62
+ sections << "<2-4 bullet points explaining what you'd change and why>"
63
+ sections << ""
64
+ sections << "IMPROVED_PROMPT:"
65
+ sections << "<the full improved prompt template, preserving all {{variable}} placeholders>"
66
+
67
+ sections.join("\n")
68
+ end
69
+
70
+ def parse_response(text)
71
+ reasoning_match = text.match(/REASONING:\s*\n(.*?)(?=IMPROVED_PROMPT:)/m)
72
+ prompt_match = text.match(/IMPROVED_PROMPT:\s*\n(.*)/m)
73
+
74
+ {
75
+ "reasoning" => reasoning_match ? reasoning_match[1].strip : "No reasoning provided.",
76
+ "suggested_template" => prompt_match ? prompt_match[1].strip : text.strip,
77
+ "original_template" => @prompt.template
78
+ }
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,6 @@
1
+ <div class="ck-api-example">
2
+ <pre class="ck-code ck-code--dark"><%= cmd %></pre>
3
+ <button type="button" class="ck-api-copy" aria-label="Copy example to clipboard" onclick="ckCopyExample(this)" data-real-token="<%= real_token %>" data-display-token="<%= token %>">
4
+ <%= heroicon_tag "clipboard-document", variant: :outline, size: 14, "aria-hidden": "true" %>
5
+ </button>
6
+ </div>