completion-kit 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +97 -0
  3. data/README.md +19 -2
  4. data/app/assets/stylesheets/completion_kit/application.css +87 -0
  5. data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
  6. data/app/controllers/completion_kit/runs_controller.rb +31 -11
  7. data/app/helpers/completion_kit/application_helper.rb +4 -12
  8. data/app/jobs/completion_kit/generate_row_job.rb +102 -0
  9. data/app/jobs/completion_kit/judge_review_job.rb +110 -0
  10. data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
  11. data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
  12. data/app/models/completion_kit/prompt.rb +4 -0
  13. data/app/models/completion_kit/response.rb +29 -2
  14. data/app/models/completion_kit/review.rb +17 -2
  15. data/app/models/completion_kit/run.rb +90 -96
  16. data/app/services/completion_kit/anthropic_client.rb +13 -0
  17. data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
  18. data/app/services/completion_kit/ollama_client.rb +13 -0
  19. data/app/services/completion_kit/open_ai_client.rb +11 -0
  20. data/app/services/completion_kit/open_router_client.rb +13 -0
  21. data/app/services/completion_kit/worker_health.rb +10 -0
  22. data/app/views/completion_kit/api_reference/index.html.erb +0 -5
  23. data/app/views/completion_kit/prompts/_form.html.erb +8 -5
  24. data/app/views/completion_kit/runs/_actions.html.erb +1 -1
  25. data/app/views/completion_kit/runs/_form.html.erb +6 -3
  26. data/app/views/completion_kit/runs/_progress.html.erb +1 -1
  27. data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
  28. data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
  29. data/app/views/completion_kit/runs/show.html.erb +1 -1
  30. data/app/views/layouts/completion_kit/application.html.erb +28 -2
  31. data/config/routes.rb +2 -2
  32. data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
  33. data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
  34. data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
  35. data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
  36. data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
  37. data/lib/completion_kit/concurrency_check.rb +16 -0
  38. data/lib/completion_kit/errors.rb +16 -0
  39. data/lib/completion_kit/version.rb +1 -1
  40. data/lib/completion_kit.rb +2 -2
  41. data/lib/tasks/completion_kit_runs.rake +13 -0
  42. metadata +31 -7
  43. data/MIT-LICENSE +0 -20
  44. data/app/jobs/completion_kit/generate_job.rb +0 -12
  45. data/app/jobs/completion_kit/judge_job.rb +0 -12
@@ -1,9 +1,17 @@
1
+ <% snap = run.progress_snapshot %>
1
2
  <div id="run_status_header">
2
3
  <% if run.status == "failed" %>
3
4
  <div class="ck-flash ck-flash--alert">
4
- <%= run.error_message.presence || "Run failed." %>
5
+ <%= run.failure_summary.presence || run.error_message.presence || "Run failed." %>
5
6
  </div>
6
7
  <% end %>
8
+
9
+ <% if run.status == "running" && !CompletionKit::WorkerHealth.healthy? %>
10
+ <div class="ck-flash ck-flash--alert">
11
+ No worker process is running. Generate and judge jobs are queued but nothing is processing them. Start <code>bin/jobs</code> (or your worker service) to resume.
12
+ </div>
13
+ <% end %>
14
+
7
15
  <section class="ck-page-header">
8
16
  <div>
9
17
  <p class="ck-kicker"><span class="<%= ck_run_dot(run) %>"></span> <%= ck_run_status_label(run) %></p>
@@ -12,4 +20,31 @@
12
20
  </div>
13
21
  <%= render "completion_kit/runs/actions", run: run %>
14
22
  </section>
23
+
24
+ <% if run.status.in?(%w[running completed]) && snap[:generated_total] > 0 %>
25
+ <div class="ck-progress-block">
26
+ <div class="ck-progress-line">
27
+ Generated <%= snap[:generated_done] %>/<%= snap[:generated_total] %>
28
+ <% if snap[:generated_failed] > 0 %>
29
+ <span class="ck-progress-failed">(<%= snap[:generated_failed] %> failed)</span>
30
+ <% end %>
31
+ </div>
32
+ <% if snap[:judged_total] > 0 %>
33
+ <div class="ck-progress-line">
34
+ Judged <%= snap[:judged_done] %>/<%= snap[:judged_total] %>
35
+ <% if snap[:judged_failed] > 0 %>
36
+ <span class="ck-progress-failed">(<%= snap[:judged_failed] %> failed)</span>
37
+ <% end %>
38
+ </div>
39
+ <% end %>
40
+ <% failed_count = snap[:generated_failed] + snap[:judged_failed] %>
41
+ <% if failed_count > 0 %>
42
+ <%= button_to "Retry #{failed_count} failed #{"row".pluralize(failed_count)}",
43
+ retry_failures_run_path(run),
44
+ method: :post,
45
+ class: ck_button_classes(:light, variant: :outline),
46
+ form_class: "inline-block" %>
47
+ <% end %>
48
+ </div>
49
+ <% end %>
15
50
  </div>
@@ -52,7 +52,7 @@
52
52
  </div>
53
53
  <p class="ck-prompt-preview__text" id="prompt_text"><%= @run.prompt.template %></p>
54
54
  <% if @run.prompt.template.length > 200 %>
55
- <button type="button" class="ck-link" id="prompt_toggle" aria-expanded="false" aria-controls="prompt_text" onclick="var t=document.getElementById('prompt_text');var l=this;var expanded=t.classList.toggle('ck-prompt-preview__text--expanded');l.textContent=expanded?'Show less':'Show more';l.setAttribute('aria-expanded',expanded?'true':'false')">Show more</button>
55
+ <button type="button" class="ck-disclosure-toggle" id="prompt_toggle" aria-expanded="false" aria-controls="prompt_text" onclick="var t=document.getElementById('prompt_text');var l=this;var expanded=t.classList.toggle('ck-prompt-preview__text--expanded');l.firstChild.textContent=expanded?'Show less':'Show more';l.setAttribute('aria-expanded',expanded?'true':'false')"><span>Show more</span></button>
56
56
  <% end %>
57
57
  </div>
58
58
 
@@ -56,8 +56,7 @@ function ckRefreshModels() {
56
56
  ckRefreshing = true;
57
57
  var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
58
58
  if (btn) btn.classList.add('ck-icon-btn--spinning');
59
- var status = document.getElementById('refresh-status');
60
- if (status) status.textContent = 'Refreshing models\u2026';
59
+ ckUpdateRefreshProgress();
61
60
  var csrfToken = document.querySelector('meta[name="csrf-token"]').getAttribute("content");
62
61
  fetch("/completion_kit/refresh_models", {
63
62
  method: "POST",
@@ -65,8 +64,35 @@ function ckRefreshModels() {
65
64
  });
66
65
  }
67
66
 
67
+ function ckUpdateRefreshProgress() {
68
+ var status = document.getElementById('refresh-status');
69
+ if (!status) return;
70
+ var carriers = document.querySelectorAll('[data-refresh-progress-carriers] [id^="discovery_status_"]');
71
+ var totalCurrent = 0, totalTotal = 0, anyDiscovering = false;
72
+ carriers.forEach(function(node) {
73
+ if (!node.querySelector('.ck-discovery-bar')) return;
74
+ if (node.querySelector('.ck-discovery-bar--failed') || node.querySelector('.ck-discovery-bar--completed')) return;
75
+ anyDiscovering = true;
76
+ var match = node.textContent.match(/(\d+)\s*\/\s*(\d+)/);
77
+ if (match) {
78
+ totalCurrent += parseInt(match[1], 10);
79
+ totalTotal += parseInt(match[2], 10);
80
+ }
81
+ });
82
+ if (anyDiscovering || ckRefreshing) {
83
+ if (totalTotal > 0) {
84
+ status.textContent = 'Refreshing models\u2026 ' + totalCurrent + '/' + totalTotal;
85
+ } else {
86
+ status.textContent = 'Refreshing models\u2026';
87
+ }
88
+ }
89
+ }
90
+
68
91
  document.addEventListener("turbo:before-stream-render", function(event) {
69
92
  var target = event.target.getAttribute("target");
93
+ if (target && target.indexOf("discovery_status_") === 0) {
94
+ requestAnimationFrame(ckUpdateRefreshProgress);
95
+ }
70
96
  if (target === "prompt_llm_model" || target === "run_judge_model") {
71
97
  ckRefreshing = false;
72
98
  var btn = document.querySelector('.ck-icon-btn[title="Refresh models"]');
data/config/routes.rb CHANGED
@@ -14,10 +14,10 @@ CompletionKit::Engine.routes.draw do
14
14
  resources :runs do
15
15
  member do
16
16
  post :generate
17
- post :judge
18
17
  get :suggestion
19
18
  post :suggest
20
19
  post :apply_suggestion
20
+ post :retry_failures
21
21
  end
22
22
  resources :responses, only: [:show]
23
23
  end
@@ -39,7 +39,7 @@ CompletionKit::Engine.routes.draw do
39
39
  resources :runs do
40
40
  member do
41
41
  post :generate
42
- post :judge
42
+ post :retry_failures
43
43
  end
44
44
  resources :responses, only: [:index, :show]
45
45
  end
@@ -0,0 +1,21 @@
1
+ class AddStatusAndErrorToResponses < ActiveRecord::Migration[7.1]
2
+ def change
3
+ add_column :completion_kit_responses, :status, :string, default: "pending", null: false
4
+ add_column :completion_kit_responses, :error_provider, :string
5
+ add_column :completion_kit_responses, :error_class, :string
6
+ add_column :completion_kit_responses, :error_status, :integer
7
+ add_column :completion_kit_responses, :error_message, :text
8
+ add_column :completion_kit_responses, :attempts, :integer, default: 0, null: false
9
+ add_column :completion_kit_responses, :row_index, :integer
10
+
11
+ reversible do |dir|
12
+ dir.up do
13
+ execute <<~SQL
14
+ UPDATE completion_kit_responses
15
+ SET status = 'succeeded'
16
+ WHERE response_text IS NOT NULL AND length(response_text) > 0
17
+ SQL
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,9 @@
1
+ class IndexResponsesOnRunIdAndStatus < ActiveRecord::Migration[7.1]
2
+ disable_ddl_transaction!
3
+
4
+ def change
5
+ options = { if_not_exists: true }
6
+ options[:algorithm] = :concurrently unless connection.adapter_name == "SQLite"
7
+ add_index :completion_kit_responses, [:run_id, :status], **options
8
+ end
9
+ end
@@ -0,0 +1,25 @@
1
+ class AddStatusAndErrorToReviews < ActiveRecord::Migration[7.1]
2
+ def change
3
+ add_column :completion_kit_reviews, :error_provider, :string
4
+ add_column :completion_kit_reviews, :error_class, :string
5
+ add_column :completion_kit_reviews, :error_status, :integer
6
+ add_column :completion_kit_reviews, :error_message, :text
7
+ add_column :completion_kit_reviews, :attempts, :integer, default: 0, null: false
8
+
9
+ reversible do |dir|
10
+ dir.up do
11
+ execute <<~SQL
12
+ UPDATE completion_kit_reviews
13
+ SET status = 'succeeded'
14
+ WHERE ai_score IS NOT NULL
15
+ SQL
16
+
17
+ execute <<~SQL
18
+ UPDATE completion_kit_reviews
19
+ SET status = 'succeeded'
20
+ WHERE status = 'evaluated'
21
+ SQL
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,9 @@
1
+ class IndexReviewsOnResponseIdAndStatus < ActiveRecord::Migration[7.1]
2
+ disable_ddl_transaction!
3
+
4
+ def change
5
+ options = { if_not_exists: true }
6
+ options[:algorithm] = :concurrently unless connection.adapter_name == "SQLite"
7
+ add_index :completion_kit_reviews, [:response_id, :status], **options
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ class CollapseRunStatusAndAddFailureSummary < ActiveRecord::Migration[7.1]
2
+ def change
3
+ add_column :completion_kit_runs, :failure_summary, :text
4
+
5
+ reversible do |dir|
6
+ dir.up do
7
+ execute <<~SQL
8
+ UPDATE completion_kit_runs
9
+ SET status = 'running'
10
+ WHERE status IN ('generating', 'judging')
11
+ SQL
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ module CompletionKit
2
+ module ConcurrencyCheck
3
+ def self.warn_if_misconfigured(logger)
4
+ threads = ENV.fetch("SOLID_QUEUE_THREADS", 10).to_i
5
+ llm_cap = ENV.fetch("COMPLETION_KIT_LLM_CONCURRENCY", 10).to_i
6
+
7
+ if threads < llm_cap
8
+ logger.warn(
9
+ "[CompletionKit] SOLID_QUEUE_THREADS=#{threads} is less than " \
10
+ "COMPLETION_KIT_LLM_CONCURRENCY=#{llm_cap}; threads will be the " \
11
+ "actual bottleneck and the per-provider cap will never be reached."
12
+ )
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,16 @@
1
+ module CompletionKit
2
+ class Error < StandardError; end
3
+
4
+ class ConfigurationError < Error; end
5
+
6
+ class RateLimitError < Error
7
+ attr_reader :provider, :status, :retry_after
8
+
9
+ def initialize(message = nil, provider: nil, status: nil, retry_after: nil)
10
+ super(message)
11
+ @provider = provider
12
+ @status = status
13
+ @retry_after = retry_after
14
+ end
15
+ end
16
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.2.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -1,9 +1,9 @@
1
+ require "completion_kit/errors"
1
2
  require "completion_kit/version"
2
3
  require "completion_kit/engine"
4
+ require "completion_kit/concurrency_check"
3
5
 
4
6
  module CompletionKit
5
- class ConfigurationError < StandardError; end
6
-
7
7
  class Configuration
8
8
  attr_accessor :openai_api_key, :anthropic_api_key, :ollama_api_key, :ollama_api_endpoint
9
9
  attr_accessor :judge_model, :high_quality_threshold, :medium_quality_threshold
@@ -0,0 +1,13 @@
1
+ namespace :completion_kit do
2
+ desc "Mark in-flight runs as failed (for use after the queue adapter cutover)"
3
+ task mark_interrupted_runs_failed: :environment do
4
+ scope = CompletionKit::Run.where(status: "running")
5
+ count = scope.count
6
+ scope.update_all(
7
+ status: "failed",
8
+ failure_summary: "Interrupted by deploy",
9
+ updated_at: Time.current
10
+ )
11
+ puts "Marked #{count} runs as failed."
12
+ end
13
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-24 00:00:00.000000000 Z
11
+ date: 2026-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -198,18 +198,32 @@ dependencies:
198
198
  - - "~>"
199
199
  - !ruby/object:Gem::Version
200
200
  version: '0.22'
201
+ - !ruby/object:Gem::Dependency
202
+ name: solid_queue
203
+ requirement: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - "~>"
206
+ - !ruby/object:Gem::Version
207
+ version: '1.0'
208
+ type: :development
209
+ prerelease: false
210
+ version_requirements: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - "~>"
213
+ - !ruby/object:Gem::Version
214
+ version: '1.0'
201
215
  description: CompletionKit is a prompt testing platform that runs as a Rails engine
202
216
  or a standalone app. Run prompts against real datasets, score every output with
203
217
  an LLM judge against criteria you define, track prompt versions, and get AI-generated
204
218
  improvement suggestions grounded in your actual results. Includes a web UI, REST
205
- API, and a built-in MCP server with 36 tools.
219
+ API, and a built-in MCP server with 34 tools.
206
220
  email:
207
221
  - damien@homemade.software
208
222
  executables: []
209
223
  extensions: []
210
224
  extra_rdoc_files: []
211
225
  files:
212
- - MIT-LICENSE
226
+ - LICENSE
213
227
  - README.md
214
228
  - Rakefile
215
229
  - app/assets/config/completion_kit_manifest.js
@@ -236,9 +250,10 @@ files:
236
250
  - app/controllers/completion_kit/runs_controller.rb
237
251
  - app/helpers/completion_kit/application_helper.rb
238
252
  - app/jobs/completion_kit/application_job.rb
239
- - app/jobs/completion_kit/generate_job.rb
240
- - app/jobs/completion_kit/judge_job.rb
253
+ - app/jobs/completion_kit/generate_row_job.rb
254
+ - app/jobs/completion_kit/judge_review_job.rb
241
255
  - app/jobs/completion_kit/model_discovery_job.rb
256
+ - app/jobs/completion_kit/run_completion_check_job.rb
242
257
  - app/mailers/completion_kit/application_mailer.rb
243
258
  - app/models/completion_kit/application_record.rb
244
259
  - app/models/completion_kit/dataset.rb
@@ -272,6 +287,7 @@ files:
272
287
  - app/services/completion_kit/open_ai_client.rb
273
288
  - app/services/completion_kit/open_router_client.rb
274
289
  - app/services/completion_kit/prompt_improvement_service.rb
290
+ - app/services/completion_kit/worker_health.rb
275
291
  - app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb
276
292
  - app/views/completion_kit/api_reference/_example.html.erb
277
293
  - app/views/completion_kit/api_reference/index.html.erb
@@ -326,16 +342,24 @@ files:
326
342
  - db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb
327
343
  - db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb
328
344
  - db/migrate/20260417000001_rename_criteria_to_metric_groups.rb
345
+ - db/migrate/20260501000001_add_status_and_error_to_responses.rb
346
+ - db/migrate/20260501000002_index_responses_on_run_id_and_status.rb
347
+ - db/migrate/20260501000003_add_status_and_error_to_reviews.rb
348
+ - db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb
349
+ - db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb
329
350
  - lib/completion-kit.rb
330
351
  - lib/completion_kit.rb
352
+ - lib/completion_kit/concurrency_check.rb
331
353
  - lib/completion_kit/engine.rb
354
+ - lib/completion_kit/errors.rb
332
355
  - lib/completion_kit/version.rb
333
356
  - lib/generators/completion_kit/install_generator.rb
334
357
  - lib/generators/completion_kit/templates/README
335
358
  - lib/generators/completion_kit/templates/initializer.rb
359
+ - lib/tasks/completion_kit_runs.rake
336
360
  homepage: https://github.com/homemade-software-inc/completion-kit
337
361
  licenses:
338
- - MIT
362
+ - BUSL-1.1
339
363
  metadata:
340
364
  homepage_uri: https://github.com/homemade-software-inc/completion-kit
341
365
  source_code_uri: https://github.com/homemade-software-inc/completion-kit
data/MIT-LICENSE DELETED
@@ -1,20 +0,0 @@
1
- Copyright
2
-
3
- Permission is hereby granted, free of charge, to any person obtaining
4
- a copy of this software and associated documentation files (the
5
- "Software"), to deal in the Software without restriction, including
6
- without limitation the rights to use, copy, modify, merge, publish,
7
- distribute, sublicense, and/or sell copies of the Software, and to
8
- permit persons to whom the Software is furnished to do so, subject to
9
- the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be
12
- included in all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,12 +0,0 @@
1
- module CompletionKit
2
- class GenerateJob < ApplicationJob
3
- queue_as :default
4
-
5
- def perform(run_id)
6
- run = Run.find_by(id: run_id)
7
- return unless run
8
-
9
- run.generate_responses!
10
- end
11
- end
12
- end
@@ -1,12 +0,0 @@
1
- module CompletionKit
2
- class JudgeJob < ApplicationJob
3
- queue_as :default
4
-
5
- def perform(run_id)
6
- run = Run.find_by(id: run_id)
7
- return unless run
8
-
9
- run.judge_responses!
10
- end
11
- end
12
- end