completion-kit 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -1
  3. data/app/assets/stylesheets/completion_kit/application.css +87 -0
  4. data/app/controllers/completion_kit/api/v1/runs_controller.rb +26 -5
  5. data/app/controllers/completion_kit/runs_controller.rb +31 -11
  6. data/app/helpers/completion_kit/application_helper.rb +4 -12
  7. data/app/jobs/completion_kit/generate_row_job.rb +102 -0
  8. data/app/jobs/completion_kit/judge_review_job.rb +110 -0
  9. data/app/jobs/completion_kit/model_discovery_job.rb +22 -4
  10. data/app/jobs/completion_kit/run_completion_check_job.rb +18 -0
  11. data/app/models/completion_kit/prompt.rb +4 -0
  12. data/app/models/completion_kit/response.rb +29 -2
  13. data/app/models/completion_kit/review.rb +17 -2
  14. data/app/models/completion_kit/run.rb +90 -96
  15. data/app/services/completion_kit/anthropic_client.rb +13 -0
  16. data/app/services/completion_kit/mcp_tools/runs.rb +5 -13
  17. data/app/services/completion_kit/ollama_client.rb +13 -0
  18. data/app/services/completion_kit/open_ai_client.rb +11 -0
  19. data/app/services/completion_kit/open_router_client.rb +13 -0
  20. data/app/services/completion_kit/worker_health.rb +10 -0
  21. data/app/views/completion_kit/api_reference/index.html.erb +0 -5
  22. data/app/views/completion_kit/prompts/_form.html.erb +8 -5
  23. data/app/views/completion_kit/runs/_actions.html.erb +1 -1
  24. data/app/views/completion_kit/runs/_form.html.erb +6 -3
  25. data/app/views/completion_kit/runs/_progress.html.erb +1 -1
  26. data/app/views/completion_kit/runs/_response_row.html.erb +26 -8
  27. data/app/views/completion_kit/runs/_status_header.html.erb +36 -1
  28. data/app/views/completion_kit/runs/show.html.erb +1 -1
  29. data/app/views/layouts/completion_kit/application.html.erb +28 -2
  30. data/config/routes.rb +2 -2
  31. data/db/migrate/20260501000001_add_status_and_error_to_responses.rb +21 -0
  32. data/db/migrate/20260501000002_index_responses_on_run_id_and_status.rb +9 -0
  33. data/db/migrate/20260501000003_add_status_and_error_to_reviews.rb +25 -0
  34. data/db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb +9 -0
  35. data/db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb +15 -0
  36. data/lib/completion_kit/concurrency_check.rb +16 -0
  37. data/lib/completion_kit/errors.rb +16 -0
  38. data/lib/completion_kit/version.rb +1 -1
  39. data/lib/completion_kit.rb +2 -2
  40. data/lib/tasks/completion_kit_runs.rake +13 -0
  41. metadata +29 -5
  42. data/app/jobs/completion_kit/generate_job.rb +0 -12
  43. data/app/jobs/completion_kit/judge_job.rb +0 -12
@@ -0,0 +1,21 @@
1
+ class AddStatusAndErrorToResponses < ActiveRecord::Migration[7.1]
2
+ def change
3
+ add_column :completion_kit_responses, :status, :string, default: "pending", null: false
4
+ add_column :completion_kit_responses, :error_provider, :string
5
+ add_column :completion_kit_responses, :error_class, :string
6
+ add_column :completion_kit_responses, :error_status, :integer
7
+ add_column :completion_kit_responses, :error_message, :text
8
+ add_column :completion_kit_responses, :attempts, :integer, default: 0, null: false
9
+ add_column :completion_kit_responses, :row_index, :integer
10
+
11
+ reversible do |dir|
12
+ dir.up do
13
+ execute <<~SQL
14
+ UPDATE completion_kit_responses
15
+ SET status = 'succeeded'
16
+ WHERE response_text IS NOT NULL AND length(response_text) > 0
17
+ SQL
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,9 @@
1
+ class IndexResponsesOnRunIdAndStatus < ActiveRecord::Migration[7.1]
2
+ disable_ddl_transaction!
3
+
4
+ def change
5
+ options = { if_not_exists: true }
6
+ options[:algorithm] = :concurrently unless connection.adapter_name == "SQLite"
7
+ add_index :completion_kit_responses, [:run_id, :status], **options
8
+ end
9
+ end
@@ -0,0 +1,25 @@
1
+ class AddStatusAndErrorToReviews < ActiveRecord::Migration[7.1]
2
+ def change
3
+ add_column :completion_kit_reviews, :error_provider, :string
4
+ add_column :completion_kit_reviews, :error_class, :string
5
+ add_column :completion_kit_reviews, :error_status, :integer
6
+ add_column :completion_kit_reviews, :error_message, :text
7
+ add_column :completion_kit_reviews, :attempts, :integer, default: 0, null: false
8
+
9
+ reversible do |dir|
10
+ dir.up do
11
+ execute <<~SQL
12
+ UPDATE completion_kit_reviews
13
+ SET status = 'succeeded'
14
+ WHERE ai_score IS NOT NULL
15
+ SQL
16
+
17
+ execute <<~SQL
18
+ UPDATE completion_kit_reviews
19
+ SET status = 'succeeded'
20
+ WHERE status = 'evaluated'
21
+ SQL
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,9 @@
1
+ class IndexReviewsOnResponseIdAndStatus < ActiveRecord::Migration[7.1]
2
+ disable_ddl_transaction!
3
+
4
+ def change
5
+ options = { if_not_exists: true }
6
+ options[:algorithm] = :concurrently unless connection.adapter_name == "SQLite"
7
+ add_index :completion_kit_reviews, [:response_id, :status], **options
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ class CollapseRunStatusAndAddFailureSummary < ActiveRecord::Migration[7.1]
2
+ def change
3
+ add_column :completion_kit_runs, :failure_summary, :text
4
+
5
+ reversible do |dir|
6
+ dir.up do
7
+ execute <<~SQL
8
+ UPDATE completion_kit_runs
9
+ SET status = 'running'
10
+ WHERE status IN ('generating', 'judging')
11
+ SQL
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ module CompletionKit
2
+ module ConcurrencyCheck
3
+ def self.warn_if_misconfigured(logger)
4
+ threads = ENV.fetch("SOLID_QUEUE_THREADS", 10).to_i
5
+ llm_cap = ENV.fetch("COMPLETION_KIT_LLM_CONCURRENCY", 10).to_i
6
+
7
+ if threads < llm_cap
8
+ logger.warn(
9
+ "[CompletionKit] SOLID_QUEUE_THREADS=#{threads} is less than " \
10
+ "COMPLETION_KIT_LLM_CONCURRENCY=#{llm_cap}; threads will be the " \
11
+ "actual bottleneck and the per-provider cap will never be reached."
12
+ )
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,16 @@
1
+ module CompletionKit
2
+ class Error < StandardError; end
3
+
4
+ class ConfigurationError < Error; end
5
+
6
+ class RateLimitError < Error
7
+ attr_reader :provider, :status, :retry_after
8
+
9
+ def initialize(message = nil, provider: nil, status: nil, retry_after: nil)
10
+ super(message)
11
+ @provider = provider
12
+ @status = status
13
+ @retry_after = retry_after
14
+ end
15
+ end
16
+ end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -1,9 +1,9 @@
1
+ require "completion_kit/errors"
1
2
  require "completion_kit/version"
2
3
  require "completion_kit/engine"
4
+ require "completion_kit/concurrency_check"
3
5
 
4
6
  module CompletionKit
5
- class ConfigurationError < StandardError; end
6
-
7
7
  class Configuration
8
8
  attr_accessor :openai_api_key, :anthropic_api_key, :ollama_api_key, :ollama_api_endpoint
9
9
  attr_accessor :judge_model, :high_quality_threshold, :medium_quality_threshold
@@ -0,0 +1,13 @@
1
+ namespace :completion_kit do
2
+ desc "Mark in-flight runs as failed (for use after the queue adapter cutover)"
3
+ task mark_interrupted_runs_failed: :environment do
4
+ scope = CompletionKit::Run.where(status: "running")
5
+ count = scope.count
6
+ scope.update_all(
7
+ status: "failed",
8
+ failure_summary: "Interrupted by deploy",
9
+ updated_at: Time.current
10
+ )
11
+ puts "Marked #{count} runs as failed."
12
+ end
13
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-26 00:00:00.000000000 Z
11
+ date: 2026-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -198,11 +198,25 @@ dependencies:
198
198
  - - "~>"
199
199
  - !ruby/object:Gem::Version
200
200
  version: '0.22'
201
+ - !ruby/object:Gem::Dependency
202
+ name: solid_queue
203
+ requirement: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - "~>"
206
+ - !ruby/object:Gem::Version
207
+ version: '1.0'
208
+ type: :development
209
+ prerelease: false
210
+ version_requirements: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - "~>"
213
+ - !ruby/object:Gem::Version
214
+ version: '1.0'
201
215
  description: CompletionKit is a prompt testing platform that runs as a Rails engine
202
216
  or a standalone app. Run prompts against real datasets, score every output with
203
217
  an LLM judge against criteria you define, track prompt versions, and get AI-generated
204
218
  improvement suggestions grounded in your actual results. Includes a web UI, REST
205
- API, and a built-in MCP server with 36 tools.
219
+ API, and a built-in MCP server with 34 tools.
206
220
  email:
207
221
  - damien@homemade.software
208
222
  executables: []
@@ -236,9 +250,10 @@ files:
236
250
  - app/controllers/completion_kit/runs_controller.rb
237
251
  - app/helpers/completion_kit/application_helper.rb
238
252
  - app/jobs/completion_kit/application_job.rb
239
- - app/jobs/completion_kit/generate_job.rb
240
- - app/jobs/completion_kit/judge_job.rb
253
+ - app/jobs/completion_kit/generate_row_job.rb
254
+ - app/jobs/completion_kit/judge_review_job.rb
241
255
  - app/jobs/completion_kit/model_discovery_job.rb
256
+ - app/jobs/completion_kit/run_completion_check_job.rb
242
257
  - app/mailers/completion_kit/application_mailer.rb
243
258
  - app/models/completion_kit/application_record.rb
244
259
  - app/models/completion_kit/dataset.rb
@@ -272,6 +287,7 @@ files:
272
287
  - app/services/completion_kit/open_ai_client.rb
273
288
  - app/services/completion_kit/open_router_client.rb
274
289
  - app/services/completion_kit/prompt_improvement_service.rb
290
+ - app/services/completion_kit/worker_health.rb
275
291
  - app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb
276
292
  - app/views/completion_kit/api_reference/_example.html.erb
277
293
  - app/views/completion_kit/api_reference/index.html.erb
@@ -326,13 +342,21 @@ files:
326
342
  - db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb
327
343
  - db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb
328
344
  - db/migrate/20260417000001_rename_criteria_to_metric_groups.rb
345
+ - db/migrate/20260501000001_add_status_and_error_to_responses.rb
346
+ - db/migrate/20260501000002_index_responses_on_run_id_and_status.rb
347
+ - db/migrate/20260501000003_add_status_and_error_to_reviews.rb
348
+ - db/migrate/20260501000004_index_reviews_on_response_id_and_status.rb
349
+ - db/migrate/20260501000005_collapse_run_status_and_add_failure_summary.rb
329
350
  - lib/completion-kit.rb
330
351
  - lib/completion_kit.rb
352
+ - lib/completion_kit/concurrency_check.rb
331
353
  - lib/completion_kit/engine.rb
354
+ - lib/completion_kit/errors.rb
332
355
  - lib/completion_kit/version.rb
333
356
  - lib/generators/completion_kit/install_generator.rb
334
357
  - lib/generators/completion_kit/templates/README
335
358
  - lib/generators/completion_kit/templates/initializer.rb
359
+ - lib/tasks/completion_kit_runs.rake
336
360
  homepage: https://github.com/homemade-software-inc/completion-kit
337
361
  licenses:
338
362
  - BUSL-1.1
@@ -1,12 +0,0 @@
1
- module CompletionKit
2
- class GenerateJob < ApplicationJob
3
- queue_as :default
4
-
5
- def perform(run_id)
6
- run = Run.find_by(id: run_id)
7
- return unless run
8
-
9
- run.generate_responses!
10
- end
11
- end
12
- end
@@ -1,12 +0,0 @@
1
- module CompletionKit
2
- class JudgeJob < ApplicationJob
3
- queue_as :default
4
-
5
- def perform(run_id)
6
- run = Run.find_by(id: run_id)
7
- return unless run
8
-
9
- run.judge_responses!
10
- end
11
- end
12
- end