completion-kit 0.1.0.rc1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +97 -86
  3. data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +53 -0
  4. data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
  5. data/app/controllers/completion_kit/api/v1/runs_controller.rb +2 -10
  6. data/app/controllers/completion_kit/metric_groups_controller.rb +59 -0
  7. data/app/controllers/completion_kit/metrics_controller.rb +2 -2
  8. data/app/controllers/completion_kit/runs_controller.rb +4 -11
  9. data/app/helpers/completion_kit/application_helper.rb +1 -8
  10. data/app/models/completion_kit/application_record.rb +7 -0
  11. data/app/models/completion_kit/metric.rb +4 -6
  12. data/app/models/completion_kit/metric_group.rb +30 -0
  13. data/app/models/completion_kit/metric_group_membership.rb +20 -0
  14. data/app/models/completion_kit/model.rb +1 -1
  15. data/app/models/completion_kit/provider_credential.rb +2 -1
  16. data/app/models/completion_kit/run.rb +11 -4
  17. data/app/services/completion_kit/anthropic_client.rb +4 -17
  18. data/app/services/completion_kit/judge_service.rb +3 -7
  19. data/app/services/completion_kit/llm_client.rb +15 -0
  20. data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
  21. data/app/services/completion_kit/mcp_tools/base.rb +23 -0
  22. data/app/services/completion_kit/mcp_tools/datasets.rb +2 -18
  23. data/app/services/completion_kit/mcp_tools/metric_groups.rb +82 -0
  24. data/app/services/completion_kit/mcp_tools/metrics.rb +4 -22
  25. data/app/services/completion_kit/mcp_tools/prompts.rb +2 -18
  26. data/app/services/completion_kit/mcp_tools/provider_credentials.rb +2 -18
  27. data/app/services/completion_kit/mcp_tools/responses.rb +2 -13
  28. data/app/services/completion_kit/mcp_tools/runs.rb +4 -28
  29. data/app/services/completion_kit/ollama_client.rb +2 -15
  30. data/app/services/completion_kit/open_ai_client.rb +1 -10
  31. data/app/services/completion_kit/open_router_client.rb +1 -12
  32. data/app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb +15 -0
  33. data/app/views/completion_kit/api_reference/index.html.erb +11 -11
  34. data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
  35. data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
  36. data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
  37. data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
  38. data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
  39. data/app/views/completion_kit/metrics/_form.html.erb +2 -23
  40. data/app/views/completion_kit/metrics/index.html.erb +13 -5
  41. data/app/views/completion_kit/metrics/show.html.erb +1 -12
  42. data/app/views/completion_kit/runs/_form.html.erb +5 -5
  43. data/app/views/layouts/completion_kit/application.html.erb +4 -1
  44. data/config/routes.rb +2 -2
  45. data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
  46. data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
  47. data/lib/completion_kit/engine.rb +1 -7
  48. data/lib/completion_kit/version.rb +1 -1
  49. data/lib/completion_kit.rb +5 -0
  50. metadata +23 -21
  51. data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
  52. data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
  53. data/app/controllers/completion_kit/criteria_controller.rb +0 -67
  54. data/app/models/completion_kit/criteria.rb +0 -22
  55. data/app/models/completion_kit/criteria_membership.rb +0 -20
  56. data/app/services/completion_kit/mcp_tools/criteria.rb +0 -106
  57. data/app/views/completion_kit/criteria/_form.html.erb +0 -46
  58. data/app/views/completion_kit/criteria/edit.html.erb +0 -14
  59. data/app/views/completion_kit/criteria/index.html.erb +0 -37
  60. data/app/views/completion_kit/criteria/new.html.erb +0 -13
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cdecf15deb685a524186a2bd4ba48268e10475da0d2cc2914969318893268f70
4
- data.tar.gz: ce395abf147434f9a825f79902e47b171b074038d20c06bf7b86b80ba70eaa00
3
+ metadata.gz: 621db92d1653ef6d326f46a164593ea731817000771aeaa649263fcf5d0a35e4
4
+ data.tar.gz: 7fbee94e29658df3508530710600a862e18b84d3b0c7b7b0f9d6ee96de1a6bbd
5
5
  SHA512:
6
- metadata.gz: 9d13fd1d1863c87ca0f7ed78eb1853ac921ff39862b84c678e5b5c9977f1832edcfb50c52b2371f2dc88ad6360be22a78d39ef53385239939a1ac9297df444ff
7
- data.tar.gz: bf95df5b178ccfe455350d216a894836a85cfeff5c117d0206b87211ef638c1d690790350168b5a9ad84e8e3895add8c89acdea98738a1c0d2025115679d939f
6
+ metadata.gz: c76b3407321ed225a97516ace2a14f39786514c2fcc393e105c2be4c04f572a01395058d9af0854164135321b2cea3b711d9675431c9d62e19fe6ca235c514df
7
+ data.tar.gz: a175cf106b5ccfb2d6605c70be85c3fe96a52962187a3a50de6bfe75322c06a4aa818b8a17ac65208479acb6eba51b1de5fe9004505ed2068881faebfb7c1334
data/README.md CHANGED
@@ -1,29 +1,44 @@
1
1
  <p align="center">
2
- <img src="https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/logo.png" alt="CompletionKit logo" width="120" />
2
+ <img src="https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/logo.png" alt="CompletionKit" width="360" />
3
3
  </p>
4
4
 
5
- # CompletionKit
6
-
7
- [![CI](https://github.com/homemade-software-inc/completion-kit/actions/workflows/ci.yml/badge.svg)](https://github.com/homemade-software-inc/completion-kit/actions/workflows/ci.yml)
8
- ![coverage](https://img.shields.io/badge/coverage-100%25-brightgreen)
9
- ![dependencies](https://img.shields.io/badge/dependencies-7-blue)
10
- [![Dependabot](https://img.shields.io/badge/dependabot-enabled-blue?logo=dependabot)](https://github.com/homemade-software-inc/completion-kit/network/updates)
11
-
12
- You need to know whether your prompts produce the output you expect, consistently, across real data. CompletionKit gives you that, inside your Rails app.
5
+ <p align="center">
6
+ <a href="https://github.com/homemade-software-inc/completion-kit/actions/workflows/ci.yml"><img src="https://github.com/homemade-software-inc/completion-kit/actions/workflows/ci.yml/badge.svg" alt="CI" /></a>
7
+ <img src="https://img.shields.io/badge/coverage-100%25-brightgreen" alt="coverage" />
8
+ </p>
13
9
 
14
- Mount the engine, bring your prompts and datasets, and every input runs through a model you pick. Each output is scored against your own metrics and rubrics by an LLM-as-judge. When you change a prompt, re-run the same dataset and see exactly what got better and what broke — and when the scores tell you something's off, CompletionKit can suggest an improved version of the prompt based on the reviews, which you inspect as a diff and apply as a new version.
10
+ Your prompts need tests too.
15
11
 
16
- Drive it from the web UI, from the REST API, or from Claude Code and other MCP-aware agents via the built-in Model Context Protocol server. All three share the same state your prompts, runs, datasets, and scores are one source of truth.
12
+ Run every prompt against real data. Score each output with an LLM judge against criteria you define. Change anything: the prompt, the model, the temperature, the dataset. Re-run and see exactly what got better and what broke. When the scores tell you something's off, CompletionKit suggests an improved prompt based on the judge's actual feedback on your runs. You inspect the diff, apply it as a new version, and verify the improvement.
17
13
 
18
14
  It's the difference between "this prompt seems to work" and "this prompt scores 4.3 out of 5 across 200 inputs, up from 3.8 last version."
19
15
 
16
+ **[completionkit.com](https://completionkit.com)** | **[RubyGems](https://rubygems.org/gems/completion-kit)**
17
+
20
18
  ![Prompts index](https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/screenshots/prompts.png)
21
19
 
22
20
  ![Prompt detail with metrics and rubrics](https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/screenshots/prompt-detail.png)
23
21
 
24
22
  ![Test run with scored results](https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/screenshots/test-run.png)
25
23
 
26
- ## Setup
24
+ ## Quick Start
25
+
26
+ ### Run the standalone app
27
+
28
+ The fastest way to start. No existing Rails app needed.
29
+
30
+ ```bash
31
+ git clone https://github.com/homemade-software-inc/completion-kit.git
32
+ cd completion-kit/standalone
33
+ bundle install
34
+ bin/rails completion_kit:install:migrations
35
+ bin/rails db:migrate
36
+ bin/rails server
37
+ ```
38
+
39
+ Visit `http://localhost:3000`. Add a provider credential (Settings), create a prompt, upload a CSV dataset, and run it.
40
+
41
+ ### Or mount as an engine in your existing Rails app
27
42
 
28
43
  ```ruby
29
44
  gem "completion-kit"
@@ -34,26 +49,40 @@ bin/rails generate completion_kit:install
34
49
  bin/rails db:migrate
35
50
  ```
36
51
 
37
- Set your provider keys via environment variables or the generated initializer:
52
+ The engine mounts at `/completion_kit` in your app.
38
53
 
39
- ```bash
40
- OPENAI_API_KEY=...
41
- ANTHROPIC_API_KEY=...
42
- LLAMA_API_KEY=...
43
- LLAMA_API_ENDPOINT=...
44
- ```
54
+ ## Providers
55
+
56
+ CompletionKit discovers available models from each provider's API automatically.
57
+
58
+ | Provider | Env vars | What it covers |
59
+ |----------|----------|----------------|
60
+ | **OpenAI** | `OPENAI_API_KEY` | GPT-5, GPT-4.1, GPT-4o, etc. |
61
+ | **Anthropic** | `ANTHROPIC_API_KEY` | Claude Opus, Sonnet, Haiku |
62
+ | **Ollama / local endpoint** | `OLLAMA_API_ENDPOINT` (default: `http://localhost:11434/v1`) | Any model you've `ollama pull`-ed, or any OpenAI-compatible local server (vLLM, LM Studio, llama.cpp) |
63
+ | **OpenRouter** | `OPENROUTER_API_KEY` | 100+ models from 30+ providers through one API key |
64
+
65
+ Set these as environment variables or configure them in the generated initializer. You can also add provider credentials through the web UI under Settings.
45
66
 
46
- Available models are discovered dynamically from each provider's API.
67
+ ### Encryption
47
68
 
48
- ### Encryption keys
69
+ Provider API keys are encrypted at rest using [Active Record encryption](https://guides.rubyonrails.org/active_record_encryption.html). You need three encryption keys configured before the app will boot in production.
49
70
 
50
- Provider API keys are stored using [Rails Active Record encryption](https://guides.rubyonrails.org/active_record_encryption.html), so the host app must have encryption keys configured. If you haven't set them up already:
71
+ Generate them:
51
72
 
52
73
  ```bash
53
74
  bin/rails db:encryption:init
54
75
  ```
55
76
 
56
- Copy the generated keys into `config/credentials.yml.enc` under `active_record_encryption`, or set the equivalent environment variables. CompletionKit won't boot without valid keys in production.
77
+ Then set them as environment variables:
78
+
79
+ ```bash
80
+ COMPLETION_KIT_ENCRYPTION_PRIMARY_KEY=<generated value>
81
+ COMPLETION_KIT_ENCRYPTION_DETERMINISTIC_KEY=<generated value>
82
+ COMPLETION_KIT_ENCRYPTION_KEY_DERIVATION_SALT=<generated value>
83
+ ```
84
+
85
+ Or add them to `config/credentials.yml.enc` under `active_record_encryption`. In development, the standalone app uses built-in fallback values so you can skip this step locally.
57
86
 
58
87
  ## Authentication
59
88
 
@@ -62,7 +91,6 @@ CompletionKit requires authentication in production. In development, routes are
62
91
  ### Basic Auth (recommended for simple setups)
63
92
 
64
93
  ```ruby
65
- # config/initializers/completion_kit.rb
66
94
  CompletionKit.configure do |c|
67
95
  c.username = "admin"
68
96
  c.password = ENV["COMPLETION_KIT_PASSWORD"]
@@ -72,58 +100,49 @@ end
72
100
  ### Custom Auth (Devise, etc.)
73
101
 
74
102
  ```ruby
75
- # config/initializers/completion_kit.rb
76
103
  CompletionKit.configure do |c|
77
104
  c.auth_strategy = ->(controller) { controller.authenticate_user! }
78
105
  end
79
106
  ```
80
107
 
81
- Only one mode can be active — setting both raises a `ConfigurationError`.
108
+ Only one mode can be active.
109
+
110
+ ## How it works
111
+
112
+ 1. **Create a prompt** with `{{variable}}` placeholders
113
+ 2. **Upload a dataset.** A CSV where column headers match the variable names.
114
+ 3. **Run it** against a model and score outputs with an LLM judge against criteria you define.
115
+ 4. **Iterate.** Change the prompt, the model, the temperature, or the dataset and re-run. CompletionKit versions your prompts so you can always compare against previous results.
116
+ 5. **Get suggestions.** When scores drop, ask CompletionKit for an AI-generated improvement. The suggestion is based on the judge's actual per-response feedback, not generic prompt-engineering advice. Inspect the diff and apply it as a new version.
82
117
 
83
- ## Usage
118
+ ## Concepts
84
119
 
85
- 1. Create a prompt with `{{variable}}` placeholders
86
- 2. Create a test run and paste CSV data (headers match variable names)
87
- 3. Generate outputs, run AI review, inspect scored results
120
+ - **Prompt.** A versioned template with `{{variable}}` placeholders. Publishing freezes the template; editing a published prompt creates a new version.
121
+ - **Dataset.** A CSV of real inputs. Each row becomes one test case.
122
+ - **Run.** One execution of a prompt against a dataset. Captures every input (model, temperature, metrics) and stores all outputs and scores.
123
+ - **Response.** The model's output for one dataset row, with reviews attached.
124
+ - **Metric.** An evaluation dimension with a name, instruction, evaluation steps, and a 1-5 star scoring scale. The LLM judge uses this to score each response.
125
+ - **Metric Group.** A reusable group of metrics you can apply to a run as a set.
126
+ - **Provider Credential.** An API key for a model provider. Encrypted at rest, never returned through the API.
88
127
 
89
- ## Programmatic access
128
+ ## REST API
90
129
 
91
- CompletionKit exposes every resource through both a REST JSON API and an MCP server. Both share the same bearer-token auth, so configure once and use either interface:
130
+ Every resource is accessible via a bearer-token JSON API:
92
131
 
93
132
  ```ruby
94
- # config/initializers/completion_kit.rb
95
133
  CompletionKit.configure { |c| c.api_token = ENV["COMPLETION_KIT_API_TOKEN"] }
96
134
  ```
97
135
 
98
- ### Concepts
99
-
100
- These are the objects you'll work with, whether through the UI, the REST API, or the MCP server:
101
-
102
- - **Prompt** — A named, versioned template with `{{variable}}` placeholders. Publishing a prompt freezes its template so runs always reference a known version; editing a published prompt creates a new version.
103
- - **Dataset** — A CSV of real inputs. Column headers match the prompt's `{{variable}}` names, and each row becomes one test case.
104
- - **Run** — A single execution of a prompt against a dataset. Tracks progress, stores outputs, and records which metrics were used for scoring.
105
- - **Response** — The model's output for one row of the dataset, with any reviews attached.
106
- - **Metric** — One evaluation dimension: a name, an instruction, evaluation steps, and 1–5-star rubric bands. The judge uses a metric to score a response.
107
- - **Criteria** — A named, reusable bundle of metrics you can apply to a run in one step.
108
- - **Provider Credential** — An API key for a model provider (OpenAI, Anthropic, Ollama, OpenRouter). Encrypted at rest using Rails' Active Record encryption, and never returned through the API.
109
-
110
- ### REST API
111
-
112
136
  ```bash
113
137
  curl -H "Authorization: Bearer $TOKEN" \
114
138
  http://localhost:3000/completion_kit/api/v1/prompts
115
-
116
- curl -X POST http://localhost:3000/completion_kit/api/v1/prompts \
117
- -H "Authorization: Bearer $TOKEN" \
118
- -H "Content-Type: application/json" \
119
- -d '{"name":"summarizer","template":"Summarize: {{text}}","llm_model":"gpt-4.1"}'
120
139
  ```
121
140
 
122
- Mount the engine, then visit **`/completion_kit/api_reference`** in your running app for per-endpoint documentation with copy-to-clipboard curl examples pre-filled with your token.
141
+ Visit `/completion_kit/api_reference` in your running app for per-endpoint docs with copy-to-clipboard curl examples.
123
142
 
124
- ### MCP server
143
+ ## MCP server
125
144
 
126
- CompletionKit also runs a [Model Context Protocol](https://modelcontextprotocol.io) server at the `/mcp` path within the engine mount, exposing the same resources as 36 tools (one per CRUD action plus process actions like `runs_generate` and `prompts_publish`). Point Claude Code, Cursor, or any other MCP client at it:
145
+ CompletionKit runs a [Model Context Protocol](https://modelcontextprotocol.io) server at `/completion_kit/mcp`, exposing every resource as tools that MCP-aware clients (Claude Code, Cursor, etc.) can drive directly:
127
146
 
128
147
  ```json
129
148
  {
@@ -136,40 +155,21 @@ CompletionKit also runs a [Model Context Protocol](https://modelcontextprotocol.
136
155
  }
137
156
  ```
138
157
 
139
- The in-app API reference page also ships install snippets you can copy straight into your MCP client config.
140
-
141
- ## Standalone App
142
-
143
- CompletionKit ships with a standalone Rails app you can deploy as a hosted service.
144
-
145
- ### Quick Start
146
-
147
- ```bash
148
- cd standalone
149
- bundle install
150
- bin/rails completion_kit:install:migrations
151
- bin/rails db:migrate
152
- bin/rails server
153
- ```
154
-
155
- Visit `http://localhost:3000` for the home page, or `http://localhost:3000/completion_kit` for the engine UI.
158
+ The in-app API reference page has install snippets you can copy straight into your MCP client config.
156
159
 
157
- ### Configuration
160
+ ## Deploying the standalone app
158
161
 
159
- Set environment variables:
162
+ Any Rails-friendly host works (Fly, Heroku, Render, Docker, etc.). Point it at a Postgres instance via `DATABASE_URL`, set your provider env vars, and run `cd standalone && bin/rails db:migrate` on each deploy.
160
163
 
161
164
  | Variable | Purpose | Default |
162
165
  |----------|---------|---------|
163
- | `COMPLETION_KIT_API_TOKEN` | Bearer token for REST API and MCP access | (none API disabled) |
166
+ | `COMPLETION_KIT_API_TOKEN` | Bearer token for REST API and MCP | (none, API disabled) |
164
167
  | `COMPLETION_KIT_USERNAME` | Web UI login username | `admin` |
165
- | `COMPLETION_KIT_PASSWORD` | Web UI login password | (none open in dev) |
166
- | `DATABASE_URL` | PostgreSQL connection string (production) | SQLite in dev |
168
+ | `COMPLETION_KIT_PASSWORD` | Web UI login password | (none, open in dev) |
167
169
 
168
- ### Deploying
170
+ You also need the three `COMPLETION_KIT_ENCRYPTION_*` keys from the [Encryption](#encryption) section above.
169
171
 
170
- Any Rails-friendly host works Fly, Heroku, Render, self-managed Docker, etc. Point your host at a Postgres instance via `DATABASE_URL`, set the environment variables above, and run `cd standalone && bin/rails db:migrate` on each deploy.
171
-
172
- When the gem ships a new engine migration, install it into your standalone app locally and commit the generated file before pushing:
172
+ When the gem ships a new migration, install it locally and commit before pushing:
173
173
 
174
174
  ```bash
175
175
  cd standalone
@@ -178,15 +178,26 @@ bin/rails db:migrate
178
178
  git add db/migrate/ && git commit -m "install new engine migration"
179
179
  ```
180
180
 
181
- That way your host's `db:migrate` picks up the new file on the next deploy. Don't run `completion_kit:install:migrations` on the host itself — migration files are source artifacts, they belong in git.
181
+ ## Multi-tenant host apps (advanced)
182
182
 
183
- ## Development
183
+ For hosts that mount CompletionKit in a multi-tenant app, two optional hooks scope engine records per tenant without forking the engine:
184
184
 
185
- ```bash
186
- bundle install
187
- bundle exec rspec
185
+ ```ruby
186
+ CompletionKit.configure do |config|
187
+ config.tenant_scope = -> {
188
+ org = Current.organization&.id
189
+ org ? where(organization_id: org) : where("1=0")
190
+ }
191
+ config.tenant_scope_columns = [:organization_id]
192
+ end
188
193
  ```
189
194
 
195
+ `tenant_scope` runs as each engine model's `default_scope` (use `unscoped` to bypass). `tenant_scope_columns` is appended to every engine uniqueness validation. Adding the tenant columns and composite unique indexes lives in your host migrations. Both defaults (`nil`, `[]`) are no-ops.
196
+
197
+ ## Contributing
198
+
199
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, testing, and pull request guidelines.
200
+
190
201
  ## License
191
202
 
192
203
  [MIT](https://opensource.org/licenses/MIT)
@@ -0,0 +1,53 @@
1
+ module CompletionKit
2
+ module Api
3
+ module V1
4
+ class MetricGroupsController < BaseController
5
+ before_action :set_metric_group, only: [:show, :update, :destroy]
6
+
7
+ def index
8
+ render json: MetricGroup.order(created_at: :desc)
9
+ end
10
+
11
+ def show
12
+ render json: @metric_group
13
+ end
14
+
15
+ def create
16
+ metric_group = MetricGroup.new(metric_group_params.except(:metric_ids))
17
+ if metric_group.save
18
+ metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
19
+ render json: metric_group.reload, status: :created
20
+ else
21
+ render json: {errors: metric_group.errors}, status: :unprocessable_entity
22
+ end
23
+ end
24
+
25
+ def update
26
+ if @metric_group.update(metric_group_params.except(:metric_ids))
27
+ @metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
28
+ render json: @metric_group.reload
29
+ else
30
+ render json: {errors: @metric_group.errors}, status: :unprocessable_entity
31
+ end
32
+ end
33
+
34
+ def destroy
35
+ @metric_group.destroy!
36
+ head :no_content
37
+ end
38
+
39
+ private
40
+
41
+ def set_metric_group
42
+ @metric_group = MetricGroup.find(params[:id])
43
+ rescue ActiveRecord::RecordNotFound
44
+ not_found
45
+ end
46
+
47
+ def metric_group_params
48
+ params.permit(:name, :description, metric_ids: [])
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -43,7 +43,7 @@ module CompletionKit
43
43
  end
44
44
 
45
45
  def metric_params
46
- params.permit(:name, :instruction, evaluation_steps: [], rubric_bands: [:stars, :description])
46
+ params.permit(:name, :instruction, rubric_bands: [:stars, :description])
47
47
  end
48
48
  end
49
49
  end
@@ -15,7 +15,7 @@ module CompletionKit
15
15
  def create
16
16
  run = Run.new(run_params.except(:metric_ids))
17
17
  if run.save
18
- replace_run_metrics(run, params[:metric_ids])
18
+ run.replace_metrics!(params[:metric_ids])
19
19
  render json: run.reload, status: :created
20
20
  else
21
21
  render json: {errors: run.errors}, status: :unprocessable_entity
@@ -24,7 +24,7 @@ module CompletionKit
24
24
 
25
25
  def update
26
26
  if @run.update(run_params.except(:metric_ids))
27
- replace_run_metrics(@run, params[:metric_ids]) if params.key?(:metric_ids)
27
+ @run.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
28
28
  render json: @run.reload
29
29
  else
30
30
  render json: {errors: @run.errors}, status: :unprocessable_entity
@@ -57,14 +57,6 @@ module CompletionKit
57
57
  def run_params
58
58
  params.permit(:name, :prompt_id, :dataset_id, :judge_model, :temperature, metric_ids: [])
59
59
  end
60
-
61
- def replace_run_metrics(run, metric_ids)
62
- return unless metric_ids
63
- run.run_metrics.delete_all
64
- Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
65
- run.run_metrics.create!(metric_id: metric_id, position: index + 1)
66
- end
67
- end
68
60
  end
69
61
  end
70
62
  end
@@ -0,0 +1,59 @@
1
+ module CompletionKit
2
+ class MetricGroupsController < ApplicationController
3
+ before_action :set_metric_group, only: [:show, :edit, :update, :destroy]
4
+
5
+ def index
6
+ @metric_groups = MetricGroup.includes(:metrics).order(:name)
7
+ end
8
+
9
+ def show
10
+ end
11
+
12
+ def new
13
+ @metric_group = MetricGroup.new
14
+ @metrics = Metric.order(:name)
15
+ end
16
+
17
+ def edit
18
+ @metrics = Metric.order(:name)
19
+ end
20
+
21
+ def create
22
+ @metric_group = MetricGroup.new(metric_group_params.except(:metric_ids))
23
+ @metrics = Metric.order(:name)
24
+
25
+ if @metric_group.save
26
+ @metric_group.replace_metrics!(metric_group_params[:metric_ids])
27
+ redirect_to metric_group_path(@metric_group), notice: "Metric group was successfully created."
28
+ else
29
+ render :new, status: :unprocessable_entity
30
+ end
31
+ end
32
+
33
+ def update
34
+ @metrics = Metric.order(:name)
35
+
36
+ if @metric_group.update(metric_group_params.except(:metric_ids))
37
+ @metric_group.replace_metrics!(metric_group_params[:metric_ids])
38
+ redirect_to metric_group_path(@metric_group), notice: "Metric group was successfully updated."
39
+ else
40
+ render :edit, status: :unprocessable_entity
41
+ end
42
+ end
43
+
44
+ def destroy
45
+ @metric_group.destroy
46
+ redirect_to metric_groups_path, notice: "Metric group was successfully destroyed."
47
+ end
48
+
49
+ private
50
+
51
+ def set_metric_group
52
+ @metric_group = MetricGroup.find(params[:id])
53
+ end
54
+
55
+ def metric_group_params
56
+ params.require(:metric_group).permit(:name, :description, metric_ids: [])
57
+ end
58
+ end
59
+ end
@@ -3,7 +3,7 @@ module CompletionKit
3
3
  before_action :set_metric, only: [:show, :edit, :update, :destroy]
4
4
 
5
5
  def index
6
- @metrics = Metric.order(:name)
6
+ @metrics = Metric.includes(:metric_groups).order(:name)
7
7
  end
8
8
 
9
9
  def show
@@ -46,7 +46,7 @@ module CompletionKit
46
46
  end
47
47
 
48
48
  def metric_params
49
- params.require(:metric).permit(:name, :instruction, evaluation_steps: [], rubric_bands: [:stars, :description])
49
+ params.require(:metric).permit(:name, :instruction, rubric_bands: [:stars, :description])
50
50
  end
51
51
  end
52
52
  end
@@ -35,7 +35,7 @@ module CompletionKit
35
35
  def create
36
36
  @run = Run.new(run_params.except(:metric_ids))
37
37
  if @run.save
38
- replace_run_metrics(@run, params[:run][:metric_ids])
38
+ @run.replace_metrics!(params[:run][:metric_ids])
39
39
  redirect_to run_path(@run), notice: "Run was successfully created."
40
40
  else
41
41
  load_form_collections
@@ -46,10 +46,10 @@ module CompletionKit
46
46
  def update
47
47
  if @run.responses.any?
48
48
  new_run = Run.create!(run_params.except(:metric_ids).to_h.merge(status: "pending"))
49
- replace_run_metrics(new_run, params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
49
+ new_run.replace_metrics!(params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
50
50
  redirect_to run_path(new_run), notice: "Saved as a new run. The previous run and its results are preserved."
51
51
  elsif @run.update(run_params.except(:metric_ids))
52
- replace_run_metrics(@run, params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
52
+ @run.replace_metrics!(params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
53
53
  redirect_to run_path(@run), notice: "Run saved."
54
54
  else
55
55
  load_form_collections
@@ -112,7 +112,7 @@ module CompletionKit
112
112
  def load_form_collections
113
113
  @prompts = Prompt.order(:name)
114
114
  @datasets = Dataset.order(:name)
115
- @criterias = Criteria.includes(:metrics).order(:name)
115
+ @metric_groups = MetricGroup.includes(:metrics).order(:name)
116
116
  @all_metrics = Metric.order(:name)
117
117
  end
118
118
 
@@ -120,12 +120,5 @@ module CompletionKit
120
120
  params.require(:run).permit(:name, :prompt_id, :dataset_id, :judge_model, :temperature, metric_ids: [])
121
121
  end
122
122
 
123
- def replace_run_metrics(run, metric_ids)
124
- return unless metric_ids
125
- run.run_metrics.delete_all
126
- Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
127
- run.run_metrics.create!(metric_id: metric_id, position: index + 1)
128
- end
129
- end
130
123
  end
131
124
  end
@@ -76,15 +76,8 @@ module CompletionKit
76
76
  end
77
77
  end
78
78
 
79
- PROVIDER_LABELS = {
80
- "openai" => "OpenAI",
81
- "anthropic" => "Anthropic",
82
- "ollama" => "Ollama / local endpoint",
83
- "openrouter" => "OpenRouter"
84
- }.freeze
85
-
86
79
  def ck_provider_label(provider)
87
- PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
80
+ CompletionKit::ProviderCredential::PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
88
81
  end
89
82
 
90
83
  def ck_grouped_models(models, selected = nil)
@@ -1,5 +1,12 @@
1
1
  module CompletionKit
2
2
  class ApplicationRecord < ActiveRecord::Base
3
3
  self.abstract_class = true
4
+
5
+ TenantScopedUniquenessValidator = CompletionKit::TenantScopedUniquenessValidator
6
+
7
+ default_scope do
8
+ scope_proc = CompletionKit.config.tenant_scope
9
+ scope_proc ? instance_exec(&scope_proc) : all
10
+ end
4
11
  end
5
12
  end
@@ -8,15 +8,14 @@ module CompletionKit
8
8
  { "stars" => 1, "description" => "Fails to meet the criteria. Major errors or completely off-target." }
9
9
  ].freeze
10
10
 
11
- has_many :criteria_memberships, dependent: :destroy
12
- has_many :criterias, through: :criteria_memberships, source: :criteria
11
+ has_many :metric_group_memberships, dependent: :destroy
12
+ has_many :metric_groups, through: :metric_group_memberships, source: :metric_group
13
13
  has_many :reviews, dependent: :nullify
14
14
 
15
15
  serialize :rubric_bands, coder: JSON
16
- serialize :evaluation_steps, coder: JSON
17
16
 
18
17
  validates :name, presence: true
19
- validates :key, uniqueness: true, allow_nil: true
18
+ validates :key, tenant_scoped_uniqueness: { allow_nil: true }
20
19
 
21
20
  before_validation :generate_key
22
21
  before_validation :normalize_rubric_bands
@@ -74,7 +73,7 @@ module CompletionKit
74
73
  def as_json(options = {})
75
74
  {
76
75
  id: id, name: name, key: key, instruction: instruction,
77
- evaluation_steps: evaluation_steps, rubric_bands: rubric_bands,
76
+ rubric_bands: rubric_bands,
78
77
  created_at: created_at, updated_at: updated_at
79
78
  }
80
79
  end
@@ -86,7 +85,6 @@ module CompletionKit
86
85
  end
87
86
 
88
87
  def set_defaults
89
- self.evaluation_steps ||= []
90
88
  self.rubric_bands = self.class.default_rubric_bands if rubric_bands.blank?
91
89
  end
92
90
 
@@ -0,0 +1,30 @@
1
+ module CompletionKit
2
+ class MetricGroup < ApplicationRecord
3
+ self.table_name = "completion_kit_metric_groups"
4
+
5
+ has_many :metric_group_memberships, -> { order(:position, :id) }, dependent: :destroy
6
+ has_many :metrics, through: :metric_group_memberships
7
+
8
+ validates :name, presence: true
9
+
10
+ def ordered_metrics
11
+ metric_group_memberships.includes(:metric).map(&:metric).compact
12
+ end
13
+
14
+ def replace_metrics!(metric_ids)
15
+ return unless metric_ids
16
+ metric_group_memberships.delete_all
17
+ Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
18
+ metric_group_memberships.create!(metric_id: metric_id, position: index + 1)
19
+ end
20
+ end
21
+
22
+ def as_json(options = {})
23
+ {
24
+ id: id, name: name, description: description,
25
+ created_at: created_at, updated_at: updated_at,
26
+ metric_ids: metric_ids
27
+ }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ module CompletionKit
2
+ class MetricGroupMembership < ApplicationRecord
3
+ self.table_name = "completion_kit_metric_group_memberships"
4
+
5
+ belongs_to :metric_group, class_name: "CompletionKit::MetricGroup", foreign_key: "metric_group_id"
6
+ belongs_to :metric
7
+
8
+ validates :metric_id, uniqueness: { scope: :metric_group_id }
9
+
10
+ before_validation :set_default_position
11
+
12
+ private
13
+
14
+ def set_default_position
15
+ return if position.present? || metric_group.blank?
16
+
17
+ self.position = metric_group.metric_group_memberships.maximum(:position).to_i + 1
18
+ end
19
+ end
20
+ end
@@ -3,7 +3,7 @@ module CompletionKit
3
3
  STATUSES = %w[active retired failed].freeze
4
4
 
5
5
  validates :provider, presence: true
6
- validates :model_id, presence: true, uniqueness: { scope: :provider }
6
+ validates :model_id, presence: true, tenant_scoped_uniqueness: { scope: :provider }
7
7
  validates :status, presence: true, inclusion: { in: STATUSES }
8
8
 
9
9
  scope :active, -> { where(status: "active") }
@@ -22,7 +22,8 @@ module CompletionKit
22
22
  PROVIDER_LABELS[provider] || provider.titleize
23
23
  end
24
24
 
25
- validates :provider, presence: true, inclusion: { in: PROVIDERS }, uniqueness: true
25
+ validates :provider, presence: true, inclusion: { in: PROVIDERS }
26
+ validates :provider, tenant_scoped_uniqueness: true
26
27
 
27
28
  after_save :enqueue_discovery
28
29