completion-kit 0.1.0.rc1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +97 -86
- data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +53 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +2 -10
- data/app/controllers/completion_kit/metric_groups_controller.rb +59 -0
- data/app/controllers/completion_kit/metrics_controller.rb +2 -2
- data/app/controllers/completion_kit/runs_controller.rb +4 -11
- data/app/helpers/completion_kit/application_helper.rb +1 -8
- data/app/models/completion_kit/application_record.rb +7 -0
- data/app/models/completion_kit/metric.rb +4 -6
- data/app/models/completion_kit/metric_group.rb +30 -0
- data/app/models/completion_kit/metric_group_membership.rb +20 -0
- data/app/models/completion_kit/model.rb +1 -1
- data/app/models/completion_kit/provider_credential.rb +2 -1
- data/app/models/completion_kit/run.rb +11 -4
- data/app/services/completion_kit/anthropic_client.rb +4 -17
- data/app/services/completion_kit/judge_service.rb +3 -7
- data/app/services/completion_kit/llm_client.rb +15 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
- data/app/services/completion_kit/mcp_tools/base.rb +23 -0
- data/app/services/completion_kit/mcp_tools/datasets.rb +2 -18
- data/app/services/completion_kit/mcp_tools/metric_groups.rb +82 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +4 -22
- data/app/services/completion_kit/mcp_tools/prompts.rb +2 -18
- data/app/services/completion_kit/mcp_tools/provider_credentials.rb +2 -18
- data/app/services/completion_kit/mcp_tools/responses.rb +2 -13
- data/app/services/completion_kit/mcp_tools/runs.rb +4 -28
- data/app/services/completion_kit/ollama_client.rb +2 -15
- data/app/services/completion_kit/open_ai_client.rb +1 -10
- data/app/services/completion_kit/open_router_client.rb +1 -12
- data/app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb +15 -0
- data/app/views/completion_kit/api_reference/index.html.erb +11 -11
- data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
- data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
- data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
- data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
- data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
- data/app/views/completion_kit/metrics/_form.html.erb +2 -23
- data/app/views/completion_kit/metrics/index.html.erb +13 -5
- data/app/views/completion_kit/metrics/show.html.erb +1 -12
- data/app/views/completion_kit/runs/_form.html.erb +5 -5
- data/app/views/layouts/completion_kit/application.html.erb +4 -1
- data/config/routes.rb +2 -2
- data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
- data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
- data/lib/completion_kit/engine.rb +1 -7
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +5 -0
- metadata +23 -21
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
- data/app/controllers/completion_kit/criteria_controller.rb +0 -67
- data/app/models/completion_kit/criteria.rb +0 -22
- data/app/models/completion_kit/criteria_membership.rb +0 -20
- data/app/services/completion_kit/mcp_tools/criteria.rb +0 -106
- data/app/views/completion_kit/criteria/_form.html.erb +0 -46
- data/app/views/completion_kit/criteria/edit.html.erb +0 -14
- data/app/views/completion_kit/criteria/index.html.erb +0 -37
- data/app/views/completion_kit/criteria/new.html.erb +0 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 621db92d1653ef6d326f46a164593ea731817000771aeaa649263fcf5d0a35e4
|
|
4
|
+
data.tar.gz: 7fbee94e29658df3508530710600a862e18b84d3b0c7b7b0f9d6ee96de1a6bbd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c76b3407321ed225a97516ace2a14f39786514c2fcc393e105c2be4c04f572a01395058d9af0854164135321b2cea3b711d9675431c9d62e19fe6ca235c514df
|
|
7
|
+
data.tar.gz: a175cf106b5ccfb2d6605c70be85c3fe96a52962187a3a50de6bfe75322c06a4aa818b8a17ac65208479acb6eba51b1de5fe9004505ed2068881faebfb7c1334
|
data/README.md
CHANGED
|
@@ -1,29 +1,44 @@
|
|
|
1
1
|
<p align="center">
|
|
2
|
-
<img src="https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/logo.png" alt="CompletionKit
|
|
2
|
+
<img src="https://raw.githubusercontent.com/homemade-software-inc/completion-kit/main/docs/logo.png" alt="CompletionKit" width="360" />
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-

|
|
10
|
-
[](https://github.com/homemade-software-inc/completion-kit/network/updates)
|
|
11
|
-
|
|
12
|
-
You need to know whether your prompts produce the output you expect, consistently, across real data. CompletionKit gives you that, inside your Rails app.
|
|
5
|
+
<p align="center">
|
|
6
|
+
<a href="https://github.com/homemade-software-inc/completion-kit/actions/workflows/ci.yml"><img src="https://github.com/homemade-software-inc/completion-kit/actions/workflows/ci.yml/badge.svg" alt="CI" /></a>
|
|
7
|
+
<img src="https://img.shields.io/badge/coverage-100%25-brightgreen" alt="coverage" />
|
|
8
|
+
</p>
|
|
13
9
|
|
|
14
|
-
|
|
10
|
+
Your prompts need tests too.
|
|
15
11
|
|
|
16
|
-
|
|
12
|
+
Run every prompt against real data. Score each output with an LLM judge against criteria you define. Change anything: the prompt, the model, the temperature, the dataset. Re-run and see exactly what got better and what broke. When the scores tell you something's off, CompletionKit suggests an improved prompt based on the judge's actual feedback on your runs. You inspect the diff, apply it as a new version, and verify the improvement.
|
|
17
13
|
|
|
18
14
|
It's the difference between "this prompt seems to work" and "this prompt scores 4.3 out of 5 across 200 inputs, up from 3.8 last version."
|
|
19
15
|
|
|
16
|
+
**[completionkit.com](https://completionkit.com)** | **[RubyGems](https://rubygems.org/gems/completion-kit)**
|
|
17
|
+
|
|
20
18
|

|
|
21
19
|
|
|
22
20
|

|
|
23
21
|
|
|
24
22
|

|
|
25
23
|
|
|
26
|
-
##
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
### Run the standalone app
|
|
27
|
+
|
|
28
|
+
The fastest way to start. No existing Rails app needed.
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
git clone https://github.com/homemade-software-inc/completion-kit.git
|
|
32
|
+
cd completion-kit/standalone
|
|
33
|
+
bundle install
|
|
34
|
+
bin/rails completion_kit:install:migrations
|
|
35
|
+
bin/rails db:migrate
|
|
36
|
+
bin/rails server
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Visit `http://localhost:3000`. Add a provider credential (Settings), create a prompt, upload a CSV dataset, and run it.
|
|
40
|
+
|
|
41
|
+
### Or mount as an engine in your existing Rails app
|
|
27
42
|
|
|
28
43
|
```ruby
|
|
29
44
|
gem "completion-kit"
|
|
@@ -34,26 +49,40 @@ bin/rails generate completion_kit:install
|
|
|
34
49
|
bin/rails db:migrate
|
|
35
50
|
```
|
|
36
51
|
|
|
37
|
-
|
|
52
|
+
The engine mounts at `/completion_kit` in your app.
|
|
38
53
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
54
|
+
## Providers
|
|
55
|
+
|
|
56
|
+
CompletionKit discovers available models from each provider's API automatically.
|
|
57
|
+
|
|
58
|
+
| Provider | Env vars | What it covers |
|
|
59
|
+
|----------|----------|----------------|
|
|
60
|
+
| **OpenAI** | `OPENAI_API_KEY` | GPT-5, GPT-4.1, GPT-4o, etc. |
|
|
61
|
+
| **Anthropic** | `ANTHROPIC_API_KEY` | Claude Opus, Sonnet, Haiku |
|
|
62
|
+
| **Ollama / local endpoint** | `OLLAMA_API_ENDPOINT` (default: `http://localhost:11434/v1`) | Any model you've `ollama pull`-ed, or any OpenAI-compatible local server (vLLM, LM Studio, llama.cpp) |
|
|
63
|
+
| **OpenRouter** | `OPENROUTER_API_KEY` | 100+ models from 30+ providers through one API key |
|
|
64
|
+
|
|
65
|
+
Set these as environment variables or configure them in the generated initializer. You can also add provider credentials through the web UI under Settings.
|
|
45
66
|
|
|
46
|
-
|
|
67
|
+
### Encryption
|
|
47
68
|
|
|
48
|
-
|
|
69
|
+
Provider API keys are encrypted at rest using [Active Record encryption](https://guides.rubyonrails.org/active_record_encryption.html). You need three encryption keys configured before the app will boot in production.
|
|
49
70
|
|
|
50
|
-
|
|
71
|
+
Generate them:
|
|
51
72
|
|
|
52
73
|
```bash
|
|
53
74
|
bin/rails db:encryption:init
|
|
54
75
|
```
|
|
55
76
|
|
|
56
|
-
|
|
77
|
+
Then set them as environment variables:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
COMPLETION_KIT_ENCRYPTION_PRIMARY_KEY=<generated value>
|
|
81
|
+
COMPLETION_KIT_ENCRYPTION_DETERMINISTIC_KEY=<generated value>
|
|
82
|
+
COMPLETION_KIT_ENCRYPTION_KEY_DERIVATION_SALT=<generated value>
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Or add them to `config/credentials.yml.enc` under `active_record_encryption`. In development, the standalone app uses built-in fallback values so you can skip this step locally.
|
|
57
86
|
|
|
58
87
|
## Authentication
|
|
59
88
|
|
|
@@ -62,7 +91,6 @@ CompletionKit requires authentication in production. In development, routes are
|
|
|
62
91
|
### Basic Auth (recommended for simple setups)
|
|
63
92
|
|
|
64
93
|
```ruby
|
|
65
|
-
# config/initializers/completion_kit.rb
|
|
66
94
|
CompletionKit.configure do |c|
|
|
67
95
|
c.username = "admin"
|
|
68
96
|
c.password = ENV["COMPLETION_KIT_PASSWORD"]
|
|
@@ -72,58 +100,49 @@ end
|
|
|
72
100
|
### Custom Auth (Devise, etc.)
|
|
73
101
|
|
|
74
102
|
```ruby
|
|
75
|
-
# config/initializers/completion_kit.rb
|
|
76
103
|
CompletionKit.configure do |c|
|
|
77
104
|
c.auth_strategy = ->(controller) { controller.authenticate_user! }
|
|
78
105
|
end
|
|
79
106
|
```
|
|
80
107
|
|
|
81
|
-
Only one mode can be active
|
|
108
|
+
Only one mode can be active.
|
|
109
|
+
|
|
110
|
+
## How it works
|
|
111
|
+
|
|
112
|
+
1. **Create a prompt** with `{{variable}}` placeholders
|
|
113
|
+
2. **Upload a dataset.** A CSV where column headers match the variable names.
|
|
114
|
+
3. **Run it** against a model and score outputs with an LLM judge against criteria you define.
|
|
115
|
+
4. **Iterate.** Change the prompt, the model, the temperature, or the dataset and re-run. CompletionKit versions your prompts so you can always compare against previous results.
|
|
116
|
+
5. **Get suggestions.** When scores drop, ask CompletionKit for an AI-generated improvement. The suggestion is based on the judge's actual per-response feedback, not generic prompt-engineering advice. Inspect the diff and apply it as a new version.
|
|
82
117
|
|
|
83
|
-
##
|
|
118
|
+
## Concepts
|
|
84
119
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
120
|
+
- **Prompt.** A versioned template with `{{variable}}` placeholders. Publishing freezes the template; editing a published prompt creates a new version.
|
|
121
|
+
- **Dataset.** A CSV of real inputs. Each row becomes one test case.
|
|
122
|
+
- **Run.** One execution of a prompt against a dataset. Captures every input (model, temperature, metrics) and stores all outputs and scores.
|
|
123
|
+
- **Response.** The model's output for one dataset row, with reviews attached.
|
|
124
|
+
- **Metric.** An evaluation dimension with a name, instruction, evaluation steps, and a 1-5 star scoring scale. The LLM judge uses this to score each response.
|
|
125
|
+
- **Metric Group.** A reusable group of metrics you can apply to a run as a set.
|
|
126
|
+
- **Provider Credential.** An API key for a model provider. Encrypted at rest, never returned through the API.
|
|
88
127
|
|
|
89
|
-
##
|
|
128
|
+
## REST API
|
|
90
129
|
|
|
91
|
-
|
|
130
|
+
Every resource is accessible via a bearer-token JSON API:
|
|
92
131
|
|
|
93
132
|
```ruby
|
|
94
|
-
# config/initializers/completion_kit.rb
|
|
95
133
|
CompletionKit.configure { |c| c.api_token = ENV["COMPLETION_KIT_API_TOKEN"] }
|
|
96
134
|
```
|
|
97
135
|
|
|
98
|
-
### Concepts
|
|
99
|
-
|
|
100
|
-
These are the objects you'll work with, whether through the UI, the REST API, or the MCP server:
|
|
101
|
-
|
|
102
|
-
- **Prompt** — A named, versioned template with `{{variable}}` placeholders. Publishing a prompt freezes its template so runs always reference a known version; editing a published prompt creates a new version.
|
|
103
|
-
- **Dataset** — A CSV of real inputs. Column headers match the prompt's `{{variable}}` names, and each row becomes one test case.
|
|
104
|
-
- **Run** — A single execution of a prompt against a dataset. Tracks progress, stores outputs, and records which metrics were used for scoring.
|
|
105
|
-
- **Response** — The model's output for one row of the dataset, with any reviews attached.
|
|
106
|
-
- **Metric** — One evaluation dimension: a name, an instruction, evaluation steps, and 1–5-star rubric bands. The judge uses a metric to score a response.
|
|
107
|
-
- **Criteria** — A named, reusable bundle of metrics you can apply to a run in one step.
|
|
108
|
-
- **Provider Credential** — An API key for a model provider (OpenAI, Anthropic, Ollama, OpenRouter). Encrypted at rest using Rails' Active Record encryption, and never returned through the API.
|
|
109
|
-
|
|
110
|
-
### REST API
|
|
111
|
-
|
|
112
136
|
```bash
|
|
113
137
|
curl -H "Authorization: Bearer $TOKEN" \
|
|
114
138
|
http://localhost:3000/completion_kit/api/v1/prompts
|
|
115
|
-
|
|
116
|
-
curl -X POST http://localhost:3000/completion_kit/api/v1/prompts \
|
|
117
|
-
-H "Authorization: Bearer $TOKEN" \
|
|
118
|
-
-H "Content-Type: application/json" \
|
|
119
|
-
-d '{"name":"summarizer","template":"Summarize: {{text}}","llm_model":"gpt-4.1"}'
|
|
120
139
|
```
|
|
121
140
|
|
|
122
|
-
|
|
141
|
+
Visit `/completion_kit/api_reference` in your running app for per-endpoint docs with copy-to-clipboard curl examples.
|
|
123
142
|
|
|
124
|
-
|
|
143
|
+
## MCP server
|
|
125
144
|
|
|
126
|
-
CompletionKit
|
|
145
|
+
CompletionKit runs a [Model Context Protocol](https://modelcontextprotocol.io) server at `/completion_kit/mcp`, exposing every resource as tools that MCP-aware clients (Claude Code, Cursor, etc.) can drive directly:
|
|
127
146
|
|
|
128
147
|
```json
|
|
129
148
|
{
|
|
@@ -136,40 +155,21 @@ CompletionKit also runs a [Model Context Protocol](https://modelcontextprotocol.
|
|
|
136
155
|
}
|
|
137
156
|
```
|
|
138
157
|
|
|
139
|
-
The in-app API reference page
|
|
140
|
-
|
|
141
|
-
## Standalone App
|
|
142
|
-
|
|
143
|
-
CompletionKit ships with a standalone Rails app you can deploy as a hosted service.
|
|
144
|
-
|
|
145
|
-
### Quick Start
|
|
146
|
-
|
|
147
|
-
```bash
|
|
148
|
-
cd standalone
|
|
149
|
-
bundle install
|
|
150
|
-
bin/rails completion_kit:install:migrations
|
|
151
|
-
bin/rails db:migrate
|
|
152
|
-
bin/rails server
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
Visit `http://localhost:3000` for the home page, or `http://localhost:3000/completion_kit` for the engine UI.
|
|
158
|
+
The in-app API reference page has install snippets you can copy straight into your MCP client config.
|
|
156
159
|
|
|
157
|
-
|
|
160
|
+
## Deploying the standalone app
|
|
158
161
|
|
|
159
|
-
|
|
162
|
+
Any Rails-friendly host works (Fly, Heroku, Render, Docker, etc.). Point it at a Postgres instance via `DATABASE_URL`, set your provider env vars, and run `cd standalone && bin/rails db:migrate` on each deploy.
|
|
160
163
|
|
|
161
164
|
| Variable | Purpose | Default |
|
|
162
165
|
|----------|---------|---------|
|
|
163
|
-
| `COMPLETION_KIT_API_TOKEN` | Bearer token for REST API and MCP
|
|
166
|
+
| `COMPLETION_KIT_API_TOKEN` | Bearer token for REST API and MCP | (none, API disabled) |
|
|
164
167
|
| `COMPLETION_KIT_USERNAME` | Web UI login username | `admin` |
|
|
165
|
-
| `COMPLETION_KIT_PASSWORD` | Web UI login password | (none
|
|
166
|
-
| `DATABASE_URL` | PostgreSQL connection string (production) | SQLite in dev |
|
|
168
|
+
| `COMPLETION_KIT_PASSWORD` | Web UI login password | (none, open in dev) |
|
|
167
169
|
|
|
168
|
-
|
|
170
|
+
You also need the three `COMPLETION_KIT_ENCRYPTION_*` keys from the [Encryption](#encryption) section above.
|
|
169
171
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
When the gem ships a new engine migration, install it into your standalone app locally and commit the generated file before pushing:
|
|
172
|
+
When the gem ships a new migration, install it locally and commit before pushing:
|
|
173
173
|
|
|
174
174
|
```bash
|
|
175
175
|
cd standalone
|
|
@@ -178,15 +178,26 @@ bin/rails db:migrate
|
|
|
178
178
|
git add db/migrate/ && git commit -m "install new engine migration"
|
|
179
179
|
```
|
|
180
180
|
|
|
181
|
-
|
|
181
|
+
## Multi-tenant host apps (advanced)
|
|
182
182
|
|
|
183
|
-
|
|
183
|
+
For hosts that mount CompletionKit in a multi-tenant app, two optional hooks scope engine records per tenant without forking the engine:
|
|
184
184
|
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
|
|
185
|
+
```ruby
|
|
186
|
+
CompletionKit.configure do |config|
|
|
187
|
+
config.tenant_scope = -> {
|
|
188
|
+
org = Current.organization&.id
|
|
189
|
+
org ? where(organization_id: org) : where("1=0")
|
|
190
|
+
}
|
|
191
|
+
config.tenant_scope_columns = [:organization_id]
|
|
192
|
+
end
|
|
188
193
|
```
|
|
189
194
|
|
|
195
|
+
`tenant_scope` runs as each engine model's `default_scope` (use `unscoped` to bypass). `tenant_scope_columns` is appended to every engine uniqueness validation. Adding the tenant columns and composite unique indexes lives in your host migrations. Both defaults (`nil`, `[]`) are no-ops.
|
|
196
|
+
|
|
197
|
+
## Contributing
|
|
198
|
+
|
|
199
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, testing, and pull request guidelines.
|
|
200
|
+
|
|
190
201
|
## License
|
|
191
202
|
|
|
192
203
|
[MIT](https://opensource.org/licenses/MIT)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module Api
|
|
3
|
+
module V1
|
|
4
|
+
class MetricGroupsController < BaseController
|
|
5
|
+
before_action :set_metric_group, only: [:show, :update, :destroy]
|
|
6
|
+
|
|
7
|
+
def index
|
|
8
|
+
render json: MetricGroup.order(created_at: :desc)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def show
|
|
12
|
+
render json: @metric_group
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def create
|
|
16
|
+
metric_group = MetricGroup.new(metric_group_params.except(:metric_ids))
|
|
17
|
+
if metric_group.save
|
|
18
|
+
metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
19
|
+
render json: metric_group.reload, status: :created
|
|
20
|
+
else
|
|
21
|
+
render json: {errors: metric_group.errors}, status: :unprocessable_entity
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def update
|
|
26
|
+
if @metric_group.update(metric_group_params.except(:metric_ids))
|
|
27
|
+
@metric_group.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
28
|
+
render json: @metric_group.reload
|
|
29
|
+
else
|
|
30
|
+
render json: {errors: @metric_group.errors}, status: :unprocessable_entity
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def destroy
|
|
35
|
+
@metric_group.destroy!
|
|
36
|
+
head :no_content
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def set_metric_group
|
|
42
|
+
@metric_group = MetricGroup.find(params[:id])
|
|
43
|
+
rescue ActiveRecord::RecordNotFound
|
|
44
|
+
not_found
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def metric_group_params
|
|
48
|
+
params.permit(:name, :description, metric_ids: [])
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -15,7 +15,7 @@ module CompletionKit
|
|
|
15
15
|
def create
|
|
16
16
|
run = Run.new(run_params.except(:metric_ids))
|
|
17
17
|
if run.save
|
|
18
|
-
|
|
18
|
+
run.replace_metrics!(params[:metric_ids])
|
|
19
19
|
render json: run.reload, status: :created
|
|
20
20
|
else
|
|
21
21
|
render json: {errors: run.errors}, status: :unprocessable_entity
|
|
@@ -24,7 +24,7 @@ module CompletionKit
|
|
|
24
24
|
|
|
25
25
|
def update
|
|
26
26
|
if @run.update(run_params.except(:metric_ids))
|
|
27
|
-
|
|
27
|
+
@run.replace_metrics!(params[:metric_ids]) if params.key?(:metric_ids)
|
|
28
28
|
render json: @run.reload
|
|
29
29
|
else
|
|
30
30
|
render json: {errors: @run.errors}, status: :unprocessable_entity
|
|
@@ -57,14 +57,6 @@ module CompletionKit
|
|
|
57
57
|
def run_params
|
|
58
58
|
params.permit(:name, :prompt_id, :dataset_id, :judge_model, :temperature, metric_ids: [])
|
|
59
59
|
end
|
|
60
|
-
|
|
61
|
-
def replace_run_metrics(run, metric_ids)
|
|
62
|
-
return unless metric_ids
|
|
63
|
-
run.run_metrics.delete_all
|
|
64
|
-
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
65
|
-
run.run_metrics.create!(metric_id: metric_id, position: index + 1)
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
60
|
end
|
|
69
61
|
end
|
|
70
62
|
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class MetricGroupsController < ApplicationController
|
|
3
|
+
before_action :set_metric_group, only: [:show, :edit, :update, :destroy]
|
|
4
|
+
|
|
5
|
+
def index
|
|
6
|
+
@metric_groups = MetricGroup.includes(:metrics).order(:name)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def show
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def new
|
|
13
|
+
@metric_group = MetricGroup.new
|
|
14
|
+
@metrics = Metric.order(:name)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def edit
|
|
18
|
+
@metrics = Metric.order(:name)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def create
|
|
22
|
+
@metric_group = MetricGroup.new(metric_group_params.except(:metric_ids))
|
|
23
|
+
@metrics = Metric.order(:name)
|
|
24
|
+
|
|
25
|
+
if @metric_group.save
|
|
26
|
+
@metric_group.replace_metrics!(metric_group_params[:metric_ids])
|
|
27
|
+
redirect_to metric_group_path(@metric_group), notice: "Metric group was successfully created."
|
|
28
|
+
else
|
|
29
|
+
render :new, status: :unprocessable_entity
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def update
|
|
34
|
+
@metrics = Metric.order(:name)
|
|
35
|
+
|
|
36
|
+
if @metric_group.update(metric_group_params.except(:metric_ids))
|
|
37
|
+
@metric_group.replace_metrics!(metric_group_params[:metric_ids])
|
|
38
|
+
redirect_to metric_group_path(@metric_group), notice: "Metric group was successfully updated."
|
|
39
|
+
else
|
|
40
|
+
render :edit, status: :unprocessable_entity
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def destroy
|
|
45
|
+
@metric_group.destroy
|
|
46
|
+
redirect_to metric_groups_path, notice: "Metric group was successfully destroyed."
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def set_metric_group
|
|
52
|
+
@metric_group = MetricGroup.find(params[:id])
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def metric_group_params
|
|
56
|
+
params.require(:metric_group).permit(:name, :description, metric_ids: [])
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -3,7 +3,7 @@ module CompletionKit
|
|
|
3
3
|
before_action :set_metric, only: [:show, :edit, :update, :destroy]
|
|
4
4
|
|
|
5
5
|
def index
|
|
6
|
-
@metrics = Metric.order(:name)
|
|
6
|
+
@metrics = Metric.includes(:metric_groups).order(:name)
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
def show
|
|
@@ -46,7 +46,7 @@ module CompletionKit
|
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def metric_params
|
|
49
|
-
params.require(:metric).permit(:name, :instruction,
|
|
49
|
+
params.require(:metric).permit(:name, :instruction, rubric_bands: [:stars, :description])
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
end
|
|
@@ -35,7 +35,7 @@ module CompletionKit
|
|
|
35
35
|
def create
|
|
36
36
|
@run = Run.new(run_params.except(:metric_ids))
|
|
37
37
|
if @run.save
|
|
38
|
-
|
|
38
|
+
@run.replace_metrics!(params[:run][:metric_ids])
|
|
39
39
|
redirect_to run_path(@run), notice: "Run was successfully created."
|
|
40
40
|
else
|
|
41
41
|
load_form_collections
|
|
@@ -46,10 +46,10 @@ module CompletionKit
|
|
|
46
46
|
def update
|
|
47
47
|
if @run.responses.any?
|
|
48
48
|
new_run = Run.create!(run_params.except(:metric_ids).to_h.merge(status: "pending"))
|
|
49
|
-
|
|
49
|
+
new_run.replace_metrics!(params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
|
|
50
50
|
redirect_to run_path(new_run), notice: "Saved as a new run. The previous run and its results are preserved."
|
|
51
51
|
elsif @run.update(run_params.except(:metric_ids))
|
|
52
|
-
|
|
52
|
+
@run.replace_metrics!(params[:run][:metric_ids]) if params[:run].key?(:metric_ids)
|
|
53
53
|
redirect_to run_path(@run), notice: "Run saved."
|
|
54
54
|
else
|
|
55
55
|
load_form_collections
|
|
@@ -112,7 +112,7 @@ module CompletionKit
|
|
|
112
112
|
def load_form_collections
|
|
113
113
|
@prompts = Prompt.order(:name)
|
|
114
114
|
@datasets = Dataset.order(:name)
|
|
115
|
-
@
|
|
115
|
+
@metric_groups = MetricGroup.includes(:metrics).order(:name)
|
|
116
116
|
@all_metrics = Metric.order(:name)
|
|
117
117
|
end
|
|
118
118
|
|
|
@@ -120,12 +120,5 @@ module CompletionKit
|
|
|
120
120
|
params.require(:run).permit(:name, :prompt_id, :dataset_id, :judge_model, :temperature, metric_ids: [])
|
|
121
121
|
end
|
|
122
122
|
|
|
123
|
-
def replace_run_metrics(run, metric_ids)
|
|
124
|
-
return unless metric_ids
|
|
125
|
-
run.run_metrics.delete_all
|
|
126
|
-
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
127
|
-
run.run_metrics.create!(metric_id: metric_id, position: index + 1)
|
|
128
|
-
end
|
|
129
|
-
end
|
|
130
123
|
end
|
|
131
124
|
end
|
|
@@ -76,15 +76,8 @@ module CompletionKit
|
|
|
76
76
|
end
|
|
77
77
|
end
|
|
78
78
|
|
|
79
|
-
PROVIDER_LABELS = {
|
|
80
|
-
"openai" => "OpenAI",
|
|
81
|
-
"anthropic" => "Anthropic",
|
|
82
|
-
"ollama" => "Ollama / local endpoint",
|
|
83
|
-
"openrouter" => "OpenRouter"
|
|
84
|
-
}.freeze
|
|
85
|
-
|
|
86
79
|
def ck_provider_label(provider)
|
|
87
|
-
PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
|
|
80
|
+
CompletionKit::ProviderCredential::PROVIDER_LABELS[provider.to_s] || provider.to_s.titleize
|
|
88
81
|
end
|
|
89
82
|
|
|
90
83
|
def ck_grouped_models(models, selected = nil)
|
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
class ApplicationRecord < ActiveRecord::Base
|
|
3
3
|
self.abstract_class = true
|
|
4
|
+
|
|
5
|
+
TenantScopedUniquenessValidator = CompletionKit::TenantScopedUniquenessValidator
|
|
6
|
+
|
|
7
|
+
default_scope do
|
|
8
|
+
scope_proc = CompletionKit.config.tenant_scope
|
|
9
|
+
scope_proc ? instance_exec(&scope_proc) : all
|
|
10
|
+
end
|
|
4
11
|
end
|
|
5
12
|
end
|
|
@@ -8,15 +8,14 @@ module CompletionKit
|
|
|
8
8
|
{ "stars" => 1, "description" => "Fails to meet the criteria. Major errors or completely off-target." }
|
|
9
9
|
].freeze
|
|
10
10
|
|
|
11
|
-
has_many :
|
|
12
|
-
has_many :
|
|
11
|
+
has_many :metric_group_memberships, dependent: :destroy
|
|
12
|
+
has_many :metric_groups, through: :metric_group_memberships, source: :metric_group
|
|
13
13
|
has_many :reviews, dependent: :nullify
|
|
14
14
|
|
|
15
15
|
serialize :rubric_bands, coder: JSON
|
|
16
|
-
serialize :evaluation_steps, coder: JSON
|
|
17
16
|
|
|
18
17
|
validates :name, presence: true
|
|
19
|
-
validates :key,
|
|
18
|
+
validates :key, tenant_scoped_uniqueness: { allow_nil: true }
|
|
20
19
|
|
|
21
20
|
before_validation :generate_key
|
|
22
21
|
before_validation :normalize_rubric_bands
|
|
@@ -74,7 +73,7 @@ module CompletionKit
|
|
|
74
73
|
def as_json(options = {})
|
|
75
74
|
{
|
|
76
75
|
id: id, name: name, key: key, instruction: instruction,
|
|
77
|
-
|
|
76
|
+
rubric_bands: rubric_bands,
|
|
78
77
|
created_at: created_at, updated_at: updated_at
|
|
79
78
|
}
|
|
80
79
|
end
|
|
@@ -86,7 +85,6 @@ module CompletionKit
|
|
|
86
85
|
end
|
|
87
86
|
|
|
88
87
|
def set_defaults
|
|
89
|
-
self.evaluation_steps ||= []
|
|
90
88
|
self.rubric_bands = self.class.default_rubric_bands if rubric_bands.blank?
|
|
91
89
|
end
|
|
92
90
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class MetricGroup < ApplicationRecord
|
|
3
|
+
self.table_name = "completion_kit_metric_groups"
|
|
4
|
+
|
|
5
|
+
has_many :metric_group_memberships, -> { order(:position, :id) }, dependent: :destroy
|
|
6
|
+
has_many :metrics, through: :metric_group_memberships
|
|
7
|
+
|
|
8
|
+
validates :name, presence: true
|
|
9
|
+
|
|
10
|
+
def ordered_metrics
|
|
11
|
+
metric_group_memberships.includes(:metric).map(&:metric).compact
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def replace_metrics!(metric_ids)
|
|
15
|
+
return unless metric_ids
|
|
16
|
+
metric_group_memberships.delete_all
|
|
17
|
+
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
18
|
+
metric_group_memberships.create!(metric_id: metric_id, position: index + 1)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def as_json(options = {})
|
|
23
|
+
{
|
|
24
|
+
id: id, name: name, description: description,
|
|
25
|
+
created_at: created_at, updated_at: updated_at,
|
|
26
|
+
metric_ids: metric_ids
|
|
27
|
+
}
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class MetricGroupMembership < ApplicationRecord
|
|
3
|
+
self.table_name = "completion_kit_metric_group_memberships"
|
|
4
|
+
|
|
5
|
+
belongs_to :metric_group, class_name: "CompletionKit::MetricGroup", foreign_key: "metric_group_id"
|
|
6
|
+
belongs_to :metric
|
|
7
|
+
|
|
8
|
+
validates :metric_id, uniqueness: { scope: :metric_group_id }
|
|
9
|
+
|
|
10
|
+
before_validation :set_default_position
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def set_default_position
|
|
15
|
+
return if position.present? || metric_group.blank?
|
|
16
|
+
|
|
17
|
+
self.position = metric_group.metric_group_memberships.maximum(:position).to_i + 1
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -3,7 +3,7 @@ module CompletionKit
|
|
|
3
3
|
STATUSES = %w[active retired failed].freeze
|
|
4
4
|
|
|
5
5
|
validates :provider, presence: true
|
|
6
|
-
validates :model_id, presence: true,
|
|
6
|
+
validates :model_id, presence: true, tenant_scoped_uniqueness: { scope: :provider }
|
|
7
7
|
validates :status, presence: true, inclusion: { in: STATUSES }
|
|
8
8
|
|
|
9
9
|
scope :active, -> { where(status: "active") }
|
|
@@ -22,7 +22,8 @@ module CompletionKit
|
|
|
22
22
|
PROVIDER_LABELS[provider] || provider.titleize
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
validates :provider, presence: true, inclusion: { in: PROVIDERS }
|
|
25
|
+
validates :provider, presence: true, inclusion: { in: PROVIDERS }
|
|
26
|
+
validates :provider, tenant_scoped_uniqueness: true
|
|
26
27
|
|
|
27
28
|
after_save :enqueue_discovery
|
|
28
29
|
|