completion-kit 0.1.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +192 -0
- data/Rakefile +12 -0
- data/app/assets/config/completion_kit_manifest.js +1 -0
- data/app/assets/config/manifest.js +3 -0
- data/app/assets/images/completion_kit/logo.svg +6 -0
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
- data/app/assets/stylesheets/completion_kit/application.css +2214 -0
- data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
- data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
- data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
- data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
- data/app/controllers/completion_kit/application_controller.rb +31 -0
- data/app/controllers/completion_kit/criteria_controller.rb +67 -0
- data/app/controllers/completion_kit/datasets_controller.rb +53 -0
- data/app/controllers/completion_kit/mcp_controller.rb +57 -0
- data/app/controllers/completion_kit/metrics_controller.rb +52 -0
- data/app/controllers/completion_kit/prompts_controller.rb +69 -0
- data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
- data/app/controllers/completion_kit/responses_controller.rb +44 -0
- data/app/controllers/completion_kit/runs_controller.rb +131 -0
- data/app/helpers/completion_kit/application_helper.rb +193 -0
- data/app/jobs/completion_kit/application_job.rb +4 -0
- data/app/jobs/completion_kit/generate_job.rb +12 -0
- data/app/jobs/completion_kit/judge_job.rb +12 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
- data/app/mailers/completion_kit/application_mailer.rb +6 -0
- data/app/models/completion_kit/application_record.rb +5 -0
- data/app/models/completion_kit/criteria.rb +22 -0
- data/app/models/completion_kit/criteria_membership.rb +20 -0
- data/app/models/completion_kit/dataset.rb +24 -0
- data/app/models/completion_kit/metric.rb +97 -0
- data/app/models/completion_kit/model.rb +13 -0
- data/app/models/completion_kit/prompt.rb +99 -0
- data/app/models/completion_kit/provider_credential.rb +114 -0
- data/app/models/completion_kit/response.rb +30 -0
- data/app/models/completion_kit/review.rb +28 -0
- data/app/models/completion_kit/run.rb +253 -0
- data/app/models/completion_kit/run_metric.rb +6 -0
- data/app/models/completion_kit/suggestion.rb +8 -0
- data/app/services/completion_kit/anthropic_client.rb +86 -0
- data/app/services/completion_kit/api_config.rb +80 -0
- data/app/services/completion_kit/csv_processor.rb +65 -0
- data/app/services/completion_kit/judge_service.rb +87 -0
- data/app/services/completion_kit/llm_client.rb +45 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
- data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
- data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
- data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
- data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
- data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
- data/app/services/completion_kit/model_discovery_service.rb +223 -0
- data/app/services/completion_kit/ollama_client.rb +80 -0
- data/app/services/completion_kit/open_ai_client.rb +71 -0
- data/app/services/completion_kit/open_router_client.rb +69 -0
- data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
- data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
- data/app/views/completion_kit/api_reference/index.html.erb +308 -0
- data/app/views/completion_kit/criteria/_form.html.erb +46 -0
- data/app/views/completion_kit/criteria/edit.html.erb +14 -0
- data/app/views/completion_kit/criteria/index.html.erb +37 -0
- data/app/views/completion_kit/criteria/new.html.erb +13 -0
- data/app/views/completion_kit/criteria/show.html.erb +37 -0
- data/app/views/completion_kit/datasets/_form.html.erb +29 -0
- data/app/views/completion_kit/datasets/edit.html.erb +13 -0
- data/app/views/completion_kit/datasets/index.html.erb +38 -0
- data/app/views/completion_kit/datasets/new.html.erb +12 -0
- data/app/views/completion_kit/datasets/show.html.erb +45 -0
- data/app/views/completion_kit/metrics/_form.html.erb +72 -0
- data/app/views/completion_kit/metrics/edit.html.erb +13 -0
- data/app/views/completion_kit/metrics/index.html.erb +34 -0
- data/app/views/completion_kit/metrics/new.html.erb +12 -0
- data/app/views/completion_kit/metrics/show.html.erb +49 -0
- data/app/views/completion_kit/prompts/_form.html.erb +52 -0
- data/app/views/completion_kit/prompts/edit.html.erb +13 -0
- data/app/views/completion_kit/prompts/index.html.erb +46 -0
- data/app/views/completion_kit/prompts/new.html.erb +12 -0
- data/app/views/completion_kit/prompts/show.html.erb +156 -0
- data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
- data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
- data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
- data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
- data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
- data/app/views/completion_kit/responses/show.html.erb +87 -0
- data/app/views/completion_kit/runs/_actions.html.erb +14 -0
- data/app/views/completion_kit/runs/_form.html.erb +159 -0
- data/app/views/completion_kit/runs/_progress.html.erb +18 -0
- data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
- data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
- data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
- data/app/views/completion_kit/runs/edit.html.erb +14 -0
- data/app/views/completion_kit/runs/index.html.erb +43 -0
- data/app/views/completion_kit/runs/new.html.erb +12 -0
- data/app/views/completion_kit/runs/show.html.erb +79 -0
- data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
- data/app/views/layouts/completion_kit/application.html.erb +77 -0
- data/config/routes.rb +55 -0
- data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
- data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
- data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
- data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
- data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
- data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
- data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
- data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
- data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
- data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
- data/lib/completion-kit.rb +1 -0
- data/lib/completion_kit/engine.rb +35 -0
- data/lib/completion_kit/version.rb +3 -0
- data/lib/completion_kit.rb +55 -0
- data/lib/generators/completion_kit/install_generator.rb +21 -0
- data/lib/generators/completion_kit/templates/README +20 -0
- data/lib/generators/completion_kit/templates/initializer.rb +43 -0
- metadata +361 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Metrics
|
|
4
|
+
TOOLS = {
|
|
5
|
+
"metrics_list" => {
|
|
6
|
+
description: "List all metrics",
|
|
7
|
+
inputSchema: {type: "object", properties: {}, required: []},
|
|
8
|
+
handler: :list
|
|
9
|
+
},
|
|
10
|
+
"metrics_get" => {
|
|
11
|
+
description: "Get a metric by ID",
|
|
12
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
13
|
+
handler: :get
|
|
14
|
+
},
|
|
15
|
+
"metrics_create" => {
|
|
16
|
+
description: "Create a metric with evaluation criteria",
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {
|
|
20
|
+
name: {type: "string"}, instruction: {type: "string"},
|
|
21
|
+
evaluation_steps: {type: "array", items: {type: "string"}},
|
|
22
|
+
rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
|
|
23
|
+
},
|
|
24
|
+
required: ["name"]
|
|
25
|
+
},
|
|
26
|
+
handler: :create
|
|
27
|
+
},
|
|
28
|
+
"metrics_update" => {
|
|
29
|
+
description: "Update a metric",
|
|
30
|
+
inputSchema: {
|
|
31
|
+
type: "object",
|
|
32
|
+
properties: {
|
|
33
|
+
id: {type: "integer"}, name: {type: "string"}, instruction: {type: "string"},
|
|
34
|
+
evaluation_steps: {type: "array", items: {type: "string"}},
|
|
35
|
+
rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
|
|
36
|
+
},
|
|
37
|
+
required: ["id"]
|
|
38
|
+
},
|
|
39
|
+
handler: :update
|
|
40
|
+
},
|
|
41
|
+
"metrics_delete" => {
|
|
42
|
+
description: "Delete a metric",
|
|
43
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
44
|
+
handler: :delete
|
|
45
|
+
}
|
|
46
|
+
}.freeze
|
|
47
|
+
|
|
48
|
+
def self.definitions
|
|
49
|
+
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.call(name, arguments)
|
|
53
|
+
tool = TOOLS.fetch(name)
|
|
54
|
+
send(tool[:handler], arguments)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.list(_args)
|
|
58
|
+
text_result(Metric.order(created_at: :desc).map(&:as_json))
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def self.get(args)
|
|
62
|
+
text_result(Metric.find(args["id"]).as_json)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def self.create(args)
|
|
66
|
+
metric = Metric.new(args.slice("name", "instruction", "evaluation_steps", "rubric_bands"))
|
|
67
|
+
if metric.save
|
|
68
|
+
text_result(metric.as_json)
|
|
69
|
+
else
|
|
70
|
+
error_result(metric.errors.full_messages.join(", "))
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.update(args)
|
|
75
|
+
metric = Metric.find(args["id"])
|
|
76
|
+
if metric.update(args.except("id").slice("name", "instruction", "evaluation_steps", "rubric_bands"))
|
|
77
|
+
text_result(metric.as_json)
|
|
78
|
+
else
|
|
79
|
+
error_result(metric.errors.full_messages.join(", "))
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def self.delete(args)
|
|
84
|
+
Metric.find(args["id"]).destroy!
|
|
85
|
+
text_result("Metric #{args["id"]} deleted")
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.text_result(data)
|
|
89
|
+
text = data.is_a?(String) ? data : data.to_json
|
|
90
|
+
{content: [{type: "text", text: text}]}
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def self.error_result(message)
|
|
94
|
+
{content: [{type: "text", text: message}], isError: true}
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Prompts
|
|
4
|
+
TOOLS = {
|
|
5
|
+
"prompts_list" => {
|
|
6
|
+
description: "List all prompts",
|
|
7
|
+
inputSchema: {type: "object", properties: {}, required: []},
|
|
8
|
+
handler: :list
|
|
9
|
+
},
|
|
10
|
+
"prompts_get" => {
|
|
11
|
+
description: "Get a prompt by ID",
|
|
12
|
+
inputSchema: {type: "object", properties: {id: {type: "integer", description: "Prompt ID"}}, required: ["id"]},
|
|
13
|
+
handler: :get
|
|
14
|
+
},
|
|
15
|
+
"prompts_create" => {
|
|
16
|
+
description: "Create a prompt",
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {
|
|
20
|
+
name: {type: "string"}, description: {type: "string"},
|
|
21
|
+
template: {type: "string"}, llm_model: {type: "string"}
|
|
22
|
+
},
|
|
23
|
+
required: ["name", "template", "llm_model"]
|
|
24
|
+
},
|
|
25
|
+
handler: :create
|
|
26
|
+
},
|
|
27
|
+
"prompts_update" => {
|
|
28
|
+
description: "Update a prompt",
|
|
29
|
+
inputSchema: {
|
|
30
|
+
type: "object",
|
|
31
|
+
properties: {
|
|
32
|
+
id: {type: "integer"}, name: {type: "string"}, description: {type: "string"},
|
|
33
|
+
template: {type: "string"}, llm_model: {type: "string"}
|
|
34
|
+
},
|
|
35
|
+
required: ["id"]
|
|
36
|
+
},
|
|
37
|
+
handler: :update
|
|
38
|
+
},
|
|
39
|
+
"prompts_delete" => {
|
|
40
|
+
description: "Delete a prompt",
|
|
41
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
42
|
+
handler: :delete
|
|
43
|
+
},
|
|
44
|
+
"prompts_publish" => {
|
|
45
|
+
description: "Publish a prompt version, making it the current version",
|
|
46
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
47
|
+
handler: :publish
|
|
48
|
+
},
|
|
49
|
+
}.freeze
|
|
50
|
+
|
|
51
|
+
def self.definitions
|
|
52
|
+
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.call(name, arguments)
|
|
56
|
+
tool = TOOLS.fetch(name)
|
|
57
|
+
send(tool[:handler], arguments)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def self.list(_args)
|
|
61
|
+
text_result(Prompt.order(created_at: :desc).map(&:as_json))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.get(args)
|
|
65
|
+
text_result(Prompt.find(args["id"]).as_json)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.create(args)
|
|
69
|
+
prompt = Prompt.new(args.slice("name", "description", "template", "llm_model"))
|
|
70
|
+
if prompt.save
|
|
71
|
+
text_result(prompt.as_json)
|
|
72
|
+
else
|
|
73
|
+
error_result(prompt.errors.full_messages.join(", "))
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def self.update(args)
|
|
78
|
+
prompt = Prompt.find(args["id"])
|
|
79
|
+
attrs = args.except("id").slice("name", "description", "template", "llm_model")
|
|
80
|
+
if prompt.runs.exists?
|
|
81
|
+
new_prompt = prompt.clone_as_new_version(attrs)
|
|
82
|
+
new_prompt.publish!
|
|
83
|
+
text_result(new_prompt.as_json)
|
|
84
|
+
elsif prompt.update(attrs)
|
|
85
|
+
text_result(prompt.as_json)
|
|
86
|
+
else
|
|
87
|
+
error_result(prompt.errors.full_messages.join(", "))
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def self.delete(args)
|
|
92
|
+
Prompt.find(args["id"]).destroy!
|
|
93
|
+
text_result("Prompt #{args["id"]} deleted")
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def self.publish(args)
|
|
97
|
+
prompt = Prompt.find(args["id"])
|
|
98
|
+
prompt.publish!
|
|
99
|
+
text_result(prompt.reload.as_json)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def self.text_result(data)
|
|
103
|
+
text = data.is_a?(String) ? data : data.to_json
|
|
104
|
+
{content: [{type: "text", text: text}]}
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def self.error_result(message)
|
|
108
|
+
{content: [{type: "text", text: message}], isError: true}
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module ProviderCredentials
|
|
4
|
+
TOOLS = {
|
|
5
|
+
"provider_credentials_list" => {
|
|
6
|
+
description: "List all provider credentials (API keys are not exposed)",
|
|
7
|
+
inputSchema: {type: "object", properties: {}, required: []},
|
|
8
|
+
handler: :list
|
|
9
|
+
},
|
|
10
|
+
"provider_credentials_get" => {
|
|
11
|
+
description: "Get a provider credential by ID (API key is not exposed)",
|
|
12
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
13
|
+
handler: :get
|
|
14
|
+
},
|
|
15
|
+
"provider_credentials_create" => {
|
|
16
|
+
description: "Create a provider credential",
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {
|
|
20
|
+
provider: {type: "string", enum: ["openai", "anthropic", "ollama", "openrouter"]},
|
|
21
|
+
api_key: {type: "string"},
|
|
22
|
+
api_endpoint: {type: "string"}
|
|
23
|
+
},
|
|
24
|
+
required: ["provider", "api_key"]
|
|
25
|
+
},
|
|
26
|
+
handler: :create
|
|
27
|
+
},
|
|
28
|
+
"provider_credentials_update" => {
|
|
29
|
+
description: "Update a provider credential",
|
|
30
|
+
inputSchema: {
|
|
31
|
+
type: "object",
|
|
32
|
+
properties: {
|
|
33
|
+
id: {type: "integer"}, provider: {type: "string"},
|
|
34
|
+
api_key: {type: "string"}, api_endpoint: {type: "string"}
|
|
35
|
+
},
|
|
36
|
+
required: ["id"]
|
|
37
|
+
},
|
|
38
|
+
handler: :update
|
|
39
|
+
},
|
|
40
|
+
"provider_credentials_delete" => {
|
|
41
|
+
description: "Delete a provider credential",
|
|
42
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
43
|
+
handler: :delete
|
|
44
|
+
}
|
|
45
|
+
}.freeze
|
|
46
|
+
|
|
47
|
+
def self.definitions
|
|
48
|
+
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def self.call(name, arguments)
|
|
52
|
+
tool = TOOLS.fetch(name)
|
|
53
|
+
send(tool[:handler], arguments)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.list(_args)
|
|
57
|
+
text_result(ProviderCredential.order(created_at: :desc).map(&:as_json))
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def self.get(args)
|
|
61
|
+
text_result(ProviderCredential.find(args["id"]).as_json)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.create(args)
|
|
65
|
+
credential = ProviderCredential.new(args.slice("provider", "api_key", "api_endpoint"))
|
|
66
|
+
if credential.save
|
|
67
|
+
text_result(credential.as_json)
|
|
68
|
+
else
|
|
69
|
+
error_result(credential.errors.full_messages.join(", "))
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def self.update(args)
|
|
74
|
+
credential = ProviderCredential.find(args["id"])
|
|
75
|
+
if credential.update(args.except("id").slice("provider", "api_key", "api_endpoint"))
|
|
76
|
+
text_result(credential.as_json)
|
|
77
|
+
else
|
|
78
|
+
error_result(credential.errors.full_messages.join(", "))
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def self.delete(args)
|
|
83
|
+
ProviderCredential.find(args["id"]).destroy!
|
|
84
|
+
text_result("Provider credential #{args["id"]} deleted")
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def self.text_result(data)
|
|
88
|
+
text = data.is_a?(String) ? data : data.to_json
|
|
89
|
+
{content: [{type: "text", text: text}]}
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def self.error_result(message)
|
|
93
|
+
{content: [{type: "text", text: message}], isError: true}
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Responses
|
|
4
|
+
TOOLS = {
|
|
5
|
+
"responses_list" => {
|
|
6
|
+
description: "List responses for a run",
|
|
7
|
+
inputSchema: {type: "object", properties: {run_id: {type: "integer"}}, required: ["run_id"]},
|
|
8
|
+
handler: :list
|
|
9
|
+
},
|
|
10
|
+
"responses_get" => {
|
|
11
|
+
description: "Get a specific response",
|
|
12
|
+
inputSchema: {
|
|
13
|
+
type: "object",
|
|
14
|
+
properties: {run_id: {type: "integer"}, id: {type: "integer"}},
|
|
15
|
+
required: ["run_id", "id"]
|
|
16
|
+
},
|
|
17
|
+
handler: :get
|
|
18
|
+
}
|
|
19
|
+
}.freeze
|
|
20
|
+
|
|
21
|
+
def self.definitions
|
|
22
|
+
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.call(name, arguments)
|
|
26
|
+
tool = TOOLS.fetch(name)
|
|
27
|
+
send(tool[:handler], arguments)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.list(args)
|
|
31
|
+
run = Run.find(args["run_id"])
|
|
32
|
+
text_result(run.responses.includes(:reviews).map(&:as_json))
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.get(args)
|
|
36
|
+
run = Run.find(args["run_id"])
|
|
37
|
+
text_result(run.responses.find(args["id"]).as_json)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.text_result(data)
|
|
41
|
+
{content: [{type: "text", text: data.to_json}]}
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Runs
|
|
4
|
+
TOOLS = {
|
|
5
|
+
"runs_list" => {
|
|
6
|
+
description: "List all runs",
|
|
7
|
+
inputSchema: {type: "object", properties: {}, required: []},
|
|
8
|
+
handler: :list
|
|
9
|
+
},
|
|
10
|
+
"runs_get" => {
|
|
11
|
+
description: "Get a run by ID",
|
|
12
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
13
|
+
handler: :get
|
|
14
|
+
},
|
|
15
|
+
"runs_create" => {
|
|
16
|
+
description: "Create a run",
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {
|
|
20
|
+
name: {type: "string"}, prompt_id: {type: "integer"},
|
|
21
|
+
dataset_id: {type: "integer"}, judge_model: {type: "string"},
|
|
22
|
+
metric_ids: {type: "array", items: {type: "integer"}}
|
|
23
|
+
},
|
|
24
|
+
required: ["name", "prompt_id"]
|
|
25
|
+
},
|
|
26
|
+
handler: :create
|
|
27
|
+
},
|
|
28
|
+
"runs_update" => {
|
|
29
|
+
description: "Update a run",
|
|
30
|
+
inputSchema: {
|
|
31
|
+
type: "object",
|
|
32
|
+
properties: {
|
|
33
|
+
id: {type: "integer"}, name: {type: "string"},
|
|
34
|
+
dataset_id: {type: "integer"}, judge_model: {type: "string"},
|
|
35
|
+
metric_ids: {type: "array", items: {type: "integer"}}
|
|
36
|
+
},
|
|
37
|
+
required: ["id"]
|
|
38
|
+
},
|
|
39
|
+
handler: :update
|
|
40
|
+
},
|
|
41
|
+
"runs_delete" => {
|
|
42
|
+
description: "Delete a run",
|
|
43
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
44
|
+
handler: :delete
|
|
45
|
+
},
|
|
46
|
+
"runs_generate" => {
|
|
47
|
+
description: "Generate responses for a run using its prompt and dataset",
|
|
48
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
49
|
+
handler: :generate
|
|
50
|
+
},
|
|
51
|
+
"runs_judge" => {
|
|
52
|
+
description: "Judge responses for a run using configured metrics",
|
|
53
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
54
|
+
handler: :judge
|
|
55
|
+
}
|
|
56
|
+
}.freeze
|
|
57
|
+
|
|
58
|
+
def self.definitions
|
|
59
|
+
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.call(name, arguments)
|
|
63
|
+
tool = TOOLS.fetch(name)
|
|
64
|
+
send(tool[:handler], arguments)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def self.list(_args)
|
|
68
|
+
text_result(Run.order(created_at: :desc).map(&:as_json))
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def self.get(args)
|
|
72
|
+
text_result(Run.find(args["id"]).as_json)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.create(args)
|
|
76
|
+
run = Run.new(args.slice("name", "prompt_id", "dataset_id", "judge_model"))
|
|
77
|
+
if run.save
|
|
78
|
+
replace_run_metrics(run, args["metric_ids"])
|
|
79
|
+
text_result(run.reload.as_json)
|
|
80
|
+
else
|
|
81
|
+
error_result(run.errors.full_messages.join(", "))
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.update(args)
|
|
86
|
+
run = Run.find(args["id"])
|
|
87
|
+
if run.update(args.except("id", "metric_ids").slice("name", "dataset_id", "judge_model"))
|
|
88
|
+
replace_run_metrics(run, args["metric_ids"]) if args.key?("metric_ids")
|
|
89
|
+
text_result(run.reload.as_json)
|
|
90
|
+
else
|
|
91
|
+
error_result(run.errors.full_messages.join(", "))
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def self.delete(args)
|
|
96
|
+
Run.find(args["id"]).destroy!
|
|
97
|
+
text_result("Run #{args["id"]} deleted")
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def self.generate(args)
|
|
101
|
+
run = Run.find(args["id"])
|
|
102
|
+
GenerateJob.perform_later(run.id)
|
|
103
|
+
text_result(run.reload.as_json)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def self.judge(args)
|
|
107
|
+
run = Run.find(args["id"])
|
|
108
|
+
JudgeJob.perform_later(run.id)
|
|
109
|
+
text_result(run.reload.as_json)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def self.text_result(data)
|
|
113
|
+
text = data.is_a?(String) ? data : data.to_json
|
|
114
|
+
{content: [{type: "text", text: text}]}
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def self.error_result(message)
|
|
118
|
+
{content: [{type: "text", text: message}], isError: true}
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def self.replace_run_metrics(run, metric_ids)
|
|
122
|
+
return unless metric_ids
|
|
123
|
+
run.run_metrics.delete_all
|
|
124
|
+
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
125
|
+
run.run_metrics.create!(metric_id: metric_id, position: index + 1)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|