completion-kit 0.1.0.rc1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +97 -86
- data/app/controllers/completion_kit/api/v1/metric_groups_controller.rb +53 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +1 -1
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +2 -10
- data/app/controllers/completion_kit/metric_groups_controller.rb +59 -0
- data/app/controllers/completion_kit/metrics_controller.rb +2 -2
- data/app/controllers/completion_kit/runs_controller.rb +4 -11
- data/app/helpers/completion_kit/application_helper.rb +1 -8
- data/app/models/completion_kit/application_record.rb +7 -0
- data/app/models/completion_kit/metric.rb +4 -6
- data/app/models/completion_kit/metric_group.rb +30 -0
- data/app/models/completion_kit/metric_group_membership.rb +20 -0
- data/app/models/completion_kit/model.rb +1 -1
- data/app/models/completion_kit/provider_credential.rb +2 -1
- data/app/models/completion_kit/run.rb +11 -4
- data/app/services/completion_kit/anthropic_client.rb +4 -17
- data/app/services/completion_kit/judge_service.rb +3 -7
- data/app/services/completion_kit/llm_client.rb +15 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +2 -2
- data/app/services/completion_kit/mcp_tools/base.rb +23 -0
- data/app/services/completion_kit/mcp_tools/datasets.rb +2 -18
- data/app/services/completion_kit/mcp_tools/metric_groups.rb +82 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +4 -22
- data/app/services/completion_kit/mcp_tools/prompts.rb +2 -18
- data/app/services/completion_kit/mcp_tools/provider_credentials.rb +2 -18
- data/app/services/completion_kit/mcp_tools/responses.rb +2 -13
- data/app/services/completion_kit/mcp_tools/runs.rb +4 -28
- data/app/services/completion_kit/ollama_client.rb +2 -15
- data/app/services/completion_kit/open_ai_client.rb +1 -10
- data/app/services/completion_kit/open_router_client.rb +1 -12
- data/app/validators/completion_kit/tenant_scoped_uniqueness_validator.rb +15 -0
- data/app/views/completion_kit/api_reference/index.html.erb +11 -11
- data/app/views/completion_kit/metric_groups/_form.html.erb +46 -0
- data/app/views/completion_kit/metric_groups/edit.html.erb +13 -0
- data/app/views/completion_kit/metric_groups/index.html.erb +41 -0
- data/app/views/completion_kit/metric_groups/new.html.erb +12 -0
- data/app/views/completion_kit/{criteria → metric_groups}/show.html.erb +8 -9
- data/app/views/completion_kit/metrics/_form.html.erb +2 -23
- data/app/views/completion_kit/metrics/index.html.erb +13 -5
- data/app/views/completion_kit/metrics/show.html.erb +1 -12
- data/app/views/completion_kit/runs/_form.html.erb +5 -5
- data/app/views/layouts/completion_kit/application.html.erb +4 -1
- data/config/routes.rb +2 -2
- data/db/migrate/20260416000001_remove_evaluation_steps_from_metrics.rb +5 -0
- data/db/migrate/20260417000001_rename_criteria_to_metric_groups.rb +13 -0
- data/lib/completion_kit/engine.rb +1 -7
- data/lib/completion_kit/version.rb +1 -1
- data/lib/completion_kit.rb +5 -0
- metadata +23 -21
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +0 -25
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +0 -62
- data/app/controllers/completion_kit/criteria_controller.rb +0 -67
- data/app/models/completion_kit/criteria.rb +0 -22
- data/app/models/completion_kit/criteria_membership.rb +0 -20
- data/app/services/completion_kit/mcp_tools/criteria.rb +0 -106
- data/app/views/completion_kit/criteria/_form.html.erb +0 -46
- data/app/views/completion_kit/criteria/edit.html.erb +0 -14
- data/app/views/completion_kit/criteria/index.html.erb +0 -37
- data/app/views/completion_kit/criteria/new.html.erb +0 -13
|
@@ -21,6 +21,14 @@ module CompletionKit
|
|
|
21
21
|
judge_model.present? && metrics.any? && ApiConfig.valid_for_model?(judge_model)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
+
def replace_metrics!(metric_ids)
|
|
25
|
+
return unless metric_ids
|
|
26
|
+
run_metrics.delete_all
|
|
27
|
+
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
28
|
+
run_metrics.create!(metric_id: metric_id, position: index + 1)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
24
32
|
def avg_score
|
|
25
33
|
all_reviews = responses.flat_map(&:reviews)
|
|
26
34
|
scores = all_reviews.map(&:ai_score).compact.map(&:to_f)
|
|
@@ -113,16 +121,15 @@ module CompletionKit
|
|
|
113
121
|
response.response_text,
|
|
114
122
|
response.expected_output,
|
|
115
123
|
prompt.template,
|
|
116
|
-
criteria: metric.
|
|
117
|
-
|
|
118
|
-
rubric_text: metric.respond_to?(:display_rubric_text) ? metric.display_rubric_text : nil,
|
|
124
|
+
criteria: metric.instruction.to_s,
|
|
125
|
+
rubric_text: metric.display_rubric_text,
|
|
119
126
|
input_data: response.input_data
|
|
120
127
|
)
|
|
121
128
|
|
|
122
129
|
response.reviews.find_or_initialize_by(metric_id: metric.id).tap do |review|
|
|
123
130
|
review.assign_attributes(
|
|
124
131
|
metric_name: metric.name,
|
|
125
|
-
instruction: metric.
|
|
132
|
+
instruction: metric.instruction.to_s,
|
|
126
133
|
status: "evaluated",
|
|
127
134
|
ai_score: evaluation[:score],
|
|
128
135
|
ai_feedback: evaluation[:feedback]
|
|
@@ -7,21 +7,12 @@ module CompletionKit
|
|
|
7
7
|
|
|
8
8
|
def generate_completion(prompt, options = {})
|
|
9
9
|
return "Error: API key not configured" unless configured?
|
|
10
|
-
|
|
11
|
-
require "faraday"
|
|
12
|
-
require "faraday/retry"
|
|
13
|
-
require "json"
|
|
14
|
-
|
|
10
|
+
|
|
15
11
|
model = options[:model] || "claude-3-7-sonnet-latest"
|
|
16
12
|
max_tokens = options[:max_tokens] || 1000
|
|
17
13
|
temperature = options[:temperature] || 0.7
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
f.request :retry, max: 2, interval: 0.5
|
|
21
|
-
f.adapter Faraday.default_adapter
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
response = conn.post do |req|
|
|
14
|
+
|
|
15
|
+
response = build_connection("https://api.anthropic.com").post do |req|
|
|
25
16
|
req.url "/v1/messages"
|
|
26
17
|
req.headers["Content-Type"] = "application/json"
|
|
27
18
|
req.headers["x-api-key"] = api_key
|
|
@@ -49,11 +40,7 @@ module CompletionKit
|
|
|
49
40
|
def available_models
|
|
50
41
|
return STATIC_MODELS unless configured?
|
|
51
42
|
|
|
52
|
-
|
|
53
|
-
require "faraday/retry"
|
|
54
|
-
require "json"
|
|
55
|
-
|
|
56
|
-
response = Faraday.get("https://api.anthropic.com/v1/models?limit=100") do |req|
|
|
43
|
+
response = build_connection("https://api.anthropic.com").get("/v1/models?limit=100") do |req|
|
|
57
44
|
req.headers["x-api-key"] = api_key
|
|
58
45
|
req.headers["anthropic-version"] = "2023-06-01"
|
|
59
46
|
end
|
|
@@ -8,11 +8,11 @@ module CompletionKit
|
|
|
8
8
|
@judge_client = LlmClient.for_model(@judge_model, ApiConfig.for_model(@judge_model))
|
|
9
9
|
end
|
|
10
10
|
|
|
11
|
-
def evaluate(output, expected_output = nil, prompt = nil, criteria: nil,
|
|
11
|
+
def evaluate(output, expected_output = nil, prompt = nil, criteria: nil, rubric_text: nil, human_examples: nil, input_data: nil, **_extras)
|
|
12
12
|
return { score: 1, feedback: "Judge not configured" } unless @judge_client.configured?
|
|
13
13
|
|
|
14
14
|
judge_prompt = build_judge_prompt(output, expected_output, prompt,
|
|
15
|
-
criteria: criteria,
|
|
15
|
+
criteria: criteria,
|
|
16
16
|
rubric_text: rubric_text, human_examples: human_examples,
|
|
17
17
|
input_data: input_data)
|
|
18
18
|
|
|
@@ -27,7 +27,7 @@ module CompletionKit
|
|
|
27
27
|
|
|
28
28
|
private
|
|
29
29
|
|
|
30
|
-
def build_judge_prompt(output, expected_output, prompt, criteria: nil,
|
|
30
|
+
def build_judge_prompt(output, expected_output, prompt, criteria: nil, rubric_text: nil, human_examples: nil, input_data: nil)
|
|
31
31
|
judge_prompt = <<~PROMPT
|
|
32
32
|
You are an expert evaluator. You MUST respond with ONLY two lines in this exact format, nothing else:
|
|
33
33
|
|
|
@@ -44,10 +44,6 @@ module CompletionKit
|
|
|
44
44
|
judge_prompt += "\nCriteria: #{criteria}\n"
|
|
45
45
|
end
|
|
46
46
|
|
|
47
|
-
if evaluation_steps.present? && evaluation_steps.any?
|
|
48
|
-
judge_prompt += "\nEvaluation steps:\n#{evaluation_steps.each_with_index.map { |step, i| "#{i + 1}. #{step}" }.join("\n")}\n"
|
|
49
|
-
end
|
|
50
|
-
|
|
51
47
|
if human_examples.present?
|
|
52
48
|
judge_prompt += "\nCalibration examples:\n"
|
|
53
49
|
human_examples.each_with_index do |example, index|
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
require "faraday/retry"
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
1
5
|
module CompletionKit
|
|
2
6
|
class LlmClient
|
|
3
7
|
def initialize(config = {})
|
|
@@ -41,5 +45,16 @@ module CompletionKit
|
|
|
41
45
|
|
|
42
46
|
for_provider(provider, config)
|
|
43
47
|
end
|
|
48
|
+
|
|
49
|
+
protected
|
|
50
|
+
|
|
51
|
+
def build_connection(url, timeout: nil, open_timeout: nil)
|
|
52
|
+
Faraday.new(url: url) do |f|
|
|
53
|
+
f.options.timeout = timeout if timeout
|
|
54
|
+
f.options.open_timeout = open_timeout if open_timeout
|
|
55
|
+
f.request :retry, max: 2, interval: 0.5
|
|
56
|
+
f.adapter Faraday.default_adapter
|
|
57
|
+
end
|
|
58
|
+
end
|
|
44
59
|
end
|
|
45
60
|
end
|
|
@@ -33,7 +33,7 @@ module CompletionKit
|
|
|
33
33
|
McpTools::Responses.definitions +
|
|
34
34
|
McpTools::Datasets.definitions +
|
|
35
35
|
McpTools::Metrics.definitions +
|
|
36
|
-
McpTools::
|
|
36
|
+
McpTools::MetricGroups.definitions +
|
|
37
37
|
McpTools::ProviderCredentials.definitions
|
|
38
38
|
end
|
|
39
39
|
|
|
@@ -44,7 +44,7 @@ module CompletionKit
|
|
|
44
44
|
when /\Aresponses_/ then McpTools::Responses.call(name, arguments)
|
|
45
45
|
when /\Adatasets_/ then McpTools::Datasets.call(name, arguments)
|
|
46
46
|
when /\Ametrics_/ then McpTools::Metrics.call(name, arguments)
|
|
47
|
-
when /\
|
|
47
|
+
when /\Ametric_groups_/ then McpTools::MetricGroups.call(name, arguments)
|
|
48
48
|
when /\Aprovider_credentials_/ then McpTools::ProviderCredentials.call(name, arguments)
|
|
49
49
|
else raise MethodNotFound, "Unknown tool: #{name}"
|
|
50
50
|
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Base
|
|
4
|
+
def definitions
|
|
5
|
+
self::TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def call(name, arguments)
|
|
9
|
+
tool = self::TOOLS.fetch(name)
|
|
10
|
+
send(tool[:handler], arguments)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def text_result(data)
|
|
14
|
+
text = data.is_a?(String) ? data : data.to_json
|
|
15
|
+
{content: [{type: "text", text: text}]}
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def error_result(message)
|
|
19
|
+
{content: [{type: "text", text: message}], isError: true}
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module McpTools
|
|
3
3
|
module Datasets
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
4
6
|
TOOLS = {
|
|
5
7
|
"datasets_list" => {
|
|
6
8
|
description: "List all datasets",
|
|
@@ -37,15 +39,6 @@ module CompletionKit
|
|
|
37
39
|
}
|
|
38
40
|
}.freeze
|
|
39
41
|
|
|
40
|
-
def self.definitions
|
|
41
|
-
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def self.call(name, arguments)
|
|
45
|
-
tool = TOOLS.fetch(name)
|
|
46
|
-
send(tool[:handler], arguments)
|
|
47
|
-
end
|
|
48
|
-
|
|
49
42
|
def self.list(_args)
|
|
50
43
|
text_result(Dataset.order(created_at: :desc).map(&:as_json))
|
|
51
44
|
end
|
|
@@ -76,15 +69,6 @@ module CompletionKit
|
|
|
76
69
|
Dataset.find(args["id"]).destroy!
|
|
77
70
|
text_result("Dataset #{args["id"]} deleted")
|
|
78
71
|
end
|
|
79
|
-
|
|
80
|
-
def self.text_result(data)
|
|
81
|
-
text = data.is_a?(String) ? data : data.to_json
|
|
82
|
-
{content: [{type: "text", text: text}]}
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
def self.error_result(message)
|
|
86
|
-
{content: [{type: "text", text: message}], isError: true}
|
|
87
|
-
end
|
|
88
72
|
end
|
|
89
73
|
end
|
|
90
74
|
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module MetricGroups
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
6
|
+
TOOLS = {
|
|
7
|
+
"metric_groups_list" => {
|
|
8
|
+
description: "List all metric groups",
|
|
9
|
+
inputSchema: {type: "object", properties: {}, required: []},
|
|
10
|
+
handler: :list
|
|
11
|
+
},
|
|
12
|
+
"metric_groups_get" => {
|
|
13
|
+
description: "Get a metric group by ID",
|
|
14
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
15
|
+
handler: :get
|
|
16
|
+
},
|
|
17
|
+
"metric_groups_create" => {
|
|
18
|
+
description: "Create a metric group",
|
|
19
|
+
inputSchema: {
|
|
20
|
+
type: "object",
|
|
21
|
+
properties: {
|
|
22
|
+
name: {type: "string"}, description: {type: "string"},
|
|
23
|
+
metric_ids: {type: "array", items: {type: "integer"}}
|
|
24
|
+
},
|
|
25
|
+
required: ["name"]
|
|
26
|
+
},
|
|
27
|
+
handler: :create
|
|
28
|
+
},
|
|
29
|
+
"metric_groups_update" => {
|
|
30
|
+
description: "Update a metric group",
|
|
31
|
+
inputSchema: {
|
|
32
|
+
type: "object",
|
|
33
|
+
properties: {
|
|
34
|
+
id: {type: "integer"}, name: {type: "string"}, description: {type: "string"},
|
|
35
|
+
metric_ids: {type: "array", items: {type: "integer"}}
|
|
36
|
+
},
|
|
37
|
+
required: ["id"]
|
|
38
|
+
},
|
|
39
|
+
handler: :update
|
|
40
|
+
},
|
|
41
|
+
"metric_groups_delete" => {
|
|
42
|
+
description: "Delete a metric group",
|
|
43
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
44
|
+
handler: :delete
|
|
45
|
+
}
|
|
46
|
+
}.freeze
|
|
47
|
+
|
|
48
|
+
def self.list(_args)
|
|
49
|
+
text_result(CompletionKit::MetricGroup.order(created_at: :desc).map(&:as_json))
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.get(args)
|
|
53
|
+
text_result(CompletionKit::MetricGroup.find(args["id"]).as_json)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.create(args)
|
|
57
|
+
metric_group = CompletionKit::MetricGroup.new(args.slice("name", "description"))
|
|
58
|
+
if metric_group.save
|
|
59
|
+
metric_group.replace_metrics!(args["metric_ids"])
|
|
60
|
+
text_result(metric_group.reload.as_json)
|
|
61
|
+
else
|
|
62
|
+
error_result(metric_group.errors.full_messages.join(", "))
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.update(args)
|
|
67
|
+
metric_group = CompletionKit::MetricGroup.find(args["id"])
|
|
68
|
+
if metric_group.update(args.except("id", "metric_ids").slice("name", "description"))
|
|
69
|
+
metric_group.replace_metrics!(args["metric_ids"]) if args.key?("metric_ids")
|
|
70
|
+
text_result(metric_group.reload.as_json)
|
|
71
|
+
else
|
|
72
|
+
error_result(metric_group.errors.full_messages.join(", "))
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def self.delete(args)
|
|
77
|
+
CompletionKit::MetricGroup.find(args["id"]).destroy!
|
|
78
|
+
text_result("Metric group #{args["id"]} deleted")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module McpTools
|
|
3
3
|
module Metrics
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
4
6
|
TOOLS = {
|
|
5
7
|
"metrics_list" => {
|
|
6
8
|
description: "List all metrics",
|
|
@@ -18,7 +20,6 @@ module CompletionKit
|
|
|
18
20
|
type: "object",
|
|
19
21
|
properties: {
|
|
20
22
|
name: {type: "string"}, instruction: {type: "string"},
|
|
21
|
-
evaluation_steps: {type: "array", items: {type: "string"}},
|
|
22
23
|
rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
|
|
23
24
|
},
|
|
24
25
|
required: ["name"]
|
|
@@ -31,7 +32,6 @@ module CompletionKit
|
|
|
31
32
|
type: "object",
|
|
32
33
|
properties: {
|
|
33
34
|
id: {type: "integer"}, name: {type: "string"}, instruction: {type: "string"},
|
|
34
|
-
evaluation_steps: {type: "array", items: {type: "string"}},
|
|
35
35
|
rubric_bands: {type: "array", items: {type: "object", properties: {stars: {type: "integer"}, description: {type: "string"}}}}
|
|
36
36
|
},
|
|
37
37
|
required: ["id"]
|
|
@@ -45,15 +45,6 @@ module CompletionKit
|
|
|
45
45
|
}
|
|
46
46
|
}.freeze
|
|
47
47
|
|
|
48
|
-
def self.definitions
|
|
49
|
-
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def self.call(name, arguments)
|
|
53
|
-
tool = TOOLS.fetch(name)
|
|
54
|
-
send(tool[:handler], arguments)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
48
|
def self.list(_args)
|
|
58
49
|
text_result(Metric.order(created_at: :desc).map(&:as_json))
|
|
59
50
|
end
|
|
@@ -63,7 +54,7 @@ module CompletionKit
|
|
|
63
54
|
end
|
|
64
55
|
|
|
65
56
|
def self.create(args)
|
|
66
|
-
metric = Metric.new(args.slice("name", "instruction", "
|
|
57
|
+
metric = Metric.new(args.slice("name", "instruction", "rubric_bands"))
|
|
67
58
|
if metric.save
|
|
68
59
|
text_result(metric.as_json)
|
|
69
60
|
else
|
|
@@ -73,7 +64,7 @@ module CompletionKit
|
|
|
73
64
|
|
|
74
65
|
def self.update(args)
|
|
75
66
|
metric = Metric.find(args["id"])
|
|
76
|
-
if metric.update(args.except("id").slice("name", "instruction", "
|
|
67
|
+
if metric.update(args.except("id").slice("name", "instruction", "rubric_bands"))
|
|
77
68
|
text_result(metric.as_json)
|
|
78
69
|
else
|
|
79
70
|
error_result(metric.errors.full_messages.join(", "))
|
|
@@ -84,15 +75,6 @@ module CompletionKit
|
|
|
84
75
|
Metric.find(args["id"]).destroy!
|
|
85
76
|
text_result("Metric #{args["id"]} deleted")
|
|
86
77
|
end
|
|
87
|
-
|
|
88
|
-
def self.text_result(data)
|
|
89
|
-
text = data.is_a?(String) ? data : data.to_json
|
|
90
|
-
{content: [{type: "text", text: text}]}
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def self.error_result(message)
|
|
94
|
-
{content: [{type: "text", text: message}], isError: true}
|
|
95
|
-
end
|
|
96
78
|
end
|
|
97
79
|
end
|
|
98
80
|
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module McpTools
|
|
3
3
|
module Prompts
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
4
6
|
TOOLS = {
|
|
5
7
|
"prompts_list" => {
|
|
6
8
|
description: "List all prompts",
|
|
@@ -48,15 +50,6 @@ module CompletionKit
|
|
|
48
50
|
},
|
|
49
51
|
}.freeze
|
|
50
52
|
|
|
51
|
-
def self.definitions
|
|
52
|
-
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
def self.call(name, arguments)
|
|
56
|
-
tool = TOOLS.fetch(name)
|
|
57
|
-
send(tool[:handler], arguments)
|
|
58
|
-
end
|
|
59
|
-
|
|
60
53
|
def self.list(_args)
|
|
61
54
|
text_result(Prompt.order(created_at: :desc).map(&:as_json))
|
|
62
55
|
end
|
|
@@ -98,15 +91,6 @@ module CompletionKit
|
|
|
98
91
|
prompt.publish!
|
|
99
92
|
text_result(prompt.reload.as_json)
|
|
100
93
|
end
|
|
101
|
-
|
|
102
|
-
def self.text_result(data)
|
|
103
|
-
text = data.is_a?(String) ? data : data.to_json
|
|
104
|
-
{content: [{type: "text", text: text}]}
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def self.error_result(message)
|
|
108
|
-
{content: [{type: "text", text: message}], isError: true}
|
|
109
|
-
end
|
|
110
94
|
end
|
|
111
95
|
end
|
|
112
96
|
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module McpTools
|
|
3
3
|
module ProviderCredentials
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
4
6
|
TOOLS = {
|
|
5
7
|
"provider_credentials_list" => {
|
|
6
8
|
description: "List all provider credentials (API keys are not exposed)",
|
|
@@ -44,15 +46,6 @@ module CompletionKit
|
|
|
44
46
|
}
|
|
45
47
|
}.freeze
|
|
46
48
|
|
|
47
|
-
def self.definitions
|
|
48
|
-
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def self.call(name, arguments)
|
|
52
|
-
tool = TOOLS.fetch(name)
|
|
53
|
-
send(tool[:handler], arguments)
|
|
54
|
-
end
|
|
55
|
-
|
|
56
49
|
def self.list(_args)
|
|
57
50
|
text_result(ProviderCredential.order(created_at: :desc).map(&:as_json))
|
|
58
51
|
end
|
|
@@ -83,15 +76,6 @@ module CompletionKit
|
|
|
83
76
|
ProviderCredential.find(args["id"]).destroy!
|
|
84
77
|
text_result("Provider credential #{args["id"]} deleted")
|
|
85
78
|
end
|
|
86
|
-
|
|
87
|
-
def self.text_result(data)
|
|
88
|
-
text = data.is_a?(String) ? data : data.to_json
|
|
89
|
-
{content: [{type: "text", text: text}]}
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
def self.error_result(message)
|
|
93
|
-
{content: [{type: "text", text: message}], isError: true}
|
|
94
|
-
end
|
|
95
79
|
end
|
|
96
80
|
end
|
|
97
81
|
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module McpTools
|
|
3
3
|
module Responses
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
4
6
|
TOOLS = {
|
|
5
7
|
"responses_list" => {
|
|
6
8
|
description: "List responses for a run",
|
|
@@ -18,15 +20,6 @@ module CompletionKit
|
|
|
18
20
|
}
|
|
19
21
|
}.freeze
|
|
20
22
|
|
|
21
|
-
def self.definitions
|
|
22
|
-
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def self.call(name, arguments)
|
|
26
|
-
tool = TOOLS.fetch(name)
|
|
27
|
-
send(tool[:handler], arguments)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
23
|
def self.list(args)
|
|
31
24
|
run = Run.find(args["run_id"])
|
|
32
25
|
text_result(run.responses.includes(:reviews).map(&:as_json))
|
|
@@ -36,10 +29,6 @@ module CompletionKit
|
|
|
36
29
|
run = Run.find(args["run_id"])
|
|
37
30
|
text_result(run.responses.find(args["id"]).as_json)
|
|
38
31
|
end
|
|
39
|
-
|
|
40
|
-
def self.text_result(data)
|
|
41
|
-
{content: [{type: "text", text: data.to_json}]}
|
|
42
|
-
end
|
|
43
32
|
end
|
|
44
33
|
end
|
|
45
34
|
end
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
module CompletionKit
|
|
2
2
|
module McpTools
|
|
3
3
|
module Runs
|
|
4
|
+
extend Base
|
|
5
|
+
|
|
4
6
|
TOOLS = {
|
|
5
7
|
"runs_list" => {
|
|
6
8
|
description: "List all runs",
|
|
@@ -55,15 +57,6 @@ module CompletionKit
|
|
|
55
57
|
}
|
|
56
58
|
}.freeze
|
|
57
59
|
|
|
58
|
-
def self.definitions
|
|
59
|
-
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def self.call(name, arguments)
|
|
63
|
-
tool = TOOLS.fetch(name)
|
|
64
|
-
send(tool[:handler], arguments)
|
|
65
|
-
end
|
|
66
|
-
|
|
67
60
|
def self.list(_args)
|
|
68
61
|
text_result(Run.order(created_at: :desc).map(&:as_json))
|
|
69
62
|
end
|
|
@@ -75,7 +68,7 @@ module CompletionKit
|
|
|
75
68
|
def self.create(args)
|
|
76
69
|
run = Run.new(args.slice("name", "prompt_id", "dataset_id", "judge_model"))
|
|
77
70
|
if run.save
|
|
78
|
-
|
|
71
|
+
run.replace_metrics!(args["metric_ids"])
|
|
79
72
|
text_result(run.reload.as_json)
|
|
80
73
|
else
|
|
81
74
|
error_result(run.errors.full_messages.join(", "))
|
|
@@ -85,7 +78,7 @@ module CompletionKit
|
|
|
85
78
|
def self.update(args)
|
|
86
79
|
run = Run.find(args["id"])
|
|
87
80
|
if run.update(args.except("id", "metric_ids").slice("name", "dataset_id", "judge_model"))
|
|
88
|
-
|
|
81
|
+
run.replace_metrics!(args["metric_ids"]) if args.key?("metric_ids")
|
|
89
82
|
text_result(run.reload.as_json)
|
|
90
83
|
else
|
|
91
84
|
error_result(run.errors.full_messages.join(", "))
|
|
@@ -108,23 +101,6 @@ module CompletionKit
|
|
|
108
101
|
JudgeJob.perform_later(run.id)
|
|
109
102
|
text_result(run.reload.as_json)
|
|
110
103
|
end
|
|
111
|
-
|
|
112
|
-
def self.text_result(data)
|
|
113
|
-
text = data.is_a?(String) ? data : data.to_json
|
|
114
|
-
{content: [{type: "text", text: text}]}
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
def self.error_result(message)
|
|
118
|
-
{content: [{type: "text", text: message}], isError: true}
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
def self.replace_run_metrics(run, metric_ids)
|
|
122
|
-
return unless metric_ids
|
|
123
|
-
run.run_metrics.delete_all
|
|
124
|
-
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
125
|
-
run.run_metrics.create!(metric_id: metric_id, position: index + 1)
|
|
126
|
-
end
|
|
127
|
-
end
|
|
128
104
|
end
|
|
129
105
|
end
|
|
130
106
|
end
|
|
@@ -3,20 +3,11 @@ module CompletionKit
|
|
|
3
3
|
def generate_completion(prompt, options = {})
|
|
4
4
|
return "Error: API endpoint not configured" unless configured?
|
|
5
5
|
|
|
6
|
-
require "faraday"
|
|
7
|
-
require "faraday/retry"
|
|
8
|
-
require "json"
|
|
9
|
-
|
|
10
6
|
model = options[:model]
|
|
11
7
|
max_tokens = options[:max_tokens] || 1000
|
|
12
8
|
temperature = options[:temperature] || 0.7
|
|
13
9
|
|
|
14
|
-
|
|
15
|
-
f.request :retry, max: 2, interval: 0.5
|
|
16
|
-
f.adapter Faraday.default_adapter
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
response = conn.post do |req|
|
|
10
|
+
response = build_connection(api_endpoint).post do |req|
|
|
20
11
|
req.url "/v1/completions"
|
|
21
12
|
req.headers["Content-Type"] = "application/json"
|
|
22
13
|
req.headers["Authorization"] = "Bearer #{api_key}" if api_key.present?
|
|
@@ -41,11 +32,7 @@ module CompletionKit
|
|
|
41
32
|
def available_models
|
|
42
33
|
return [] unless configured?
|
|
43
34
|
|
|
44
|
-
|
|
45
|
-
require "faraday/retry"
|
|
46
|
-
require "json"
|
|
47
|
-
|
|
48
|
-
response = Faraday.get("#{api_endpoint}/v1/models") do |req|
|
|
35
|
+
response = build_connection(api_endpoint).get("/v1/models") do |req|
|
|
49
36
|
req.headers["Authorization"] = "Bearer #{api_key}" if api_key.present?
|
|
50
37
|
end
|
|
51
38
|
|
|
@@ -9,20 +9,11 @@ module CompletionKit
|
|
|
9
9
|
def generate_completion(prompt, options = {})
|
|
10
10
|
return "Error: API key not configured" unless configured?
|
|
11
11
|
|
|
12
|
-
require "faraday"
|
|
13
|
-
require "faraday/retry"
|
|
14
|
-
require "json"
|
|
15
|
-
|
|
16
12
|
model = options[:model] || "gpt-4.1-mini"
|
|
17
13
|
max_tokens = options[:max_tokens] || 1000
|
|
18
14
|
temperature = options[:temperature] || 0.7
|
|
19
15
|
|
|
20
|
-
|
|
21
|
-
f.request :retry, max: 2, interval: 0.5
|
|
22
|
-
f.adapter Faraday.default_adapter
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
response = conn.post do |req|
|
|
16
|
+
response = build_connection("https://api.openai.com").post do |req|
|
|
26
17
|
req.url "/v1/responses"
|
|
27
18
|
req.headers["Content-Type"] = "application/json"
|
|
28
19
|
req.headers["Authorization"] = "Bearer #{api_key}"
|
|
@@ -7,22 +7,11 @@ module CompletionKit
|
|
|
7
7
|
def generate_completion(prompt, options = {})
|
|
8
8
|
return "Error: API key not configured" unless configured?
|
|
9
9
|
|
|
10
|
-
require "faraday"
|
|
11
|
-
require "faraday/retry"
|
|
12
|
-
require "json"
|
|
13
|
-
|
|
14
10
|
model = options[:model] || "openai/gpt-4o-mini"
|
|
15
11
|
max_tokens = options[:max_tokens] || 1000
|
|
16
12
|
temperature = options[:temperature] || 0.7
|
|
17
13
|
|
|
18
|
-
|
|
19
|
-
f.options.timeout = 30
|
|
20
|
-
f.options.open_timeout = 5
|
|
21
|
-
f.request :retry, max: 2, interval: 0.5
|
|
22
|
-
f.adapter Faraday.default_adapter
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
response = conn.post do |req|
|
|
14
|
+
response = build_connection(BASE_URL, timeout: 30, open_timeout: 5).post do |req|
|
|
26
15
|
req.url "/chat/completions"
|
|
27
16
|
req.headers["Content-Type"] = "application/json"
|
|
28
17
|
req.headers["Authorization"] = "Bearer #{api_key}"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class TenantScopedUniquenessValidator < ActiveRecord::Validations::UniquenessValidator
|
|
3
|
+
def validate_each(record, attribute, value)
|
|
4
|
+
extra = Array(CompletionKit.config.tenant_scope_columns)
|
|
5
|
+
return super if extra.empty? && options[:scope].nil?
|
|
6
|
+
|
|
7
|
+
merged = options.merge(
|
|
8
|
+
scope: Array(options[:scope]) + extra,
|
|
9
|
+
attributes: [attribute],
|
|
10
|
+
class: @klass
|
|
11
|
+
)
|
|
12
|
+
self.class.superclass.new(merged).validate(record)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|