completion-kit 0.1.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +192 -0
- data/Rakefile +12 -0
- data/app/assets/config/completion_kit_manifest.js +1 -0
- data/app/assets/config/manifest.js +3 -0
- data/app/assets/images/completion_kit/logo.svg +6 -0
- data/app/assets/javascripts/completion_kit/evaluation_steps_controller.js +25 -0
- data/app/assets/stylesheets/completion_kit/application.css +2214 -0
- data/app/controllers/completion_kit/api/v1/base_controller.rb +29 -0
- data/app/controllers/completion_kit/api/v1/criteria_controller.rb +62 -0
- data/app/controllers/completion_kit/api/v1/datasets_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/metrics_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/prompts_controller.rb +64 -0
- data/app/controllers/completion_kit/api/v1/provider_credentials_controller.rb +51 -0
- data/app/controllers/completion_kit/api/v1/responses_controller.rb +32 -0
- data/app/controllers/completion_kit/api/v1/runs_controller.rb +71 -0
- data/app/controllers/completion_kit/api_reference_controller.rb +9 -0
- data/app/controllers/completion_kit/application_controller.rb +31 -0
- data/app/controllers/completion_kit/criteria_controller.rb +67 -0
- data/app/controllers/completion_kit/datasets_controller.rb +53 -0
- data/app/controllers/completion_kit/mcp_controller.rb +57 -0
- data/app/controllers/completion_kit/metrics_controller.rb +52 -0
- data/app/controllers/completion_kit/prompts_controller.rb +69 -0
- data/app/controllers/completion_kit/provider_credentials_controller.rb +63 -0
- data/app/controllers/completion_kit/responses_controller.rb +44 -0
- data/app/controllers/completion_kit/runs_controller.rb +131 -0
- data/app/helpers/completion_kit/application_helper.rb +193 -0
- data/app/jobs/completion_kit/application_job.rb +4 -0
- data/app/jobs/completion_kit/generate_job.rb +12 -0
- data/app/jobs/completion_kit/judge_job.rb +12 -0
- data/app/jobs/completion_kit/model_discovery_job.rb +29 -0
- data/app/mailers/completion_kit/application_mailer.rb +6 -0
- data/app/models/completion_kit/application_record.rb +5 -0
- data/app/models/completion_kit/criteria.rb +22 -0
- data/app/models/completion_kit/criteria_membership.rb +20 -0
- data/app/models/completion_kit/dataset.rb +24 -0
- data/app/models/completion_kit/metric.rb +97 -0
- data/app/models/completion_kit/model.rb +13 -0
- data/app/models/completion_kit/prompt.rb +99 -0
- data/app/models/completion_kit/provider_credential.rb +114 -0
- data/app/models/completion_kit/response.rb +30 -0
- data/app/models/completion_kit/review.rb +28 -0
- data/app/models/completion_kit/run.rb +253 -0
- data/app/models/completion_kit/run_metric.rb +6 -0
- data/app/models/completion_kit/suggestion.rb +8 -0
- data/app/services/completion_kit/anthropic_client.rb +86 -0
- data/app/services/completion_kit/api_config.rb +80 -0
- data/app/services/completion_kit/csv_processor.rb +65 -0
- data/app/services/completion_kit/judge_service.rb +87 -0
- data/app/services/completion_kit/llm_client.rb +45 -0
- data/app/services/completion_kit/mcp_dispatcher.rb +53 -0
- data/app/services/completion_kit/mcp_tools/criteria.rb +106 -0
- data/app/services/completion_kit/mcp_tools/datasets.rb +90 -0
- data/app/services/completion_kit/mcp_tools/metrics.rb +98 -0
- data/app/services/completion_kit/mcp_tools/prompts.rb +112 -0
- data/app/services/completion_kit/mcp_tools/provider_credentials.rb +97 -0
- data/app/services/completion_kit/mcp_tools/responses.rb +45 -0
- data/app/services/completion_kit/mcp_tools/runs.rb +130 -0
- data/app/services/completion_kit/model_discovery_service.rb +223 -0
- data/app/services/completion_kit/ollama_client.rb +80 -0
- data/app/services/completion_kit/open_ai_client.rb +71 -0
- data/app/services/completion_kit/open_router_client.rb +69 -0
- data/app/services/completion_kit/prompt_improvement_service.rb +81 -0
- data/app/views/completion_kit/api_reference/_example.html.erb +6 -0
- data/app/views/completion_kit/api_reference/index.html.erb +308 -0
- data/app/views/completion_kit/criteria/_form.html.erb +46 -0
- data/app/views/completion_kit/criteria/edit.html.erb +14 -0
- data/app/views/completion_kit/criteria/index.html.erb +37 -0
- data/app/views/completion_kit/criteria/new.html.erb +13 -0
- data/app/views/completion_kit/criteria/show.html.erb +37 -0
- data/app/views/completion_kit/datasets/_form.html.erb +29 -0
- data/app/views/completion_kit/datasets/edit.html.erb +13 -0
- data/app/views/completion_kit/datasets/index.html.erb +38 -0
- data/app/views/completion_kit/datasets/new.html.erb +12 -0
- data/app/views/completion_kit/datasets/show.html.erb +45 -0
- data/app/views/completion_kit/metrics/_form.html.erb +72 -0
- data/app/views/completion_kit/metrics/edit.html.erb +13 -0
- data/app/views/completion_kit/metrics/index.html.erb +34 -0
- data/app/views/completion_kit/metrics/new.html.erb +12 -0
- data/app/views/completion_kit/metrics/show.html.erb +49 -0
- data/app/views/completion_kit/prompts/_form.html.erb +52 -0
- data/app/views/completion_kit/prompts/edit.html.erb +13 -0
- data/app/views/completion_kit/prompts/index.html.erb +46 -0
- data/app/views/completion_kit/prompts/new.html.erb +12 -0
- data/app/views/completion_kit/prompts/show.html.erb +156 -0
- data/app/views/completion_kit/provider_credentials/_discovery_status.html.erb +30 -0
- data/app/views/completion_kit/provider_credentials/_form.html.erb +71 -0
- data/app/views/completion_kit/provider_credentials/edit.html.erb +12 -0
- data/app/views/completion_kit/provider_credentials/index.html.erb +41 -0
- data/app/views/completion_kit/provider_credentials/new.html.erb +12 -0
- data/app/views/completion_kit/responses/show.html.erb +87 -0
- data/app/views/completion_kit/runs/_actions.html.erb +14 -0
- data/app/views/completion_kit/runs/_form.html.erb +159 -0
- data/app/views/completion_kit/runs/_progress.html.erb +18 -0
- data/app/views/completion_kit/runs/_response_row.html.erb +13 -0
- data/app/views/completion_kit/runs/_sort_toolbar.html.erb +8 -0
- data/app/views/completion_kit/runs/_status_header.html.erb +15 -0
- data/app/views/completion_kit/runs/edit.html.erb +14 -0
- data/app/views/completion_kit/runs/index.html.erb +43 -0
- data/app/views/completion_kit/runs/new.html.erb +12 -0
- data/app/views/completion_kit/runs/show.html.erb +79 -0
- data/app/views/completion_kit/runs/suggestion.html.erb +47 -0
- data/app/views/layouts/completion_kit/application.html.erb +77 -0
- data/config/routes.rb +55 -0
- data/db/migrate/20260311000001_create_completion_kit_tables.rb +87 -0
- data/db/migrate/20260326000001_rename_criteria_to_instruction_on_metrics_and_reviews.rb +6 -0
- data/db/migrate/20260327000001_add_progress_to_runs.rb +6 -0
- data/db/migrate/20260327100001_replace_criteria_with_direct_metrics_on_runs.rb +12 -0
- data/db/migrate/20260328000001_add_error_message_to_runs.rb +5 -0
- data/db/migrate/20260329000001_create_completion_kit_models.rb +20 -0
- data/db/migrate/20260401170001_add_discovery_columns_to_completion_kit_provider_credentials.rb +7 -0
- data/db/migrate/20260403000001_add_temperature_to_completion_kit_runs.rb +5 -0
- data/db/migrate/20260403000002_create_completion_kit_suggestions.rb +13 -0
- data/db/migrate/20260403000003_add_applied_at_to_completion_kit_suggestions.rb +5 -0
- data/lib/completion-kit.rb +1 -0
- data/lib/completion_kit/engine.rb +35 -0
- data/lib/completion_kit/version.rb +3 -0
- data/lib/completion_kit.rb +55 -0
- data/lib/generators/completion_kit/install_generator.rb +21 -0
- data/lib/generators/completion_kit/templates/README +20 -0
- data/lib/generators/completion_kit/templates/initializer.rb +43 -0
- metadata +361 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class AnthropicClient < LlmClient
|
|
3
|
+
STATIC_MODELS = [
|
|
4
|
+
{ id: "claude-3-7-sonnet-latest", name: "Claude 3.7 Sonnet" },
|
|
5
|
+
{ id: "claude-3-5-haiku-latest", name: "Claude 3.5 Haiku" }
|
|
6
|
+
].freeze
|
|
7
|
+
|
|
8
|
+
def generate_completion(prompt, options = {})
|
|
9
|
+
return "Error: API key not configured" unless configured?
|
|
10
|
+
|
|
11
|
+
require "faraday"
|
|
12
|
+
require "faraday/retry"
|
|
13
|
+
require "json"
|
|
14
|
+
|
|
15
|
+
model = options[:model] || "claude-3-7-sonnet-latest"
|
|
16
|
+
max_tokens = options[:max_tokens] || 1000
|
|
17
|
+
temperature = options[:temperature] || 0.7
|
|
18
|
+
|
|
19
|
+
conn = Faraday.new(url: "https://api.anthropic.com") do |f|
|
|
20
|
+
f.request :retry, max: 2, interval: 0.5
|
|
21
|
+
f.adapter Faraday.default_adapter
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
response = conn.post do |req|
|
|
25
|
+
req.url "/v1/messages"
|
|
26
|
+
req.headers["Content-Type"] = "application/json"
|
|
27
|
+
req.headers["x-api-key"] = api_key
|
|
28
|
+
req.headers["anthropic-version"] = "2023-06-01"
|
|
29
|
+
req.body = {
|
|
30
|
+
model: model,
|
|
31
|
+
messages: [
|
|
32
|
+
{ role: "user", content: prompt }
|
|
33
|
+
],
|
|
34
|
+
max_tokens: max_tokens,
|
|
35
|
+
temperature: temperature
|
|
36
|
+
}.to_json
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
if response.success?
|
|
40
|
+
data = JSON.parse(response.body)
|
|
41
|
+
data["content"][0]["text"].strip
|
|
42
|
+
else
|
|
43
|
+
"Error: #{response.status} - #{response.body}"
|
|
44
|
+
end
|
|
45
|
+
rescue => e
|
|
46
|
+
"Error: #{e.message}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def available_models
|
|
50
|
+
return STATIC_MODELS unless configured?
|
|
51
|
+
|
|
52
|
+
require "faraday"
|
|
53
|
+
require "faraday/retry"
|
|
54
|
+
require "json"
|
|
55
|
+
|
|
56
|
+
response = Faraday.get("https://api.anthropic.com/v1/models?limit=100") do |req|
|
|
57
|
+
req.headers["x-api-key"] = api_key
|
|
58
|
+
req.headers["anthropic-version"] = "2023-06-01"
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
return STATIC_MODELS unless response.success?
|
|
62
|
+
|
|
63
|
+
entries = JSON.parse(response.body).fetch("data", [])
|
|
64
|
+
models = entries.map { |entry| { id: entry["id"], name: entry["display_name"] || entry["id"] } }
|
|
65
|
+
models.presence || STATIC_MODELS
|
|
66
|
+
rescue StandardError
|
|
67
|
+
STATIC_MODELS
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def configured?
|
|
71
|
+
api_key.present?
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def configuration_errors
|
|
75
|
+
errors = []
|
|
76
|
+
errors << "Anthropic API key is not configured" unless api_key.present?
|
|
77
|
+
errors
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def api_key
|
|
83
|
+
@config[:api_key] || ENV["ANTHROPIC_API_KEY"]
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class ApiConfig
|
|
3
|
+
def self.for_model(model_name)
|
|
4
|
+
provider = provider_for_model(model_name)
|
|
5
|
+
provider ? for_provider(provider) : {}
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def self.for_provider(provider_name)
|
|
9
|
+
provider = provider_name.to_s
|
|
10
|
+
stored = ProviderCredential.find_by(provider: provider)&.config_hash || {}
|
|
11
|
+
|
|
12
|
+
defaults = case provider
|
|
13
|
+
when "openai"
|
|
14
|
+
{ provider: "openai", api_key: CompletionKit.config.openai_api_key || ENV["OPENAI_API_KEY"] }
|
|
15
|
+
when "anthropic"
|
|
16
|
+
{ provider: "anthropic", api_key: CompletionKit.config.anthropic_api_key || ENV["ANTHROPIC_API_KEY"] }
|
|
17
|
+
when "ollama"
|
|
18
|
+
{
|
|
19
|
+
provider: "ollama",
|
|
20
|
+
api_key: CompletionKit.config.ollama_api_key || ENV["OLLAMA_API_KEY"],
|
|
21
|
+
api_endpoint: CompletionKit.config.ollama_api_endpoint || ENV["OLLAMA_API_ENDPOINT"]
|
|
22
|
+
}
|
|
23
|
+
when "openrouter"
|
|
24
|
+
{ provider: "openrouter", api_key: ENV["OPENROUTER_API_KEY"] }
|
|
25
|
+
else
|
|
26
|
+
{}
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
defaults.merge(stored.compact)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.provider_for_model(model_name)
|
|
33
|
+
available_match = available_models.find { |model| model[:id] == model_name.to_s }
|
|
34
|
+
return available_match[:provider] if available_match
|
|
35
|
+
|
|
36
|
+
case model_name.to_s
|
|
37
|
+
when /\Agpt-/
|
|
38
|
+
"openai"
|
|
39
|
+
when /\Aclaude-/
|
|
40
|
+
"anthropic"
|
|
41
|
+
else
|
|
42
|
+
nil
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def self.valid_for_model?(model_name)
|
|
47
|
+
client = LlmClient.for_model(model_name, for_model(model_name))
|
|
48
|
+
client.configured?
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def self.errors_for_model(model_name)
|
|
52
|
+
client = LlmClient.for_model(model_name, for_model(model_name))
|
|
53
|
+
client.configuration_errors
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.available_models(provider: nil, scope: :generation)
|
|
57
|
+
query = case scope
|
|
58
|
+
when :judging then Model.for_judging
|
|
59
|
+
when :generation then Model.for_generation
|
|
60
|
+
else Model.active
|
|
61
|
+
end
|
|
62
|
+
query = query.where(provider: provider) if provider.present?
|
|
63
|
+
models = query.order(:provider, :display_name).map do |m|
|
|
64
|
+
{ id: m.model_id, name: m.display_name || m.model_id, provider: m.provider }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
return models if models.any?
|
|
68
|
+
|
|
69
|
+
configured = ProviderCredential.pluck(:provider)
|
|
70
|
+
providers = provider.present? ? [provider.to_s] : configured
|
|
71
|
+
providers.flat_map do |provider_name|
|
|
72
|
+
next [] unless configured.include?(provider_name)
|
|
73
|
+
client = LlmClient.for_provider(provider_name, for_provider(provider_name))
|
|
74
|
+
client.available_models.map { |model| model.symbolize_keys.merge(provider: provider_name) }
|
|
75
|
+
rescue StandardError
|
|
76
|
+
[]
|
|
77
|
+
end.uniq { |model| model[:id] }
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class CsvProcessor
|
|
3
|
+
require 'csv'
|
|
4
|
+
|
|
5
|
+
def self.process(run)
|
|
6
|
+
return [] if run.csv_data.blank?
|
|
7
|
+
|
|
8
|
+
begin
|
|
9
|
+
csv_data = CSV.parse(run.csv_data, headers: true)
|
|
10
|
+
rows = csv_data.map(&:to_h)
|
|
11
|
+
|
|
12
|
+
if rows.empty?
|
|
13
|
+
run.errors.add(:csv_data, "No data rows found in CSV")
|
|
14
|
+
return []
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
return [] unless validate_variables(run, rows.first.keys)
|
|
18
|
+
|
|
19
|
+
rows
|
|
20
|
+
rescue CSV::MalformedCSVError => e
|
|
21
|
+
run.errors.add(:csv_data, "Invalid CSV format: #{e.message}")
|
|
22
|
+
[]
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.process_self(run)
|
|
27
|
+
return [] unless run.dataset&.csv_data.present?
|
|
28
|
+
|
|
29
|
+
begin
|
|
30
|
+
csv_data = CSV.parse(run.dataset.csv_data, headers: true)
|
|
31
|
+
csv_data.map(&:to_h)
|
|
32
|
+
rescue CSV::MalformedCSVError
|
|
33
|
+
[]
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.extract_variables(prompt)
|
|
38
|
+
return [] if prompt.nil? || prompt.template.blank?
|
|
39
|
+
|
|
40
|
+
prompt.template.scan(/\{\{([^}]+)\}\}/).flatten.map(&:strip).uniq
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def self.validate_variables(run, headers)
|
|
44
|
+
prompt_variables = extract_variables(run.prompt)
|
|
45
|
+
missing_variables = prompt_variables - headers
|
|
46
|
+
|
|
47
|
+
if missing_variables.any?
|
|
48
|
+
run.errors.add(:csv_data, "Missing required variables in CSV: #{missing_variables.join(', ')}")
|
|
49
|
+
return false
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
true
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.apply_variables(prompt, variables)
|
|
56
|
+
result = prompt.template.dup
|
|
57
|
+
|
|
58
|
+
variables.each do |name, value|
|
|
59
|
+
result.gsub!(/\{\{\s*#{Regexp.escape(name.to_s)}\s*\}\}/, value.to_s)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
result
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
require "faraday"
|
|
2
|
+
|
|
3
|
+
module CompletionKit
|
|
4
|
+
class JudgeService
|
|
5
|
+
def initialize(config = {})
|
|
6
|
+
@config = config
|
|
7
|
+
@judge_model = config[:judge_model] || CompletionKit.config.judge_model
|
|
8
|
+
@judge_client = LlmClient.for_model(@judge_model, ApiConfig.for_model(@judge_model))
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def evaluate(output, expected_output = nil, prompt = nil, criteria: nil, evaluation_steps: nil, rubric_text: nil, human_examples: nil, input_data: nil, **_extras)
|
|
12
|
+
return { score: 1, feedback: "Judge not configured" } unless @judge_client.configured?
|
|
13
|
+
|
|
14
|
+
judge_prompt = build_judge_prompt(output, expected_output, prompt,
|
|
15
|
+
criteria: criteria, evaluation_steps: evaluation_steps,
|
|
16
|
+
rubric_text: rubric_text, human_examples: human_examples,
|
|
17
|
+
input_data: input_data)
|
|
18
|
+
|
|
19
|
+
response = @judge_client.generate_completion(judge_prompt, model: @judge_model)
|
|
20
|
+
raise StandardError, response if response.start_with?("Error:")
|
|
21
|
+
parse_judge_response(response)
|
|
22
|
+
rescue Faraday::Error
|
|
23
|
+
raise
|
|
24
|
+
rescue => e
|
|
25
|
+
{ score: 1, feedback: "Error during evaluation: #{e.message}" }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def build_judge_prompt(output, expected_output, prompt, criteria: nil, evaluation_steps: nil, rubric_text: nil, human_examples: nil, input_data: nil)
|
|
31
|
+
judge_prompt = <<~PROMPT
|
|
32
|
+
You are an expert evaluator. You MUST respond with ONLY two lines in this exact format, nothing else:
|
|
33
|
+
|
|
34
|
+
Score: <integer from 1 to 5>
|
|
35
|
+
Feedback: <one sentence explaining why>
|
|
36
|
+
|
|
37
|
+
Do not include any other text, markdown, or explanation. Just those two lines.
|
|
38
|
+
|
|
39
|
+
Use this rubric to choose the score:
|
|
40
|
+
#{rubric_text.presence || CompletionKit::Metric.default_rubric_text}
|
|
41
|
+
PROMPT
|
|
42
|
+
|
|
43
|
+
if criteria.present?
|
|
44
|
+
judge_prompt += "\nCriteria: #{criteria}\n"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
if evaluation_steps.present? && evaluation_steps.any?
|
|
48
|
+
judge_prompt += "\nEvaluation steps:\n#{evaluation_steps.each_with_index.map { |step, i| "#{i + 1}. #{step}" }.join("\n")}\n"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
if human_examples.present?
|
|
52
|
+
judge_prompt += "\nCalibration examples:\n"
|
|
53
|
+
human_examples.each_with_index do |example, index|
|
|
54
|
+
judge_prompt += "Example #{index + 1}: score=#{example[:human_score]} output=#{example[:response_text].to_s.truncate(200)}\n"
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
judge_prompt += <<~PROMPT
|
|
59
|
+
|
|
60
|
+
Original prompt: #{prompt || "Not provided"}
|
|
61
|
+
#{input_data.present? ? "Input data: #{input_data}" : ""}
|
|
62
|
+
#{expected_output.present? ? "Expected output: #{expected_output}" : ""}
|
|
63
|
+
AI output to evaluate: #{output}
|
|
64
|
+
PROMPT
|
|
65
|
+
|
|
66
|
+
judge_prompt
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def parse_judge_response(response)
|
|
70
|
+
score_match = response.match(/\*{0,2}Score:?\*{0,2}\s*(\d+(?:\.\d+)?)/i)
|
|
71
|
+
feedback_match = response.match(/\*{0,2}Feedback:?\*{0,2}\s*(.+)/mi)
|
|
72
|
+
|
|
73
|
+
score = score_match ? score_match[1].to_f : 1
|
|
74
|
+
feedback = if feedback_match
|
|
75
|
+
feedback_match[1].strip
|
|
76
|
+
elsif score_match
|
|
77
|
+
"No feedback provided"
|
|
78
|
+
else
|
|
79
|
+
"Could not parse judge response: #{response.truncate(500)}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
score = [[score, 1].max, 5].min
|
|
83
|
+
|
|
84
|
+
{ score: score, feedback: feedback }
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class LlmClient
|
|
3
|
+
def initialize(config = {})
|
|
4
|
+
@config = config
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
def generate_completion(prompt, options = {})
|
|
8
|
+
raise NotImplementedError, "Subclasses must implement generate_completion"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def available_models
|
|
12
|
+
raise NotImplementedError, "Subclasses must implement available_models"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def configured?
|
|
16
|
+
raise NotImplementedError, "Subclasses must implement configured?"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def configuration_errors
|
|
20
|
+
[]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def self.for_provider(provider_name, config = {})
|
|
24
|
+
case provider_name.to_s
|
|
25
|
+
when "openai"
|
|
26
|
+
OpenAiClient.new(config)
|
|
27
|
+
when "anthropic"
|
|
28
|
+
AnthropicClient.new(config)
|
|
29
|
+
when "ollama"
|
|
30
|
+
OllamaClient.new(config)
|
|
31
|
+
when "openrouter"
|
|
32
|
+
OpenRouterClient.new(config)
|
|
33
|
+
else
|
|
34
|
+
raise ArgumentError, "Unsupported provider: #{provider_name}"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.for_model(model_name, config = {})
|
|
39
|
+
provider = ApiConfig.provider_for_model(model_name)
|
|
40
|
+
raise ArgumentError, "Unsupported model: #{model_name}" unless provider
|
|
41
|
+
|
|
42
|
+
for_provider(provider, config)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
class McpDispatcher
|
|
3
|
+
class MethodNotFound < StandardError; end
|
|
4
|
+
class InvalidParams < StandardError; end
|
|
5
|
+
|
|
6
|
+
PROTOCOL_VERSION = "2025-03-26"
|
|
7
|
+
|
|
8
|
+
def self.initialize_session
|
|
9
|
+
session_id = SecureRandom.uuid
|
|
10
|
+
Rails.cache.write("mcp_session:#{session_id}", true, expires_in: 1.hour)
|
|
11
|
+
{
|
|
12
|
+
session_id: session_id,
|
|
13
|
+
protocolVersion: PROTOCOL_VERSION,
|
|
14
|
+
serverInfo: {name: "CompletionKit", version: CompletionKit::VERSION},
|
|
15
|
+
capabilities: {tools: {listChanged: false}}
|
|
16
|
+
}
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.dispatch(method, params)
|
|
20
|
+
case method
|
|
21
|
+
when "tools/list"
|
|
22
|
+
{tools: tool_definitions}
|
|
23
|
+
when "tools/call"
|
|
24
|
+
call_tool(params&.dig("name"), params&.dig("arguments") || {})
|
|
25
|
+
else
|
|
26
|
+
raise MethodNotFound, "Method not found: #{method}"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def self.tool_definitions
|
|
31
|
+
McpTools::Prompts.definitions +
|
|
32
|
+
McpTools::Runs.definitions +
|
|
33
|
+
McpTools::Responses.definitions +
|
|
34
|
+
McpTools::Datasets.definitions +
|
|
35
|
+
McpTools::Metrics.definitions +
|
|
36
|
+
McpTools::Criteria.definitions +
|
|
37
|
+
McpTools::ProviderCredentials.definitions
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.call_tool(name, arguments)
|
|
41
|
+
case name
|
|
42
|
+
when /\Aprompts_/ then McpTools::Prompts.call(name, arguments)
|
|
43
|
+
when /\Aruns_/ then McpTools::Runs.call(name, arguments)
|
|
44
|
+
when /\Aresponses_/ then McpTools::Responses.call(name, arguments)
|
|
45
|
+
when /\Adatasets_/ then McpTools::Datasets.call(name, arguments)
|
|
46
|
+
when /\Ametrics_/ then McpTools::Metrics.call(name, arguments)
|
|
47
|
+
when /\Acriteria_/ then McpTools::Criteria.call(name, arguments)
|
|
48
|
+
when /\Aprovider_credentials_/ then McpTools::ProviderCredentials.call(name, arguments)
|
|
49
|
+
else raise MethodNotFound, "Unknown tool: #{name}"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Criteria
|
|
4
|
+
TOOLS = {
|
|
5
|
+
"criteria_list" => {
|
|
6
|
+
description: "List all criteria",
|
|
7
|
+
inputSchema: {type: "object", properties: {}, required: []},
|
|
8
|
+
handler: :list
|
|
9
|
+
},
|
|
10
|
+
"criteria_get" => {
|
|
11
|
+
description: "Get a criteria by ID",
|
|
12
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
13
|
+
handler: :get
|
|
14
|
+
},
|
|
15
|
+
"criteria_create" => {
|
|
16
|
+
description: "Create a criteria grouping metrics",
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {
|
|
20
|
+
name: {type: "string"}, description: {type: "string"},
|
|
21
|
+
metric_ids: {type: "array", items: {type: "integer"}}
|
|
22
|
+
},
|
|
23
|
+
required: ["name"]
|
|
24
|
+
},
|
|
25
|
+
handler: :create
|
|
26
|
+
},
|
|
27
|
+
"criteria_update" => {
|
|
28
|
+
description: "Update a criteria",
|
|
29
|
+
inputSchema: {
|
|
30
|
+
type: "object",
|
|
31
|
+
properties: {
|
|
32
|
+
id: {type: "integer"}, name: {type: "string"}, description: {type: "string"},
|
|
33
|
+
metric_ids: {type: "array", items: {type: "integer"}}
|
|
34
|
+
},
|
|
35
|
+
required: ["id"]
|
|
36
|
+
},
|
|
37
|
+
handler: :update
|
|
38
|
+
},
|
|
39
|
+
"criteria_delete" => {
|
|
40
|
+
description: "Delete a criteria",
|
|
41
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
42
|
+
handler: :delete
|
|
43
|
+
}
|
|
44
|
+
}.freeze
|
|
45
|
+
|
|
46
|
+
def self.definitions
|
|
47
|
+
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def self.call(name, arguments)
|
|
51
|
+
tool = TOOLS.fetch(name)
|
|
52
|
+
send(tool[:handler], arguments)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.list(_args)
|
|
56
|
+
text_result(CompletionKit::Criteria.order(created_at: :desc).map(&:as_json))
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def self.get(args)
|
|
60
|
+
text_result(CompletionKit::Criteria.find(args["id"]).as_json)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.create(args)
|
|
64
|
+
criteria = CompletionKit::Criteria.new(args.slice("name", "description"))
|
|
65
|
+
if criteria.save
|
|
66
|
+
replace_metric_memberships(criteria, args["metric_ids"])
|
|
67
|
+
text_result(criteria.reload.as_json)
|
|
68
|
+
else
|
|
69
|
+
error_result(criteria.errors.full_messages.join(", "))
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def self.update(args)
|
|
74
|
+
criteria = CompletionKit::Criteria.find(args["id"])
|
|
75
|
+
if criteria.update(args.except("id", "metric_ids").slice("name", "description"))
|
|
76
|
+
replace_metric_memberships(criteria, args["metric_ids"]) if args.key?("metric_ids")
|
|
77
|
+
text_result(criteria.reload.as_json)
|
|
78
|
+
else
|
|
79
|
+
error_result(criteria.errors.full_messages.join(", "))
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def self.delete(args)
|
|
84
|
+
CompletionKit::Criteria.find(args["id"]).destroy!
|
|
85
|
+
text_result("Criteria #{args["id"]} deleted")
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.text_result(data)
|
|
89
|
+
text = data.is_a?(String) ? data : data.to_json
|
|
90
|
+
{content: [{type: "text", text: text}]}
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def self.error_result(message)
|
|
94
|
+
{content: [{type: "text", text: message}], isError: true}
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def self.replace_metric_memberships(criteria, metric_ids)
|
|
98
|
+
return unless metric_ids
|
|
99
|
+
criteria.criteria_memberships.delete_all
|
|
100
|
+
Array(metric_ids).reject(&:blank?).each_with_index do |metric_id, index|
|
|
101
|
+
criteria.criteria_memberships.create!(metric_id: metric_id, position: index + 1)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
module CompletionKit
|
|
2
|
+
module McpTools
|
|
3
|
+
module Datasets
|
|
4
|
+
TOOLS = {
|
|
5
|
+
"datasets_list" => {
|
|
6
|
+
description: "List all datasets",
|
|
7
|
+
inputSchema: {type: "object", properties: {}, required: []},
|
|
8
|
+
handler: :list
|
|
9
|
+
},
|
|
10
|
+
"datasets_get" => {
|
|
11
|
+
description: "Get a dataset by ID",
|
|
12
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
13
|
+
handler: :get
|
|
14
|
+
},
|
|
15
|
+
"datasets_create" => {
|
|
16
|
+
description: "Create a dataset with CSV data",
|
|
17
|
+
inputSchema: {
|
|
18
|
+
type: "object",
|
|
19
|
+
properties: {name: {type: "string"}, csv_data: {type: "string"}},
|
|
20
|
+
required: ["name", "csv_data"]
|
|
21
|
+
},
|
|
22
|
+
handler: :create
|
|
23
|
+
},
|
|
24
|
+
"datasets_update" => {
|
|
25
|
+
description: "Update a dataset",
|
|
26
|
+
inputSchema: {
|
|
27
|
+
type: "object",
|
|
28
|
+
properties: {id: {type: "integer"}, name: {type: "string"}, csv_data: {type: "string"}},
|
|
29
|
+
required: ["id"]
|
|
30
|
+
},
|
|
31
|
+
handler: :update
|
|
32
|
+
},
|
|
33
|
+
"datasets_delete" => {
|
|
34
|
+
description: "Delete a dataset",
|
|
35
|
+
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
36
|
+
handler: :delete
|
|
37
|
+
}
|
|
38
|
+
}.freeze
|
|
39
|
+
|
|
40
|
+
def self.definitions
|
|
41
|
+
TOOLS.map { |name, config| {name: name, description: config[:description], inputSchema: config[:inputSchema]} }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.call(name, arguments)
|
|
45
|
+
tool = TOOLS.fetch(name)
|
|
46
|
+
send(tool[:handler], arguments)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def self.list(_args)
|
|
50
|
+
text_result(Dataset.order(created_at: :desc).map(&:as_json))
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def self.get(args)
|
|
54
|
+
text_result(Dataset.find(args["id"]).as_json)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.create(args)
|
|
58
|
+
dataset = Dataset.new(args.slice("name", "csv_data"))
|
|
59
|
+
if dataset.save
|
|
60
|
+
text_result(dataset.as_json)
|
|
61
|
+
else
|
|
62
|
+
error_result(dataset.errors.full_messages.join(", "))
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.update(args)
|
|
67
|
+
dataset = Dataset.find(args["id"])
|
|
68
|
+
if dataset.update(args.except("id").slice("name", "csv_data"))
|
|
69
|
+
text_result(dataset.as_json)
|
|
70
|
+
else
|
|
71
|
+
error_result(dataset.errors.full_messages.join(", "))
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def self.delete(args)
|
|
76
|
+
Dataset.find(args["id"]).destroy!
|
|
77
|
+
text_result("Dataset #{args["id"]} deleted")
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.text_result(data)
|
|
81
|
+
text = data.is_a?(String) ? data : data.to_json
|
|
82
|
+
{content: [{type: "text", text: text}]}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.error_result(message)
|
|
86
|
+
{content: [{type: "text", text: message}], isError: true}
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|