completion-kit 0.12.4 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfff016e2f30d073116ddcac7e1deb59dd6a6f5cbbc56452004912691a28d07b
4
- data.tar.gz: 536e3a578f45d762c37d0115a390b034f581a5e3e2dcd7f036200aef8ef6db22
3
+ metadata.gz: 7a3fda7b41271efa8b169adff5bc72e3f347fa67641aa26ab2d52f84ad4d0526
4
+ data.tar.gz: fd9fcca6cc6ac22e32a3f8ba7f917fd8b0ccb30a270f45aa63fb7fc3ee59343b
5
5
  SHA512:
6
- metadata.gz: 1d7498497e03fdcd2797eb361250eb70bb378884f9593f617dc4e8a5dc9e61dbad15feef83bfd1b811b56f07bdd1a7573e65c291c05bd5233ff4b0442f16e089
7
- data.tar.gz: 729db67d4ab9151e3103c4ae75cfdfc720311608cdb3f7aba15609bbabc5221d292528fb514a54b9c058e62ce98f4f1e74971edde05aba53073313f427c6803f
6
+ metadata.gz: '02917101ab65551a18974c80b382ba508b5f0a80f67182bf630998aacfebc4584f85d03d9fc0c59d6de7b89c1881391340cd81c3cd1631bca6b545067f57a513'
7
+ data.tar.gz: 255c33ab87a13e1e4ed5387c8cff70c952cf0c68c3d3829ddc4c401208bc1d94019f121fd0cfbd7ec425a4669f3420b96b988ade2bba50f0113f5e714957295e
@@ -45,7 +45,10 @@ module CompletionKit
45
45
  end
46
46
 
47
47
  def dataset_params
48
- params.permit(:name, :csv_data, tag_names: [])
48
+ permitted = params.permit(:name, :csv_data, tag_names: [])
49
+ upload = params[:file]
50
+ permitted[:csv_data] = upload.read.to_s.force_encoding("UTF-8") if upload.respond_to?(:read)
51
+ permitted
49
52
  end
50
53
  end
51
54
  end
@@ -1,8 +1,12 @@
1
+ require "faraday"
2
+
1
3
  module CompletionKit
2
4
  module McpTools
3
5
  module Datasets
4
6
  extend Base
5
7
 
8
+ MAX_CSV_BYTES = 10 * 1024 * 1024
9
+
6
10
  TOOLS = {
7
11
  "datasets_list" => {
8
12
  description: "List all datasets",
@@ -36,6 +40,19 @@ module CompletionKit
36
40
  description: "Delete a dataset",
37
41
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
38
42
  handler: :delete
43
+ },
44
+ "datasets_create_from_url" => {
45
+ description: "Create a dataset by downloading CSV from a URL instead of inlining it. Use this for large datasets: pass a public http(s) URL and the server fetches the CSV directly, so the data never has to pass through the tool-call arguments. The URL is SSRF-checked and the download is capped at 10MB.",
46
+ inputSchema: {
47
+ type: "object",
48
+ properties: {
49
+ name: {type: "string"},
50
+ url: {type: "string", description: "Public http(s) URL of the CSV file to download."},
51
+ tag_names: {type: "array", items: {type: "string"}}
52
+ },
53
+ required: ["name", "url"]
54
+ },
55
+ handler: :create_from_url
39
56
  }
40
57
  }.freeze
41
58
 
@@ -71,6 +88,35 @@ module CompletionKit
71
88
  Dataset.find(args["id"]).destroy!
72
89
  text_result("Dataset #{args["id"]} deleted")
73
90
  end
91
+
92
+ def self.create_from_url(args)
93
+ issues = ProviderEndpoint.validate(args["url"])
94
+ return error_result("URL is not allowed (#{issues.join(", ")}).") if issues.any?
95
+
96
+ response = csv_connection.get(args["url"])
97
+ return error_result("Could not download CSV (HTTP #{response.status}).") unless response.success?
98
+
99
+ body = response.body.to_s
100
+ return error_result("CSV is larger than the #{MAX_CSV_BYTES / (1024 * 1024)}MB limit.") if body.bytesize > MAX_CSV_BYTES
101
+
102
+ dataset = Dataset.new(name: args["name"], csv_data: body.dup.force_encoding("UTF-8"))
103
+ dataset.tag_names = args["tag_names"] if args.key?("tag_names")
104
+ if dataset.save
105
+ text_result(dataset.reload.as_json)
106
+ else
107
+ error_result(dataset.errors.full_messages.join(", "))
108
+ end
109
+ rescue Faraday::Error => e
110
+ error_result("Could not download CSV: #{e.message}")
111
+ end
112
+
113
+ def self.csv_connection
114
+ Faraday.new do |f|
115
+ f.options.timeout = 30
116
+ f.options.open_timeout = 5
117
+ f.adapter Faraday.default_adapter
118
+ end
119
+ end
74
120
  end
75
121
  end
76
122
  end
@@ -4,19 +4,6 @@ module CompletionKit
4
4
  extend Base
5
5
 
6
6
  TOOLS = {
7
- "judges_suggest" => {
8
- description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft MetricVersion with source=\"suggestion\". Returns the persisted drafts. Stripe-metering hooks fire via ActiveSupport::Notifications under completion_kit.judge_suggestion.generated.",
9
- inputSchema: {
10
- type: "object",
11
- properties: {
12
- metric_id: { type: "integer" },
13
- count: { type: "integer", description: "How many variants to request (default 1, max 3). One focused rewrite beats five reworded copies." },
14
- model: { type: "string", description: "Override the model used to generate variants. Defaults to CompletionKit.config.judge_model." }
15
- },
16
- required: ["metric_id"]
17
- },
18
- handler: :suggest
19
- },
20
7
  "judges_replay" => {
21
8
  description: "Run the current judge against a dataset (judge-only run). Wraps runs_create with prompt_id omitted and output_column supplied. Re-judges existing dataset outputs so you can compare against human verdicts.",
22
9
  inputSchema: {
@@ -47,15 +34,6 @@ module CompletionKit
47
34
  }
48
35
  }.freeze
49
36
 
50
- def self.suggest(args)
51
- metric = CompletionKit::Metric.find(args["metric_id"])
52
- generator = CompletionKit::MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
53
- variants = generator.call
54
- return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
55
- versions = generator.persist!(variants)
56
- text_result(versions.map(&:as_json))
57
- end
58
-
59
37
  def self.replay(args)
60
38
  metric = CompletionKit::Metric.find(args["metric_id"])
61
39
  dataset = CompletionKit::Dataset.find(args["dataset_id"])
@@ -44,6 +44,19 @@ module CompletionKit
44
44
  description: "Delete a metric",
45
45
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
46
46
  handler: :delete
47
+ },
48
+ "metrics_suggest_variants" => {
49
+ description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft MetricVersion with source=\"suggestion\". Returns the persisted drafts. Stripe-metering hooks fire via ActiveSupport::Notifications under completion_kit.judge_suggestion.generated.",
50
+ inputSchema: {
51
+ type: "object",
52
+ properties: {
53
+ metric_id: {type: "integer"},
54
+ count: {type: "integer", description: "How many variants to request (default 1, max 3). One focused rewrite beats five reworded copies."},
55
+ model: {type: "string", description: "Override the model used to generate variants. Defaults to CompletionKit.config.judge_model."}
56
+ },
57
+ required: ["metric_id"]
58
+ },
59
+ handler: :suggest_variants
47
60
  }
48
61
  }.freeze
49
62
 
@@ -79,6 +92,15 @@ module CompletionKit
79
92
  Metric.find(args["id"]).destroy!
80
93
  text_result("Metric #{args["id"]} deleted")
81
94
  end
95
+
96
+ def self.suggest_variants(args)
97
+ metric = Metric.find(args["metric_id"])
98
+ generator = MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
99
+ variants = generator.call
100
+ return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
101
+ versions = generator.persist!(variants)
102
+ text_result(versions.map(&:as_json))
103
+ end
82
104
  end
83
105
  end
84
106
  end
@@ -50,6 +50,15 @@ module CompletionKit
50
50
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
51
51
  handler: :publish
52
52
  },
53
+ "prompts_suggest_improvement" => {
54
+ description: "Suggest an improved version of a prompt, grounded in a run's test results and judge feedback. Analyzes the run's responses, scores, and reviews, then returns reasoning plus a rewritten template (preserving {{variables}}) and persists it as a Suggestion. Requires a run that has a prompt (not a judge-only run).",
55
+ inputSchema: {
56
+ type: "object",
57
+ properties: {run_id: {type: "integer", description: "The run whose results ground the improvement."}},
58
+ required: ["run_id"]
59
+ },
60
+ handler: :suggest_improvement
61
+ },
53
62
  }.freeze
54
63
 
55
64
  def self.list(_args)
@@ -96,6 +105,26 @@ module CompletionKit
96
105
  prompt.publish!
97
106
  text_result(prompt.reload.as_json)
98
107
  end
108
+
109
+ def self.suggest_improvement(args)
110
+ run = Run.find(args["run_id"])
111
+ return error_result("Judge-only runs don't have a prompt to improve.") if run.prompt.nil?
112
+
113
+ result = PromptImprovementService.new(run).suggest
114
+ suggestion = run.suggestions.create!(
115
+ prompt: run.prompt,
116
+ reasoning: result["reasoning"],
117
+ suggested_template: result["suggested_template"],
118
+ original_template: result["original_template"]
119
+ )
120
+ text_result(
121
+ suggestion_id: suggestion.id,
122
+ prompt_id: run.prompt.id,
123
+ reasoning: suggestion.reasoning,
124
+ suggested_template: suggestion.suggested_template,
125
+ original_template: suggestion.original_template
126
+ )
127
+ end
99
128
  end
100
129
  end
101
130
  end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.12.4"
2
+ VERSION = "0.14.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.4
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin