completion-kit 0.12.4 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfff016e2f30d073116ddcac7e1deb59dd6a6f5cbbc56452004912691a28d07b
4
- data.tar.gz: 536e3a578f45d762c37d0115a390b034f581a5e3e2dcd7f036200aef8ef6db22
3
+ metadata.gz: 83e9019cabf58eef8e2110428561e9a4a14763e2f7b7334d77684e6714b85560
4
+ data.tar.gz: c05f8a637a7727091af35c2a673a7cca07e9c3bf424daff49cca9208da1a96ee
5
5
  SHA512:
6
- metadata.gz: 1d7498497e03fdcd2797eb361250eb70bb378884f9593f617dc4e8a5dc9e61dbad15feef83bfd1b811b56f07bdd1a7573e65c291c05bd5233ff4b0442f16e089
7
- data.tar.gz: 729db67d4ab9151e3103c4ae75cfdfc720311608cdb3f7aba15609bbabc5221d292528fb514a54b9c058e62ce98f4f1e74971edde05aba53073313f427c6803f
6
+ metadata.gz: 7bd1d7943255b84a81706d7977aaf2d224c88d6884c831302f669151b24659bfd60debb1f3c2558ac71e29b9f14ddc6b4140597955658576d8c39003d62b89dc
7
+ data.tar.gz: 71f23d6a1ac7dfc1b32792ee30116a9b3a32062836798b28fc3c07adc12ebe1ca79345a5249375c7b380af26df4a2fd78cf33976591ccee682c8eb3fd9bce0a9
@@ -4,19 +4,6 @@ module CompletionKit
4
4
  extend Base
5
5
 
6
6
  TOOLS = {
7
- "judges_suggest" => {
8
- description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft MetricVersion with source=\"suggestion\". Returns the persisted drafts. Stripe-metering hooks fire via ActiveSupport::Notifications under completion_kit.judge_suggestion.generated.",
9
- inputSchema: {
10
- type: "object",
11
- properties: {
12
- metric_id: { type: "integer" },
13
- count: { type: "integer", description: "How many variants to request (default 1, max 3). One focused rewrite beats five reworded copies." },
14
- model: { type: "string", description: "Override the model used to generate variants. Defaults to CompletionKit.config.judge_model." }
15
- },
16
- required: ["metric_id"]
17
- },
18
- handler: :suggest
19
- },
20
7
  "judges_replay" => {
21
8
  description: "Run the current judge against a dataset (judge-only run). Wraps runs_create with prompt_id omitted and output_column supplied. Re-judges existing dataset outputs so you can compare against human verdicts.",
22
9
  inputSchema: {
@@ -47,15 +34,6 @@ module CompletionKit
47
34
  }
48
35
  }.freeze
49
36
 
50
- def self.suggest(args)
51
- metric = CompletionKit::Metric.find(args["metric_id"])
52
- generator = CompletionKit::MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
53
- variants = generator.call
54
- return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
55
- versions = generator.persist!(variants)
56
- text_result(versions.map(&:as_json))
57
- end
58
-
59
37
  def self.replay(args)
60
38
  metric = CompletionKit::Metric.find(args["metric_id"])
61
39
  dataset = CompletionKit::Dataset.find(args["dataset_id"])
@@ -44,6 +44,19 @@ module CompletionKit
44
44
  description: "Delete a metric",
45
45
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
46
46
  handler: :delete
47
+ },
48
+ "metrics_suggest_variants" => {
49
+ description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft MetricVersion with source=\"suggestion\". Returns the persisted drafts. Stripe-metering hooks fire via ActiveSupport::Notifications under completion_kit.judge_suggestion.generated.",
50
+ inputSchema: {
51
+ type: "object",
52
+ properties: {
53
+ metric_id: {type: "integer"},
54
+ count: {type: "integer", description: "How many variants to request (default 1, max 3). One focused rewrite beats five reworded copies."},
55
+ model: {type: "string", description: "Override the model used to generate variants. Defaults to CompletionKit.config.judge_model."}
56
+ },
57
+ required: ["metric_id"]
58
+ },
59
+ handler: :suggest_variants
47
60
  }
48
61
  }.freeze
49
62
 
@@ -79,6 +92,15 @@ module CompletionKit
79
92
  Metric.find(args["id"]).destroy!
80
93
  text_result("Metric #{args["id"]} deleted")
81
94
  end
95
+
96
+ def self.suggest_variants(args)
97
+ metric = Metric.find(args["metric_id"])
98
+ generator = MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
99
+ variants = generator.call
100
+ return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
101
+ versions = generator.persist!(variants)
102
+ text_result(versions.map(&:as_json))
103
+ end
82
104
  end
83
105
  end
84
106
  end
@@ -50,6 +50,15 @@ module CompletionKit
50
50
  inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
51
51
  handler: :publish
52
52
  },
53
+ "prompts_suggest_improvement" => {
54
+ description: "Suggest an improved version of a prompt, grounded in a run's test results and judge feedback. Analyzes the run's responses, scores, and reviews, then returns reasoning plus a rewritten template (preserving {{variables}}) and persists it as a Suggestion. Requires a run that has a prompt (not a judge-only run).",
55
+ inputSchema: {
56
+ type: "object",
57
+ properties: {run_id: {type: "integer", description: "The run whose results ground the improvement."}},
58
+ required: ["run_id"]
59
+ },
60
+ handler: :suggest_improvement
61
+ },
53
62
  }.freeze
54
63
 
55
64
  def self.list(_args)
@@ -96,6 +105,26 @@ module CompletionKit
96
105
  prompt.publish!
97
106
  text_result(prompt.reload.as_json)
98
107
  end
108
+
109
+ def self.suggest_improvement(args)
110
+ run = Run.find(args["run_id"])
111
+ return error_result("Judge-only runs don't have a prompt to improve.") if run.prompt.nil?
112
+
113
+ result = PromptImprovementService.new(run).suggest
114
+ suggestion = run.suggestions.create!(
115
+ prompt: run.prompt,
116
+ reasoning: result["reasoning"],
117
+ suggested_template: result["suggested_template"],
118
+ original_template: result["original_template"]
119
+ )
120
+ text_result(
121
+ suggestion_id: suggestion.id,
122
+ prompt_id: run.prompt.id,
123
+ reasoning: suggestion.reasoning,
124
+ suggested_template: suggestion.suggested_template,
125
+ original_template: suggestion.original_template
126
+ )
127
+ end
99
128
  end
100
129
  end
101
130
  end
@@ -1,3 +1,3 @@
1
1
  module CompletionKit
2
- VERSION = "0.12.4"
2
+ VERSION = "0.13.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: completion-kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.4
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damien Bastin