completion-kit 0.12.4 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 83e9019cabf58eef8e2110428561e9a4a14763e2f7b7334d77684e6714b85560
|
|
4
|
+
data.tar.gz: c05f8a637a7727091af35c2a673a7cca07e9c3bf424daff49cca9208da1a96ee
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7bd1d7943255b84a81706d7977aaf2d224c88d6884c831302f669151b24659bfd60debb1f3c2558ac71e29b9f14ddc6b4140597955658576d8c39003d62b89dc
|
|
7
|
+
data.tar.gz: 71f23d6a1ac7dfc1b32792ee30116a9b3a32062836798b28fc3c07adc12ebe1ca79345a5249375c7b380af26df4a2fd78cf33976591ccee682c8eb3fd9bce0a9
|
|
@@ -4,19 +4,6 @@ module CompletionKit
|
|
|
4
4
|
extend Base
|
|
5
5
|
|
|
6
6
|
TOOLS = {
|
|
7
|
-
"judges_suggest" => {
|
|
8
|
-
description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft MetricVersion with source=\"suggestion\". Returns the persisted drafts. Stripe-metering hooks fire via ActiveSupport::Notifications under completion_kit.judge_suggestion.generated.",
|
|
9
|
-
inputSchema: {
|
|
10
|
-
type: "object",
|
|
11
|
-
properties: {
|
|
12
|
-
metric_id: { type: "integer" },
|
|
13
|
-
count: { type: "integer", description: "How many variants to request (default 1, max 3). One focused rewrite beats five reworded copies." },
|
|
14
|
-
model: { type: "string", description: "Override the model used to generate variants. Defaults to CompletionKit.config.judge_model." }
|
|
15
|
-
},
|
|
16
|
-
required: ["metric_id"]
|
|
17
|
-
},
|
|
18
|
-
handler: :suggest
|
|
19
|
-
},
|
|
20
7
|
"judges_replay" => {
|
|
21
8
|
description: "Run the current judge against a dataset (judge-only run). Wraps runs_create with prompt_id omitted and output_column supplied. Re-judges existing dataset outputs so you can compare against human verdicts.",
|
|
22
9
|
inputSchema: {
|
|
@@ -47,15 +34,6 @@ module CompletionKit
|
|
|
47
34
|
}
|
|
48
35
|
}.freeze
|
|
49
36
|
|
|
50
|
-
def self.suggest(args)
|
|
51
|
-
metric = CompletionKit::Metric.find(args["metric_id"])
|
|
52
|
-
generator = CompletionKit::MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
|
|
53
|
-
variants = generator.call
|
|
54
|
-
return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
|
|
55
|
-
versions = generator.persist!(variants)
|
|
56
|
-
text_result(versions.map(&:as_json))
|
|
57
|
-
end
|
|
58
|
-
|
|
59
37
|
def self.replay(args)
|
|
60
38
|
metric = CompletionKit::Metric.find(args["metric_id"])
|
|
61
39
|
dataset = CompletionKit::Dataset.find(args["dataset_id"])
|
|
@@ -44,6 +44,19 @@ module CompletionKit
|
|
|
44
44
|
description: "Delete a metric",
|
|
45
45
|
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
46
46
|
handler: :delete
|
|
47
|
+
},
|
|
48
|
+
"metrics_suggest_variants" => {
|
|
49
|
+
description: "Ask the model to rewrite the metric's judge instruction in N variants targeted at the recent disagreements. Each variant is saved as a draft MetricVersion with source=\"suggestion\". Returns the persisted drafts. Stripe-metering hooks fire via ActiveSupport::Notifications under completion_kit.judge_suggestion.generated.",
|
|
50
|
+
inputSchema: {
|
|
51
|
+
type: "object",
|
|
52
|
+
properties: {
|
|
53
|
+
metric_id: {type: "integer"},
|
|
54
|
+
count: {type: "integer", description: "How many variants to request (default 1, max 3). One focused rewrite beats five reworded copies."},
|
|
55
|
+
model: {type: "string", description: "Override the model used to generate variants. Defaults to CompletionKit.config.judge_model."}
|
|
56
|
+
},
|
|
57
|
+
required: ["metric_id"]
|
|
58
|
+
},
|
|
59
|
+
handler: :suggest_variants
|
|
47
60
|
}
|
|
48
61
|
}.freeze
|
|
49
62
|
|
|
@@ -79,6 +92,15 @@ module CompletionKit
|
|
|
79
92
|
Metric.find(args["id"]).destroy!
|
|
80
93
|
text_result("Metric #{args["id"]} deleted")
|
|
81
94
|
end
|
|
95
|
+
|
|
96
|
+
def self.suggest_variants(args)
|
|
97
|
+
metric = Metric.find(args["metric_id"])
|
|
98
|
+
generator = MetricVariantGenerator.new(metric, count: args["count"].to_i, model: args["model"])
|
|
99
|
+
variants = generator.call
|
|
100
|
+
return error_result("Variant generator returned no parseable variants. Try again or change the model.") if variants.empty?
|
|
101
|
+
versions = generator.persist!(variants)
|
|
102
|
+
text_result(versions.map(&:as_json))
|
|
103
|
+
end
|
|
82
104
|
end
|
|
83
105
|
end
|
|
84
106
|
end
|
|
@@ -50,6 +50,15 @@ module CompletionKit
|
|
|
50
50
|
inputSchema: {type: "object", properties: {id: {type: "integer"}}, required: ["id"]},
|
|
51
51
|
handler: :publish
|
|
52
52
|
},
|
|
53
|
+
"prompts_suggest_improvement" => {
|
|
54
|
+
description: "Suggest an improved version of a prompt, grounded in a run's test results and judge feedback. Analyzes the run's responses, scores, and reviews, then returns reasoning plus a rewritten template (preserving {{variables}}) and persists it as a Suggestion. Requires a run that has a prompt (not a judge-only run).",
|
|
55
|
+
inputSchema: {
|
|
56
|
+
type: "object",
|
|
57
|
+
properties: {run_id: {type: "integer", description: "The run whose results ground the improvement."}},
|
|
58
|
+
required: ["run_id"]
|
|
59
|
+
},
|
|
60
|
+
handler: :suggest_improvement
|
|
61
|
+
},
|
|
53
62
|
}.freeze
|
|
54
63
|
|
|
55
64
|
def self.list(_args)
|
|
@@ -96,6 +105,26 @@ module CompletionKit
|
|
|
96
105
|
prompt.publish!
|
|
97
106
|
text_result(prompt.reload.as_json)
|
|
98
107
|
end
|
|
108
|
+
|
|
109
|
+
def self.suggest_improvement(args)
|
|
110
|
+
run = Run.find(args["run_id"])
|
|
111
|
+
return error_result("Judge-only runs don't have a prompt to improve.") if run.prompt.nil?
|
|
112
|
+
|
|
113
|
+
result = PromptImprovementService.new(run).suggest
|
|
114
|
+
suggestion = run.suggestions.create!(
|
|
115
|
+
prompt: run.prompt,
|
|
116
|
+
reasoning: result["reasoning"],
|
|
117
|
+
suggested_template: result["suggested_template"],
|
|
118
|
+
original_template: result["original_template"]
|
|
119
|
+
)
|
|
120
|
+
text_result(
|
|
121
|
+
suggestion_id: suggestion.id,
|
|
122
|
+
prompt_id: run.prompt.id,
|
|
123
|
+
reasoning: suggestion.reasoning,
|
|
124
|
+
suggested_template: suggestion.suggested_template,
|
|
125
|
+
original_template: suggestion.original_template
|
|
126
|
+
)
|
|
127
|
+
end
|
|
99
128
|
end
|
|
100
129
|
end
|
|
101
130
|
end
|