@mzhub/promptc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +224 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +111 -0
- package/dist/cli.js.map +1 -0
- package/dist/compiler/BootstrapFewShot.d.ts +13 -0
- package/dist/compiler/BootstrapFewShot.d.ts.map +1 -0
- package/dist/compiler/BootstrapFewShot.js +93 -0
- package/dist/compiler/BootstrapFewShot.js.map +1 -0
- package/dist/compiler/CandidatePool.d.ts +10 -0
- package/dist/compiler/CandidatePool.d.ts.map +1 -0
- package/dist/compiler/CandidatePool.js +29 -0
- package/dist/compiler/CandidatePool.js.map +1 -0
- package/dist/compiler/CompiledProgram.d.ts +43 -0
- package/dist/compiler/CompiledProgram.d.ts.map +1 -0
- package/dist/compiler/CompiledProgram.js +41 -0
- package/dist/compiler/CompiledProgram.js.map +1 -0
- package/dist/compiler/InstructionRewrite.d.ts +19 -0
- package/dist/compiler/InstructionRewrite.d.ts.map +1 -0
- package/dist/compiler/InstructionRewrite.js +117 -0
- package/dist/compiler/InstructionRewrite.js.map +1 -0
- package/dist/compiler/index.d.ts +8 -0
- package/dist/compiler/index.d.ts.map +1 -0
- package/dist/compiler/index.js +5 -0
- package/dist/compiler/index.js.map +1 -0
- package/dist/compiler/types.d.ts +41 -0
- package/dist/compiler/types.d.ts.map +1 -0
- package/dist/compiler/types.js +2 -0
- package/dist/compiler/types.js.map +1 -0
- package/dist/eval/exactMatch.d.ts +5 -0
- package/dist/eval/exactMatch.d.ts.map +1 -0
- package/dist/eval/exactMatch.js +58 -0
- package/dist/eval/exactMatch.js.map +1 -0
- package/dist/eval/index.d.ts +5 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +3 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/llmJudge.d.ts +9 -0
- package/dist/eval/llmJudge.d.ts.map +1 -0
- package/dist/eval/llmJudge.js +33 -0
- package/dist/eval/llmJudge.js.map +1 -0
- package/dist/eval/types.d.ts +2 -0
- package/dist/eval/types.d.ts.map +1 -0
- package/dist/eval/types.js +2 -0
- package/dist/eval/types.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -0
- package/dist/program/ChainOfThought.d.ts +6 -0
- package/dist/program/ChainOfThought.d.ts.map +1 -0
- package/dist/program/ChainOfThought.js +44 -0
- package/dist/program/ChainOfThought.js.map +1 -0
- package/dist/program/Predict.d.ts +6 -0
- package/dist/program/Predict.d.ts.map +1 -0
- package/dist/program/Predict.js +33 -0
- package/dist/program/Predict.js.map +1 -0
- package/dist/program/Program.d.ts +33 -0
- package/dist/program/Program.d.ts.map +1 -0
- package/dist/program/Program.js +28 -0
- package/dist/program/Program.js.map +1 -0
- package/dist/program/index.d.ts +5 -0
- package/dist/program/index.d.ts.map +1 -0
- package/dist/program/index.js +4 -0
- package/dist/program/index.js.map +1 -0
- package/dist/providers/anthropic.d.ts +10 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +40 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/cerebras.d.ts +10 -0
- package/dist/providers/cerebras.d.ts.map +1 -0
- package/dist/providers/cerebras.js +39 -0
- package/dist/providers/cerebras.js.map +1 -0
- package/dist/providers/google.d.ts +10 -0
- package/dist/providers/google.d.ts.map +1 -0
- package/dist/providers/google.js +42 -0
- package/dist/providers/google.js.map +1 -0
- package/dist/providers/groq.d.ts +10 -0
- package/dist/providers/groq.d.ts.map +1 -0
- package/dist/providers/groq.js +42 -0
- package/dist/providers/groq.js.map +1 -0
- package/dist/providers/index.d.ts +11 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +31 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/ollama.d.ts +9 -0
- package/dist/providers/ollama.d.ts.map +1 -0
- package/dist/providers/ollama.js +39 -0
- package/dist/providers/ollama.js.map +1 -0
- package/dist/providers/openai.d.ts +10 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +42 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/types.d.ts +25 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +2 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/runtime/cache.d.ts +18 -0
- package/dist/runtime/cache.d.ts.map +1 -0
- package/dist/runtime/cache.js +45 -0
- package/dist/runtime/cache.js.map +1 -0
- package/dist/runtime/concurrency.d.ts +7 -0
- package/dist/runtime/concurrency.d.ts.map +1 -0
- package/dist/runtime/concurrency.js +14 -0
- package/dist/runtime/concurrency.js.map +1 -0
- package/dist/runtime/costTracker.d.ts +24 -0
- package/dist/runtime/costTracker.d.ts.map +1 -0
- package/dist/runtime/costTracker.js +37 -0
- package/dist/runtime/costTracker.js.map +1 -0
- package/dist/runtime/index.d.ts +9 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +5 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/retry.d.ts +10 -0
- package/dist/runtime/retry.d.ts.map +1 -0
- package/dist/runtime/retry.js +39 -0
- package/dist/runtime/retry.js.map +1 -0
- package/dist/schema/defineSchema.d.ts +18 -0
- package/dist/schema/defineSchema.d.ts.map +1 -0
- package/dist/schema/defineSchema.js +27 -0
- package/dist/schema/defineSchema.js.map +1 -0
- package/dist/schema/index.d.ts +3 -0
- package/dist/schema/index.d.ts.map +1 -0
- package/dist/schema/index.js +2 -0
- package/dist/schema/index.js.map +1 -0
- package/examples/README.md +42 -0
- package/examples/load-compiled.ts +62 -0
- package/examples/multi-provider.ts +77 -0
- package/examples/name-extractor.ts +113 -0
- package/examples/qa-system.ts +98 -0
- package/package.json +62 -0
- package/src/cli.ts +122 -0
- package/src/compiler/BootstrapFewShot.ts +149 -0
- package/src/compiler/CandidatePool.ts +39 -0
- package/src/compiler/CompiledProgram.ts +112 -0
- package/src/compiler/InstructionRewrite.ts +200 -0
- package/src/compiler/index.ts +19 -0
- package/src/compiler/types.ts +46 -0
- package/src/eval/exactMatch.ts +65 -0
- package/src/eval/index.ts +4 -0
- package/src/eval/llmJudge.ts +45 -0
- package/src/eval/types.ts +4 -0
- package/src/index.ts +71 -0
- package/src/program/ChainOfThought.ts +59 -0
- package/src/program/Predict.ts +47 -0
- package/src/program/Program.ts +64 -0
- package/src/program/index.ts +4 -0
- package/src/providers/anthropic.ts +55 -0
- package/src/providers/cerebras.ts +53 -0
- package/src/providers/google.ts +57 -0
- package/src/providers/groq.ts +57 -0
- package/src/providers/index.ts +50 -0
- package/src/providers/ollama.ts +54 -0
- package/src/providers/openai.ts +57 -0
- package/src/providers/types.ts +27 -0
- package/src/runtime/cache.ts +65 -0
- package/src/runtime/concurrency.ts +21 -0
- package/src/runtime/costTracker.ts +58 -0
- package/src/runtime/index.ts +8 -0
- package/src/runtime/retry.ts +59 -0
- package/src/schema/defineSchema.ts +44 -0
- package/src/schema/index.ts +2 -0
- package/tests/candidatePool.test.ts +46 -0
- package/tests/evaluators.test.ts +69 -0
- package/tests/runtime.test.ts +106 -0
- package/tests/schema.test.ts +59 -0
- package/tsconfig.json +24 -0
package/src/cli.ts
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { parseArgs } from "node:util";
|
|
4
|
+
import { readFileSync, writeFileSync, existsSync } from "node:fs";
|
|
5
|
+
import { resolve } from "node:path";
|
|
6
|
+
|
|
7
|
+
const { values, positionals } = parseArgs({
|
|
8
|
+
args: process.argv.slice(2),
|
|
9
|
+
options: {
|
|
10
|
+
config: { type: "string", short: "c" },
|
|
11
|
+
output: { type: "string", short: "o", default: "compiled-prompt.json" },
|
|
12
|
+
candidates: { type: "string", default: "10" },
|
|
13
|
+
concurrency: { type: "string", default: "5" },
|
|
14
|
+
provider: { type: "string", short: "p", default: "openai" },
|
|
15
|
+
model: { type: "string", short: "m" },
|
|
16
|
+
help: { type: "boolean", short: "h" },
|
|
17
|
+
},
|
|
18
|
+
allowPositionals: true,
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
function printHelp() {
|
|
22
|
+
console.log(`
|
|
23
|
+
promptc - Type-safe LLM program compiler
|
|
24
|
+
|
|
25
|
+
Usage:
|
|
26
|
+
promptc compile <trainset.json> [options]
|
|
27
|
+
promptc validate <compiled.json>
|
|
28
|
+
|
|
29
|
+
Commands:
|
|
30
|
+
compile Compile a program using a training set
|
|
31
|
+
validate Validate a compiled prompt JSON file
|
|
32
|
+
|
|
33
|
+
Options:
|
|
34
|
+
-c, --config <file> Path to config file
|
|
35
|
+
-o, --output <file> Output file (default: compiled-prompt.json)
|
|
36
|
+
-p, --provider <name> LLM provider: openai, anthropic, google, groq, cerebras, ollama
|
|
37
|
+
-m, --model <name> Model to use (overrides provider default)
|
|
38
|
+
--candidates <n> Number of candidates to try (default: 10)
|
|
39
|
+
--concurrency <n> Parallel evaluations (default: 5)
|
|
40
|
+
-h, --help Show this help
|
|
41
|
+
|
|
42
|
+
Examples:
|
|
43
|
+
promptc compile trainset.json -p openai -o prompt.json
|
|
44
|
+
promptc validate prompt.json
|
|
45
|
+
`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function validateCommand(filePath: string) {
|
|
49
|
+
if (!existsSync(filePath)) {
|
|
50
|
+
console.error(`Error: File not found: ${filePath}`);
|
|
51
|
+
process.exit(1);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const content = readFileSync(filePath, "utf-8");
|
|
56
|
+
const data = JSON.parse(content);
|
|
57
|
+
|
|
58
|
+
if (!data.meta || !data.config) {
|
|
59
|
+
console.error("Invalid format: missing 'meta' or 'config' fields");
|
|
60
|
+
process.exit(1);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
console.log("✓ Valid compiled prompt file");
|
|
64
|
+
console.log(` Score: ${data.meta.score}`);
|
|
65
|
+
console.log(` Strategy: ${data.meta.strategy}`);
|
|
66
|
+
console.log(` Compiled: ${data.meta.compiledAt}`);
|
|
67
|
+
console.log(` Examples: ${data.config.fewShotExamples?.length || 0}`);
|
|
68
|
+
console.log(` Tokens used: ${data.meta.tokenUsage?.totalTokens || "N/A"}`);
|
|
69
|
+
} catch (error) {
|
|
70
|
+
console.error("Invalid JSON file");
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async function main() {
|
|
76
|
+
if (values.help || positionals.length === 0) {
|
|
77
|
+
printHelp();
|
|
78
|
+
process.exit(0);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const command = positionals[0];
|
|
82
|
+
|
|
83
|
+
switch (command) {
|
|
84
|
+
case "validate":
|
|
85
|
+
if (!positionals[1]) {
|
|
86
|
+
console.error("Error: Please provide a file to validate");
|
|
87
|
+
process.exit(1);
|
|
88
|
+
}
|
|
89
|
+
await validateCommand(resolve(positionals[1]));
|
|
90
|
+
break;
|
|
91
|
+
|
|
92
|
+
case "compile":
|
|
93
|
+
console.log(`
|
|
94
|
+
Note: The 'compile' command requires a custom config file that defines:
|
|
95
|
+
- Your schema (using defineSchema)
|
|
96
|
+
- Your program (Predict or ChainOfThought)
|
|
97
|
+
- Your evaluator
|
|
98
|
+
|
|
99
|
+
Example config file (promptc.config.ts):
|
|
100
|
+
|
|
101
|
+
import { defineSchema, ChainOfThought, createProvider, exactMatch, z } from 'promptc';
|
|
102
|
+
|
|
103
|
+
export const schema = defineSchema({...});
|
|
104
|
+
export const provider = createProvider('openai', { apiKey: process.env.OPENAI_API_KEY });
|
|
105
|
+
export const program = new ChainOfThought(schema, provider);
|
|
106
|
+
export const evaluator = exactMatch();
|
|
107
|
+
|
|
108
|
+
Run with: promptc compile trainset.json -c promptc.config.ts
|
|
109
|
+
`);
|
|
110
|
+
break;
|
|
111
|
+
|
|
112
|
+
default:
|
|
113
|
+
console.error(`Unknown command: ${command}`);
|
|
114
|
+
printHelp();
|
|
115
|
+
process.exit(1);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
main().catch((error) => {
|
|
120
|
+
console.error("Error:", error.message);
|
|
121
|
+
process.exit(1);
|
|
122
|
+
});
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import { ZodRawShape } from "zod";
|
|
2
|
+
import { Program } from "../program/Program.js";
|
|
3
|
+
import { createConcurrencyManager } from "../runtime/concurrency.js";
|
|
4
|
+
import { CostTracker } from "../runtime/costTracker.js";
|
|
5
|
+
import { CandidatePool } from "./CandidatePool.js";
|
|
6
|
+
import type {
|
|
7
|
+
Evaluator,
|
|
8
|
+
Example,
|
|
9
|
+
CompilationResult,
|
|
10
|
+
CompileOptions,
|
|
11
|
+
} from "./types.js";
|
|
12
|
+
|
|
13
|
+
export class BootstrapFewShot<O> {
|
|
14
|
+
constructor(private evaluator: Evaluator<O>) {}
|
|
15
|
+
|
|
16
|
+
async compile<I extends ZodRawShape, OShape extends ZodRawShape>(
|
|
17
|
+
program: Program<I, OShape>,
|
|
18
|
+
trainset: Example<unknown, O>[],
|
|
19
|
+
options: CompileOptions = {}
|
|
20
|
+
): Promise<CompilationResult<unknown, O>> {
|
|
21
|
+
const {
|
|
22
|
+
candidates: candidateCount = 10,
|
|
23
|
+
concurrency = 5,
|
|
24
|
+
examplesPerCandidate = 3,
|
|
25
|
+
validationSplit = 0.3,
|
|
26
|
+
seed,
|
|
27
|
+
earlyStopThreshold = 0,
|
|
28
|
+
budget,
|
|
29
|
+
onProgress,
|
|
30
|
+
} = options;
|
|
31
|
+
|
|
32
|
+
const pool = new CandidatePool(trainset, seed);
|
|
33
|
+
const candidates = pool.generateFewShotCandidates(
|
|
34
|
+
candidateCount,
|
|
35
|
+
examplesPerCandidate
|
|
36
|
+
);
|
|
37
|
+
const validationSet = pool.getValidationSet(validationSplit);
|
|
38
|
+
const limiter = createConcurrencyManager(concurrency);
|
|
39
|
+
const costTracker = new CostTracker();
|
|
40
|
+
|
|
41
|
+
const results: Array<{ score: number; examples: Example<unknown, O>[] }> =
|
|
42
|
+
[];
|
|
43
|
+
|
|
44
|
+
const evaluationPromises = candidates.map((fewShotSet) =>
|
|
45
|
+
limiter.run(async () => {
|
|
46
|
+
if (budget?.maxTokens && costTracker.exceedsBudget(budget.maxTokens)) {
|
|
47
|
+
budget.onBudgetWarning?.(
|
|
48
|
+
costTracker.total.totalTokens,
|
|
49
|
+
budget.maxTokens
|
|
50
|
+
);
|
|
51
|
+
return { score: -1, examples: fewShotSet };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let totalScore = 0;
|
|
55
|
+
let validCount = 0;
|
|
56
|
+
|
|
57
|
+
for (const testCase of validationSet) {
|
|
58
|
+
try {
|
|
59
|
+
const prediction = await program.run(testCase.input as any, {
|
|
60
|
+
fewShotExamples: fewShotSet.map((ex) => ({
|
|
61
|
+
input: ex.input as Record<string, unknown>,
|
|
62
|
+
output: ex.output as Record<string, unknown>,
|
|
63
|
+
})),
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
costTracker.record(prediction.trace.usage);
|
|
67
|
+
const score = await this.evaluator(
|
|
68
|
+
prediction.result as O,
|
|
69
|
+
testCase.output
|
|
70
|
+
);
|
|
71
|
+
totalScore += score;
|
|
72
|
+
validCount++;
|
|
73
|
+
|
|
74
|
+
if (earlyStopThreshold > 0 && validCount >= 2) {
|
|
75
|
+
const avgSoFar = totalScore / validCount;
|
|
76
|
+
if (avgSoFar < earlyStopThreshold) {
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
} catch {
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
score: validCount > 0 ? totalScore / validCount : 0,
|
|
87
|
+
examples: fewShotSet,
|
|
88
|
+
};
|
|
89
|
+
})
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
const candidateResults = await Promise.all(evaluationPromises);
|
|
93
|
+
|
|
94
|
+
let currentBestScore = -1;
|
|
95
|
+
for (const result of candidateResults) {
|
|
96
|
+
if (result.score >= 0) {
|
|
97
|
+
results.push(result);
|
|
98
|
+
if (result.score > currentBestScore) {
|
|
99
|
+
currentBestScore = result.score;
|
|
100
|
+
}
|
|
101
|
+
onProgress?.({
|
|
102
|
+
candidatesEvaluated: results.length,
|
|
103
|
+
totalCandidates: candidateCount,
|
|
104
|
+
currentBestScore,
|
|
105
|
+
tokensUsed: costTracker.total.totalTokens,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const winner = results.reduce(
|
|
111
|
+
(best, current) => (current.score > best.score ? current : best),
|
|
112
|
+
{ score: -1, examples: [] as Example<unknown, O>[] }
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
meta: {
|
|
117
|
+
score: winner.score,
|
|
118
|
+
compiledAt: new Date().toISOString(),
|
|
119
|
+
strategy: "BootstrapFewShot",
|
|
120
|
+
tokenUsage: costTracker.total,
|
|
121
|
+
},
|
|
122
|
+
config: {
|
|
123
|
+
instructions: program["schema"].description,
|
|
124
|
+
fewShotExamples: winner.examples,
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
estimateCost(
|
|
130
|
+
trainsetSize: number,
|
|
131
|
+
options: CompileOptions = {}
|
|
132
|
+
): {
|
|
133
|
+
estimatedCalls: number;
|
|
134
|
+
estimatedTokens: number;
|
|
135
|
+
} {
|
|
136
|
+
const { candidates = 10, validationSplit = 0.3 } = options;
|
|
137
|
+
const validationSize = Math.max(
|
|
138
|
+
1,
|
|
139
|
+
Math.floor(trainsetSize * validationSplit)
|
|
140
|
+
);
|
|
141
|
+
const estimatedCalls = candidates * validationSize;
|
|
142
|
+
const avgTokensPerCall = 500;
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
estimatedCalls,
|
|
146
|
+
estimatedTokens: estimatedCalls * avgTokensPerCall,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import type { Example } from "./types.js";
|
|
2
|
+
|
|
3
|
+
export class CandidatePool<I, O> {
|
|
4
|
+
private rng: () => number;
|
|
5
|
+
|
|
6
|
+
constructor(private trainset: Example<I, O>[], seed?: number) {
|
|
7
|
+
this.rng = seed !== undefined ? this.seededRandom(seed) : Math.random;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
generateFewShotCandidates(
|
|
11
|
+
count: number,
|
|
12
|
+
examplesPerCandidate: number
|
|
13
|
+
): Array<Example<I, O>[]> {
|
|
14
|
+
const candidates: Array<Example<I, O>[]> = [];
|
|
15
|
+
|
|
16
|
+
for (let i = 0; i < count; i++) {
|
|
17
|
+
const shuffled = [...this.trainset].sort(() => this.rng() - 0.5);
|
|
18
|
+
candidates.push(
|
|
19
|
+
shuffled.slice(0, Math.min(examplesPerCandidate, shuffled.length))
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return candidates;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
getValidationSet(splitRatio: number = 0.3): Example<I, O>[] {
|
|
27
|
+
const count = Math.max(1, Math.floor(this.trainset.length * splitRatio));
|
|
28
|
+
const shuffled = [...this.trainset].sort(() => this.rng() - 0.5);
|
|
29
|
+
return shuffled.slice(0, count);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
private seededRandom(seed: number): () => number {
|
|
33
|
+
let s = seed;
|
|
34
|
+
return () => {
|
|
35
|
+
s = Math.sin(s) * 10000;
|
|
36
|
+
return s - Math.floor(s);
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { ZodRawShape, ZodObject } from "zod";
|
|
2
|
+
import type { ProgramConfig, ProgramOutput } from "../program/Program.js";
|
|
3
|
+
import type { Example } from "./types.js";
|
|
4
|
+
|
|
5
|
+
export interface CompiledProgramMeta {
|
|
6
|
+
score: number;
|
|
7
|
+
compiledAt: string;
|
|
8
|
+
strategy: string;
|
|
9
|
+
tokenUsage: {
|
|
10
|
+
inputTokens: number;
|
|
11
|
+
outputTokens: number;
|
|
12
|
+
totalTokens: number;
|
|
13
|
+
calls: number;
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface CompiledProgram<I, O> {
|
|
18
|
+
run(input: I, overrides?: Partial<ProgramConfig>): Promise<ProgramOutput<O>>;
|
|
19
|
+
meta: CompiledProgramMeta;
|
|
20
|
+
config: {
|
|
21
|
+
instructions: string;
|
|
22
|
+
fewShotExamples: Array<Example<unknown, unknown>>;
|
|
23
|
+
};
|
|
24
|
+
toJSON(): string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function createCompiledProgram<
|
|
28
|
+
I extends ZodRawShape,
|
|
29
|
+
O extends ZodRawShape,
|
|
30
|
+
InputType = unknown,
|
|
31
|
+
OutputType = unknown
|
|
32
|
+
>(
|
|
33
|
+
originalProgram: {
|
|
34
|
+
run(
|
|
35
|
+
input: InputType,
|
|
36
|
+
config?: ProgramConfig
|
|
37
|
+
): Promise<ProgramOutput<OutputType>>;
|
|
38
|
+
},
|
|
39
|
+
compilationResult: {
|
|
40
|
+
meta: CompiledProgramMeta;
|
|
41
|
+
config: {
|
|
42
|
+
instructions: string;
|
|
43
|
+
fewShotExamples: Array<Example<unknown, unknown>>;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
): CompiledProgram<InputType, OutputType> {
|
|
47
|
+
return {
|
|
48
|
+
async run(
|
|
49
|
+
input: InputType,
|
|
50
|
+
overrides?: Partial<ProgramConfig>
|
|
51
|
+
): Promise<ProgramOutput<OutputType>> {
|
|
52
|
+
return originalProgram.run(input, {
|
|
53
|
+
instructions:
|
|
54
|
+
overrides?.instructions ?? compilationResult.config.instructions,
|
|
55
|
+
fewShotExamples:
|
|
56
|
+
overrides?.fewShotExamples ??
|
|
57
|
+
compilationResult.config.fewShotExamples.map((ex) => ({
|
|
58
|
+
input: ex.input as Record<string, unknown>,
|
|
59
|
+
output: ex.output as Record<string, unknown>,
|
|
60
|
+
})),
|
|
61
|
+
});
|
|
62
|
+
},
|
|
63
|
+
meta: compilationResult.meta,
|
|
64
|
+
config: compilationResult.config,
|
|
65
|
+
toJSON(): string {
|
|
66
|
+
return JSON.stringify(
|
|
67
|
+
{
|
|
68
|
+
meta: compilationResult.meta,
|
|
69
|
+
config: compilationResult.config,
|
|
70
|
+
},
|
|
71
|
+
null,
|
|
72
|
+
2
|
|
73
|
+
);
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface SerializedCompiledProgram {
|
|
79
|
+
meta: CompiledProgramMeta;
|
|
80
|
+
config: {
|
|
81
|
+
instructions: string;
|
|
82
|
+
fewShotExamples: Array<Example<unknown, unknown>>;
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function loadCompiledProgram<InputType = unknown, OutputType = unknown>(
|
|
87
|
+
jsonOrPath: string | SerializedCompiledProgram,
|
|
88
|
+
program: {
|
|
89
|
+
run(
|
|
90
|
+
input: InputType,
|
|
91
|
+
config?: ProgramConfig
|
|
92
|
+
): Promise<ProgramOutput<OutputType>>;
|
|
93
|
+
}
|
|
94
|
+
): CompiledProgram<InputType, OutputType> {
|
|
95
|
+
let data: SerializedCompiledProgram;
|
|
96
|
+
|
|
97
|
+
if (typeof jsonOrPath === "string") {
|
|
98
|
+
try {
|
|
99
|
+
data = JSON.parse(jsonOrPath);
|
|
100
|
+
} catch {
|
|
101
|
+
throw new Error("Invalid JSON string provided to loadCompiledProgram");
|
|
102
|
+
}
|
|
103
|
+
} else {
|
|
104
|
+
data = jsonOrPath;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (!data.meta || !data.config) {
|
|
108
|
+
throw new Error("Invalid compiled program format: missing meta or config");
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return createCompiledProgram(program, data);
|
|
112
|
+
}
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import { ZodRawShape } from "zod";
|
|
2
|
+
import { Program } from "../program/Program.js";
|
|
3
|
+
import { createConcurrencyManager } from "../runtime/concurrency.js";
|
|
4
|
+
import { CostTracker } from "../runtime/costTracker.js";
|
|
5
|
+
import { CandidatePool } from "./CandidatePool.js";
|
|
6
|
+
import type { LLMProvider } from "../providers/types.js";
|
|
7
|
+
import type {
|
|
8
|
+
Evaluator,
|
|
9
|
+
Example,
|
|
10
|
+
CompilationResult,
|
|
11
|
+
CompileOptions,
|
|
12
|
+
} from "./types.js";
|
|
13
|
+
|
|
14
|
+
export interface InstructionRewriteOptions extends CompileOptions {
|
|
15
|
+
instructionVariations?: number;
|
|
16
|
+
provider: LLMProvider;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class InstructionRewrite<O> {
|
|
20
|
+
constructor(private evaluator: Evaluator<O>) {}
|
|
21
|
+
|
|
22
|
+
async compile<I extends ZodRawShape, OShape extends ZodRawShape>(
|
|
23
|
+
program: Program<I, OShape>,
|
|
24
|
+
trainset: Example<unknown, O>[],
|
|
25
|
+
options: InstructionRewriteOptions
|
|
26
|
+
): Promise<CompilationResult<unknown, O>> {
|
|
27
|
+
const {
|
|
28
|
+
candidates: candidateCount = 5,
|
|
29
|
+
instructionVariations = 5,
|
|
30
|
+
concurrency = 5,
|
|
31
|
+
examplesPerCandidate = 3,
|
|
32
|
+
validationSplit = 0.3,
|
|
33
|
+
seed,
|
|
34
|
+
budget,
|
|
35
|
+
provider,
|
|
36
|
+
} = options;
|
|
37
|
+
|
|
38
|
+
const baseDescription = program["schema"].description;
|
|
39
|
+
const pool = new CandidatePool(trainset, seed);
|
|
40
|
+
const validationSet = pool.getValidationSet(validationSplit);
|
|
41
|
+
const limiter = createConcurrencyManager(concurrency);
|
|
42
|
+
const costTracker = new CostTracker();
|
|
43
|
+
|
|
44
|
+
const instructionVariants = await this.generateInstructionVariants(
|
|
45
|
+
provider,
|
|
46
|
+
baseDescription,
|
|
47
|
+
instructionVariations
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
const fewShotCandidates = pool.generateFewShotCandidates(
|
|
51
|
+
candidateCount,
|
|
52
|
+
examplesPerCandidate
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
const allCandidates: Array<{
|
|
56
|
+
instructions: string;
|
|
57
|
+
examples: Example<unknown, O>[];
|
|
58
|
+
}> = [];
|
|
59
|
+
for (const instruction of instructionVariants) {
|
|
60
|
+
for (const examples of fewShotCandidates) {
|
|
61
|
+
allCandidates.push({ instructions: instruction, examples });
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const results: Array<{
|
|
66
|
+
score: number;
|
|
67
|
+
instructions: string;
|
|
68
|
+
examples: Example<unknown, O>[];
|
|
69
|
+
}> = [];
|
|
70
|
+
|
|
71
|
+
const evaluationPromises = allCandidates.map((candidate) =>
|
|
72
|
+
limiter.run(async () => {
|
|
73
|
+
if (budget?.maxTokens && costTracker.exceedsBudget(budget.maxTokens)) {
|
|
74
|
+
budget.onBudgetWarning?.(
|
|
75
|
+
costTracker.total.totalTokens,
|
|
76
|
+
budget.maxTokens
|
|
77
|
+
);
|
|
78
|
+
return { score: -1, ...candidate };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
let totalScore = 0;
|
|
82
|
+
let validCount = 0;
|
|
83
|
+
|
|
84
|
+
for (const testCase of validationSet) {
|
|
85
|
+
try {
|
|
86
|
+
const prediction = await program.run(testCase.input as any, {
|
|
87
|
+
instructions: candidate.instructions,
|
|
88
|
+
fewShotExamples: candidate.examples.map((ex) => ({
|
|
89
|
+
input: ex.input as Record<string, unknown>,
|
|
90
|
+
output: ex.output as Record<string, unknown>,
|
|
91
|
+
})),
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
costTracker.record(prediction.trace.usage);
|
|
95
|
+
const score = await this.evaluator(
|
|
96
|
+
prediction.result as O,
|
|
97
|
+
testCase.output
|
|
98
|
+
);
|
|
99
|
+
totalScore += score;
|
|
100
|
+
validCount++;
|
|
101
|
+
} catch {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
score: validCount > 0 ? totalScore / validCount : 0,
|
|
108
|
+
...candidate,
|
|
109
|
+
};
|
|
110
|
+
})
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
const candidateResults = await Promise.all(evaluationPromises);
|
|
114
|
+
results.push(...candidateResults.filter((r) => r.score >= 0));
|
|
115
|
+
|
|
116
|
+
const winner = results.reduce(
|
|
117
|
+
(best, current) => (current.score > best.score ? current : best),
|
|
118
|
+
{
|
|
119
|
+
score: -1,
|
|
120
|
+
instructions: baseDescription,
|
|
121
|
+
examples: [] as Example<unknown, O>[],
|
|
122
|
+
}
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
meta: {
|
|
127
|
+
score: winner.score,
|
|
128
|
+
compiledAt: new Date().toISOString(),
|
|
129
|
+
strategy: "InstructionRewrite",
|
|
130
|
+
tokenUsage: costTracker.total,
|
|
131
|
+
},
|
|
132
|
+
config: {
|
|
133
|
+
instructions: winner.instructions,
|
|
134
|
+
fewShotExamples: winner.examples,
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
private async generateInstructionVariants(
|
|
140
|
+
provider: LLMProvider,
|
|
141
|
+
baseInstruction: string,
|
|
142
|
+
count: number
|
|
143
|
+
): Promise<string[]> {
|
|
144
|
+
const prompt = `You are an expert prompt engineer. Given a base instruction for an LLM task, generate ${count} different variations that might perform better.
|
|
145
|
+
|
|
146
|
+
Base instruction: "${baseInstruction}"
|
|
147
|
+
|
|
148
|
+
Requirements:
|
|
149
|
+
- Each variation should convey the same task but with different wording, tone, or structure
|
|
150
|
+
- Try variations like: more specific, more concise, role-based ("You are an expert..."), step-by-step, formal, casual
|
|
151
|
+
- Output as a JSON array of strings
|
|
152
|
+
|
|
153
|
+
Respond with ONLY a JSON array, no additional text:
|
|
154
|
+
["variation 1", "variation 2", ...]`;
|
|
155
|
+
|
|
156
|
+
const response = await provider.complete({
|
|
157
|
+
prompt,
|
|
158
|
+
temperature: 0.8,
|
|
159
|
+
maxTokens: 1024,
|
|
160
|
+
responseFormat: "json",
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
const jsonMatch = response.content.match(/\[[\s\S]*\]/);
|
|
165
|
+
if (!jsonMatch) {
|
|
166
|
+
return [baseInstruction];
|
|
167
|
+
}
|
|
168
|
+
const variations = JSON.parse(jsonMatch[0]) as string[];
|
|
169
|
+
return [baseInstruction, ...variations.slice(0, count)];
|
|
170
|
+
} catch {
|
|
171
|
+
return [baseInstruction];
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
estimateCost(
|
|
176
|
+
trainsetSize: number,
|
|
177
|
+
options: Partial<InstructionRewriteOptions> = {}
|
|
178
|
+
): {
|
|
179
|
+
estimatedCalls: number;
|
|
180
|
+
estimatedTokens: number;
|
|
181
|
+
} {
|
|
182
|
+
const {
|
|
183
|
+
candidates = 5,
|
|
184
|
+
instructionVariations = 5,
|
|
185
|
+
validationSplit = 0.3,
|
|
186
|
+
} = options;
|
|
187
|
+
const validationSize = Math.max(
|
|
188
|
+
1,
|
|
189
|
+
Math.floor(trainsetSize * validationSplit)
|
|
190
|
+
);
|
|
191
|
+
const totalCandidates = candidates * instructionVariations;
|
|
192
|
+
const estimatedCalls = totalCandidates * validationSize + 1;
|
|
193
|
+
const avgTokensPerCall = 500;
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
estimatedCalls,
|
|
197
|
+
estimatedTokens: estimatedCalls * avgTokensPerCall,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export { BootstrapFewShot } from "./BootstrapFewShot.js";
|
|
2
|
+
export { InstructionRewrite } from "./InstructionRewrite.js";
|
|
3
|
+
export type { InstructionRewriteOptions } from "./InstructionRewrite.js";
|
|
4
|
+
export { CandidatePool } from "./CandidatePool.js";
|
|
5
|
+
export {
|
|
6
|
+
createCompiledProgram,
|
|
7
|
+
loadCompiledProgram,
|
|
8
|
+
} from "./CompiledProgram.js";
|
|
9
|
+
export type {
|
|
10
|
+
CompiledProgram,
|
|
11
|
+
CompiledProgramMeta,
|
|
12
|
+
SerializedCompiledProgram,
|
|
13
|
+
} from "./CompiledProgram.js";
|
|
14
|
+
export type {
|
|
15
|
+
Evaluator,
|
|
16
|
+
Example,
|
|
17
|
+
CompilationResult,
|
|
18
|
+
CompileOptions,
|
|
19
|
+
} from "./types.js";
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
export type Evaluator<O> = (
|
|
2
|
+
prediction: O,
|
|
3
|
+
groundTruth: O
|
|
4
|
+
) => number | Promise<number>;
|
|
5
|
+
|
|
6
|
+
export interface Example<I, O> {
|
|
7
|
+
input: I;
|
|
8
|
+
output: O;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface CompilationResult<I, O> {
|
|
12
|
+
meta: {
|
|
13
|
+
score: number;
|
|
14
|
+
compiledAt: string;
|
|
15
|
+
strategy: string;
|
|
16
|
+
tokenUsage: {
|
|
17
|
+
inputTokens: number;
|
|
18
|
+
outputTokens: number;
|
|
19
|
+
totalTokens: number;
|
|
20
|
+
calls: number;
|
|
21
|
+
};
|
|
22
|
+
};
|
|
23
|
+
config: {
|
|
24
|
+
instructions: string;
|
|
25
|
+
fewShotExamples: Array<Example<I, O>>;
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface CompileOptions {
|
|
30
|
+
candidates?: number;
|
|
31
|
+
concurrency?: number;
|
|
32
|
+
examplesPerCandidate?: number;
|
|
33
|
+
validationSplit?: number;
|
|
34
|
+
seed?: number;
|
|
35
|
+
earlyStopThreshold?: number;
|
|
36
|
+
budget?: {
|
|
37
|
+
maxTokens?: number;
|
|
38
|
+
onBudgetWarning?: (used: number, max: number) => void;
|
|
39
|
+
};
|
|
40
|
+
onProgress?: (progress: {
|
|
41
|
+
candidatesEvaluated: number;
|
|
42
|
+
totalCandidates: number;
|
|
43
|
+
currentBestScore: number;
|
|
44
|
+
tokensUsed: number;
|
|
45
|
+
}) => void;
|
|
46
|
+
}
|