tuneprompt 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +151 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +146 -0
  5. package/dist/commands/activate.d.ts +1 -0
  6. package/dist/commands/activate.js +91 -0
  7. package/dist/commands/fix.d.ts +1 -0
  8. package/dist/commands/fix.js +187 -0
  9. package/dist/commands/history.d.ts +5 -0
  10. package/dist/commands/history.js +63 -0
  11. package/dist/commands/init.d.ts +1 -0
  12. package/dist/commands/init.js +96 -0
  13. package/dist/commands/run.d.ts +9 -0
  14. package/dist/commands/run.js +216 -0
  15. package/dist/db/migrate.d.ts +2 -0
  16. package/dist/db/migrate.js +8 -0
  17. package/dist/engine/constraintExtractor.d.ts +8 -0
  18. package/dist/engine/constraintExtractor.js +54 -0
  19. package/dist/engine/loader.d.ts +5 -0
  20. package/dist/engine/loader.js +74 -0
  21. package/dist/engine/metaPrompt.d.ts +11 -0
  22. package/dist/engine/metaPrompt.js +129 -0
  23. package/dist/engine/optimizer.d.ts +26 -0
  24. package/dist/engine/optimizer.js +246 -0
  25. package/dist/engine/reporter.d.ts +7 -0
  26. package/dist/engine/reporter.js +58 -0
  27. package/dist/engine/runner.d.ts +9 -0
  28. package/dist/engine/runner.js +169 -0
  29. package/dist/engine/shadowTester.d.ts +11 -0
  30. package/dist/engine/shadowTester.js +156 -0
  31. package/dist/index.d.ts +7 -0
  32. package/dist/index.js +26 -0
  33. package/dist/providers/anthropic.d.ts +12 -0
  34. package/dist/providers/anthropic.js +51 -0
  35. package/dist/providers/base.d.ts +15 -0
  36. package/dist/providers/base.js +10 -0
  37. package/dist/providers/openai.d.ts +12 -0
  38. package/dist/providers/openai.js +58 -0
  39. package/dist/providers/openrouter.d.ts +11 -0
  40. package/dist/providers/openrouter.js +83 -0
  41. package/dist/scoring/exact-match.d.ts +1 -0
  42. package/dist/scoring/exact-match.js +8 -0
  43. package/dist/scoring/json-validator.d.ts +4 -0
  44. package/dist/scoring/json-validator.js +29 -0
  45. package/dist/scoring/semantic.d.ts +8 -0
  46. package/dist/scoring/semantic.js +107 -0
  47. package/dist/services/cloud.service.d.ts +49 -0
  48. package/dist/services/cloud.service.js +82 -0
  49. package/dist/storage/database.d.ts +10 -0
  50. package/dist/storage/database.js +179 -0
  51. package/dist/types/fix.d.ts +28 -0
  52. package/dist/types/fix.js +2 -0
  53. package/dist/types/index.d.ts +58 -0
  54. package/dist/types/index.js +2 -0
  55. package/dist/utils/analytics.d.ts +2 -0
  56. package/dist/utils/analytics.js +22 -0
  57. package/dist/utils/config.d.ts +3 -0
  58. package/dist/utils/config.js +70 -0
  59. package/dist/utils/errorHandler.d.ts +14 -0
  60. package/dist/utils/errorHandler.js +40 -0
  61. package/dist/utils/license.d.ts +40 -0
  62. package/dist/utils/license.js +207 -0
  63. package/dist/utils/storage.d.ts +2 -0
  64. package/dist/utils/storage.js +25 -0
  65. package/package.json +76 -0
@@ -0,0 +1,246 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.PromptOptimizer = void 0;
40
+ const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
41
+ const openai_1 = __importDefault(require("openai"));
42
+ const metaPrompt_1 = require("./metaPrompt");
43
+ const constraintExtractor_1 = require("./constraintExtractor");
44
+ class PromptOptimizer {
45
+ anthropic;
46
+ openai;
47
+ constructor() {
48
+ const anthropicKey = process.env.ANTHROPIC_API_KEY;
49
+ if (anthropicKey && !anthropicKey.startsWith('api_key') && anthropicKey !== 'phc_xxxxx') {
50
+ this.anthropic = new sdk_1.default({
51
+ apiKey: anthropicKey
52
+ });
53
+ }
54
+ const openaiKey = process.env.OPENAI_API_KEY;
55
+ if (openaiKey && !openaiKey.startsWith('api_key')) {
56
+ this.openai = new openai_1.default({
57
+ apiKey: openaiKey
58
+ });
59
+ }
60
+ }
61
+ /**
62
+ * Main optimization method
63
+ */
64
+ async optimize(failedTest) {
65
+ console.log(`\n🧠 Analyzing failure: "${failedTest.description}"`);
66
+ // Step 1: Extract constraints and build context
67
+ const errorContext = (0, constraintExtractor_1.generateErrorContext)(failedTest);
68
+ // Step 2: Choose the right meta-prompt based on error type
69
+ const metaPrompt = this.selectMetaPrompt(failedTest, errorContext);
70
+ // Step 3: Generate fix candidates using Claude
71
+ console.log('⚡ Generating optimized prompt candidates...');
72
+ const candidates = await this.generateCandidates(metaPrompt, failedTest);
73
+ // Step 4: Shadow test each candidate
74
+ console.log('🧪 Shadow testing candidates...');
75
+ const bestCandidate = await this.selectBestCandidate(candidates, failedTest);
76
+ return {
77
+ originalPrompt: failedTest.prompt,
78
+ optimizedPrompt: bestCandidate.prompt,
79
+ reasoning: bestCandidate.reasoning,
80
+ confidence: bestCandidate.score,
81
+ testResults: {
82
+ score: bestCandidate.score,
83
+ passed: bestCandidate.score >= failedTest.threshold,
84
+ output: '' // Will be filled by shadow tester
85
+ }
86
+ };
87
+ }
88
+ /**
89
+ * Select appropriate meta-prompt based on error type
90
+ */
91
+ selectMetaPrompt(test, errorContext) {
92
+ const input = {
93
+ originalPrompt: test.prompt,
94
+ testInput: test.input,
95
+ expectedOutput: test.expectedOutput,
96
+ actualOutput: test.actualOutput,
97
+ errorType: test.errorType,
98
+ errorMessage: errorContext
99
+ };
100
+ switch (test.errorType) {
101
+ case 'json':
102
+ return (0, metaPrompt_1.generateJSONFixPrompt)(input);
103
+ case 'semantic':
104
+ return (0, metaPrompt_1.generateSemanticFixPrompt)(input);
105
+ default:
106
+ return (0, metaPrompt_1.generateOptimizationPrompt)(input);
107
+ }
108
+ }
109
+ /**
110
+ * Generate multiple fix candidates using available LLMs with fallback
111
+ */
112
+ async generateCandidates(metaPrompt, failedTest) {
113
+ // Define provider priority order for candidate generation
114
+ const providers = ['anthropic', 'openai', 'openrouter'];
115
+ for (const provider of providers) {
116
+ try {
117
+ // Check if we have the required client for this provider
118
+ if (provider === 'anthropic' && this.anthropic) {
119
+ console.log(`⚡ Using Anthropic for candidate generation...`);
120
+ const response = await this.anthropic.messages.create({
121
+ model: 'claude-sonnet-4-20250514',
122
+ max_tokens: 4000,
123
+ temperature: 0.7, // Some creativity for prompt rewriting
124
+ messages: [{
125
+ role: 'user',
126
+ content: metaPrompt
127
+ }]
128
+ });
129
+ const content = response.content[0];
130
+ if (content.type !== 'text') {
131
+ throw new Error('Unexpected response type from Claude');
132
+ }
133
+ // Parse the JSON response
134
+ const parsed = JSON.parse(content.text);
135
+ return [
136
+ {
137
+ prompt: parsed.candidateA.prompt,
138
+ reasoning: parsed.candidateA.reasoning,
139
+ score: 0 // Will be filled by shadow testing
140
+ },
141
+ {
142
+ prompt: parsed.candidateB.prompt,
143
+ reasoning: parsed.candidateB.reasoning,
144
+ score: 0
145
+ }
146
+ ];
147
+ }
148
+ else if (provider === 'openai' && this.openai) {
149
+ console.log(`⚡ Using OpenAI for candidate generation...`);
150
+ const response = await this.openai.chat.completions.create({
151
+ model: 'gpt-4o',
152
+ messages: [{
153
+ role: 'user',
154
+ content: metaPrompt
155
+ }],
156
+ response_format: { type: 'json_object' }
157
+ });
158
+ const content = response.choices[0]?.message?.content;
159
+ if (!content) {
160
+ throw new Error('No content returned from OpenAI');
161
+ }
162
+ // Parse the JSON response
163
+ const parsed = JSON.parse(content);
164
+ return [
165
+ {
166
+ prompt: parsed.candidateA.prompt,
167
+ reasoning: parsed.candidateA.reasoning,
168
+ score: 0 // Will be filled by shadow testing
169
+ },
170
+ {
171
+ prompt: parsed.candidateB.prompt,
172
+ reasoning: parsed.candidateB.reasoning,
173
+ score: 0
174
+ }
175
+ ];
176
+ }
177
+ else if (provider === 'openrouter') {
178
+ // For OpenRouter, we'll use the shadowTester to get a response
179
+ console.log(`⚡ Using OpenRouter for candidate generation...`);
180
+ // Since OpenRouter is used in shadow testing, we'll use a different approach
181
+ // For now, we'll return a basic fallback since OpenRouter doesn't support structured outputs as well
182
+ return [{
183
+ prompt: this.createFallbackPrompt(failedTest),
184
+ reasoning: 'Generated using fallback method',
185
+ score: 0
186
+ }];
187
+ }
188
+ }
189
+ catch (error) {
190
+ console.log(`⚠️ ${provider} provider failed for candidate generation: ${error.message}`);
191
+ continue; // Try next provider
192
+ }
193
+ }
194
+ // All providers failed
195
+ console.error('All providers failed for candidate generation');
196
+ return [{
197
+ prompt: this.createFallbackPrompt(failedTest),
198
+ reasoning: 'Fallback prompt with basic improvements',
199
+ score: 0
200
+ }];
201
+ }
202
+ /**
203
+ * Shadow test each candidate and return the best one
204
+ */
205
+ async selectBestCandidate(candidates, originalTest) {
206
+ const { runShadowTest } = await Promise.resolve().then(() => __importStar(require('./shadowTester')));
207
+ const testedCandidates = await Promise.all(candidates.map(async (candidate) => {
208
+ const result = await runShadowTest(candidate.prompt, originalTest);
209
+ return {
210
+ ...candidate,
211
+ score: result.score
212
+ };
213
+ }));
214
+ // Sort by score (highest first)
215
+ testedCandidates.sort((a, b) => b.score - a.score);
216
+ return testedCandidates[0];
217
+ }
218
+ /**
219
+ * Fallback prompt improvement - generates a clean rewritten prompt
220
+ */
221
+ createFallbackPrompt(test) {
222
+ // Extract the core intent from the original prompt
223
+ // Remove any existing "You must provide..." prefixes to avoid duplication
224
+ let corePrompt = test.prompt
225
+ .replace(/You must provide a response that includes the following key information:\n[^\n]*\n\n/g, '')
226
+ .trim();
227
+ // For JSON errors, create a structured prompt
228
+ if (test.errorType === 'json') {
229
+ return `${corePrompt}
230
+
231
+ IMPORTANT: You must respond with valid JSON only. No explanations, no markdown, just the raw JSON object.`;
232
+ }
233
+ // For semantic errors, be more specific about expected output
234
+ if (test.errorType === 'semantic') {
235
+ return `${corePrompt}
236
+
237
+ Your response must match this exactly: "${test.expectedOutput}"
238
+ Do not add any extra text, greetings, or explanations. Output only what is requested.`;
239
+ }
240
+ // Default: add clarity
241
+ return `${corePrompt}
242
+
243
+ Be concise and match the expected output format exactly.`;
244
+ }
245
+ }
246
+ exports.PromptOptimizer = PromptOptimizer;
@@ -0,0 +1,7 @@
1
+ import { TestRun } from '../types';
2
+ export declare class TestReporter {
3
+ printResults(run: TestRun, format?: 'json' | 'table' | 'both'): void;
4
+ private printJSON;
5
+ private printTable;
6
+ private printSummary;
7
+ }
@@ -0,0 +1,58 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.TestReporter = void 0;
7
+ const chalk_1 = __importDefault(require("chalk"));
8
+ const cli_table3_1 = __importDefault(require("cli-table3"));
9
+ class TestReporter {
10
+ printResults(run, format = 'both') {
11
+ if (format === 'json' || format === 'both') {
12
+ this.printJSON(run);
13
+ }
14
+ if (format === 'table' || format === 'both') {
15
+ this.printTable(run);
16
+ }
17
+ this.printSummary(run);
18
+ }
19
+ printJSON(run) {
20
+ console.log(JSON.stringify(run, null, 2));
21
+ }
22
+ printTable(run) {
23
+ const table = new cli_table3_1.default({
24
+ head: ['Status', 'Test', 'Score', 'Method', 'Duration'],
25
+ colWidths: [10, 40, 10, 15, 12]
26
+ });
27
+ for (const result of run.results) {
28
+ const statusIcon = result.status === 'pass'
29
+ ? chalk_1.default.green('✓ PASS')
30
+ : result.status === 'fail'
31
+ ? chalk_1.default.red('✗ FAIL')
32
+ : chalk_1.default.yellow('⚠ ERROR');
33
+ const scoreColor = result.score >= 0.8
34
+ ? chalk_1.default.green
35
+ : result.score >= 0.5
36
+ ? chalk_1.default.yellow
37
+ : chalk_1.default.red;
38
+ table.push([
39
+ statusIcon,
40
+ result.testCase.description,
41
+ scoreColor(result.score.toFixed(2)),
42
+ result.testCase.config?.method || 'semantic',
43
+ `${result.metadata.duration}ms`
44
+ ]);
45
+ }
46
+ console.log(table.toString());
47
+ }
48
+ printSummary(run) {
49
+ console.log('\n' + chalk_1.default.bold('Summary:'));
50
+ console.log(` Total: ${run.totalTests}`);
51
+ console.log(chalk_1.default.green(` Passed: ${run.passed}`));
52
+ console.log(chalk_1.default.red(` Failed: ${run.failed}`));
53
+ console.log(` Duration: ${run.duration}ms`);
54
+ const totalCost = run.results.reduce((sum, r) => sum + (r.metadata.cost || 0), 0);
55
+ console.log(` Cost: $${totalCost.toFixed(4)}`);
56
+ }
57
+ }
58
+ exports.TestReporter = TestReporter;
@@ -0,0 +1,9 @@
1
+ import { TestCase, TestRun, TunePromptConfig } from "../types";
2
+ export declare class TestRunner {
3
+ private config;
4
+ private providers;
5
+ constructor(config: TunePromptConfig);
6
+ private initializeProviders;
7
+ runTests(testCases: TestCase[]): Promise<TestRun>;
8
+ private runSingleTest;
9
+ }
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TestRunner = void 0;
4
+ const uuid_1 = require("uuid");
5
+ const openai_1 = require("../providers/openai");
6
+ const anthropic_1 = require("../providers/anthropic");
7
+ const openrouter_1 = require("../providers/openrouter");
8
+ const exact_match_1 = require("../scoring/exact-match");
9
+ const json_validator_1 = require("../scoring/json-validator");
10
+ const semantic_1 = require("../scoring/semantic");
11
+ class TestRunner {
12
+ config;
13
+ providers = new Map();
14
+ constructor(config) {
15
+ this.config = config;
16
+ this.initializeProviders();
17
+ }
18
+ initializeProviders() {
19
+ if (this.config.providers.openai) {
20
+ const provider = new openai_1.OpenAIProvider(this.config.providers.openai);
21
+ this.providers.set("openai", provider);
22
+ }
23
+ if (this.config.providers.anthropic) {
24
+ this.providers.set("anthropic", new anthropic_1.AnthropicProvider(this.config.providers.anthropic));
25
+ }
26
+ if (this.config.providers.openrouter) {
27
+ const provider = new openrouter_1.OpenRouterProvider(this.config.providers.openrouter);
28
+ this.providers.set("openrouter", provider);
29
+ }
30
+ }
31
+ async runTests(testCases) {
32
+ const runId = (0, uuid_1.v4)();
33
+ const startTime = Date.now();
34
+ const results = [];
35
+ for (const testCase of testCases) {
36
+ const result = await this.runSingleTest(testCase);
37
+ results.push(result);
38
+ }
39
+ const duration = Date.now() - startTime;
40
+ const passed = results.filter((r) => r.status === "pass").length;
41
+ const failed = results.filter((r) => r.status === "fail").length;
42
+ return {
43
+ id: runId,
44
+ timestamp: new Date(),
45
+ totalTests: testCases.length,
46
+ passed,
47
+ failed,
48
+ duration,
49
+ results,
50
+ };
51
+ }
52
+ async runSingleTest(testCase) {
53
+ const testId = (0, uuid_1.v4)();
54
+ const startTime = Date.now();
55
+ // Define fallback order: Primary -> Fallbacks
56
+ const fallbackChain = ["openai", "anthropic", "openrouter"];
57
+ // Determine starting provider
58
+ const initialProvider = testCase.config?.provider || "openai";
59
+ // Build the sequence of providers to try
60
+ let providersToTry;
61
+ if (testCase.config?.provider) {
62
+ // If provider is explicitly set, only try that one
63
+ providersToTry = [testCase.config.provider];
64
+ }
65
+ else {
66
+ providersToTry = [
67
+ initialProvider,
68
+ ...fallbackChain.filter((p) => p !== initialProvider),
69
+ ];
70
+ }
71
+ let lastError;
72
+ let errors = [];
73
+ for (const providerName of providersToTry) {
74
+ const provider = this.providers.get(providerName);
75
+ if (!provider)
76
+ continue;
77
+ try {
78
+ // Execute prompt
79
+ const response = await provider.complete(testCase.prompt);
80
+ // Score result
81
+ const scoringMethod = testCase.config?.method || "semantic";
82
+ const threshold = testCase.config?.threshold || this.config.threshold || 0.8;
83
+ let score;
84
+ let error;
85
+ if (scoringMethod === "exact") {
86
+ score = (0, exact_match_1.exactMatch)(String(testCase.expect), response.content);
87
+ }
88
+ else if (scoringMethod === "json") {
89
+ const result = (0, json_validator_1.validateJSON)(testCase.expect, response.content);
90
+ score = result.score;
91
+ error = result.error;
92
+ }
93
+ else if (scoringMethod === "semantic") {
94
+ let calculatedScore;
95
+ let lastScoringError;
96
+ // potential embedding providers
97
+ const embeddingCapable = ["openai", "openrouter"];
98
+ // Order: Current provider (if capable) -> OpenAI -> OpenRouter -> others
99
+ const scoringProvidersToTry = [
100
+ ...(embeddingCapable.includes(providerName) ? [providerName] : []),
101
+ ...embeddingCapable.filter((p) => p !== providerName),
102
+ ].filter((p) => this.providers.has(p));
103
+ if (scoringProvidersToTry.length === 0) {
104
+ throw new Error("No embedding-capable providers available for semantic scoring");
105
+ }
106
+ for (const scoreProviderName of scoringProvidersToTry) {
107
+ try {
108
+ const scoreProvider = this.providers.get(scoreProviderName);
109
+ if (!scoreProvider)
110
+ continue;
111
+ const scorer = new semantic_1.SemanticScorer(scoreProvider);
112
+ calculatedScore = await scorer.score(String(testCase.expect), response.content);
113
+ break;
114
+ }
115
+ catch (err) {
116
+ lastScoringError = err;
117
+ continue;
118
+ }
119
+ }
120
+ if (calculatedScore === undefined) {
121
+ throw new Error(`Semantic scoring failed. Last error: ${lastScoringError?.message || "Unknown error"}`);
122
+ }
123
+ score = calculatedScore;
124
+ }
125
+ else {
126
+ throw new Error(`Unknown scoring method: ${scoringMethod}`);
127
+ }
128
+ const status = score >= threshold ? "pass" : "fail";
129
+ const duration = Date.now() - startTime;
130
+ return {
131
+ id: testId,
132
+ testCase,
133
+ status,
134
+ score,
135
+ actualOutput: response.content,
136
+ expectedOutput: String(testCase.expect),
137
+ error,
138
+ metadata: {
139
+ duration,
140
+ timestamp: new Date(),
141
+ tokens: response.tokens,
142
+ cost: response.cost,
143
+ provider: providerName,
144
+ },
145
+ };
146
+ }
147
+ catch (error) {
148
+ lastError = error;
149
+ errors.push(`${providerName.toUpperCase()}: ${error.message}`);
150
+ continue;
151
+ }
152
+ }
153
+ // If all attempts failed
154
+ return {
155
+ id: testId,
156
+ testCase,
157
+ status: "error",
158
+ score: 0,
159
+ actualOutput: "",
160
+ expectedOutput: String(testCase.expect),
161
+ error: errors.join(" | ") || lastError?.message || "All providers failed",
162
+ metadata: {
163
+ duration: Date.now() - startTime,
164
+ timestamp: new Date(),
165
+ },
166
+ };
167
+ }
168
+ }
169
+ exports.TestRunner = TestRunner;
@@ -0,0 +1,11 @@
1
+ import { FailedTest } from '../types/fix';
2
+ export interface ShadowTestResult {
3
+ score: number;
4
+ output: string;
5
+ passed: boolean;
6
+ }
7
+ /**
8
+ * Test a candidate prompt against the original test case
9
+ * Tries providers in sequence until one succeeds
10
+ */
11
+ export declare function runShadowTest(candidatePrompt: string, originalTest: FailedTest): Promise<ShadowTestResult>;
@@ -0,0 +1,156 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.runShadowTest = runShadowTest;
7
+ const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
8
+ const openai_1 = __importDefault(require("openai"));
9
+ const semantic_1 = require("../scoring/semantic"); // From Phase 1
10
+ /**
11
+ * Test a candidate prompt against the original test case
12
+ * Tries providers in sequence until one succeeds
13
+ */
14
+ async function runShadowTest(candidatePrompt, originalTest) {
15
+ // Define provider priority order
16
+ const providers = ['anthropic', 'openai', 'openrouter'];
17
+ // Try each provider in order
18
+ for (const provider of providers) {
19
+ try {
20
+ // Check if API key exists for this provider
21
+ const apiKey = getApiKeyForProvider(provider);
22
+ if (!apiKey || apiKey.startsWith('api_key') || apiKey === 'phc_xxxxx') {
23
+ // Silently skip placeholders or missing keys
24
+ continue;
25
+ }
26
+ let output;
27
+ if (provider === 'anthropic') {
28
+ output = await runAnthropicTest(candidatePrompt, originalTest.input);
29
+ }
30
+ else if (provider === 'openai') {
31
+ output = await runOpenAITest(candidatePrompt, originalTest.input);
32
+ }
33
+ else if (provider === 'openrouter') {
34
+ output = await runOpenRouterTest(candidatePrompt, originalTest.input);
35
+ }
36
+ else {
37
+ continue; // Unsupported provider
38
+ }
39
+ // Score the output using the same method as Phase 1
40
+ const score = await scoreOutput(output, originalTest.expectedOutput, originalTest.errorType);
41
+ return {
42
+ score,
43
+ output,
44
+ passed: score >= originalTest.threshold
45
+ };
46
+ }
47
+ catch (error) {
48
+ console.log(`⚠️ ${provider} provider failed: ${error.message}`);
49
+ continue; // Try next provider
50
+ }
51
+ }
52
+ // All providers failed
53
+ console.error('All providers failed for shadow test');
54
+ return {
55
+ score: 0,
56
+ output: '',
57
+ passed: false
58
+ };
59
+ }
60
+ function getApiKeyForProvider(provider) {
61
+ switch (provider) {
62
+ case 'anthropic':
63
+ return process.env.ANTHROPIC_API_KEY;
64
+ case 'openai':
65
+ return process.env.OPENAI_API_KEY;
66
+ case 'openrouter':
67
+ return process.env.OPENROUTER_API_KEY;
68
+ default:
69
+ return undefined;
70
+ }
71
+ }
72
+ async function runAnthropicTest(prompt, input) {
73
+ const anthropic = new sdk_1.default({
74
+ apiKey: process.env.ANTHROPIC_API_KEY
75
+ });
76
+ // Interpolate variables if present
77
+ const finalPrompt = interpolateVariables(prompt, input);
78
+ const response = await anthropic.messages.create({
79
+ model: 'claude-sonnet-4-20250514',
80
+ max_tokens: 2000,
81
+ messages: [{
82
+ role: 'user',
83
+ content: finalPrompt
84
+ }]
85
+ });
86
+ const content = response.content[0];
87
+ return content.type === 'text' ? content.text : '';
88
+ }
89
+ async function runOpenAITest(prompt, input) {
90
+ const openai = new openai_1.default({
91
+ apiKey: process.env.OPENAI_API_KEY
92
+ });
93
+ const finalPrompt = interpolateVariables(prompt, input);
94
+ const response = await openai.chat.completions.create({
95
+ model: 'gpt-4o',
96
+ messages: [{
97
+ role: 'user',
98
+ content: finalPrompt
99
+ }]
100
+ });
101
+ return response.choices[0]?.message?.content || '';
102
+ }
103
+ async function runOpenRouterTest(prompt, input) {
104
+ const key = process.env.OPENROUTER_API_KEY;
105
+ // Save original key and temporarily remove it to prevent OpenAI client confusion
106
+ const originalOpenAIKey = process.env.OPENAI_API_KEY;
107
+ delete process.env.OPENAI_API_KEY;
108
+ try {
109
+ const openai = new openai_1.default({
110
+ baseURL: 'https://openrouter.ai/api/v1',
111
+ apiKey: key
112
+ });
113
+ const finalPrompt = interpolateVariables(prompt, input);
114
+ const response = await openai.chat.completions.create({
115
+ model: 'nvidia/nemotron-3-nano-30b-a3b:free',
116
+ messages: [{
117
+ role: 'user',
118
+ content: finalPrompt
119
+ }]
120
+ });
121
+ return response.choices[0]?.message?.content || '';
122
+ }
123
+ finally {
124
+ // Restore original key
125
+ if (originalOpenAIKey) {
126
+ process.env.OPENAI_API_KEY = originalOpenAIKey;
127
+ }
128
+ }
129
+ }
130
+ function interpolateVariables(prompt, variables) {
131
+ if (!variables)
132
+ return prompt;
133
+ let result = prompt;
134
+ for (const [key, value] of Object.entries(variables)) {
135
+ result = result.replace(new RegExp(`{{${key}}}`, 'g'), String(value));
136
+ }
137
+ return result;
138
+ }
139
+ async function scoreOutput(actual, expected, method) {
140
+ switch (method) {
141
+ case 'semantic':
142
+ return await (0, semantic_1.calculateSemanticSimilarity)(actual, expected);
143
+ case 'exact':
144
+ return actual.trim() === expected.trim() ? 1.0 : 0.0;
145
+ case 'json':
146
+ try {
147
+ JSON.parse(actual);
148
+ return 1.0;
149
+ }
150
+ catch {
151
+ return 0.0;
152
+ }
153
+ default:
154
+ return 0.5;
155
+ }
156
+ }
@@ -0,0 +1,7 @@
1
+ export * from './engine/runner';
2
+ export * from './engine/loader';
3
+ export * from './engine/optimizer';
4
+ export * from './services/cloud.service';
5
+ export * from './types';
6
+ export { runTests } from './commands/run';
7
+ export { loadConfig } from './utils/config';