tuneprompt 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +151 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +146 -0
- package/dist/commands/activate.d.ts +1 -0
- package/dist/commands/activate.js +91 -0
- package/dist/commands/fix.d.ts +1 -0
- package/dist/commands/fix.js +187 -0
- package/dist/commands/history.d.ts +5 -0
- package/dist/commands/history.js +63 -0
- package/dist/commands/init.d.ts +1 -0
- package/dist/commands/init.js +96 -0
- package/dist/commands/run.d.ts +9 -0
- package/dist/commands/run.js +216 -0
- package/dist/db/migrate.d.ts +2 -0
- package/dist/db/migrate.js +8 -0
- package/dist/engine/constraintExtractor.d.ts +8 -0
- package/dist/engine/constraintExtractor.js +54 -0
- package/dist/engine/loader.d.ts +5 -0
- package/dist/engine/loader.js +74 -0
- package/dist/engine/metaPrompt.d.ts +11 -0
- package/dist/engine/metaPrompt.js +129 -0
- package/dist/engine/optimizer.d.ts +26 -0
- package/dist/engine/optimizer.js +246 -0
- package/dist/engine/reporter.d.ts +7 -0
- package/dist/engine/reporter.js +58 -0
- package/dist/engine/runner.d.ts +9 -0
- package/dist/engine/runner.js +169 -0
- package/dist/engine/shadowTester.d.ts +11 -0
- package/dist/engine/shadowTester.js +156 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +26 -0
- package/dist/providers/anthropic.d.ts +12 -0
- package/dist/providers/anthropic.js +51 -0
- package/dist/providers/base.d.ts +15 -0
- package/dist/providers/base.js +10 -0
- package/dist/providers/openai.d.ts +12 -0
- package/dist/providers/openai.js +58 -0
- package/dist/providers/openrouter.d.ts +11 -0
- package/dist/providers/openrouter.js +83 -0
- package/dist/scoring/exact-match.d.ts +1 -0
- package/dist/scoring/exact-match.js +8 -0
- package/dist/scoring/json-validator.d.ts +4 -0
- package/dist/scoring/json-validator.js +29 -0
- package/dist/scoring/semantic.d.ts +8 -0
- package/dist/scoring/semantic.js +107 -0
- package/dist/services/cloud.service.d.ts +49 -0
- package/dist/services/cloud.service.js +82 -0
- package/dist/storage/database.d.ts +10 -0
- package/dist/storage/database.js +179 -0
- package/dist/types/fix.d.ts +28 -0
- package/dist/types/fix.js +2 -0
- package/dist/types/index.d.ts +58 -0
- package/dist/types/index.js +2 -0
- package/dist/utils/analytics.d.ts +2 -0
- package/dist/utils/analytics.js +22 -0
- package/dist/utils/config.d.ts +3 -0
- package/dist/utils/config.js +70 -0
- package/dist/utils/errorHandler.d.ts +14 -0
- package/dist/utils/errorHandler.js +40 -0
- package/dist/utils/license.d.ts +40 -0
- package/dist/utils/license.js +207 -0
- package/dist/utils/storage.d.ts +2 -0
- package/dist/utils/storage.js +25 -0
- package/package.json +76 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.initCommand = initCommand;
|
|
40
|
+
const fs = __importStar(require("fs"));
|
|
41
|
+
const path = __importStar(require("path"));
|
|
42
|
+
const inquirer_1 = __importDefault(require("inquirer"));
|
|
43
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
44
|
+
const config_1 = require("../utils/config");
|
|
45
|
+
async function initCommand() {
|
|
46
|
+
console.log(chalk_1.default.bold('\n🎛️ TunePrompt Initialization\n'));
|
|
47
|
+
const answers = await inquirer_1.default.prompt([
|
|
48
|
+
{
|
|
49
|
+
type: 'confirm',
|
|
50
|
+
name: 'createConfig',
|
|
51
|
+
message: 'Create tuneprompt.config.js?',
|
|
52
|
+
default: true
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
type: 'confirm',
|
|
56
|
+
name: 'createTests',
|
|
57
|
+
message: 'Create sample test file?',
|
|
58
|
+
default: true
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
type: 'confirm',
|
|
62
|
+
name: 'createEnv',
|
|
63
|
+
message: 'Create .env file for API keys?',
|
|
64
|
+
default: true
|
|
65
|
+
}
|
|
66
|
+
]);
|
|
67
|
+
if (answers.createConfig) {
|
|
68
|
+
const configPath = path.join(process.cwd(), 'tuneprompt.config.js');
|
|
69
|
+
fs.writeFileSync(configPath, (0, config_1.getDefaultConfigTemplate)());
|
|
70
|
+
console.log(chalk_1.default.green('✓ Created tuneprompt.config.js'));
|
|
71
|
+
}
|
|
72
|
+
if (answers.createTests) {
|
|
73
|
+
const testsDir = path.join(process.cwd(), 'tests');
|
|
74
|
+
if (!fs.existsSync(testsDir)) {
|
|
75
|
+
fs.mkdirSync(testsDir);
|
|
76
|
+
}
|
|
77
|
+
const sampleTest = {
|
|
78
|
+
description: 'User onboarding welcome message',
|
|
79
|
+
prompt: 'Generate a friendly welcome message for a user named Alice.',
|
|
80
|
+
expect: 'Welcome, Alice! We are glad you are here.',
|
|
81
|
+
config: {
|
|
82
|
+
threshold: 0.85,
|
|
83
|
+
method: 'semantic'
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
fs.writeFileSync(path.join(testsDir, 'sample.json'), JSON.stringify(sampleTest, null, 2));
|
|
87
|
+
console.log(chalk_1.default.green('✓ Created tests/sample.json'));
|
|
88
|
+
}
|
|
89
|
+
if (answers.createEnv) {
|
|
90
|
+
const envPath = path.join(process.cwd(), '.env');
|
|
91
|
+
const envContent = `OPENAI_API_KEY=your_key_here\nANTHROPIC_API_KEY=your_key_here\nOPENROUTER_API_KEY=your_key_here\n`;
|
|
92
|
+
fs.writeFileSync(envPath, envContent);
|
|
93
|
+
console.log(chalk_1.default.green('✓ Created .env'));
|
|
94
|
+
}
|
|
95
|
+
console.log(chalk_1.default.bold('\n✨ Setup complete! Run "tuneprompt run" to test your prompts.\n'));
|
|
96
|
+
}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.runCommand = void 0;
|
|
7
|
+
exports.runTests = runTests;
|
|
8
|
+
const commander_1 = require("commander");
|
|
9
|
+
const cloud_service_1 = require("../services/cloud.service");
|
|
10
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
11
|
+
const child_process_1 = require("child_process");
|
|
12
|
+
const ora_1 = __importDefault(require("ora"));
|
|
13
|
+
const config_1 = require("../utils/config");
|
|
14
|
+
const loader_1 = require("../engine/loader");
|
|
15
|
+
const runner_1 = require("../engine/runner");
|
|
16
|
+
const reporter_1 = require("../engine/reporter");
|
|
17
|
+
const database_1 = require("../storage/database");
|
|
18
|
+
const license_1 = require("../utils/license");
|
|
19
|
+
// At the end of your test run reporter
|
|
20
|
+
function displayRunSummary(results) {
|
|
21
|
+
const failed = results.filter(r => r.status === 'fail');
|
|
22
|
+
const passed = results.filter(r => r.status === 'pass');
|
|
23
|
+
console.log(chalk_1.default.bold.white('\n' + '='.repeat(60)));
|
|
24
|
+
console.log(chalk_1.default.bold.white('Test Summary'));
|
|
25
|
+
console.log(chalk_1.default.bold.white('='.repeat(60)));
|
|
26
|
+
console.log(chalk_1.default.green(`✓ Passed: ${passed.length}`));
|
|
27
|
+
console.log(chalk_1.default.red(`✗ Failed: ${failed.length}`));
|
|
28
|
+
console.log(chalk_1.default.gray(`Total: ${results.length}`));
|
|
29
|
+
console.log(chalk_1.default.bold.white('='.repeat(60) + '\n'));
|
|
30
|
+
// UPSELL MESSAGE (NEW)
|
|
31
|
+
if (failed.length > 0) {
|
|
32
|
+
console.log(chalk_1.default.yellow('⚠️ ' + failed.length + ' test(s) failed'));
|
|
33
|
+
console.log(chalk_1.default.gray('\nDon\'t waste time debugging manually.'));
|
|
34
|
+
console.log(chalk_1.default.cyan('Run ') + chalk_1.default.bold.white('tuneprompt fix') + chalk_1.default.cyan(' to let AI repair these prompts instantly.\n'));
|
|
35
|
+
// Check license status
|
|
36
|
+
const licenseManager = new license_1.LicenseManager();
|
|
37
|
+
licenseManager.hasFeature('fix').then((hasAccess) => {
|
|
38
|
+
if (!hasAccess) {
|
|
39
|
+
console.log(chalk_1.default.gray('Unlock fix with: ') + chalk_1.default.white('https://tuneprompt.com/pricing'));
|
|
40
|
+
console.log(chalk_1.default.gray('Already have a key? ') + chalk_1.default.white('tuneprompt activate <key>\n'));
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// Extract the core run functionality to a separate function
|
|
46
|
+
async function runTests(options = {}) {
|
|
47
|
+
const startTime = Date.now();
|
|
48
|
+
const spinner = (0, ora_1.default)('Loading configuration...').start();
|
|
49
|
+
try {
|
|
50
|
+
// Load config
|
|
51
|
+
const config = await (0, config_1.loadConfig)(options.config);
|
|
52
|
+
spinner.succeed('Configuration loaded');
|
|
53
|
+
// Load tests
|
|
54
|
+
spinner.start('Loading test cases...');
|
|
55
|
+
const loader = new loader_1.TestLoader();
|
|
56
|
+
const testCases = loader.loadTestDir(config.testDir || './tests');
|
|
57
|
+
if (testCases.length === 0) {
|
|
58
|
+
spinner.fail('No test cases found');
|
|
59
|
+
process.exit(1);
|
|
60
|
+
}
|
|
61
|
+
spinner.succeed(`Loaded ${testCases.length} test case(s)`);
|
|
62
|
+
// Run tests
|
|
63
|
+
spinner.start('Running tests...');
|
|
64
|
+
const runner = new runner_1.TestRunner(config);
|
|
65
|
+
const results = await runner.runTests(testCases);
|
|
66
|
+
spinner.stop();
|
|
67
|
+
// Save to database
|
|
68
|
+
const db = new database_1.TestDatabase();
|
|
69
|
+
db.saveRun(results);
|
|
70
|
+
db.close();
|
|
71
|
+
// Report results
|
|
72
|
+
const reporter = new reporter_1.TestReporter();
|
|
73
|
+
reporter.printResults(results, config.outputFormat);
|
|
74
|
+
// Calculate results for cloud upload
|
|
75
|
+
const testResults = results.results.map((result) => {
|
|
76
|
+
// Map from internal TestResult to cloud service TestResult
|
|
77
|
+
const mappedResult = {
|
|
78
|
+
test_name: result.testCase.description,
|
|
79
|
+
test_description: result.testCase.description,
|
|
80
|
+
prompt: typeof result.testCase.prompt === 'string'
|
|
81
|
+
? result.testCase.prompt
|
|
82
|
+
: JSON.stringify(result.testCase.prompt),
|
|
83
|
+
input_data: result.testCase.variables,
|
|
84
|
+
expected_output: result.expectedOutput,
|
|
85
|
+
actual_output: result.actualOutput,
|
|
86
|
+
score: result.score,
|
|
87
|
+
method: result.testCase.config?.method || 'exact',
|
|
88
|
+
status: result.status,
|
|
89
|
+
model: result.metadata.provider || '',
|
|
90
|
+
tokens_used: result.metadata.tokens,
|
|
91
|
+
latency_ms: result.metadata.duration,
|
|
92
|
+
cost_usd: result.metadata.cost,
|
|
93
|
+
error_message: result.error,
|
|
94
|
+
error_type: undefined, // No error type in current TestResult interface
|
|
95
|
+
};
|
|
96
|
+
return mappedResult;
|
|
97
|
+
});
|
|
98
|
+
// Calculate total cost from all test results
|
|
99
|
+
const totalCost = results.results.reduce((sum, result) => {
|
|
100
|
+
return sum + (result.metadata.cost || 0);
|
|
101
|
+
}, 0);
|
|
102
|
+
const resultsSummary = {
|
|
103
|
+
totalTests: results.results.length,
|
|
104
|
+
passedTests: results.passed,
|
|
105
|
+
failedTests: results.failed,
|
|
106
|
+
durationMs: Date.now() - startTime,
|
|
107
|
+
totalCost: totalCost || 0.05, // fallback value
|
|
108
|
+
tests: testResults,
|
|
109
|
+
};
|
|
110
|
+
// Print results to console (existing logic)
|
|
111
|
+
console.log(chalk_1.default.green(`\n✅ ${resultsSummary.passedTests} passed`));
|
|
112
|
+
console.log(chalk_1.default.red(`❌ ${resultsSummary.failedTests} failed\n`));
|
|
113
|
+
// Show upsell hint if tests failed
|
|
114
|
+
displayRunSummary(results.results);
|
|
115
|
+
// NEW: Cloud upload logic
|
|
116
|
+
const isCI = options.ci ||
|
|
117
|
+
process.env.CI === 'true' ||
|
|
118
|
+
!!process.env.GITHUB_ACTIONS ||
|
|
119
|
+
!!process.env.GITLAB_CI;
|
|
120
|
+
const shouldUpload = options.cloud || isCI;
|
|
121
|
+
if (shouldUpload) {
|
|
122
|
+
await uploadToCloud(resultsSummary, options);
|
|
123
|
+
}
|
|
124
|
+
// Exit with error code if tests failed
|
|
125
|
+
if (resultsSummary.failedTests > 0) {
|
|
126
|
+
process.exit(1);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
catch (error) {
|
|
130
|
+
spinner.fail('Test run failed');
|
|
131
|
+
console.error(chalk_1.default.red(error.message));
|
|
132
|
+
process.exit(1);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
exports.runCommand = new commander_1.Command('run')
|
|
136
|
+
.description('Run prompt tests')
|
|
137
|
+
.option('--cloud', 'Upload results to Tuneprompt Cloud')
|
|
138
|
+
.option('--ci', 'Run in CI mode (auto-enables --cloud)')
|
|
139
|
+
.action(async (options) => {
|
|
140
|
+
await runTests(options);
|
|
141
|
+
});
|
|
142
|
+
async function uploadToCloud(results, options) {
|
|
143
|
+
const cloudService = new cloud_service_1.CloudService();
|
|
144
|
+
await cloudService.init();
|
|
145
|
+
const isAuth = await cloudService.isAuthenticated();
|
|
146
|
+
if (!isAuth) {
|
|
147
|
+
console.log(chalk_1.default.yellow('\n⚠️ Not authenticated with Cloud.'));
|
|
148
|
+
console.log(chalk_1.default.gray('Results saved locally. Run `tuneprompt activate` to enable cloud sync\n'));
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
// Get or create project
|
|
152
|
+
let projectId;
|
|
153
|
+
try {
|
|
154
|
+
const projects = await cloudService.getProjects();
|
|
155
|
+
if (projects.length === 0) {
|
|
156
|
+
console.log(chalk_1.default.blue('📁 Creating default project...'));
|
|
157
|
+
const project = await cloudService.createProject('Default Project');
|
|
158
|
+
projectId = project.id;
|
|
159
|
+
console.log(chalk_1.default.green(`✅ Project created: ${projectId}`));
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
projectId = projects[0].id; // Use first project
|
|
163
|
+
console.log(chalk_1.default.gray(`📋 Using existing project: ${projectId}`));
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
catch (error) {
|
|
167
|
+
console.log(chalk_1.default.yellow('⚠️ Failed to get project'), error);
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
// Get Git context
|
|
171
|
+
let gitContext = {};
|
|
172
|
+
try {
|
|
173
|
+
gitContext = {
|
|
174
|
+
commit_hash: (0, child_process_1.execSync)('git rev-parse HEAD', { encoding: 'utf-8' }).trim(),
|
|
175
|
+
branch_name: (0, child_process_1.execSync)('git rev-parse --abbrev-ref HEAD', { encoding: 'utf-8' }).trim(),
|
|
176
|
+
commit_message: (0, child_process_1.execSync)('git log -1 --pretty=%B', { encoding: 'utf-8' }).trim(),
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
catch {
|
|
180
|
+
// Not a git repo
|
|
181
|
+
}
|
|
182
|
+
// Detect CI provider
|
|
183
|
+
let ciProvider;
|
|
184
|
+
if (process.env.GITHUB_ACTIONS)
|
|
185
|
+
ciProvider = 'github';
|
|
186
|
+
else if (process.env.GITLAB_CI)
|
|
187
|
+
ciProvider = 'gitlab';
|
|
188
|
+
else if (process.env.JENKINS_HOME)
|
|
189
|
+
ciProvider = 'jenkins';
|
|
190
|
+
else if (process.env.CIRCLECI)
|
|
191
|
+
ciProvider = 'circleci';
|
|
192
|
+
// Prepare run data
|
|
193
|
+
const runData = {
|
|
194
|
+
project_id: projectId,
|
|
195
|
+
environment: options.ci || process.env.CI ? 'ci' : 'local',
|
|
196
|
+
ci_provider: ciProvider,
|
|
197
|
+
total_tests: results.totalTests,
|
|
198
|
+
passed_tests: results.passedTests,
|
|
199
|
+
failed_tests: results.failedTests,
|
|
200
|
+
duration_ms: results.durationMs,
|
|
201
|
+
cost_usd: results.totalCost,
|
|
202
|
+
started_at: new Date(Date.now() - results.durationMs).toISOString(),
|
|
203
|
+
completed_at: new Date().toISOString(),
|
|
204
|
+
test_results: results.tests,
|
|
205
|
+
...gitContext,
|
|
206
|
+
};
|
|
207
|
+
console.log(chalk_1.default.blue('\n☁️ Uploading results to Cloud...'));
|
|
208
|
+
const uploadResult = await cloudService.uploadRun(runData);
|
|
209
|
+
if (uploadResult.success) {
|
|
210
|
+
console.log(chalk_1.default.green('✅ Results uploaded successfully'));
|
|
211
|
+
console.log(chalk_1.default.gray(`View at: ${uploadResult.url}\n`));
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
console.log(chalk_1.default.yellow('⚠️ Failed to upload results:'), uploadResult.error);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runMigrations = runMigrations;
|
|
4
|
+
async function runMigrations(db) {
|
|
5
|
+
// Current migrations are handled in TestDatabase constructor for now
|
|
6
|
+
// This function can be used for future schema updates
|
|
7
|
+
return;
|
|
8
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { FailedTest } from '../types/fix';
|
|
2
|
+
export interface ExtractedConstraints {
|
|
3
|
+
errorType: 'semantic' | 'json' | 'exact' | 'length' | 'format';
|
|
4
|
+
issues: string[];
|
|
5
|
+
suggestions: string[];
|
|
6
|
+
}
|
|
7
|
+
export declare function extractConstraints(test: FailedTest): ExtractedConstraints;
|
|
8
|
+
export declare function generateErrorContext(test: FailedTest): string;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractConstraints = extractConstraints;
|
|
4
|
+
exports.generateErrorContext = generateErrorContext;
|
|
5
|
+
function extractConstraints(test) {
|
|
6
|
+
const constraints = {
|
|
7
|
+
errorType: test.errorType,
|
|
8
|
+
issues: [],
|
|
9
|
+
suggestions: []
|
|
10
|
+
};
|
|
11
|
+
// JSON validation failures
|
|
12
|
+
if (test.errorType === 'json') {
|
|
13
|
+
constraints.issues.push('Output was not valid JSON');
|
|
14
|
+
try {
|
|
15
|
+
JSON.parse(test.actualOutput);
|
|
16
|
+
}
|
|
17
|
+
catch (e) {
|
|
18
|
+
constraints.issues.push(`JSON Error: ${e.message}`);
|
|
19
|
+
}
|
|
20
|
+
constraints.suggestions.push('Add explicit JSON formatting instructions', 'Provide a JSON schema example', 'Use delimiters like <json_output></json_output>');
|
|
21
|
+
}
|
|
22
|
+
// Semantic similarity failures
|
|
23
|
+
if (test.errorType === 'semantic') {
|
|
24
|
+
const scoreDiff = test.threshold - test.score;
|
|
25
|
+
constraints.issues.push(`Semantic similarity too low (${test.score.toFixed(2)} < ${test.threshold})`);
|
|
26
|
+
if (scoreDiff > 0.3) {
|
|
27
|
+
constraints.issues.push('Output is significantly off-topic');
|
|
28
|
+
constraints.suggestions.push('Add more specific instructions', 'Include key phrases that must appear', 'Provide examples of correct outputs');
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
constraints.issues.push('Output is close but missing key details');
|
|
32
|
+
constraints.suggestions.push('Emphasize critical information', 'Add constraint checklist', 'Request step-by-step reasoning');
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// Length failures
|
|
36
|
+
if (test.errorType === 'length') {
|
|
37
|
+
const actualLength = test.actualOutput.length;
|
|
38
|
+
constraints.issues.push(`Output length mismatch: ${actualLength} characters`);
|
|
39
|
+
constraints.suggestions.push('Specify exact character/word limits', 'Add "Be concise" or "Be detailed" instructions', 'Provide length reference examples');
|
|
40
|
+
}
|
|
41
|
+
return constraints;
|
|
42
|
+
}
|
|
43
|
+
function generateErrorContext(test) {
|
|
44
|
+
const constraints = extractConstraints(test);
|
|
45
|
+
return `
|
|
46
|
+
Error Type: ${constraints.errorType.toUpperCase()}
|
|
47
|
+
|
|
48
|
+
Issues Detected:
|
|
49
|
+
${constraints.issues.map((issue, i) => `${i + 1}. ${issue}`).join('\n')}
|
|
50
|
+
|
|
51
|
+
Recommended Fixes:
|
|
52
|
+
${constraints.suggestions.map((sug, i) => `${i + 1}. ${sug}`).join('\n')}
|
|
53
|
+
`.trim();
|
|
54
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.TestLoader = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const yaml = __importStar(require("js-yaml"));
|
|
40
|
+
class TestLoader {
|
|
41
|
+
loadTestFile(filePath) {
|
|
42
|
+
const ext = path.extname(filePath);
|
|
43
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
44
|
+
if (ext === '.json') {
|
|
45
|
+
const data = JSON.parse(content);
|
|
46
|
+
const tests = Array.isArray(data) ? data : [data];
|
|
47
|
+
return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
|
|
48
|
+
}
|
|
49
|
+
else if (ext === '.yaml' || ext === '.yml') {
|
|
50
|
+
const data = yaml.load(content);
|
|
51
|
+
const tests = Array.isArray(data) ? data : [data];
|
|
52
|
+
return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
throw new Error(`Unsupported file format: ${ext}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
loadTestDir(dirPath) {
|
|
59
|
+
const tests = [];
|
|
60
|
+
const files = fs.readdirSync(dirPath);
|
|
61
|
+
for (const file of files) {
|
|
62
|
+
const filePath = path.join(dirPath, file);
|
|
63
|
+
const stat = fs.statSync(filePath);
|
|
64
|
+
if (stat.isDirectory()) {
|
|
65
|
+
tests.push(...this.loadTestDir(filePath));
|
|
66
|
+
}
|
|
67
|
+
else if (file.match(/\.(json|ya?ml)$/)) {
|
|
68
|
+
tests.push(...this.loadTestFile(filePath));
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return tests;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
exports.TestLoader = TestLoader;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface MetaPromptInput {
|
|
2
|
+
originalPrompt: string;
|
|
3
|
+
testInput?: Record<string, any>;
|
|
4
|
+
expectedOutput: string;
|
|
5
|
+
actualOutput: string;
|
|
6
|
+
errorType: string;
|
|
7
|
+
errorMessage: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function generateOptimizationPrompt(input: MetaPromptInput): string;
|
|
10
|
+
export declare function generateJSONFixPrompt(input: MetaPromptInput): string;
|
|
11
|
+
export declare function generateSemanticFixPrompt(input: MetaPromptInput): string;
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.generateOptimizationPrompt = generateOptimizationPrompt;
|
|
4
|
+
exports.generateJSONFixPrompt = generateJSONFixPrompt;
|
|
5
|
+
exports.generateSemanticFixPrompt = generateSemanticFixPrompt;
|
|
6
|
+
function generateOptimizationPrompt(input) {
|
|
7
|
+
const { originalPrompt, testInput, expectedOutput, actualOutput, errorType, errorMessage } = input;
|
|
8
|
+
return `You are an elite LLM Prompt Engineer with expertise in Claude, GPT-4, and advanced prompting techniques.
|
|
9
|
+
|
|
10
|
+
A prompt has failed a critical test case. Your mission is to rewrite it to pass the test while maintaining the original intent.
|
|
11
|
+
|
|
12
|
+
=== FAILURE ANALYSIS ===
|
|
13
|
+
|
|
14
|
+
[Original Prompt]:
|
|
15
|
+
"""
|
|
16
|
+
${originalPrompt}
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
${testInput ? `[Test Input Variables]:
|
|
20
|
+
${JSON.stringify(testInput, null, 2)}
|
|
21
|
+
` : ''}
|
|
22
|
+
|
|
23
|
+
[Expected Output]:
|
|
24
|
+
"""
|
|
25
|
+
${expectedOutput}
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
[Actual Output (FAILED)]:
|
|
29
|
+
"""
|
|
30
|
+
${actualOutput}
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
[Error Type]: ${errorType}
|
|
34
|
+
[Error Details]: ${errorMessage}
|
|
35
|
+
|
|
36
|
+
=== YOUR TASK ===
|
|
37
|
+
|
|
38
|
+
1. **Root Cause Analysis**: Identify WHY the prompt failed
|
|
39
|
+
- Missing instructions?
|
|
40
|
+
- Ambiguous wording?
|
|
41
|
+
- Wrong output format specified?
|
|
42
|
+
- Tone mismatch?
|
|
43
|
+
- Missing constraints?
|
|
44
|
+
|
|
45
|
+
2. **Prompt Engineering Fixes**: Apply advanced techniques:
|
|
46
|
+
- ✅ Chain-of-Thought reasoning (if logic is needed)
|
|
47
|
+
- ✅ XML tags for structure (<instructions>, <output_format>)
|
|
48
|
+
- ✅ Few-shot examples (if pattern recognition helps)
|
|
49
|
+
- ✅ Explicit constraints (length, format, tone)
|
|
50
|
+
- ✅ Role assignment ("You are a [expert]...")
|
|
51
|
+
- ✅ Output format specifications (JSON schema, markdown, etc.)
|
|
52
|
+
|
|
53
|
+
3. **Generate TWO Candidate Prompts**:
|
|
54
|
+
- Candidate A: Conservative fix (minimal changes)
|
|
55
|
+
- Candidate B: Aggressive rewrite (best practices applied)
|
|
56
|
+
|
|
57
|
+
=== OUTPUT FORMAT ===
|
|
58
|
+
|
|
59
|
+
Return ONLY valid JSON (no markdown, no explanations):
|
|
60
|
+
|
|
61
|
+
{
|
|
62
|
+
"analysis": "Brief explanation of why it failed (2-3 sentences)",
|
|
63
|
+
"candidateA": {
|
|
64
|
+
"prompt": "Your rewritten prompt here",
|
|
65
|
+
"reasoning": "Why this approach works"
|
|
66
|
+
},
|
|
67
|
+
"candidateB": {
|
|
68
|
+
"prompt": "Your alternative rewritten prompt here",
|
|
69
|
+
"reasoning": "Why this approach works"
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
CRITICAL: Return ONLY the JSON object. No preamble, no markdown backticks.`;
|
|
74
|
+
}
|
|
75
|
+
// Specialized prompts for different error types
|
|
76
|
+
function generateJSONFixPrompt(input) {
|
|
77
|
+
return `You are a JSON Schema expert. The following prompt failed to produce valid JSON.
|
|
78
|
+
|
|
79
|
+
Original Prompt:
|
|
80
|
+
"""
|
|
81
|
+
${input.originalPrompt}
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
Expected JSON Structure:
|
|
85
|
+
"""
|
|
86
|
+
${input.expectedOutput}
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
Actual Output (Invalid JSON):
|
|
90
|
+
"""
|
|
91
|
+
${input.actualOutput}
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
Rewrite the prompt to GUARANTEE valid JSON output. Use these techniques:
|
|
95
|
+
1. Explicitly state: "Return ONLY valid JSON, no markdown, no explanations"
|
|
96
|
+
2. Provide the exact schema structure
|
|
97
|
+
3. Add output format examples
|
|
98
|
+
4. Use XML tags like <json_output> to delimit the response area
|
|
99
|
+
|
|
100
|
+
Return your improved prompt as plain text (not JSON).`;
|
|
101
|
+
}
|
|
102
|
+
function generateSemanticFixPrompt(input) {
|
|
103
|
+
return `The prompt failed semantic similarity testing (score: too low).
|
|
104
|
+
|
|
105
|
+
Original Prompt:
|
|
106
|
+
"""
|
|
107
|
+
${input.originalPrompt}
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
Expected Meaning/Content:
|
|
111
|
+
"""
|
|
112
|
+
${input.expectedOutput}
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
What the Model Actually Said:
|
|
116
|
+
"""
|
|
117
|
+
${input.actualOutput}
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
The model's response was off-topic or missed key information.
|
|
121
|
+
|
|
122
|
+
Rewrite the prompt to ensure the model:
|
|
123
|
+
1. Stays on topic
|
|
124
|
+
2. Includes all required information from the expected output
|
|
125
|
+
3. Uses clear, specific instructions
|
|
126
|
+
4. Avoids ambiguity
|
|
127
|
+
|
|
128
|
+
Return your improved prompt as plain text.`;
|
|
129
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { FailedTest, OptimizationResult } from '../types/fix';
|
|
2
|
+
export declare class PromptOptimizer {
|
|
3
|
+
private anthropic?;
|
|
4
|
+
private openai?;
|
|
5
|
+
constructor();
|
|
6
|
+
/**
|
|
7
|
+
* Main optimization method
|
|
8
|
+
*/
|
|
9
|
+
optimize(failedTest: FailedTest): Promise<OptimizationResult>;
|
|
10
|
+
/**
|
|
11
|
+
* Select appropriate meta-prompt based on error type
|
|
12
|
+
*/
|
|
13
|
+
private selectMetaPrompt;
|
|
14
|
+
/**
|
|
15
|
+
* Generate multiple fix candidates using available LLMs with fallback
|
|
16
|
+
*/
|
|
17
|
+
private generateCandidates;
|
|
18
|
+
/**
|
|
19
|
+
* Shadow test each candidate and return the best one
|
|
20
|
+
*/
|
|
21
|
+
private selectBestCandidate;
|
|
22
|
+
/**
|
|
23
|
+
* Fallback prompt improvement - generates a clean rewritten prompt
|
|
24
|
+
*/
|
|
25
|
+
private createFallbackPrompt;
|
|
26
|
+
}
|