tuneprompt 1.0.7 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -9
- package/dist/cli.js +5 -2
- package/dist/commands/fix.d.ts +3 -1
- package/dist/commands/fix.js +45 -25
- package/dist/commands/generate.d.ts +2 -0
- package/dist/commands/generate.js +11 -0
- package/dist/engine/__tests__/optimizer.test.d.ts +1 -0
- package/dist/engine/__tests__/optimizer.test.js +9 -0
- package/dist/engine/loader.js +6 -2
- package/dist/engine/metaPrompt.d.ts +5 -0
- package/dist/engine/metaPrompt.js +55 -55
- package/dist/engine/optimizer.d.ts +7 -21
- package/dist/engine/optimizer.js +141 -252
- package/dist/engine/runner.d.ts +2 -0
- package/dist/engine/runner.js +56 -69
- package/dist/engine/shadowTester.d.ts +17 -2
- package/dist/engine/shadowTester.js +86 -128
- package/dist/providers/__tests__/custom.test.d.ts +1 -0
- package/dist/providers/__tests__/custom.test.js +9 -0
- package/dist/providers/custom.d.ts +6 -0
- package/dist/providers/custom.js +10 -0
- package/dist/providers/factory.d.ts +6 -0
- package/dist/providers/factory.js +38 -0
- package/dist/providers/gemini.d.ts +11 -0
- package/dist/providers/gemini.js +46 -0
- package/dist/scoring/__tests__/rag.test.d.ts +1 -0
- package/dist/scoring/__tests__/rag.test.js +10 -0
- package/dist/scoring/rag.d.ts +9 -0
- package/dist/scoring/rag.js +9 -0
- package/dist/services/cloud.service.js +1 -1
- package/dist/storage/database.js +1 -1
- package/dist/types/fix.d.ts +11 -0
- package/dist/types/index.d.ts +2 -1
- package/dist/types/test.d.ts +8 -0
- package/dist/types/test.js +2 -0
- package/dist/utils/config.js +11 -5
- package/dist/utils/interpolation.d.ts +4 -0
- package/dist/utils/interpolation.js +16 -0
- package/dist/utils/storage.d.ts +4 -0
- package/dist/utils/storage.js +26 -5
- package/dist/utils/validator.d.ts +2 -0
- package/dist/utils/validator.js +10 -0
- package/package.json +3 -2
|
@@ -1,156 +1,114 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
-
};
|
|
5
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
3
|
exports.runShadowTest = runShadowTest;
|
|
7
|
-
|
|
8
|
-
const
|
|
9
|
-
const
|
|
4
|
+
exports.runSuiteShadowTest = runSuiteShadowTest;
|
|
5
|
+
const semantic_1 = require("../scoring/semantic");
|
|
6
|
+
const factory_1 = require("../providers/factory");
|
|
7
|
+
const interpolation_1 = require("../utils/interpolation");
|
|
10
8
|
/**
|
|
11
9
|
* Test a candidate prompt against the original test case
|
|
12
|
-
*
|
|
10
|
+
* Uses specified provider/model or falls back to priority sequence
|
|
13
11
|
*/
|
|
14
|
-
async function runShadowTest(candidatePrompt,
|
|
15
|
-
//
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
12
|
+
async function runShadowTest(candidatePrompt, test) {
|
|
13
|
+
// For integration tests: bypass real API calls if mock mode is on
|
|
14
|
+
if (process.env.TUNEPROMPT_MOCK_OPTIMIZER === 'true') {
|
|
15
|
+
return {
|
|
16
|
+
score: 0.95,
|
|
17
|
+
output: 'Mock satisfied output',
|
|
18
|
+
passed: true
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
const providerName = test.config?.provider;
|
|
22
|
+
const model = test.config?.model;
|
|
23
|
+
// Determine providers to try
|
|
24
|
+
let providersToTry = [];
|
|
25
|
+
if (providerName && model) {
|
|
26
|
+
providersToTry.push({ name: providerName, model });
|
|
27
|
+
}
|
|
28
|
+
// Fallback queue
|
|
29
|
+
const fallbackQueue = [
|
|
30
|
+
{ name: 'anthropic', model: 'claude-3-5-sonnet-latest' },
|
|
31
|
+
{ name: 'openai', model: 'gpt-4o' },
|
|
32
|
+
{ name: 'gemini', model: 'gemini-2.0-flash' },
|
|
33
|
+
{ name: 'openrouter', model: 'nvidia/nemotron-3-nano-30b-a3b:free' }
|
|
34
|
+
];
|
|
35
|
+
for (const entry of fallbackQueue) {
|
|
36
|
+
if (entry.name !== providerName) {
|
|
37
|
+
providersToTry.push(entry);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
let errors = [];
|
|
41
|
+
for (const target of providersToTry) {
|
|
19
42
|
try {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if (!apiKey || apiKey.startsWith('api_key') || apiKey === 'phc_xxxxx') {
|
|
23
|
-
// Silently skip placeholders or missing keys
|
|
43
|
+
const apiKey = factory_1.ProviderFactory.getApiKey(target.name);
|
|
44
|
+
if (!apiKey)
|
|
24
45
|
continue;
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
output = await runOpenRouterTest(candidatePrompt, originalTest.input);
|
|
35
|
-
}
|
|
36
|
-
else {
|
|
37
|
-
continue; // Unsupported provider
|
|
38
|
-
}
|
|
39
|
-
// Score the output using the same method as Phase 1
|
|
40
|
-
const score = await scoreOutput(output, originalTest.expectedOutput, originalTest.errorType);
|
|
46
|
+
const provider = factory_1.ProviderFactory.create(target.name, {
|
|
47
|
+
apiKey,
|
|
48
|
+
model: target.model || 'latest',
|
|
49
|
+
maxTokens: 2000
|
|
50
|
+
});
|
|
51
|
+
const finalPrompt = (0, interpolation_1.interpolateVariables)(candidatePrompt, test.input);
|
|
52
|
+
const response = await provider.complete(finalPrompt);
|
|
53
|
+
const output = response.content;
|
|
54
|
+
const { score, failureReason } = await scoreOutput(output, test.expectedOutput, test.errorType);
|
|
41
55
|
return {
|
|
42
56
|
score,
|
|
43
57
|
output,
|
|
44
|
-
passed: score >=
|
|
58
|
+
passed: score >= test.threshold,
|
|
59
|
+
failureReason
|
|
45
60
|
};
|
|
46
61
|
}
|
|
47
62
|
catch (error) {
|
|
48
|
-
|
|
49
|
-
continue;
|
|
63
|
+
errors.push(`${target.name}: ${error.message}`);
|
|
64
|
+
continue;
|
|
50
65
|
}
|
|
51
66
|
}
|
|
52
|
-
|
|
53
|
-
|
|
67
|
+
throw new Error(`Shadow test failed for all providers: ${errors.join(' | ')}`);
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Run a candidate prompt against multiple tests and return aggregate results
|
|
71
|
+
*/
|
|
72
|
+
async function runSuiteShadowTest(candidatePrompt, tests) {
|
|
73
|
+
const results = await Promise.all(tests.map(async (test) => {
|
|
74
|
+
const result = await runShadowTest(candidatePrompt, test);
|
|
75
|
+
return {
|
|
76
|
+
testId: test.id,
|
|
77
|
+
score: result.score,
|
|
78
|
+
passed: result.passed,
|
|
79
|
+
output: result.output,
|
|
80
|
+
failureReason: result.failureReason
|
|
81
|
+
};
|
|
82
|
+
}));
|
|
83
|
+
const aggregateScore = results.reduce((sum, r) => sum + r.score, 0) / results.length;
|
|
54
84
|
return {
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
passed: false
|
|
85
|
+
aggregateScore,
|
|
86
|
+
results
|
|
58
87
|
};
|
|
59
88
|
}
|
|
60
|
-
function getApiKeyForProvider(provider) {
|
|
61
|
-
switch (provider) {
|
|
62
|
-
case 'anthropic':
|
|
63
|
-
return process.env.ANTHROPIC_API_KEY;
|
|
64
|
-
case 'openai':
|
|
65
|
-
return process.env.OPENAI_API_KEY;
|
|
66
|
-
case 'openrouter':
|
|
67
|
-
return process.env.OPENROUTER_API_KEY;
|
|
68
|
-
default:
|
|
69
|
-
return undefined;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
async function runAnthropicTest(prompt, input) {
|
|
73
|
-
const anthropic = new sdk_1.default({
|
|
74
|
-
apiKey: process.env.ANTHROPIC_API_KEY
|
|
75
|
-
});
|
|
76
|
-
// Interpolate variables if present
|
|
77
|
-
const finalPrompt = interpolateVariables(prompt, input);
|
|
78
|
-
const response = await anthropic.messages.create({
|
|
79
|
-
model: 'claude-sonnet-4-20250514',
|
|
80
|
-
max_tokens: 2000,
|
|
81
|
-
messages: [{
|
|
82
|
-
role: 'user',
|
|
83
|
-
content: finalPrompt
|
|
84
|
-
}]
|
|
85
|
-
});
|
|
86
|
-
const content = response.content[0];
|
|
87
|
-
return content.type === 'text' ? content.text : '';
|
|
88
|
-
}
|
|
89
|
-
async function runOpenAITest(prompt, input) {
|
|
90
|
-
const openai = new openai_1.default({
|
|
91
|
-
apiKey: process.env.OPENAI_API_KEY
|
|
92
|
-
});
|
|
93
|
-
const finalPrompt = interpolateVariables(prompt, input);
|
|
94
|
-
const response = await openai.chat.completions.create({
|
|
95
|
-
model: 'gpt-4o',
|
|
96
|
-
messages: [{
|
|
97
|
-
role: 'user',
|
|
98
|
-
content: finalPrompt
|
|
99
|
-
}]
|
|
100
|
-
});
|
|
101
|
-
return response.choices[0]?.message?.content || '';
|
|
102
|
-
}
|
|
103
|
-
async function runOpenRouterTest(prompt, input) {
|
|
104
|
-
const key = process.env.OPENROUTER_API_KEY;
|
|
105
|
-
// Save original key and temporarily remove it to prevent OpenAI client confusion
|
|
106
|
-
const originalOpenAIKey = process.env.OPENAI_API_KEY;
|
|
107
|
-
delete process.env.OPENAI_API_KEY;
|
|
108
|
-
try {
|
|
109
|
-
const openai = new openai_1.default({
|
|
110
|
-
baseURL: 'https://openrouter.ai/api/v1',
|
|
111
|
-
apiKey: key
|
|
112
|
-
});
|
|
113
|
-
const finalPrompt = interpolateVariables(prompt, input);
|
|
114
|
-
const response = await openai.chat.completions.create({
|
|
115
|
-
model: 'nvidia/nemotron-3-nano-30b-a3b:free',
|
|
116
|
-
messages: [{
|
|
117
|
-
role: 'user',
|
|
118
|
-
content: finalPrompt
|
|
119
|
-
}]
|
|
120
|
-
});
|
|
121
|
-
return response.choices[0]?.message?.content || '';
|
|
122
|
-
}
|
|
123
|
-
finally {
|
|
124
|
-
// Restore original key
|
|
125
|
-
if (originalOpenAIKey) {
|
|
126
|
-
process.env.OPENAI_API_KEY = originalOpenAIKey;
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
function interpolateVariables(prompt, variables) {
|
|
131
|
-
if (!variables)
|
|
132
|
-
return prompt;
|
|
133
|
-
let result = prompt;
|
|
134
|
-
for (const [key, value] of Object.entries(variables)) {
|
|
135
|
-
result = result.replace(new RegExp(`{{${key}}}`, 'g'), String(value));
|
|
136
|
-
}
|
|
137
|
-
return result;
|
|
138
|
-
}
|
|
139
89
|
async function scoreOutput(actual, expected, method) {
|
|
140
90
|
switch (method) {
|
|
141
|
-
case 'semantic':
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
case '
|
|
91
|
+
case 'semantic': {
|
|
92
|
+
const score = await (0, semantic_1.calculateSemanticSimilarity)(actual, expected);
|
|
93
|
+
return { score, failureReason: score < 0.9 ? `Semantic similarity (${score.toFixed(2)}) is low. Output did not capture expected meaning.` : undefined };
|
|
94
|
+
}
|
|
95
|
+
case 'exact': {
|
|
96
|
+
const exactMatch = actual.trim() === expected.trim();
|
|
97
|
+
return {
|
|
98
|
+
score: exactMatch ? 1.0 : 0.0,
|
|
99
|
+
failureReason: exactMatch ? undefined : `Expected exact match but output differed.`
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
case 'json': {
|
|
146
103
|
try {
|
|
147
104
|
JSON.parse(actual);
|
|
148
|
-
return 1.0;
|
|
105
|
+
return { score: 1.0 };
|
|
149
106
|
}
|
|
150
|
-
catch {
|
|
151
|
-
return 0.0;
|
|
107
|
+
catch (e) {
|
|
108
|
+
return { score: 0.0, failureReason: `Did not output valid JSON. Parse error: ${e.message}` };
|
|
152
109
|
}
|
|
110
|
+
}
|
|
153
111
|
default:
|
|
154
|
-
return 0.5;
|
|
112
|
+
return { score: 0.5, failureReason: `Unknown scoring method: ${method}` };
|
|
155
113
|
}
|
|
156
114
|
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const custom_1 = require("../custom");
|
|
4
|
+
describe('CustomProvider', () => {
|
|
5
|
+
it('should instantiate custom provider with arbitrary endpoint', () => {
|
|
6
|
+
const provider = new custom_1.CustomProvider({ endpoint: 'http://localhost:11434/api/generate' });
|
|
7
|
+
expect(provider.endpoint).toBe('http://localhost:11434/api/generate');
|
|
8
|
+
});
|
|
9
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CustomProvider = void 0;
|
|
4
|
+
class CustomProvider {
|
|
5
|
+
endpoint;
|
|
6
|
+
constructor(config) {
|
|
7
|
+
this.endpoint = config.endpoint;
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
exports.CustomProvider = CustomProvider;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ProviderFactory = void 0;
|
|
4
|
+
const openai_1 = require("./openai");
|
|
5
|
+
const anthropic_1 = require("./anthropic");
|
|
6
|
+
const gemini_1 = require("./gemini");
|
|
7
|
+
const openrouter_1 = require("./openrouter");
|
|
8
|
+
class ProviderFactory {
|
|
9
|
+
static create(provider, config) {
|
|
10
|
+
switch (provider.toLowerCase()) {
|
|
11
|
+
case 'openai':
|
|
12
|
+
return new openai_1.OpenAIProvider(config);
|
|
13
|
+
case 'anthropic':
|
|
14
|
+
return new anthropic_1.AnthropicProvider(config);
|
|
15
|
+
case 'gemini':
|
|
16
|
+
return new gemini_1.GeminiProvider(config);
|
|
17
|
+
case 'openrouter':
|
|
18
|
+
return new openrouter_1.OpenRouterProvider(config);
|
|
19
|
+
default:
|
|
20
|
+
throw new Error(`Unsupported provider: ${provider}`);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
static getApiKey(provider) {
|
|
24
|
+
switch (provider.toLowerCase()) {
|
|
25
|
+
case 'openai':
|
|
26
|
+
return process.env.OPENAI_API_KEY;
|
|
27
|
+
case 'anthropic':
|
|
28
|
+
return process.env.ANTHROPIC_API_KEY;
|
|
29
|
+
case 'gemini':
|
|
30
|
+
return process.env.GEMINI_API_KEY;
|
|
31
|
+
case 'openrouter':
|
|
32
|
+
return process.env.OPENROUTER_API_KEY;
|
|
33
|
+
default:
|
|
34
|
+
return undefined;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
exports.ProviderFactory = ProviderFactory;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { BaseProvider, ProviderResponse } from './base';
|
|
2
|
+
import { ProviderConfig } from '../types';
|
|
3
|
+
export declare class GeminiProvider extends BaseProvider {
|
|
4
|
+
private ai;
|
|
5
|
+
constructor(config: ProviderConfig);
|
|
6
|
+
complete(prompt: string | {
|
|
7
|
+
system?: string;
|
|
8
|
+
user: string;
|
|
9
|
+
}): Promise<ProviderResponse>;
|
|
10
|
+
getEmbedding(text: string): Promise<number[]>;
|
|
11
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GeminiProvider = void 0;
|
|
4
|
+
const genai_1 = require("@google/genai");
|
|
5
|
+
const base_1 = require("./base");
|
|
6
|
+
class GeminiProvider extends base_1.BaseProvider {
|
|
7
|
+
ai;
|
|
8
|
+
constructor(config) {
|
|
9
|
+
super(config);
|
|
10
|
+
this.ai = new genai_1.GoogleGenAI({ apiKey: config.apiKey });
|
|
11
|
+
}
|
|
12
|
+
async complete(prompt) {
|
|
13
|
+
let textContent = '';
|
|
14
|
+
let systemInstruction = undefined;
|
|
15
|
+
if (typeof prompt === 'string') {
|
|
16
|
+
textContent = prompt;
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
textContent = prompt.user;
|
|
20
|
+
systemInstruction = prompt.system;
|
|
21
|
+
}
|
|
22
|
+
const response = await this.ai.models.generateContent({
|
|
23
|
+
model: this.config.model,
|
|
24
|
+
contents: textContent,
|
|
25
|
+
config: {
|
|
26
|
+
systemInstruction: systemInstruction,
|
|
27
|
+
maxOutputTokens: this.config.maxTokens,
|
|
28
|
+
temperature: this.config.temperature
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
const content = response.text || '';
|
|
32
|
+
return {
|
|
33
|
+
content,
|
|
34
|
+
tokens: 0,
|
|
35
|
+
cost: 0
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
async getEmbedding(text) {
|
|
39
|
+
const response = await this.ai.models.embedContent({
|
|
40
|
+
model: 'text-embedding-004',
|
|
41
|
+
contents: text
|
|
42
|
+
});
|
|
43
|
+
return response.embeddings?.[0]?.values || [];
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
exports.GeminiProvider = GeminiProvider;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const rag_1 = require("../rag");
|
|
4
|
+
describe('RAGScorer', () => {
|
|
5
|
+
it('should instantiate and evaluate', async () => {
|
|
6
|
+
const scorer = new rag_1.RAGScorer();
|
|
7
|
+
const result = await scorer.score({ expected: '', actual: 'Test' });
|
|
8
|
+
expect(result.score).toBeDefined();
|
|
9
|
+
});
|
|
10
|
+
});
|
|
@@ -10,7 +10,7 @@ class CloudService {
|
|
|
10
10
|
backendUrl;
|
|
11
11
|
subscriptionId;
|
|
12
12
|
constructor() {
|
|
13
|
-
this.backendUrl = process.env.TUNEPROMPT_API_URL ||
|
|
13
|
+
this.backendUrl = process.env.TUNEPROMPT_API_URL || 'https://api.tuneprompt.com';
|
|
14
14
|
}
|
|
15
15
|
async init() {
|
|
16
16
|
// Load subscription ID from local storage (Phase 2 activation)
|
package/dist/storage/database.js
CHANGED
|
@@ -118,7 +118,7 @@ class TestDatabase {
|
|
|
118
118
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
119
119
|
`);
|
|
120
120
|
for (const result of run.results) {
|
|
121
|
-
insertResult.run(result.id, run.id, result.testCase.description, typeof result.testCase.prompt === 'string' ? result.testCase.prompt : JSON.stringify(result.testCase.prompt), result.testCase.variables ? JSON.stringify(result.testCase.variables) : null, typeof result.testCase.expect === 'string' ? result.testCase.expect : JSON.stringify(result.testCase.expect), result.testCase.config ? JSON.stringify(result.testCase.config) : null, result.testCase.filePath || null, result.status, result.score, result.actualOutput, result.expectedOutput, result.error || null, result.metadata.duration, result.metadata.tokens || null, result.metadata.cost || null, result.metadata.provider || null);
|
|
121
|
+
insertResult.run(result.id, run.id, result.testCase.description || 'No description', typeof result.testCase.prompt === 'string' ? result.testCase.prompt : JSON.stringify(result.testCase.prompt), result.testCase.variables ? JSON.stringify(result.testCase.variables) : null, typeof result.testCase.expect === 'string' ? result.testCase.expect : JSON.stringify(result.testCase.expect), result.testCase.config ? JSON.stringify(result.testCase.config) : null, result.testCase.filePath || null, result.status, result.score, result.actualOutput, result.expectedOutput, result.error || null, result.metadata.duration, result.metadata.tokens || null, result.metadata.cost || null, result.metadata.provider || null);
|
|
122
122
|
}
|
|
123
123
|
}
|
|
124
124
|
getRecentRuns(limit = 10) {
|
package/dist/types/fix.d.ts
CHANGED
|
@@ -9,6 +9,10 @@ export interface FailedTest {
|
|
|
9
9
|
threshold: number;
|
|
10
10
|
errorType: 'semantic' | 'json' | 'exact' | 'length';
|
|
11
11
|
errorMessage: string;
|
|
12
|
+
config?: {
|
|
13
|
+
provider?: string;
|
|
14
|
+
model?: string;
|
|
15
|
+
};
|
|
12
16
|
}
|
|
13
17
|
export interface OptimizationResult {
|
|
14
18
|
originalPrompt: string;
|
|
@@ -19,10 +23,17 @@ export interface OptimizationResult {
|
|
|
19
23
|
score: number;
|
|
20
24
|
passed: boolean;
|
|
21
25
|
output: string;
|
|
26
|
+
aggregateScore?: number;
|
|
22
27
|
};
|
|
28
|
+
iterations?: number;
|
|
23
29
|
}
|
|
24
30
|
export interface FixCandidate {
|
|
25
31
|
prompt: string;
|
|
26
32
|
score: number;
|
|
27
33
|
reasoning: string;
|
|
34
|
+
testResults?: {
|
|
35
|
+
testId: string;
|
|
36
|
+
score: number;
|
|
37
|
+
passed: boolean;
|
|
38
|
+
}[];
|
|
28
39
|
}
|
package/dist/types/index.d.ts
CHANGED
|
@@ -10,7 +10,7 @@ export interface TestCase {
|
|
|
10
10
|
threshold?: number;
|
|
11
11
|
method?: 'exact' | 'semantic' | 'json' | 'llm-judge';
|
|
12
12
|
model?: string;
|
|
13
|
-
provider?: 'openai' | 'anthropic' | 'openrouter';
|
|
13
|
+
provider?: 'openai' | 'anthropic' | 'gemini' | 'openrouter';
|
|
14
14
|
};
|
|
15
15
|
filePath?: string;
|
|
16
16
|
}
|
|
@@ -50,6 +50,7 @@ export interface TunePromptConfig {
|
|
|
50
50
|
providers: {
|
|
51
51
|
openai?: ProviderConfig;
|
|
52
52
|
anthropic?: ProviderConfig;
|
|
53
|
+
gemini?: ProviderConfig;
|
|
53
54
|
openrouter?: ProviderConfig;
|
|
54
55
|
};
|
|
55
56
|
threshold?: number;
|
package/dist/utils/config.js
CHANGED
|
@@ -21,11 +21,11 @@ function validateConfig(config) {
|
|
|
21
21
|
if (!config.providers || Object.keys(config.providers).length === 0) {
|
|
22
22
|
throw new Error('At least one provider must be configured');
|
|
23
23
|
}
|
|
24
|
-
// Validate API keys
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
// Validate API keys - ensure at least one provider is valid
|
|
25
|
+
const validProviders = Object.entries(config.providers)
|
|
26
|
+
.filter(([_, cfg]) => !!cfg.apiKey);
|
|
27
|
+
if (validProviders.length === 0) {
|
|
28
|
+
throw new Error('No valid API keys found. Please provide at least one API key in your .env file.');
|
|
29
29
|
}
|
|
30
30
|
return {
|
|
31
31
|
threshold: config.threshold || 0.8,
|
|
@@ -49,6 +49,12 @@ function getDefaultConfigTemplate() {
|
|
|
49
49
|
maxTokens: 1000,
|
|
50
50
|
temperature: 0.7
|
|
51
51
|
},
|
|
52
|
+
gemini: {
|
|
53
|
+
apiKey: process.env.GEMINI_API_KEY,
|
|
54
|
+
model: 'gemini-2.5-flash',
|
|
55
|
+
maxTokens: 1000,
|
|
56
|
+
temperature: 0.7
|
|
57
|
+
},
|
|
52
58
|
openrouter: {
|
|
53
59
|
apiKey: process.env.OPENROUTER_API_KEY,
|
|
54
60
|
model: 'nvidia/nemotron-3-nano-30b-a3b:free',
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.interpolateVariables = interpolateVariables;
|
|
4
|
+
/**
|
|
5
|
+
* Interpolate variables in a prompt string using {{variableName}} syntax
|
|
6
|
+
*/
|
|
7
|
+
function interpolateVariables(prompt, variables) {
|
|
8
|
+
if (!variables)
|
|
9
|
+
return prompt;
|
|
10
|
+
let result = prompt;
|
|
11
|
+
for (const [key, value] of Object.entries(variables)) {
|
|
12
|
+
// Use a global regex to replace all occurrences
|
|
13
|
+
result = result.replace(new RegExp(`{{${key}}}`, 'g'), String(value));
|
|
14
|
+
}
|
|
15
|
+
return result;
|
|
16
|
+
}
|
package/dist/utils/storage.d.ts
CHANGED
|
@@ -1,2 +1,6 @@
|
|
|
1
1
|
import { FailedTest } from '../types/fix';
|
|
2
2
|
export declare function getFailedTests(): Promise<FailedTest[]>;
|
|
3
|
+
/**
|
|
4
|
+
* Get the full suite of tests (passing and failing) for a specific prompt file
|
|
5
|
+
*/
|
|
6
|
+
export declare function getSuiteTests(filePath: string): Promise<FailedTest[]>;
|
package/dist/utils/storage.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.getFailedTests = getFailedTests;
|
|
4
|
+
exports.getSuiteTests = getSuiteTests;
|
|
4
5
|
const database_1 = require("../storage/database");
|
|
5
6
|
async function getFailedTests() {
|
|
6
7
|
const db = new database_1.TestDatabase();
|
|
@@ -10,16 +11,36 @@ async function getFailedTests() {
|
|
|
10
11
|
}
|
|
11
12
|
const latestRun = recentRuns[0];
|
|
12
13
|
const failures = latestRun.results.filter(r => r.status === 'fail' || r.status === 'error');
|
|
13
|
-
return failures.map(
|
|
14
|
-
|
|
14
|
+
return failures.map(mapResultToFailedTest);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Get the full suite of tests (passing and failing) for a specific prompt file
|
|
18
|
+
*/
|
|
19
|
+
async function getSuiteTests(filePath) {
|
|
20
|
+
const db = new database_1.TestDatabase();
|
|
21
|
+
const recentRuns = db.getRecentRuns(1);
|
|
22
|
+
if (recentRuns.length === 0) {
|
|
23
|
+
return [];
|
|
24
|
+
}
|
|
25
|
+
const latestRun = recentRuns[0];
|
|
26
|
+
const suite = latestRun.results.filter(r => r.testCase.filePath === filePath);
|
|
27
|
+
return suite.map(mapResultToFailedTest);
|
|
28
|
+
}
|
|
29
|
+
function mapResultToFailedTest(r) {
|
|
30
|
+
return {
|
|
31
|
+
id: r.testCase.filePath || r.id,
|
|
15
32
|
description: r.testCase.description,
|
|
16
33
|
prompt: !r.testCase.prompt ? '' : (typeof r.testCase.prompt === 'string' ? r.testCase.prompt : r.testCase.prompt.user),
|
|
17
34
|
input: r.testCase.variables,
|
|
18
35
|
expectedOutput: typeof r.testCase.expect === 'string' ? r.testCase.expect : JSON.stringify(r.testCase.expect),
|
|
19
|
-
actualOutput: r.actualOutput,
|
|
36
|
+
actualOutput: r.actualOutput || '',
|
|
20
37
|
score: r.score,
|
|
21
38
|
threshold: r.testCase.config?.threshold || 0.8,
|
|
22
39
|
errorType: r.testCase.config?.method || 'semantic',
|
|
23
|
-
errorMessage: r.error || ''
|
|
24
|
-
|
|
40
|
+
errorMessage: r.error || '',
|
|
41
|
+
config: {
|
|
42
|
+
provider: r.metadata?.provider,
|
|
43
|
+
model: r.testCase.config?.model || r.testCase.config?.modelId
|
|
44
|
+
}
|
|
45
|
+
};
|
|
25
46
|
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.validateTestFile = validateTestFile;
|
|
4
|
+
function validateTestFile(configs) {
|
|
5
|
+
for (const config of configs) {
|
|
6
|
+
if (!config.prompt && (!config.steps || config.steps.length === 0)) {
|
|
7
|
+
throw new Error("Invalid configuration: missing required 'prompt' field");
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "tuneprompt",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.2",
|
|
4
4
|
"description": "Industrial-grade testing framework for LLM prompts",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"main": "dist/index.js",
|
|
14
14
|
"types": "dist/index.d.ts",
|
|
15
15
|
"bin": {
|
|
16
|
-
"tuneprompt": "
|
|
16
|
+
"tuneprompt": "dist/cli.js"
|
|
17
17
|
},
|
|
18
18
|
"files": [
|
|
19
19
|
"dist",
|
|
@@ -58,6 +58,7 @@
|
|
|
58
58
|
},
|
|
59
59
|
"dependencies": {
|
|
60
60
|
"@anthropic-ai/sdk": "^0.71.2",
|
|
61
|
+
"@google/genai": "^1.44.0",
|
|
61
62
|
"@types/chokidar": "^1.7.5",
|
|
62
63
|
"axios": "^1.13.2",
|
|
63
64
|
"better-sqlite3": "^12.5.0",
|