tuneprompt 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,17 +10,26 @@ Industrial-grade testing framework for LLM prompts
10
10
 
11
11
  TunePrompt is a comprehensive testing framework designed specifically for Large Language Model (LLM) prompts. It helps developers validate, test, and optimize their prompts with industrial-grade reliability and accuracy.
12
12
 
13
+ ## šŸš€ What's New in v1.1.1
14
+
15
+ The first production-ready release of **TunePrompt**, the industrial-grade testing framework for the modern LLM stack.
16
+
17
+ - **Multi-Provider Support**: Seamlessly test across **OpenAI**, **Anthropic**, **Gemini**, and **OpenRouter**.
18
+ - **Semantic Evaluation**: Advanced vector-based scoring to detect logic drift and nuance shifts.
19
+ - **Auto-Fix Engine (Premium)**: AI-powered prompt optimization for failing tests.
20
+ - **Cloud Orchestration**: Unified synchronization with the [TunePrompt Dashboard](https://www.tuneprompt.xyz).
21
+ - **Industrial CLI**: Built-in watch mode, CI/CD integration, and historical analytics.
22
+
13
23
  ## Features
14
24
 
15
- - **Multi-provider Support**: Test prompts across OpenAI, Anthropic, OpenRouter, and other LLM providers
16
- - **Semantic Testing**: Compare outputs using semantic similarity rather than exact matches
17
- - **JSON Validation**: Validate structured JSON outputs
18
- - **LLM-based Judging**: Use advanced LLMs to evaluate prompt quality
19
- - **Watch Mode**: Automatically re-run tests when files change
20
- - **CI/CD Integration**: Seamlessly integrate with your CI/CD pipeline
21
- - **Cloud Sync**: Upload results to the TunePrompt Cloud dashboard
22
- - **Auto-fix Engine**: Premium feature to automatically fix failing prompts using AI
23
- - **Detailed Reporting**: Comprehensive test reports with scores, methods, and durations
25
+ - **Multi-provider Support**: Native integration with Google Gemini, OpenAI, Anthropic, and OpenRouter.
26
+ - **Semantic Testing**: Compare outputs using high-precision embedding similarity.
27
+ - **JSON Validation**: Validate structured outputs with schema-aware checks.
28
+ - **LLM-based Judging**: Utilize advanced providers as evaluators for qualitative metrics.
29
+ - **Watch Mode**: Immediate feedback loop with automatic re-runs on file changes.
30
+ - **CI/CD Ready**: Native integration patterns for industrial deployment pipelines.
31
+ - **Cloud Sync**: Global telemetry and result storage via the dashboard.
32
+ - **Auto-fix Engine**: Iterative refinement loop for intelligent prompt repair.
24
33
 
25
34
  ## Installation
26
35
 
@@ -48,8 +48,7 @@ const fs = __importStar(require("fs"));
48
48
  const errorHandler_1 = require("../utils/errorHandler");
49
49
  async function fixCommand(options = {}) {
50
50
  try {
51
- console.log(chalk_1.default.bold.cyan('\nšŸ”§ TunePrompt Fix\n'));
52
- // License check with better error
51
+ console.log('');
53
52
  const spinner = (0, ora_1.default)('Checking license...').start();
54
53
  const licenseValid = await (0, license_1.checkLicense)();
55
54
  if (!licenseValid) {
@@ -58,36 +57,29 @@ async function fixCommand(options = {}) {
58
57
  throw errorHandler_1.Errors.NO_LICENSE;
59
58
  }
60
59
  spinner.succeed('License validated');
61
- // Load failed tests with error handling
62
60
  const failedTests = await (0, storage_1.getFailedTests)();
63
61
  if (failedTests.length === 0) {
64
62
  throw errorHandler_1.Errors.NO_FAILED_TESTS;
65
63
  }
66
- console.log(chalk_1.default.yellow(`\nFound ${failedTests.length} failed test(s):\n`));
64
+ console.log(chalk_1.default.yellow(`\n${failedTests.length} failed test(s):`));
67
65
  failedTests.forEach((test, index) => {
68
- const modelInfo = test.config?.model ? ` [Target: ${test.config.provider || 'unknown'}/${test.config.model}]` : '';
69
- console.log(`${index + 1}. ${chalk_1.default.bold(test.description)}${chalk_1.default.cyan(modelInfo)}`);
70
- console.log(` Score: ${chalk_1.default.red(test.score.toFixed(2))} (threshold: ${test.threshold})`);
66
+ console.log(chalk_1.default.gray(` ${index + 1}. ${test.description} — score: ${chalk_1.default.red(test.score.toFixed(2))} / ${test.threshold}`));
71
67
  });
72
68
  // Step 3: Ask which tests to fix
73
69
  let selectedIndexes = [];
74
70
  if (options.yes) {
75
71
  selectedIndexes = failedTests.map((_, i) => i);
76
- console.log(chalk_1.default.gray(`\nNon-interactive mode: Automatic selection of all ${failedTests.length} tests.`));
77
72
  }
78
73
  else {
79
74
  const response = await inquirer_1.default.prompt([{
80
75
  type: 'checkbox',
81
76
  name: 'selectedIndexes',
82
77
  message: 'Which tests would you like to fix?',
83
- choices: failedTests.map((test, index) => {
84
- const modelInfo = test.config?.model ? ` [${test.config.provider || 'unknown'}/${test.config.model}]` : '';
85
- return {
86
- name: `${test.description} (score: ${test.score.toFixed(2)})${modelInfo}`,
87
- value: index,
88
- checked: true
89
- };
90
- })
78
+ choices: failedTests.map((test, index) => ({
79
+ name: `${test.description} (${test.score.toFixed(2)})`,
80
+ value: index,
81
+ checked: true
82
+ }))
91
83
  }]);
92
84
  selectedIndexes = response.selectedIndexes;
93
85
  }
@@ -102,8 +94,8 @@ async function fixCommand(options = {}) {
102
94
  for (const index of selectedIndexes) {
103
95
  const test = failedTests[index];
104
96
  const suite = await getSuiteTests(test.id);
105
- const modelInfo = test.config?.model ? ` (Target: ${test.config.model})` : '';
106
- console.log(chalk_1.default.bold(`\n\n━━━ Fixing: ${test.description}${modelInfo} ━━━\n`));
97
+ const modelInfo = test.config?.model ? ` (${test.config.model})` : '';
98
+ console.log(chalk_1.default.bold(`\n━━━ ${test.description}${modelInfo} ━━━\n`));
107
99
  try {
108
100
  const result = await optimizer.optimize(test, suite);
109
101
  await showDiff(result.originalPrompt, result.optimizedPrompt, result.reasoning);
@@ -125,8 +117,7 @@ async function fixCommand(options = {}) {
125
117
  }
126
118
  if (action === 'apply') {
127
119
  await applyFix(test, result.optimizedPrompt);
128
- console.log(`\n${chalk_1.default.bgGreen.black(' DONE ')} ${chalk_1.default.green('Prompt updated in:')} ${chalk_1.default.bold(test.id)}`);
129
- console.log(chalk_1.default.gray('The next run will use this new prompt.\n'));
120
+ console.log(chalk_1.default.green(` āœ“ Updated: ${test.id}`));
130
121
  }
131
122
  else if (action === 'edit') {
132
123
  console.log(chalk_1.default.gray('\nOpening editor... (Save and close to apply)\n'));
@@ -149,8 +140,7 @@ async function fixCommand(options = {}) {
149
140
  continue; // Skip to next test
150
141
  }
151
142
  }
152
- console.log(chalk_1.default.bold.green('\n\n✨ Fix session complete!\n'));
153
- console.log(chalk_1.default.gray('Run `tuneprompt run` to verify your fixes.\n'));
143
+ console.log(chalk_1.default.bold.green('\n✨ Done. Run `tuneprompt run` to verify.\n'));
154
144
  // After fix completes
155
145
  const license = (0, license_1.getLicenseInfo)();
156
146
  if (license) {
@@ -162,19 +152,9 @@ async function fixCommand(options = {}) {
162
152
  }
163
153
  }
164
154
  function showUpgradePrompt() {
165
- console.log(chalk_1.default.yellow('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'));
166
- console.log(chalk_1.default.bold('šŸ”’ Premium Feature: Auto-Fix Engine\n'));
167
- console.log('The ' + chalk_1.default.cyan('fix') + ' command uses advanced AI to automatically');
168
- console.log('repair your failing prompts.\n');
169
- console.log(chalk_1.default.bold('What you get:'));
170
- console.log(' āœ… AI-powered prompt optimization');
171
- console.log(' āœ… Shadow testing before applying fixes');
172
- console.log(' āœ… Interactive diff viewer');
173
- console.log(' āœ… Unlimited fix attempts\n');
174
- console.log(chalk_1.default.bold('Get Premium:'));
175
- console.log(` 1. Buy a license: ${chalk_1.default.blue.underline('https://www.tuneprompt.xyz/pricing')}`);
176
- console.log(` 2. Activate: ${chalk_1.default.gray('tuneprompt activate <your-key>')}\n`);
177
- console.log(chalk_1.default.yellow('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n'));
155
+ console.log(chalk_1.default.yellow('\nšŸ”’ Premium feature. Get access:'));
156
+ console.log(chalk_1.default.gray(` ${chalk_1.default.blue.underline('https://www.tuneprompt.xyz/pricing')}`));
157
+ console.log(chalk_1.default.gray(` Then: ${chalk_1.default.white('tuneprompt activate <key>')}\n`));
178
158
  }
179
159
  async function showDiff(original, optimized, reasoning) {
180
160
  const diffLib = await Promise.resolve().then(() => __importStar(require('diff')));
@@ -15,62 +15,29 @@ const loader_1 = require("../engine/loader");
15
15
  const runner_1 = require("../engine/runner");
16
16
  const reporter_1 = require("../engine/reporter");
17
17
  const database_1 = require("../storage/database");
18
- const license_1 = require("../utils/license");
19
- // At the end of your test run reporter
20
- function displayRunSummary(results) {
21
- const failed = results.filter(r => r.status === 'fail');
22
- const passed = results.filter(r => r.status === 'pass');
23
- console.log(chalk_1.default.bold.white('\n' + '='.repeat(60)));
24
- console.log(chalk_1.default.bold.white('Test Summary'));
25
- console.log(chalk_1.default.bold.white('='.repeat(60)));
26
- console.log(chalk_1.default.green(`āœ“ Passed: ${passed.length}`));
27
- console.log(chalk_1.default.red(`āœ— Failed: ${failed.length}`));
28
- console.log(chalk_1.default.gray(`Total: ${results.length}`));
29
- console.log(chalk_1.default.bold.white('='.repeat(60) + '\n'));
30
- // UPSELL MESSAGE (NEW)
31
- if (failed.length > 0) {
32
- console.log(chalk_1.default.yellow('āš ļø ' + failed.length + ' test(s) failed'));
33
- console.log(chalk_1.default.gray('\nDon\'t waste time debugging manually.'));
34
- console.log(chalk_1.default.cyan('Run ') + chalk_1.default.bold.white('tuneprompt fix') + chalk_1.default.cyan(' to let AI repair these prompts instantly.\n'));
35
- // Check license status
36
- const licenseManager = new license_1.LicenseManager();
37
- licenseManager.hasFeature('fix').then((hasAccess) => {
38
- if (!hasAccess) {
39
- console.log(chalk_1.default.gray('Unlock fix with: ') + chalk_1.default.white('https://www.tuneprompt.xyz/pricing'));
40
- console.log(chalk_1.default.gray('Already have a key? ') + chalk_1.default.white('tuneprompt activate <key>\n'));
41
- }
42
- });
43
- }
44
- }
45
18
  // Extract the core run functionality to a separate function
46
19
  async function runTests(options = {}) {
47
20
  const startTime = Date.now();
48
- const spinner = (0, ora_1.default)('Loading configuration...').start();
21
+ const spinner = (0, ora_1.default)('Loading...').start();
49
22
  try {
50
- // Load config
51
23
  const config = await (0, config_1.loadConfig)(options.config);
52
24
  spinner.succeed('Configuration loaded');
53
- // Load tests
54
- spinner.start('Loading test cases...');
25
+ spinner.start('Loading tests...');
55
26
  const loader = new loader_1.TestLoader();
56
27
  const testCases = loader.loadTestDir(config.testDir || './tests');
57
28
  if (testCases.length === 0) {
58
29
  spinner.fail('No test cases found');
59
30
  process.exit(1);
60
31
  }
61
- spinner.succeed(`Loaded ${testCases.length} test case(s)`);
62
- // Run tests
32
+ spinner.succeed(`Loaded ${testCases.length} test(s)`);
63
33
  spinner.start('Running tests...');
64
34
  const runner = new runner_1.TestRunner(config);
65
35
  const results = await runner.runTests(testCases);
66
36
  spinner.stop();
67
- // Save to database
68
- // Save to database
69
37
  const db = new database_1.TestDatabase();
70
38
  db.saveRun(results);
71
- // Calculate results for cloud upload (and for sync logic)
72
- const currentRunId = results.id; // Assuming results has ID
73
- // Report results
39
+ // Calculate results for cloud upload
40
+ const currentRunId = results.id;
74
41
  const reporter = new reporter_1.TestReporter();
75
42
  reporter.printResults(results, config.outputFormat);
76
43
  const isCI = options.ci ||
@@ -104,11 +71,11 @@ async function syncPendingRuns(db, options) {
104
71
  const pendingRuns = db.getPendingUploads();
105
72
  if (pendingRuns.length === 0)
106
73
  return;
107
- console.log(chalk_1.default.blue(`\nā˜ļø Syncing ${pendingRuns.length} pending run(s) to Cloud...`));
74
+ const syncSpinner = (0, ora_1.default)(`Syncing ${pendingRuns.length} run(s) to Cloud...`).start();
108
75
  const cloudService = new cloud_service_1.CloudService();
109
76
  await cloudService.init();
110
77
  if (!(await cloudService.isAuthenticated())) {
111
- console.log(chalk_1.default.yellow('āš ļø Not authenticated. Run `tuneprompt activate` first.'));
78
+ syncSpinner.warn('Not authenticated. Run `tuneprompt activate` first.');
112
79
  return;
113
80
  }
114
81
  // Get project ID once
@@ -124,7 +91,7 @@ async function syncPendingRuns(db, options) {
124
91
  }
125
92
  }
126
93
  catch (err) {
127
- console.log(chalk_1.default.yellow('āš ļø Failed to get project info'));
94
+ syncSpinner.warn('Failed to get project info');
128
95
  return;
129
96
  }
130
97
  // Common Git/Env context
@@ -179,10 +146,7 @@ async function syncPendingRuns(db, options) {
179
146
  const uploadResult = await cloudService.uploadRun(runData);
180
147
  if (uploadResult.success) {
181
148
  db.markAsUploaded(run.id);
182
- console.log(chalk_1.default.green(` āœ“ Uploaded run from ${run.timestamp.toLocaleTimeString()}`));
183
- }
184
- else {
185
- console.log(chalk_1.default.red(` āœ— Failed to upload run ${run.id}: ${uploadResult.error}`));
186
149
  }
187
150
  }
151
+ syncSpinner.succeed(`Synced ${pendingRuns.length} run(s) to Cloud`);
188
152
  }
@@ -44,12 +44,16 @@ class TestLoader {
44
44
  if (ext === '.json') {
45
45
  const data = JSON.parse(content);
46
46
  const tests = Array.isArray(data) ? data : [data];
47
- return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
47
+ return tests
48
+ .filter((t) => t && typeof t === 'object' && t.prompt)
49
+ .map(t => ({ ...t, filePath: path.resolve(filePath) }));
48
50
  }
49
51
  else if (ext === '.yaml' || ext === '.yml') {
50
52
  const data = yaml.load(content);
51
53
  const tests = Array.isArray(data) ? data : [data];
52
- return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
54
+ return tests
55
+ .filter((t) => t && typeof t === 'object' && t.prompt)
56
+ .map(t => ({ ...t, filePath: path.resolve(filePath) }));
53
57
  }
54
58
  else {
55
59
  throw new Error(`Unsupported file format: ${ext}`);
@@ -1,6 +1,10 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.PromptOptimizer = void 0;
7
+ const ora_1 = __importDefault(require("ora"));
4
8
  const metaPrompt_1 = require("./metaPrompt");
5
9
  const constraintExtractor_1 = require("./constraintExtractor");
6
10
  const shadowTester_1 = require("./shadowTester");
@@ -14,10 +18,8 @@ class PromptOptimizer {
14
18
  * Main optimization method with Anti-Regression and Iterative Refinement
15
19
  */
16
20
  async optimize(failedTest, suite) {
17
- console.log(`\n🧠 Analyzing failure: "${failedTest.description}"`);
18
- console.log(`šŸ“ˆ Full test suite size: ${suite.length}`);
21
+ const spinner = (0, ora_1.default)(`Analyzing failure: "${failedTest.description}"`).start();
19
22
  const initialAggregateScore = suite.reduce((sum, t) => sum + t.score, 0) / suite.length;
20
- console.log(`šŸ“Š Current aggregate score: ${initialAggregateScore.toFixed(2)}`);
21
23
  const errorContext = (0, constraintExtractor_1.generateErrorContext)(failedTest);
22
24
  const passingExamples = suite
23
25
  .filter(t => t.score >= t.threshold)
@@ -30,7 +32,7 @@ class PromptOptimizer {
30
32
  let conversation = [];
31
33
  while (iterations < this.maxIterations) {
32
34
  iterations++;
33
- console.log(`šŸš€ Optimization Attempt #${iterations}...`);
35
+ spinner.text = `Optimization Attempt #${iterations}/${this.maxIterations}...`;
34
36
  if (iterations === 1) {
35
37
  const input = {
36
38
  originalPrompt: failedTest.prompt,
@@ -53,17 +55,15 @@ class PromptOptimizer {
53
55
  }
54
56
  for (const candidate of candidates) {
55
57
  try {
56
- console.log(`🧪 Testing candidate...`);
58
+ spinner.text = `Attempt #${iterations}: Testing candidate...`;
57
59
  const primaryResult = await (0, shadowTester_1.runShadowTest)(candidate.prompt, failedTest);
58
60
  if (primaryResult.score < failedTest.threshold) {
59
- console.log(` āŒ Candidate failed to resolve primary error (score: ${primaryResult.score.toFixed(2)})`);
60
- const specificReason = primaryResult.failureReason || `the output was: "${primaryResult.output.substring(0, 100)}..."`;
61
- lastFailureReason = `Candidate failed. Reason: ${specificReason}. Previous reasoning: ${candidate.reasoning}`;
61
+ const specificReason = primaryResult.failureReason || `the output was: "${primaryResult.output.substring(0, 50)}..."`;
62
+ lastFailureReason = `Candidate failed. Reason: ${specificReason}.`;
62
63
  continue;
63
64
  }
64
- console.log(` āœ… Resolved primary error. Running anti-regression...`);
65
+ spinner.text = `Attempt #${iterations}: Verifying anti-regression...`;
65
66
  const suiteResult = await (0, shadowTester_1.runSuiteShadowTest)(candidate.prompt, suite);
66
- console.log(` šŸ“Š Suite aggregate score: ${suiteResult.aggregateScore.toFixed(2)}`);
67
67
  if (suiteResult.aggregateScore > bestAggregateScore) {
68
68
  bestAggregateScore = suiteResult.aggregateScore;
69
69
  bestResult = {
@@ -81,23 +81,24 @@ class PromptOptimizer {
81
81
  };
82
82
  }
83
83
  else if (suiteResult.aggregateScore <= bestAggregateScore) {
84
- console.log(` šŸ“‰ Candidate regression: aggregate score dropped (Current: ${bestAggregateScore.toFixed(2)} VS New: ${suiteResult.aggregateScore.toFixed(2)})`);
85
84
  const regressions = suiteResult.results.filter(r => !r.passed).map(r => r.failureReason).filter(Boolean);
86
85
  const regressionText = regressions.length > 0 ? ` Required features broke: ${regressions.slice(0, 2).join('; ')}.` : '';
87
- lastFailureReason = `The fix resolved the failure but introduced regressions in other cases.${regressionText} Maintain all successful patterns while fixing the failure.`;
86
+ lastFailureReason = `Fix introduced regressions.${regressionText}`;
88
87
  }
89
88
  }
90
89
  catch (error) {
91
- console.error(` āš ļø Validation error for candidate: ${error.message}`);
90
+ spinner.text = `Attempt #${iterations}: āš ļø ${error.message?.substring(0, 80)}`;
91
+ lastFailureReason = error.message;
92
92
  }
93
93
  }
94
94
  if (bestResult)
95
95
  break;
96
- console.log(`ā™»ļø No candidate was net-positive. Retrying with refinement feedback...`);
97
96
  }
98
97
  if (!bestResult) {
99
- throw new Error(`All fix attempts failed to resolve the regression or improve the aggregate score after ${this.maxIterations} iterations.`);
98
+ spinner.fail(`Optimization failed`);
99
+ throw new Error(`Failed to improve score after ${this.maxIterations} attempts. ${lastFailureReason || ''}`);
100
100
  }
101
+ spinner.succeed(`Optimization successful!`);
101
102
  return bestResult;
102
103
  }
103
104
  getMetaPrompt(input) {
@@ -130,7 +131,7 @@ class PromptOptimizer {
130
131
  // Pick a strong model for optimization if not defined
131
132
  const model = providerName === 'anthropic' ? 'claude-3-5-sonnet-latest' :
132
133
  providerName === 'openai' ? 'gpt-4o' :
133
- providerName === 'gemini' ? 'gemini-1.5-pro' : undefined;
134
+ providerName === 'gemini' ? 'gemini-2.0-flash' : undefined;
134
135
  if (!model)
135
136
  continue;
136
137
  const provider = factory_1.ProviderFactory.create(providerName, {
@@ -7,7 +7,7 @@ exports.TestReporter = void 0;
7
7
  const chalk_1 = __importDefault(require("chalk"));
8
8
  const cli_table3_1 = __importDefault(require("cli-table3"));
9
9
  class TestReporter {
10
- printResults(run, format = 'both') {
10
+ printResults(run, format = 'table') {
11
11
  if (format === 'json' || format === 'both') {
12
12
  this.printJSON(run);
13
13
  }
@@ -18,7 +18,7 @@ class TestRunner {
18
18
  const providerNames = ["openai", "anthropic", "openrouter", "gemini"];
19
19
  for (const name of providerNames) {
20
20
  const providerConfig = this.config.providers[name];
21
- if (providerConfig) {
21
+ if (providerConfig && providerConfig.apiKey) {
22
22
  this.providers.set(name, factory_1.ProviderFactory.create(name, providerConfig));
23
23
  }
24
24
  }
@@ -20,16 +20,34 @@ async function runShadowTest(candidatePrompt, test) {
20
20
  }
21
21
  const providerName = test.config?.provider;
22
22
  const model = test.config?.model;
23
- // If specific provider/model is requested, use it directly (Strict Mode)
23
+ // Determine providers to try
24
+ let providersToTry = [];
24
25
  if (providerName && model) {
26
+ providersToTry.push({ name: providerName, model });
27
+ }
28
+ // Fallback queue
29
+ const fallbackQueue = [
30
+ { name: 'anthropic', model: 'claude-3-5-sonnet-latest' },
31
+ { name: 'openai', model: 'gpt-4o' },
32
+ { name: 'gemini', model: 'gemini-2.0-flash' },
33
+ { name: 'openrouter', model: 'nvidia/nemotron-3-nano-30b-a3b:free' }
34
+ ];
35
+ for (const entry of fallbackQueue) {
36
+ if (entry.name !== providerName) {
37
+ providersToTry.push(entry);
38
+ }
39
+ }
40
+ let errors = [];
41
+ for (const target of providersToTry) {
25
42
  try {
26
- const apiKey = factory_1.ProviderFactory.getApiKey(providerName);
43
+ const apiKey = factory_1.ProviderFactory.getApiKey(target.name);
27
44
  if (!apiKey) {
28
- throw new Error(`No API key found for provider: ${providerName}`);
45
+ errors.push(`${target.name}: no API key (set ${target.name.toUpperCase()}_API_KEY)`);
46
+ continue;
29
47
  }
30
- const provider = factory_1.ProviderFactory.create(providerName, {
48
+ const provider = factory_1.ProviderFactory.create(target.name, {
31
49
  apiKey,
32
- model,
50
+ model: target.model || 'latest',
33
51
  maxTokens: 2000
34
52
  });
35
53
  const finalPrompt = (0, interpolation_1.interpolateVariables)(candidatePrompt, test.input);
@@ -44,12 +62,11 @@ async function runShadowTest(candidatePrompt, test) {
44
62
  };
45
63
  }
46
64
  catch (error) {
47
- console.log(`āš ļø Specified provider ${providerName} failed: ${error.message}`);
48
- throw new Error(`Failed to validate on target model: ${error.message}`);
65
+ errors.push(`${target.name}: ${error.message}`);
66
+ continue;
49
67
  }
50
68
  }
51
- // Phase 2 Decision: Fail fast if no provider/model is defined (Strict Awareness)
52
- throw new Error(`Test "${test.description}" lacks provider/model configuration. Validation aborted.`);
69
+ throw new Error(`Shadow test failed for all providers: ${errors.join(' | ')}`);
53
70
  }
54
71
  /**
55
72
  * Run a candidate prompt against multiple tests and return aggregate results
@@ -118,7 +118,7 @@ class TestDatabase {
118
118
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
119
119
  `);
120
120
  for (const result of run.results) {
121
- insertResult.run(result.id, run.id, result.testCase.description, typeof result.testCase.prompt === 'string' ? result.testCase.prompt : JSON.stringify(result.testCase.prompt), result.testCase.variables ? JSON.stringify(result.testCase.variables) : null, typeof result.testCase.expect === 'string' ? result.testCase.expect : JSON.stringify(result.testCase.expect), result.testCase.config ? JSON.stringify(result.testCase.config) : null, result.testCase.filePath || null, result.status, result.score, result.actualOutput, result.expectedOutput, result.error || null, result.metadata.duration, result.metadata.tokens || null, result.metadata.cost || null, result.metadata.provider || null);
121
+ insertResult.run(result.id, run.id, result.testCase.description || 'No description', typeof result.testCase.prompt === 'string' ? result.testCase.prompt : JSON.stringify(result.testCase.prompt), result.testCase.variables ? JSON.stringify(result.testCase.variables) : null, typeof result.testCase.expect === 'string' ? result.testCase.expect : JSON.stringify(result.testCase.expect), result.testCase.config ? JSON.stringify(result.testCase.config) : null, result.testCase.filePath || null, result.status, result.score, result.actualOutput, result.expectedOutput, result.error || null, result.metadata.duration, result.metadata.tokens || null, result.metadata.cost || null, result.metadata.provider || null);
122
122
  }
123
123
  }
124
124
  getRecentRuns(limit = 10) {
@@ -21,16 +21,16 @@ function validateConfig(config) {
21
21
  if (!config.providers || Object.keys(config.providers).length === 0) {
22
22
  throw new Error('At least one provider must be configured');
23
23
  }
24
- // Validate API keys
25
- for (const [provider, cfg] of Object.entries(config.providers)) {
26
- if (!cfg.apiKey) {
27
- throw new Error(`API key missing for provider: ${provider}`);
28
- }
24
+ // Validate API keys - ensure at least one provider is valid
25
+ const validProviders = Object.entries(config.providers)
26
+ .filter(([_, cfg]) => !!cfg.apiKey);
27
+ if (validProviders.length === 0) {
28
+ throw new Error('No valid API keys found. Please provide at least one API key in your .env file.');
29
29
  }
30
30
  return {
31
31
  threshold: config.threshold || 0.8,
32
32
  testDir: config.testDir || './tests',
33
- outputFormat: config.outputFormat || 'both',
33
+ outputFormat: config.outputFormat || 'table',
34
34
  ...config
35
35
  };
36
36
  }
@@ -70,7 +70,7 @@ function getDefaultConfigTemplate() {
70
70
  testDir: './tests',
71
71
 
72
72
  // Output format: 'json', 'table', or 'both'
73
- outputFormat: 'both'
73
+ outputFormat: 'table'
74
74
  };
75
75
  `;
76
76
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tuneprompt",
3
- "version": "1.1.1",
3
+ "version": "1.1.3",
4
4
  "description": "Industrial-grade testing framework for LLM prompts",
5
5
  "repository": {
6
6
  "type": "git",