tuneprompt 1.0.7 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +18 -9
  2. package/dist/cli.js +5 -2
  3. package/dist/commands/fix.d.ts +3 -1
  4. package/dist/commands/fix.js +45 -25
  5. package/dist/commands/generate.d.ts +2 -0
  6. package/dist/commands/generate.js +11 -0
  7. package/dist/engine/__tests__/optimizer.test.d.ts +1 -0
  8. package/dist/engine/__tests__/optimizer.test.js +9 -0
  9. package/dist/engine/loader.js +6 -2
  10. package/dist/engine/metaPrompt.d.ts +5 -0
  11. package/dist/engine/metaPrompt.js +55 -55
  12. package/dist/engine/optimizer.d.ts +7 -21
  13. package/dist/engine/optimizer.js +141 -252
  14. package/dist/engine/runner.d.ts +2 -0
  15. package/dist/engine/runner.js +56 -69
  16. package/dist/engine/shadowTester.d.ts +17 -2
  17. package/dist/engine/shadowTester.js +86 -128
  18. package/dist/providers/__tests__/custom.test.d.ts +1 -0
  19. package/dist/providers/__tests__/custom.test.js +9 -0
  20. package/dist/providers/custom.d.ts +6 -0
  21. package/dist/providers/custom.js +10 -0
  22. package/dist/providers/factory.d.ts +6 -0
  23. package/dist/providers/factory.js +38 -0
  24. package/dist/providers/gemini.d.ts +11 -0
  25. package/dist/providers/gemini.js +46 -0
  26. package/dist/scoring/__tests__/rag.test.d.ts +1 -0
  27. package/dist/scoring/__tests__/rag.test.js +10 -0
  28. package/dist/scoring/rag.d.ts +9 -0
  29. package/dist/scoring/rag.js +9 -0
  30. package/dist/services/cloud.service.js +1 -1
  31. package/dist/storage/database.js +1 -1
  32. package/dist/types/fix.d.ts +11 -0
  33. package/dist/types/index.d.ts +2 -1
  34. package/dist/types/test.d.ts +8 -0
  35. package/dist/types/test.js +2 -0
  36. package/dist/utils/config.js +11 -5
  37. package/dist/utils/interpolation.d.ts +4 -0
  38. package/dist/utils/interpolation.js +16 -0
  39. package/dist/utils/storage.d.ts +4 -0
  40. package/dist/utils/storage.js +26 -5
  41. package/dist/utils/validator.d.ts +2 -0
  42. package/dist/utils/validator.js +10 -0
  43. package/package.json +3 -2
package/README.md CHANGED
@@ -10,17 +10,26 @@ Industrial-grade testing framework for LLM prompts
10
10
 
11
11
  TunePrompt is a comprehensive testing framework designed specifically for Large Language Model (LLM) prompts. It helps developers validate, test, and optimize their prompts with industrial-grade reliability and accuracy.
12
12
 
13
+ ## 🚀 What's New in v1.1.1
14
+
15
+ The first production-ready release of **TunePrompt**, the industrial-grade testing framework for the modern LLM stack.
16
+
17
+ - **Multi-Provider Support**: Seamlessly test across **OpenAI**, **Anthropic**, **Gemini**, and **OpenRouter**.
18
+ - **Semantic Evaluation**: Advanced vector-based scoring to detect logic drift and nuance shifts.
19
+ - **Auto-Fix Engine (Premium)**: AI-powered prompt optimization for failing tests.
20
+ - **Cloud Orchestration**: Unified synchronization with the [TunePrompt Dashboard](https://www.tuneprompt.xyz).
21
+ - **Industrial CLI**: Built-in watch mode, CI/CD integration, and historical analytics.
22
+
13
23
  ## Features
14
24
 
15
- - **Multi-provider Support**: Test prompts across OpenAI, Anthropic, OpenRouter, and other LLM providers
16
- - **Semantic Testing**: Compare outputs using semantic similarity rather than exact matches
17
- - **JSON Validation**: Validate structured JSON outputs
18
- - **LLM-based Judging**: Use advanced LLMs to evaluate prompt quality
19
- - **Watch Mode**: Automatically re-run tests when files change
20
- - **CI/CD Integration**: Seamlessly integrate with your CI/CD pipeline
21
- - **Cloud Sync**: Upload results to the TunePrompt Cloud dashboard
22
- - **Auto-fix Engine**: Premium feature to automatically fix failing prompts using AI
23
- - **Detailed Reporting**: Comprehensive test reports with scores, methods, and durations
25
+ - **Multi-provider Support**: Native integration with Google Gemini, OpenAI, Anthropic, and OpenRouter.
26
+ - **Semantic Testing**: Compare outputs using high-precision embedding similarity.
27
+ - **JSON Validation**: Validate structured outputs with schema-aware checks.
28
+ - **LLM-based Judging**: Utilize advanced providers as evaluators for qualitative metrics.
29
+ - **Watch Mode**: Immediate feedback loop with automatic re-runs on file changes.
30
+ - **CI/CD Ready**: Native integration patterns for industrial deployment pipelines.
31
+ - **Cloud Sync**: Global telemetry and result storage via the dashboard.
32
+ - **Auto-fix Engine**: Iterative refinement loop for intelligent prompt repair.
24
33
 
25
34
  ## Installation
26
35
 
package/dist/cli.js CHANGED
@@ -45,6 +45,7 @@ const run_1 = require("./commands/run");
45
45
  const history_1 = require("./commands/history");
46
46
  const fix_1 = require("./commands/fix");
47
47
  const activate_1 = require("./commands/activate");
48
+ const generate_1 = require("./commands/generate");
48
49
  // Read version from package.json
49
50
  const packageJson = require('../package.json');
50
51
  // Load environment variables
@@ -78,8 +79,9 @@ program
78
79
  program
79
80
  .command('fix')
80
81
  .description('Auto-fix failing prompts using AI')
81
- .action(async () => {
82
- await (0, fix_1.fixCommand)();
82
+ .option('-y, --yes', 'Automatically apply the best fix without prompting')
83
+ .action(async (options) => {
84
+ await (0, fix_1.fixCommand)(options);
83
85
  });
84
86
  program
85
87
  .command('history')
@@ -123,6 +125,7 @@ program
123
125
  console.log(chalk_1.default.gray(`Last Verified: ${new Date(license.lastVerified).toLocaleDateString()}\n`));
124
126
  }
125
127
  });
128
+ (0, generate_1.registerGenerateCommand)(program);
126
129
  // Watch mode implementation
127
130
  async function runWatchMode(options) {
128
131
  const chokidar = require('chokidar');
@@ -1 +1,3 @@
1
- export declare function fixCommand(): Promise<void>;
1
+ export declare function fixCommand(options?: {
2
+ yes?: boolean;
3
+ }): Promise<void>;
@@ -46,7 +46,7 @@ const analytics_1 = require("../utils/analytics");
46
46
  const storage_1 = require("../utils/storage");
47
47
  const fs = __importStar(require("fs"));
48
48
  const errorHandler_1 = require("../utils/errorHandler");
49
- async function fixCommand() {
49
+ async function fixCommand(options = {}) {
50
50
  try {
51
51
  console.log(chalk_1.default.bold.cyan('\n🔧 TunePrompt Fix\n'));
52
52
  // License check with better error
@@ -65,44 +65,64 @@ async function fixCommand() {
65
65
  }
66
66
  console.log(chalk_1.default.yellow(`\nFound ${failedTests.length} failed test(s):\n`));
67
67
  failedTests.forEach((test, index) => {
68
- console.log(`${index + 1}. ${chalk_1.default.bold(test.description)}`);
68
+ const modelInfo = test.config?.model ? ` [Target: ${test.config.provider || 'unknown'}/${test.config.model}]` : '';
69
+ console.log(`${index + 1}. ${chalk_1.default.bold(test.description)}${chalk_1.default.cyan(modelInfo)}`);
69
70
  console.log(` Score: ${chalk_1.default.red(test.score.toFixed(2))} (threshold: ${test.threshold})`);
70
71
  });
71
72
  // Step 3: Ask which tests to fix
72
- const { selectedIndexes } = await inquirer_1.default.prompt([{
73
- type: 'checkbox',
74
- name: 'selectedIndexes',
75
- message: 'Which tests would you like to fix?',
76
- choices: failedTests.map((test, index) => ({
77
- name: `${test.description} (score: ${test.score.toFixed(2)})`,
78
- value: index,
79
- checked: true
80
- }))
81
- }]);
73
+ let selectedIndexes = [];
74
+ if (options.yes) {
75
+ selectedIndexes = failedTests.map((_, i) => i);
76
+ console.log(chalk_1.default.gray(`\nNon-interactive mode: Automatic selection of all ${failedTests.length} tests.`));
77
+ }
78
+ else {
79
+ const response = await inquirer_1.default.prompt([{
80
+ type: 'checkbox',
81
+ name: 'selectedIndexes',
82
+ message: 'Which tests would you like to fix?',
83
+ choices: failedTests.map((test, index) => {
84
+ const modelInfo = test.config?.model ? ` [${test.config.provider || 'unknown'}/${test.config.model}]` : '';
85
+ return {
86
+ name: `${test.description} (score: ${test.score.toFixed(2)})${modelInfo}`,
87
+ value: index,
88
+ checked: true
89
+ };
90
+ })
91
+ }]);
92
+ selectedIndexes = response.selectedIndexes;
93
+ }
82
94
  if (selectedIndexes.length === 0) {
83
95
  console.log(chalk_1.default.gray('\nNo tests selected. Exiting.'));
84
96
  return;
85
97
  }
86
98
  // Step 4: Optimize each selected test
87
99
  const optimizer = new optimizer_1.PromptOptimizer();
100
+ // Load suite tests for each failing test to support anti-regression
101
+ const { getSuiteTests } = await Promise.resolve().then(() => __importStar(require('../utils/storage')));
88
102
  for (const index of selectedIndexes) {
89
103
  const test = failedTests[index];
90
- console.log(chalk_1.default.bold(`\n\n━━━ Fixing: ${test.description} ━━━\n`));
104
+ const suite = await getSuiteTests(test.id);
105
+ const modelInfo = test.config?.model ? ` (Target: ${test.config.model})` : '';
106
+ console.log(chalk_1.default.bold(`\n\n━━━ Fixing: ${test.description}${modelInfo} ━━━\n`));
91
107
  try {
92
- const result = await optimizer.optimize(test);
108
+ const result = await optimizer.optimize(test, suite);
93
109
  await showDiff(result.originalPrompt, result.optimizedPrompt, result.reasoning);
94
110
  // Ask if user wants to apply
95
- const { action } = await inquirer_1.default.prompt([{
96
- type: 'rawlist',
97
- name: 'action',
98
- message: 'What would you like to do?',
99
- choices: [
100
- { name: 'Apply this fix (Updates your test file)', value: 'apply' },
101
- { name: 'Edit before applying', value: 'edit' },
102
- { name: 'Skip this fix', value: 'skip' }
103
- ],
104
- default: 0
105
- }]);
111
+ let action = 'apply';
112
+ if (!options.yes) {
113
+ const response = await inquirer_1.default.prompt([{
114
+ type: 'rawlist',
115
+ name: 'action',
116
+ message: 'What would you like to do?',
117
+ choices: [
118
+ { name: 'Apply this fix (Updates your test file)', value: 'apply' },
119
+ { name: 'Edit before applying', value: 'edit' },
120
+ { name: 'Skip this fix', value: 'skip' }
121
+ ],
122
+ default: 0
123
+ }]);
124
+ action = response.action;
125
+ }
106
126
  if (action === 'apply') {
107
127
  await applyFix(test, result.optimizedPrompt);
108
128
  console.log(`\n${chalk_1.default.bgGreen.black(' DONE ')} ${chalk_1.default.green('Prompt updated in:')} ${chalk_1.default.bold(test.id)}`);
@@ -0,0 +1,2 @@
1
+ import { Command } from 'commander';
2
+ export declare function registerGenerateCommand(program: Command): void;
@@ -0,0 +1,11 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.registerGenerateCommand = registerGenerateCommand;
4
+ function registerGenerateCommand(program) {
5
+ program
6
+ .command('generate')
7
+ .description('Generate a new test file interactively')
8
+ .action(async () => {
9
+ console.log('Wizard started');
10
+ });
11
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,9 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const optimizer_1 = require("../optimizer");
4
+ describe('Optimizer Refinement Loop', () => {
5
+ it('should support tracking Max Iterations', () => {
6
+ const opt = new optimizer_1.PromptOptimizer({ maxIterations: 3 });
7
+ expect(opt.maxIterations).toBe(3);
8
+ });
9
+ });
@@ -44,12 +44,16 @@ class TestLoader {
44
44
  if (ext === '.json') {
45
45
  const data = JSON.parse(content);
46
46
  const tests = Array.isArray(data) ? data : [data];
47
- return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
47
+ return tests
48
+ .filter((t) => t && typeof t === 'object' && t.prompt)
49
+ .map(t => ({ ...t, filePath: path.resolve(filePath) }));
48
50
  }
49
51
  else if (ext === '.yaml' || ext === '.yml') {
50
52
  const data = yaml.load(content);
51
53
  const tests = Array.isArray(data) ? data : [data];
52
- return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
54
+ return tests
55
+ .filter((t) => t && typeof t === 'object' && t.prompt)
56
+ .map(t => ({ ...t, filePath: path.resolve(filePath) }));
53
57
  }
54
58
  else {
55
59
  throw new Error(`Unsupported file format: ${ext}`);
@@ -5,6 +5,11 @@ export interface MetaPromptInput {
5
5
  actualOutput: string;
6
6
  errorType: string;
7
7
  errorMessage: string;
8
+ passingExamples?: {
9
+ input?: Record<string, any>;
10
+ output: string;
11
+ }[];
12
+ failureFeedback?: string;
8
13
  }
9
14
  export declare function generateOptimizationPrompt(input: MetaPromptInput): string;
10
15
  export declare function generateJSONFixPrompt(input: MetaPromptInput): string;
@@ -3,9 +3,27 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.generateOptimizationPrompt = generateOptimizationPrompt;
4
4
  exports.generateJSONFixPrompt = generateJSONFixPrompt;
5
5
  exports.generateSemanticFixPrompt = generateSemanticFixPrompt;
6
+ const COMMON_JSON_FOOTER = `
7
+ === OUTPUT FORMAT ===
8
+
9
+ Return ONLY valid JSON (no markdown, no explanations):
10
+
11
+ {
12
+ "analysis": "Brief explanation of why it failed (2-3 sentences)",
13
+ "candidateA": {
14
+ "prompt": "Your rewritten prompt here",
15
+ "reasoning": "Why this approach works"
16
+ },
17
+ "candidateB": {
18
+ "prompt": "Your alternative rewritten prompt here",
19
+ "reasoning": "Why this approach works"
20
+ }
21
+ }
22
+
23
+ CRITICAL: Return ONLY the JSON object. No preamble, no markdown backticks.`;
6
24
  function generateOptimizationPrompt(input) {
7
- const { originalPrompt, testInput, expectedOutput, actualOutput, errorType, errorMessage } = input;
8
- return `You are an elite LLM Prompt Engineer with expertise in Claude, GPT-4, and advanced prompting techniques.
25
+ const { originalPrompt, testInput, expectedOutput, actualOutput, errorType, errorMessage, passingExamples, failureFeedback } = input;
26
+ let prompt = `You are an elite LLM Prompt Engineer with expertise in Claude, GPT-4, and advanced prompting techniques.
9
27
 
10
28
  A prompt has failed a critical test case. Your mission is to rewrite it to pass the test while maintaining the original intent.
11
29
 
@@ -33,48 +51,28 @@ ${actualOutput}
33
51
  [Error Type]: ${errorType}
34
52
  [Error Details]: ${errorMessage}
35
53
 
36
- === YOUR TASK ===
37
-
38
- 1. **Root Cause Analysis**: Identify WHY the prompt failed
39
- - Missing instructions?
40
- - Ambiguous wording?
41
- - Wrong output format specified?
42
- - Tone mismatch?
43
- - Missing constraints?
44
-
45
- 2. **Prompt Engineering Fixes**: Apply advanced techniques:
46
- - ✅ Chain-of-Thought reasoning (if logic is needed)
47
- - ✅ XML tags for structure (<instructions>, <output_format>)
48
- - ✅ Few-shot examples (if pattern recognition helps)
49
- - ✅ Explicit constraints (length, format, tone)
50
- - ✅ Role assignment ("You are a [expert]...")
51
- - ✅ Output format specifications (JSON schema, markdown, etc.)
52
-
53
- 3. **Generate TWO Candidate Prompts**:
54
- - Candidate A: Conservative fix (minimal changes)
55
- - Candidate B: Aggressive rewrite (best practices applied)
56
-
57
- === OUTPUT FORMAT ===
54
+ ${failureFeedback ? `[ITERATIVE FEEDBACK]:
55
+ The previous fix failed because: ${failureFeedback}
56
+ PLEASE ANALYZE THIS FAILURE AND ADJUST YOUR STRATEGY.
57
+ ` : ''}
58
58
 
59
- Return ONLY valid JSON (no markdown, no explanations):
59
+ ${passingExamples && passingExamples.length > 0 ? `### Successful Performance Examples
60
+ Here are examples of inputs/outputs that work well. Inject these as a few-shot examples into the new prompt.
61
+ ${JSON.stringify(passingExamples, null, 2)}
62
+ ` : ''}
60
63
 
61
- {
62
- "analysis": "Brief explanation of why it failed (2-3 sentences)",
63
- "candidateA": {
64
- "prompt": "Your rewritten prompt here",
65
- "reasoning": "Why this approach works"
66
- },
67
- "candidateB": {
68
- "prompt": "Your alternative rewritten prompt here",
69
- "reasoning": "Why this approach works"
70
- }
71
- }
64
+ === YOUR TASK ===
72
65
 
73
- CRITICAL: Return ONLY the JSON object. No preamble, no markdown backticks.`;
66
+ 1. **Root Cause Analysis**: Identify WHY the prompt failed.
67
+ 2. **Prompt Engineering Fixes**: Apply advanced techniques like XML tags, Chain-of-Thought, and explicit JSON schemas.
68
+ 3. **Generate TWO Candidate Prompts**:
69
+ - Candidate A: Conservative fix (minimal changes).
70
+ - Candidate B: Aggressive rewrite (best practices applied).
71
+ `;
72
+ return prompt + COMMON_JSON_FOOTER;
74
73
  }
75
- // Specialized prompts for different error types
76
74
  function generateJSONFixPrompt(input) {
77
- return `You are a JSON Schema expert. The following prompt failed to produce valid JSON.
75
+ let prompt = `You are a JSON Schema expert. The following prompt failed to produce valid JSON.
78
76
 
79
77
  Original Prompt:
80
78
  """
@@ -86,21 +84,22 @@ Expected JSON Structure:
86
84
  ${input.expectedOutput}
87
85
  """
88
86
 
89
- Actual Output (Invalid JSON):
90
- """
91
- ${input.actualOutput}
92
- """
87
+ ${input.failureFeedback ? `[FAILURE FEEDBACK]: ${input.failureFeedback}` : ''}
88
+
89
+ ${input.passingExamples && input.passingExamples.length > 0 ? `### Successful Performance Examples
90
+ Here are examples of inputs/outputs that work well. Inject these as a few-shot examples into the new prompt.
91
+ ${JSON.stringify(input.passingExamples, null, 2)}
92
+ ` : ''}
93
93
 
94
94
  Rewrite the prompt to GUARANTEE valid JSON output. Use these techniques:
95
95
  1. Explicitly state: "Return ONLY valid JSON, no markdown, no explanations"
96
96
  2. Provide the exact schema structure
97
97
  3. Add output format examples
98
- 4. Use XML tags like <json_output> to delimit the response area
99
-
100
- Return your improved prompt as plain text (not JSON).`;
98
+ `;
99
+ return prompt + COMMON_JSON_FOOTER;
101
100
  }
102
101
  function generateSemanticFixPrompt(input) {
103
- return `The prompt failed semantic similarity testing (score: too low).
102
+ let prompt = `The prompt failed semantic similarity testing.
104
103
 
105
104
  Original Prompt:
106
105
  """
@@ -112,18 +111,19 @@ Expected Meaning/Content:
112
111
  ${input.expectedOutput}
113
112
  """
114
113
 
115
- What the Model Actually Said:
114
+ Actual Output:
116
115
  """
117
116
  ${input.actualOutput}
118
117
  """
119
118
 
120
- The model's response was off-topic or missed key information.
119
+ ${input.failureFeedback ? `[FAILURE FEEDBACK]: ${input.failureFeedback}` : ''}
121
120
 
122
- Rewrite the prompt to ensure the model:
123
- 1. Stays on topic
124
- 2. Includes all required information from the expected output
125
- 3. Uses clear, specific instructions
126
- 4. Avoids ambiguity
121
+ ${input.passingExamples && input.passingExamples.length > 0 ? `### Successful Performance Examples
122
+ Here are examples of inputs/outputs that work well. Inject these as a few-shot examples into the new prompt.
123
+ ${JSON.stringify(input.passingExamples, null, 2)}
124
+ ` : ''}
127
125
 
128
- Return your improved prompt as plain text.`;
126
+ Rewrite to ensure the model stays on topic and includes all required information.
127
+ `;
128
+ return prompt + COMMON_JSON_FOOTER;
129
129
  }
@@ -1,27 +1,13 @@
1
1
  import { FailedTest, OptimizationResult } from '../types/fix';
2
2
  export declare class PromptOptimizer {
3
- private anthropic?;
4
- private openai?;
5
- private openrouter?;
6
- constructor();
3
+ maxIterations: number;
4
+ constructor(options?: {
5
+ maxIterations?: number;
6
+ });
7
7
  /**
8
- * Main optimization method
9
- */
10
- optimize(failedTest: FailedTest): Promise<OptimizationResult>;
11
- /**
12
- * Select appropriate meta-prompt based on error type
13
- */
14
- private selectMetaPrompt;
15
- /**
16
- * Generate multiple fix candidates using available LLMs with fallback
8
+ * Main optimization method with Anti-Regression and Iterative Refinement
17
9
  */
10
+ optimize(failedTest: FailedTest, suite: FailedTest[]): Promise<OptimizationResult>;
11
+ private getMetaPrompt;
18
12
  private generateCandidates;
19
- /**
20
- * Shadow test each candidate and return the best one
21
- */
22
- private selectBestCandidate;
23
- /**
24
- * Fallback prompt improvement - generates a clean rewritten prompt
25
- */
26
- private createFallbackPrompt;
27
13
  }