npm - tuneprompt - Versions diffs - 1.0.7 → 1.1.2 - Mend

tuneprompt 1.0.7 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +18 -9
package/dist/cli.js +5 -2
package/dist/commands/fix.d.ts +3 -1
package/dist/commands/fix.js +45 -25
package/dist/commands/generate.d.ts +2 -0
package/dist/commands/generate.js +11 -0
package/dist/engine/__tests__/optimizer.test.d.ts +1 -0
package/dist/engine/__tests__/optimizer.test.js +9 -0
package/dist/engine/loader.js +6 -2
package/dist/engine/metaPrompt.d.ts +5 -0
package/dist/engine/metaPrompt.js +55 -55
package/dist/engine/optimizer.d.ts +7 -21
package/dist/engine/optimizer.js +141 -252
package/dist/engine/runner.d.ts +2 -0
package/dist/engine/runner.js +56 -69
package/dist/engine/shadowTester.d.ts +17 -2
package/dist/engine/shadowTester.js +86 -128
package/dist/providers/__tests__/custom.test.d.ts +1 -0
package/dist/providers/__tests__/custom.test.js +9 -0
package/dist/providers/custom.d.ts +6 -0
package/dist/providers/custom.js +10 -0
package/dist/providers/factory.d.ts +6 -0
package/dist/providers/factory.js +38 -0
package/dist/providers/gemini.d.ts +11 -0
package/dist/providers/gemini.js +46 -0
package/dist/scoring/__tests__/rag.test.d.ts +1 -0
package/dist/scoring/__tests__/rag.test.js +10 -0
package/dist/scoring/rag.d.ts +9 -0
package/dist/scoring/rag.js +9 -0
package/dist/services/cloud.service.js +1 -1
package/dist/storage/database.js +1 -1
package/dist/types/fix.d.ts +11 -0
package/dist/types/index.d.ts +2 -1
package/dist/types/test.d.ts +8 -0
package/dist/types/test.js +2 -0
package/dist/utils/config.js +11 -5
package/dist/utils/interpolation.d.ts +4 -0
package/dist/utils/interpolation.js +16 -0
package/dist/utils/storage.d.ts +4 -0
package/dist/utils/storage.js +26 -5
package/dist/utils/validator.d.ts +2 -0
package/dist/utils/validator.js +10 -0
package/package.json +3 -2

package/README.md CHANGED Viewed

@@ -10,17 +10,26 @@ Industrial-grade testing framework for LLM prompts
 TunePrompt is a comprehensive testing framework designed specifically for Large Language Model (LLM) prompts. It helps developers validate, test, and optimize their prompts with industrial-grade reliability and accuracy.
+## 🚀 What's New in v1.1.1
+The first production-ready release of **TunePrompt**, the industrial-grade testing framework for the modern LLM stack.
+- **Multi-Provider Support**: Seamlessly test across **OpenAI**, **Anthropic**, **Gemini**, and **OpenRouter**.
+- **Semantic Evaluation**: Advanced vector-based scoring to detect logic drift and nuance shifts.
+- **Auto-Fix Engine (Premium)**: AI-powered prompt optimization for failing tests.
+- **Cloud Orchestration**: Unified synchronization with the [TunePrompt Dashboard](https://www.tuneprompt.xyz).
+- **Industrial CLI**: Built-in watch mode, CI/CD integration, and historical analytics.
 ## Features
-- **Multi-provider Support**: Test prompts across OpenAI, Anthropic, OpenRouter, and other LLM providers
-- **Semantic Testing**: Compare outputs using semantic similarity rather than exact matches
-- **JSON Validation**: Validate structured JSON outputs
-- **LLM-based Judging**: Use advanced LLMs to evaluate prompt quality
-- **Watch Mode**: Automatically re-run tests when files change
-- **CI/CD Integration**: Seamlessly integrate with your CI/CD pipeline
-- **Cloud Sync**: Upload results to the TunePrompt Cloud dashboard
-- **Auto-fix Engine**: Premium feature to automatically fix failing prompts using AI
-- **Detailed Reporting**: Comprehensive test reports with scores, methods, and durations
+- **Multi-provider Support**: Native integration with Google Gemini, OpenAI, Anthropic, and OpenRouter.
+- **Semantic Testing**: Compare outputs using high-precision embedding similarity.
+- **JSON Validation**: Validate structured outputs with schema-aware checks.
+- **LLM-based Judging**: Utilize advanced providers as evaluators for qualitative metrics.
+- **Watch Mode**: Immediate feedback loop with automatic re-runs on file changes.
+- **CI/CD Ready**: Native integration patterns for industrial deployment pipelines.
+- **Cloud Sync**: Global telemetry and result storage via the dashboard.
+- **Auto-fix Engine**: Iterative refinement loop for intelligent prompt repair.
 ## Installation

package/dist/cli.js CHANGED Viewed

@@ -45,6 +45,7 @@ const run_1 = require("./commands/run");
 const history_1 = require("./commands/history");
 const fix_1 = require("./commands/fix");
 const activate_1 = require("./commands/activate");
+const generate_1 = require("./commands/generate");
 // Read version from package.json
 const packageJson = require('../package.json');
 // Load environment variables
@@ -78,8 +79,9 @@ program
 program
     .command('fix')
     .description('Auto-fix failing prompts using AI')
-    .action(async () => {
-    await (0, fix_1.fixCommand)();
+    .option('-y, --yes', 'Automatically apply the best fix without prompting')
+    .action(async (options) => {
+    await (0, fix_1.fixCommand)(options);
 });
 program
     .command('history')
@@ -123,6 +125,7 @@ program
         console.log(chalk_1.default.gray(`Last Verified: ${new Date(license.lastVerified).toLocaleDateString()}\n`));
     }
 });
+(0, generate_1.registerGenerateCommand)(program);
 // Watch mode implementation
 async function runWatchMode(options) {
     const chokidar = require('chokidar');

package/dist/commands/fix.d.ts CHANGED Viewed

@@ -1 +1,3 @@
-export declare function fixCommand(): Promise<void>;
+export declare function fixCommand(options?: {
+    yes?: boolean;
+}): Promise<void>;

package/dist/commands/fix.js CHANGED Viewed

@@ -46,7 +46,7 @@ const analytics_1 = require("../utils/analytics");
 const storage_1 = require("../utils/storage");
 const fs = __importStar(require("fs"));
 const errorHandler_1 = require("../utils/errorHandler");
-async function fixCommand() {
+async function fixCommand(options = {}) {
     try {
         console.log(chalk_1.default.bold.cyan('\n🔧 TunePrompt Fix\n'));
         // License check with better error
@@ -65,44 +65,64 @@ async function fixCommand() {
         }
         console.log(chalk_1.default.yellow(`\nFound ${failedTests.length} failed test(s):\n`));
         failedTests.forEach((test, index) => {
-            console.log(`${index + 1}. ${chalk_1.default.bold(test.description)}`);
+            const modelInfo = test.config?.model ? ` [Target: ${test.config.provider || 'unknown'}/${test.config.model}]` : '';
+            console.log(`${index + 1}. ${chalk_1.default.bold(test.description)}${chalk_1.default.cyan(modelInfo)}`);
             console.log(`   Score: ${chalk_1.default.red(test.score.toFixed(2))} (threshold: ${test.threshold})`);
         });
         // Step 3: Ask which tests to fix
-        const { selectedIndexes } = await inquirer_1.default.prompt([{
-                type: 'checkbox',
-                name: 'selectedIndexes',
-                message: 'Which tests would you like to fix?',
-                choices: failedTests.map((test, index) => ({
-                    name: `${test.description} (score: ${test.score.toFixed(2)})`,
-                    value: index,
-                    checked: true
-                }))
-            }]);
+        let selectedIndexes = [];
+        if (options.yes) {
+            selectedIndexes = failedTests.map((_, i) => i);
+            console.log(chalk_1.default.gray(`\nNon-interactive mode: Automatic selection of all ${failedTests.length} tests.`));
+        }
+        else {
+            const response = await inquirer_1.default.prompt([{
+                    type: 'checkbox',
+                    name: 'selectedIndexes',
+                    message: 'Which tests would you like to fix?',
+                    choices: failedTests.map((test, index) => {
+                        const modelInfo = test.config?.model ? ` [${test.config.provider || 'unknown'}/${test.config.model}]` : '';
+                        return {
+                            name: `${test.description} (score: ${test.score.toFixed(2)})${modelInfo}`,
+                            value: index,
+                            checked: true
+                        };
+                    })
+                }]);
+            selectedIndexes = response.selectedIndexes;
+        }
         if (selectedIndexes.length === 0) {
             console.log(chalk_1.default.gray('\nNo tests selected. Exiting.'));
             return;
         }
         // Step 4: Optimize each selected test
         const optimizer = new optimizer_1.PromptOptimizer();
+        // Load suite tests for each failing test to support anti-regression
+        const { getSuiteTests } = await Promise.resolve().then(() => __importStar(require('../utils/storage')));
         for (const index of selectedIndexes) {
             const test = failedTests[index];
-            console.log(chalk_1.default.bold(`\n\n━━━ Fixing: ${test.description} ━━━\n`));
+            const suite = await getSuiteTests(test.id);
+            const modelInfo = test.config?.model ? ` (Target: ${test.config.model})` : '';
+            console.log(chalk_1.default.bold(`\n\n━━━ Fixing: ${test.description}${modelInfo} ━━━\n`));
             try {
-                const result = await optimizer.optimize(test);
+                const result = await optimizer.optimize(test, suite);
                 await showDiff(result.originalPrompt, result.optimizedPrompt, result.reasoning);
                 // Ask if user wants to apply
-                const { action } = await inquirer_1.default.prompt([{
-                        type: 'rawlist',
-                        name: 'action',
-                        message: 'What would you like to do?',
-                        choices: [
-                            { name: 'Apply this fix (Updates your test file)', value: 'apply' },
-                            { name: 'Edit before applying', value: 'edit' },
-                            { name: 'Skip this fix', value: 'skip' }
-                        ],
-                        default: 0
-                    }]);
+                let action = 'apply';
+                if (!options.yes) {
+                    const response = await inquirer_1.default.prompt([{
+                            type: 'rawlist',
+                            name: 'action',
+                            message: 'What would you like to do?',
+                            choices: [
+                                { name: 'Apply this fix (Updates your test file)', value: 'apply' },
+                                { name: 'Edit before applying', value: 'edit' },
+                                { name: 'Skip this fix', value: 'skip' }
+                            ],
+                            default: 0
+                        }]);
+                    action = response.action;
+                }
                 if (action === 'apply') {
                     await applyFix(test, result.optimizedPrompt);
                     console.log(`\n${chalk_1.default.bgGreen.black(' DONE ')} ${chalk_1.default.green('Prompt updated in:')} ${chalk_1.default.bold(test.id)}`);

package/dist/commands/generate.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { Command } from 'commander';
2	+ export declare function registerGenerateCommand(program: Command): void;

package/dist/commands/generate.js ADDED Viewed

@@ -0,0 +1,11 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.registerGenerateCommand = registerGenerateCommand;
+function registerGenerateCommand(program) {
+    program
+        .command('generate')
+        .description('Generate a new test file interactively')
+        .action(async () => {
+        console.log('Wizard started');
+    });
+}

package/dist/engine/__tests__/optimizer.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/engine/__tests__/optimizer.test.js ADDED Viewed

@@ -0,0 +1,9 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const optimizer_1 = require("../optimizer");
+describe('Optimizer Refinement Loop', () => {
+    it('should support tracking Max Iterations', () => {
+        const opt = new optimizer_1.PromptOptimizer({ maxIterations: 3 });
+        expect(opt.maxIterations).toBe(3);
+    });
+});

package/dist/engine/loader.js CHANGED Viewed

@@ -44,12 +44,16 @@ class TestLoader {
         if (ext === '.json') {
             const data = JSON.parse(content);
             const tests = Array.isArray(data) ? data : [data];
-            return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
+            return tests
+                .filter((t) => t && typeof t === 'object' && t.prompt)
+                .map(t => ({ ...t, filePath: path.resolve(filePath) }));
         }
         else if (ext === '.yaml' || ext === '.yml') {
             const data = yaml.load(content);
             const tests = Array.isArray(data) ? data : [data];
-            return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
+            return tests
+                .filter((t) => t && typeof t === 'object' && t.prompt)
+                .map(t => ({ ...t, filePath: path.resolve(filePath) }));
         }
         else {
             throw new Error(`Unsupported file format: ${ext}`);

package/dist/engine/metaPrompt.d.ts CHANGED Viewed

@@ -5,6 +5,11 @@ export interface MetaPromptInput {
     actualOutput: string;
     errorType: string;
     errorMessage: string;
+    passingExamples?: {
+        input?: Record<string, any>;
+        output: string;
+    }[];
+    failureFeedback?: string;
 }
 export declare function generateOptimizationPrompt(input: MetaPromptInput): string;
 export declare function generateJSONFixPrompt(input: MetaPromptInput): string;

package/dist/engine/metaPrompt.js CHANGED Viewed

@@ -3,9 +3,27 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.generateOptimizationPrompt = generateOptimizationPrompt;
 exports.generateJSONFixPrompt = generateJSONFixPrompt;
 exports.generateSemanticFixPrompt = generateSemanticFixPrompt;
+const COMMON_JSON_FOOTER = `
+=== OUTPUT FORMAT ===
+Return ONLY valid JSON (no markdown, no explanations):
+{
+  "analysis": "Brief explanation of why it failed (2-3 sentences)",
+  "candidateA": {
+    "prompt": "Your rewritten prompt here",
+    "reasoning": "Why this approach works"
+  },
+  "candidateB": {
+    "prompt": "Your alternative rewritten prompt here",
+    "reasoning": "Why this approach works"
+  }
+}
+CRITICAL: Return ONLY the JSON object. No preamble, no markdown backticks.`;
 function generateOptimizationPrompt(input) {
-    const { originalPrompt, testInput, expectedOutput, actualOutput, errorType, errorMessage } = input;
-    return `You are an elite LLM Prompt Engineer with expertise in Claude, GPT-4, and advanced prompting techniques.
+    const { originalPrompt, testInput, expectedOutput, actualOutput, errorType, errorMessage, passingExamples, failureFeedback } = input;
+    let prompt = `You are an elite LLM Prompt Engineer with expertise in Claude, GPT-4, and advanced prompting techniques.
 A prompt has failed a critical test case. Your mission is to rewrite it to pass the test while maintaining the original intent.
@@ -33,48 +51,28 @@ ${actualOutput}
 [Error Type]: ${errorType}
 [Error Details]: ${errorMessage}
-=== YOUR TASK ===
-1. **Root Cause Analysis**: Identify WHY the prompt failed
-   - Missing instructions?
-   - Ambiguous wording?
-   - Wrong output format specified?
-   - Tone mismatch?
-   - Missing constraints?
-2. **Prompt Engineering Fixes**: Apply advanced techniques:
-   - ✅ Chain-of-Thought reasoning (if logic is needed)
-   - ✅ XML tags for structure (<instructions>, <output_format>)
-   - ✅ Few-shot examples (if pattern recognition helps)
-   - ✅ Explicit constraints (length, format, tone)
-   - ✅ Role assignment ("You are a [expert]...")
-   - ✅ Output format specifications (JSON schema, markdown, etc.)
-3. **Generate TWO Candidate Prompts**:
-   - Candidate A: Conservative fix (minimal changes)
-   - Candidate B: Aggressive rewrite (best practices applied)
-=== OUTPUT FORMAT ===
+${failureFeedback ? `[ITERATIVE FEEDBACK]:
+The previous fix failed because: ${failureFeedback}
+PLEASE ANALYZE THIS FAILURE AND ADJUST YOUR STRATEGY.
+` : ''}
-Return ONLY valid JSON (no markdown, no explanations):
+${passingExamples && passingExamples.length > 0 ? `### Successful Performance Examples
+Here are examples of inputs/outputs that work well. Inject these as a few-shot examples into the new prompt.
+${JSON.stringify(passingExamples, null, 2)}
+` : ''}
-{
-  "analysis": "Brief explanation of why it failed (2-3 sentences)",
-  "candidateA": {
-    "prompt": "Your rewritten prompt here",
-    "reasoning": "Why this approach works"
-  },
-  "candidateB": {
-    "prompt": "Your alternative rewritten prompt here",
-    "reasoning": "Why this approach works"
-  }
-}
+=== YOUR TASK ===
-CRITICAL: Return ONLY the JSON object. No preamble, no markdown backticks.`;
+1. **Root Cause Analysis**: Identify WHY the prompt failed.
+2. **Prompt Engineering Fixes**: Apply advanced techniques like XML tags, Chain-of-Thought, and explicit JSON schemas.
+3. **Generate TWO Candidate Prompts**:
+   - Candidate A: Conservative fix (minimal changes).
+   - Candidate B: Aggressive rewrite (best practices applied).
+`;
+    return prompt + COMMON_JSON_FOOTER;
 }
-// Specialized prompts for different error types
 function generateJSONFixPrompt(input) {
-    return `You are a JSON Schema expert. The following prompt failed to produce valid JSON.
+    let prompt = `You are a JSON Schema expert. The following prompt failed to produce valid JSON.
 Original Prompt:
 """
@@ -86,21 +84,22 @@ Expected JSON Structure:
 ${input.expectedOutput}
 """
-Actual Output (Invalid JSON):
-"""
-${input.actualOutput}
-"""
+${input.failureFeedback ? `[FAILURE FEEDBACK]: ${input.failureFeedback}` : ''}
+${input.passingExamples && input.passingExamples.length > 0 ? `### Successful Performance Examples
+Here are examples of inputs/outputs that work well. Inject these as a few-shot examples into the new prompt.
+${JSON.stringify(input.passingExamples, null, 2)}
+` : ''}
 Rewrite the prompt to GUARANTEE valid JSON output. Use these techniques:
 1. Explicitly state: "Return ONLY valid JSON, no markdown, no explanations"
 2. Provide the exact schema structure
 3. Add output format examples
-4. Use XML tags like <json_output> to delimit the response area
-Return your improved prompt as plain text (not JSON).`;
+`;
+    return prompt + COMMON_JSON_FOOTER;
 }
 function generateSemanticFixPrompt(input) {
-    return `The prompt failed semantic similarity testing (score: too low).
+    let prompt = `The prompt failed semantic similarity testing.
 Original Prompt:
 """
@@ -112,18 +111,19 @@ Expected Meaning/Content:
 ${input.expectedOutput}
 """
-What the Model Actually Said:
+Actual Output:
 """
 ${input.actualOutput}
 """
-The model's response was off-topic or missed key information.
+${input.failureFeedback ? `[FAILURE FEEDBACK]: ${input.failureFeedback}` : ''}
-Rewrite the prompt to ensure the model:
-1. Stays on topic
-2. Includes all required information from the expected output
-3. Uses clear, specific instructions
-4. Avoids ambiguity
+${input.passingExamples && input.passingExamples.length > 0 ? `### Successful Performance Examples
+Here are examples of inputs/outputs that work well. Inject these as a few-shot examples into the new prompt.
+${JSON.stringify(input.passingExamples, null, 2)}
+` : ''}
-Return your improved prompt as plain text.`;
+Rewrite to ensure the model stays on topic and includes all required information.
+`;
+    return prompt + COMMON_JSON_FOOTER;
 }

package/dist/engine/optimizer.d.ts CHANGED Viewed

@@ -1,27 +1,13 @@
 import { FailedTest, OptimizationResult } from '../types/fix';
 export declare class PromptOptimizer {
-    private anthropic?;
-    private openai?;
-    private openrouter?;
-    constructor();
+    maxIterations: number;
+    constructor(options?: {
+        maxIterations?: number;
+    });
     /**
-     * Main optimization method
-     */
-    optimize(failedTest: FailedTest): Promise<OptimizationResult>;
-    /**
-     * Select appropriate meta-prompt based on error type
-     */
-    private selectMetaPrompt;
-    /**
-     * Generate multiple fix candidates using available LLMs with fallback
+     * Main optimization method with Anti-Regression and Iterative Refinement
      */
+    optimize(failedTest: FailedTest, suite: FailedTest[]): Promise<OptimizationResult>;
+    private getMetaPrompt;
     private generateCandidates;
-    /**
-     * Shadow test each candidate and return the best one
-     */
-    private selectBestCandidate;
-    /**
-     * Fallback prompt improvement - generates a clean rewritten prompt
-     */
-    private createFallbackPrompt;
 }