tuneprompt 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,17 +10,26 @@ Industrial-grade testing framework for LLM prompts
10
10
 
11
11
  TunePrompt is a comprehensive testing framework designed specifically for Large Language Model (LLM) prompts. It helps developers validate, test, and optimize their prompts with industrial-grade reliability and accuracy.
12
12
 
13
+ ## 🚀 What's New in v1.1.1
14
+
15
+ The first production-ready release of **TunePrompt**, the industrial-grade testing framework for the modern LLM stack.
16
+
17
+ - **Multi-Provider Support**: Seamlessly test across **OpenAI**, **Anthropic**, **Gemini**, and **OpenRouter**.
18
+ - **Semantic Evaluation**: Advanced vector-based scoring to detect logic drift and nuance shifts.
19
+ - **Auto-Fix Engine (Premium)**: AI-powered prompt optimization for failing tests.
20
+ - **Cloud Orchestration**: Unified synchronization with the [TunePrompt Dashboard](https://www.tuneprompt.xyz).
21
+ - **Industrial CLI**: Built-in watch mode, CI/CD integration, and historical analytics.
22
+
13
23
  ## Features
14
24
 
15
- - **Multi-provider Support**: Test prompts across OpenAI, Anthropic, OpenRouter, and other LLM providers
16
- - **Semantic Testing**: Compare outputs using semantic similarity rather than exact matches
17
- - **JSON Validation**: Validate structured JSON outputs
18
- - **LLM-based Judging**: Use advanced LLMs to evaluate prompt quality
19
- - **Watch Mode**: Automatically re-run tests when files change
20
- - **CI/CD Integration**: Seamlessly integrate with your CI/CD pipeline
21
- - **Cloud Sync**: Upload results to the TunePrompt Cloud dashboard
22
- - **Auto-fix Engine**: Premium feature to automatically fix failing prompts using AI
23
- - **Detailed Reporting**: Comprehensive test reports with scores, methods, and durations
25
+ - **Multi-provider Support**: Native integration with Google Gemini, OpenAI, Anthropic, and OpenRouter.
26
+ - **Semantic Testing**: Compare outputs using high-precision embedding similarity.
27
+ - **JSON Validation**: Validate structured outputs with schema-aware checks.
28
+ - **LLM-based Judging**: Utilize advanced providers as evaluators for qualitative metrics.
29
+ - **Watch Mode**: Immediate feedback loop with automatic re-runs on file changes.
30
+ - **CI/CD Ready**: Native integration patterns for industrial deployment pipelines.
31
+ - **Cloud Sync**: Global telemetry and result storage via the dashboard.
32
+ - **Auto-fix Engine**: Iterative refinement loop for intelligent prompt repair.
24
33
 
25
34
  ## Installation
26
35
 
@@ -44,12 +44,16 @@ class TestLoader {
44
44
  if (ext === '.json') {
45
45
  const data = JSON.parse(content);
46
46
  const tests = Array.isArray(data) ? data : [data];
47
- return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
47
+ return tests
48
+ .filter((t) => t && typeof t === 'object' && t.prompt)
49
+ .map(t => ({ ...t, filePath: path.resolve(filePath) }));
48
50
  }
49
51
  else if (ext === '.yaml' || ext === '.yml') {
50
52
  const data = yaml.load(content);
51
53
  const tests = Array.isArray(data) ? data : [data];
52
- return tests.map(t => ({ ...t, filePath: path.resolve(filePath) }));
54
+ return tests
55
+ .filter((t) => t && typeof t === 'object' && t.prompt)
56
+ .map(t => ({ ...t, filePath: path.resolve(filePath) }));
53
57
  }
54
58
  else {
55
59
  throw new Error(`Unsupported file format: ${ext}`);
@@ -130,7 +130,7 @@ class PromptOptimizer {
130
130
  // Pick a strong model for optimization if not defined
131
131
  const model = providerName === 'anthropic' ? 'claude-3-5-sonnet-latest' :
132
132
  providerName === 'openai' ? 'gpt-4o' :
133
- providerName === 'gemini' ? 'gemini-1.5-pro' : undefined;
133
+ providerName === 'gemini' ? 'gemini-2.0-flash' : undefined;
134
134
  if (!model)
135
135
  continue;
136
136
  const provider = factory_1.ProviderFactory.create(providerName, {
@@ -18,7 +18,7 @@ class TestRunner {
18
18
  const providerNames = ["openai", "anthropic", "openrouter", "gemini"];
19
19
  for (const name of providerNames) {
20
20
  const providerConfig = this.config.providers[name];
21
- if (providerConfig) {
21
+ if (providerConfig && providerConfig.apiKey) {
22
22
  this.providers.set(name, factory_1.ProviderFactory.create(name, providerConfig));
23
23
  }
24
24
  }
@@ -20,16 +20,32 @@ async function runShadowTest(candidatePrompt, test) {
20
20
  }
21
21
  const providerName = test.config?.provider;
22
22
  const model = test.config?.model;
23
- // If specific provider/model is requested, use it directly (Strict Mode)
23
+ // Determine providers to try
24
+ let providersToTry = [];
24
25
  if (providerName && model) {
26
+ providersToTry.push({ name: providerName, model });
27
+ }
28
+ // Fallback queue
29
+ const fallbackQueue = [
30
+ { name: 'anthropic', model: 'claude-3-5-sonnet-latest' },
31
+ { name: 'openai', model: 'gpt-4o' },
32
+ { name: 'gemini', model: 'gemini-2.0-flash' },
33
+ { name: 'openrouter', model: 'nvidia/nemotron-3-nano-30b-a3b:free' }
34
+ ];
35
+ for (const entry of fallbackQueue) {
36
+ if (entry.name !== providerName) {
37
+ providersToTry.push(entry);
38
+ }
39
+ }
40
+ let errors = [];
41
+ for (const target of providersToTry) {
25
42
  try {
26
- const apiKey = factory_1.ProviderFactory.getApiKey(providerName);
27
- if (!apiKey) {
28
- throw new Error(`No API key found for provider: ${providerName}`);
29
- }
30
- const provider = factory_1.ProviderFactory.create(providerName, {
43
+ const apiKey = factory_1.ProviderFactory.getApiKey(target.name);
44
+ if (!apiKey)
45
+ continue;
46
+ const provider = factory_1.ProviderFactory.create(target.name, {
31
47
  apiKey,
32
- model,
48
+ model: target.model || 'latest',
33
49
  maxTokens: 2000
34
50
  });
35
51
  const finalPrompt = (0, interpolation_1.interpolateVariables)(candidatePrompt, test.input);
@@ -44,12 +60,11 @@ async function runShadowTest(candidatePrompt, test) {
44
60
  };
45
61
  }
46
62
  catch (error) {
47
- console.log(`⚠️ Specified provider ${providerName} failed: ${error.message}`);
48
- throw new Error(`Failed to validate on target model: ${error.message}`);
63
+ errors.push(`${target.name}: ${error.message}`);
64
+ continue;
49
65
  }
50
66
  }
51
- // Phase 2 Decision: Fail fast if no provider/model is defined (Strict Awareness)
52
- throw new Error(`Test "${test.description}" lacks provider/model configuration. Validation aborted.`);
67
+ throw new Error(`Shadow test failed for all providers: ${errors.join(' | ')}`);
53
68
  }
54
69
  /**
55
70
  * Run a candidate prompt against multiple tests and return aggregate results
@@ -118,7 +118,7 @@ class TestDatabase {
118
118
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
119
119
  `);
120
120
  for (const result of run.results) {
121
- insertResult.run(result.id, run.id, result.testCase.description, typeof result.testCase.prompt === 'string' ? result.testCase.prompt : JSON.stringify(result.testCase.prompt), result.testCase.variables ? JSON.stringify(result.testCase.variables) : null, typeof result.testCase.expect === 'string' ? result.testCase.expect : JSON.stringify(result.testCase.expect), result.testCase.config ? JSON.stringify(result.testCase.config) : null, result.testCase.filePath || null, result.status, result.score, result.actualOutput, result.expectedOutput, result.error || null, result.metadata.duration, result.metadata.tokens || null, result.metadata.cost || null, result.metadata.provider || null);
121
+ insertResult.run(result.id, run.id, result.testCase.description || 'No description', typeof result.testCase.prompt === 'string' ? result.testCase.prompt : JSON.stringify(result.testCase.prompt), result.testCase.variables ? JSON.stringify(result.testCase.variables) : null, typeof result.testCase.expect === 'string' ? result.testCase.expect : JSON.stringify(result.testCase.expect), result.testCase.config ? JSON.stringify(result.testCase.config) : null, result.testCase.filePath || null, result.status, result.score, result.actualOutput, result.expectedOutput, result.error || null, result.metadata.duration, result.metadata.tokens || null, result.metadata.cost || null, result.metadata.provider || null);
122
122
  }
123
123
  }
124
124
  getRecentRuns(limit = 10) {
@@ -21,11 +21,11 @@ function validateConfig(config) {
21
21
  if (!config.providers || Object.keys(config.providers).length === 0) {
22
22
  throw new Error('At least one provider must be configured');
23
23
  }
24
- // Validate API keys
25
- for (const [provider, cfg] of Object.entries(config.providers)) {
26
- if (!cfg.apiKey) {
27
- throw new Error(`API key missing for provider: ${provider}`);
28
- }
24
+ // Validate API keys - ensure at least one provider is valid
25
+ const validProviders = Object.entries(config.providers)
26
+ .filter(([_, cfg]) => !!cfg.apiKey);
27
+ if (validProviders.length === 0) {
28
+ throw new Error('No valid API keys found. Please provide at least one API key in your .env file.');
29
29
  }
30
30
  return {
31
31
  threshold: config.threshold || 0.8,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tuneprompt",
3
- "version": "1.1.1",
3
+ "version": "1.1.2",
4
4
  "description": "Industrial-grade testing framework for LLM prompts",
5
5
  "repository": {
6
6
  "type": "git",