npm - @dotsetlabs/bellwether - Versions diffs - 1.0.2 → 1.0.3 - Mend

@dotsetlabs/bellwether 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +23 -0
package/README.md +3 -2
package/dist/cache/response-cache.d.ts +4 -2
package/dist/cache/response-cache.js +68 -30
package/dist/cli/commands/check.js +78 -49
package/dist/cli/index.js +5 -3
package/dist/interview/interviewer.js +70 -50
package/dist/interview/orchestrator.js +49 -22
package/dist/llm/anthropic.js +49 -16
package/dist/llm/client.d.ts +2 -0
package/dist/llm/client.js +61 -0
package/dist/llm/ollama.js +9 -4
package/dist/llm/openai.js +34 -23
package/dist/transport/base-transport.d.ts +1 -1
package/dist/transport/http-transport.d.ts +2 -2
package/dist/transport/http-transport.js +26 -6
package/dist/transport/mcp-client.d.ts +18 -6
package/dist/transport/mcp-client.js +49 -19
package/dist/transport/sse-transport.d.ts +1 -1
package/dist/transport/sse-transport.js +4 -2
package/dist/transport/stdio-transport.d.ts +1 -1
package/dist/transport/stdio-transport.js +1 -1
package/dist/utils/timeout.d.ts +10 -2
package/dist/utils/timeout.js +9 -5
package/dist/version.js +1 -1
package/dist/workflow/executor.js +18 -13
package/dist/workflow/loader.js +4 -1
package/dist/workflow/state-tracker.js +22 -18
package/man/bellwether.1 +204 -0
package/man/bellwether.1.md +148 -0
package/package.json +6 -7

package/dist/interview/interviewer.js CHANGED Viewed

@@ -4,7 +4,7 @@ import { DEFAULT_PERSONA } from '../persona/builtins.js';
 import { getLogger, startTiming } from '../logging/logger.js';
 import { evaluateAssertions } from '../scenarios/evaluator.js';
 import { withTimeout, DEFAULT_TIMEOUTS, parallelLimit, createMutex } from '../utils/index.js';
-import { INTERVIEW, WORKFLOW, DISPLAY_LIMITS, SCHEMA_TESTING, OUTCOME_ASSESSMENT } from '../constants.js';
+import { INTERVIEW, WORKFLOW, DISPLAY_LIMITS, SCHEMA_TESTING, OUTCOME_ASSESSMENT, } from '../constants.js';
 import { generateSchemaTests } from './schema-test-generator.js';
 import { WorkflowDiscoverer } from '../workflow/discovery.js';
 import { WorkflowExecutor } from '../workflow/executor.js';
@@ -62,7 +62,8 @@ export class Interviewer {
         // Use multiple personas by default for better coverage
         // Fall back to DEFAULT_PERSONAS if no personas provided or empty array
         const providedPersonas = config?.personas;
-        this.personas = (providedPersonas && providedPersonas.length > 0) ? providedPersonas : DEFAULT_PERSONAS;
+        this.personas =
+            providedPersonas && providedPersonas.length > 0 ? providedPersonas : DEFAULT_PERSONAS;
         // Store cache reference for tool response and analysis caching
         this.cache = config?.cache;
         if (this.config.rateLimit?.enabled) {
@@ -270,12 +271,12 @@ export class Interviewer {
         };
         // Look for tools that reveal server constraints
         for (const toolName of INTERVIEW.CONSTRAINT_DISCOVERY_TOOLS) {
-            const tool = discovery.tools.find(t => t.name === toolName);
+            const tool = discovery.tools.find((t) => t.name === toolName);
             if (tool) {
                 try {
                     const result = await client.callTool(toolName, {});
                     if (result?.content) {
-                        const textContent = result.content.find(c => c.type === 'text');
+                        const textContent = result.content.find((c) => c.type === 'text');
                         if (textContent && 'text' in textContent) {
                             const text = String(textContent.text);
                             // Parse allowed directories from response
@@ -344,7 +345,7 @@ export class Interviewer {
         try {
             const parsed = JSON.parse(text);
             if (Array.isArray(parsed)) {
-                return parsed.filter(d => typeof d === 'string' && d.startsWith('/'));
+                return parsed.filter((d) => typeof d === 'string' && d.startsWith('/'));
             }
         }
         catch (error) {
@@ -443,7 +444,7 @@ export class Interviewer {
                 concurrency,
             }, 'Running persona interviews in parallel');
             // Create tasks for each persona
-            const personaTasks = this.personas.map(persona => async () => {
+            const personaTasks = this.personas.map((persona) => async () => {
                 progress.currentPersona = persona.name;
                 onProgress?.(progress);
                 const result = await this.interviewPersona(client, discovery, persona, toolCallMutex);
@@ -501,7 +502,10 @@ export class Interviewer {
             if (statefulEnabled) {
                 this.logger.info({ toolCount: orderedTools.length }, 'Stateful testing enabled');
             }
-            this.logger.info({ parallel: this.config.parallelTools && !statefulEnabled, concurrency: effectiveConcurrency }, 'Using check mode tool testing');
+            this.logger.info({
+                parallel: this.config.parallelTools && !statefulEnabled,
+                concurrency: effectiveConcurrency,
+            }, 'Using check mode tool testing');
             const statefulRunner = statefulEnabled
                 ? new StatefulTestRunner({ shareOutputs: statefulConfig?.shareOutputsBetweenTools ?? true })
                 : undefined;
@@ -516,13 +520,15 @@ export class Interviewer {
                 const toolData = toolInteractionsMap.get(profile.name);
                 if (toolData) {
                     toolData.interactions = profile.interactions;
-                    toolData.findingsByPersona = [{
+                    toolData.findingsByPersona = [
+                        {
                             personaId: 'check_mode',
                             personaName: 'Check Mode',
                             behavioralNotes: [],
                             limitations: [],
                             securityNotes: [],
-                        }];
+                        },
+                    ];
                 }
             }
             // Update persona stats with aggregated counts
@@ -557,7 +563,7 @@ export class Interviewer {
                         const scenarioResults = await this.executeToolScenarios(client, tool.name, customScenarios);
                         allScenarioResults.push(...scenarioResults);
                         // Convert scenarios to interview questions for integration with profiling
-                        questions = customScenarios.map(s => this.scenarioToQuestion(s));
+                        questions = customScenarios.map((s) => this.scenarioToQuestion(s));
                         // If not custom-only mode, also generate LLM questions (skip in fast CI mode)
                         if (!this.config.customScenariosOnly && !this.config.checkMode) {
                             const llmQuestions = await orchestrator.generateQuestions(tool, this.config.maxQuestionsPerTool, this.config.skipErrorTests);
@@ -568,7 +574,8 @@ export class Interviewer {
                         // No custom scenarios - generate questions
                         if (this.config.checkMode) {
                             // Fast CI mode: use fallback questions (no LLM call)
-                            questions = orchestrator.getFallbackQuestions(tool, this.config.skipErrorTests)
+                            questions = orchestrator
+                                .getFallbackQuestions(tool, this.config.skipErrorTests)
                                 .slice(0, this.config.maxQuestionsPerTool);
                         }
                         else {
@@ -589,8 +596,10 @@ export class Interviewer {
                             });
                             // If we have multiple failures, regenerate remaining questions with error context
                             // Skip in scenarios-only mode and fast CI mode
-                            if (!this.config.customScenariosOnly && !this.config.checkMode &&
-                                previousErrors.length >= 2 && personaInteractions.length < questions.length) {
+                            if (!this.config.customScenariosOnly &&
+                                !this.config.checkMode &&
+                                previousErrors.length >= 2 &&
+                                personaInteractions.length < questions.length) {
                                 const remaining = this.config.maxQuestionsPerTool - personaInteractions.length;
                                 if (remaining > 0) {
                                     this.logger.debug({ tool: tool.name, errors: previousErrors.length }, 'Regenerating questions after errors');
@@ -616,7 +625,7 @@ export class Interviewer {
                         };
                     }
                     else {
-                        personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map(i => ({
+                        personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map((i) => ({
                             question: i.question,
                             response: i.response,
                             error: i.error,
@@ -664,7 +673,9 @@ export class Interviewer {
             progress.promptsCompleted = 0;
             onProgress?.(progress);
             // Only create orchestrator if NOT in check mode (requires LLM)
-            const primaryOrchestrator = this.isCheckMode() ? null : this.createOrchestrator(this.personas[0]);
+            const primaryOrchestrator = this.isCheckMode()
+                ? null
+                : this.createOrchestrator(this.personas[0]);
             for (const prompt of discovery.prompts) {
                 progress.currentTool = `prompt:${prompt.name}`;
                 onProgress?.(progress);
@@ -678,7 +689,7 @@ export class Interviewer {
                     const scenarioResults = await this.executePromptScenarios(client, prompt.name, customScenarios);
                     allScenarioResults.push(...scenarioResults);
                     // Convert scenarios to prompt questions for profiling
-                    questions = customScenarios.map(s => ({
+                    questions = customScenarios.map((s) => ({
                         description: s.description,
                         args: s.args,
                     }));
@@ -688,7 +699,9 @@ export class Interviewer {
                         questions = [...questions, ...llmQuestions];
                     }
                 }
-                else if (!this.config.customScenariosOnly && !this.config.checkMode && primaryOrchestrator) {
+                else if (!this.config.customScenariosOnly &&
+                    !this.config.checkMode &&
+                    primaryOrchestrator) {
                     // No custom scenarios - generate LLM questions as usual
                     questions = await primaryOrchestrator.generatePromptQuestions(prompt, 2);
                 }
@@ -740,7 +753,7 @@ export class Interviewer {
                     };
                 }
                 else {
-                    profile = await primaryOrchestrator.synthesizePromptProfile(prompt, promptInteractions.map(i => ({
+                    profile = await primaryOrchestrator.synthesizePromptProfile(prompt, promptInteractions.map((i) => ({
                         question: i.question,
                         response: i.response,
                         error: i.error,
@@ -768,7 +781,9 @@ export class Interviewer {
             progress.resourcesCompleted = 0;
             onProgress?.(progress);
             // Only create orchestrator if NOT in check mode (requires LLM)
-            const primaryOrchestrator = this.isCheckMode() ? null : this.createOrchestrator(this.personas[0]);
+            const primaryOrchestrator = this.isCheckMode()
+                ? null
+                : this.createOrchestrator(this.personas[0]);
             for (const resource of discoveredResources) {
                 progress.currentTool = `resource:${resource.name}`;
                 onProgress?.(progress);
@@ -777,7 +792,9 @@ export class Interviewer {
                 let questions;
                 if (this.config.checkMode || !primaryOrchestrator) {
                     // Fast CI mode: use simple fallback question
-                    questions = [{ description: 'Basic resource read test', category: 'happy_path' }];
+                    questions = [
+                        { description: 'Basic resource read test', category: 'happy_path' },
+                    ];
                 }
                 else {
                     questions = await primaryOrchestrator.generateResourceQuestions(resource, 2);
@@ -787,8 +804,9 @@ export class Interviewer {
                     let response = null;
                     let error = null;
                     try {
+                        const abortController = new AbortController();
                         // Apply timeout to resource read to prevent indefinite hangs
-                        response = await withTimeout(client.readResource(resource.uri), this.config.resourceTimeout ?? DEFAULT_TIMEOUTS.resourceRead, `Resource read: ${resource.uri}`);
+                        response = await withTimeout(client.readResource(resource.uri, { signal: abortController.signal }), this.config.resourceTimeout ?? DEFAULT_TIMEOUTS.resourceRead, `Resource read: ${resource.uri}`, { abortController });
                         resourceReadCount++;
                     }
                     catch (e) {
@@ -829,7 +847,7 @@ export class Interviewer {
                     };
                 }
                 else {
-                    profile = await primaryOrchestrator.synthesizeResourceProfile(resource, resourceInteractions.map(i => ({
+                    profile = await primaryOrchestrator.synthesizeResourceProfile(resource, resourceInteractions.map((i) => ({
                         question: i.question,
                         response: i.response,
                         error: i.error,
@@ -838,13 +856,14 @@ export class Interviewer {
                 }
                 // Extract content preview from first successful read
                 let contentPreview;
-                const successfulRead = resourceInteractions.find(i => i.response && !i.error);
+                const successfulRead = resourceInteractions.find((i) => i.response && !i.error);
                 if (successfulRead?.response?.contents?.[0]) {
                     const content = successfulRead.response.contents[0];
                     if (content.text) {
-                        contentPreview = content.text.length > DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW
-                            ? `${content.text.substring(0, DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW)}...`
-                            : content.text;
+                        contentPreview =
+                            content.text.length > DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW
+                                ? `${content.text.substring(0, DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW)}...`
+                                : content.text;
                     }
                     else if (content.blob) {
                         contentPreview = `[Binary data: ${content.blob.length} bytes base64]`;
@@ -1058,7 +1077,7 @@ export class Interviewer {
                 if (response.isError) {
                     stats.errorCount++;
                     hadError = true;
-                    const errorContent = response.content?.find(c => c.type === 'text');
+                    const errorContent = response.content?.find((c) => c.type === 'text');
                     if (errorContent && 'text' in errorContent) {
                         error = String(errorContent.text);
                     }
@@ -1143,7 +1162,7 @@ export class Interviewer {
         // Extract allowed directories explicitly mentioned
         const allowedMatch = error.match(/allowed director(?:y|ies)[:\s]+([^\n]+)/i);
         if (allowedMatch) {
-            const dirs = allowedMatch[1].split(/[,\s]+/).filter(d => d.startsWith('/'));
+            const dirs = allowedMatch[1].split(/[,\s]+/).filter((d) => d.startsWith('/'));
             if (dirs.length > 0) {
                 const currentContext = orchestrator.getServerContext() ?? { allowedDirectories: [] };
                 const existingDirs = currentContext.allowedDirectories ?? [];
@@ -1197,7 +1216,7 @@ export class Interviewer {
                     toolCallMutex.release();
                 }
                 // Convert scenarios to interview questions
-                questions = customScenarios.map(s => this.scenarioToQuestion(s));
+                questions = customScenarios.map((s) => this.scenarioToQuestion(s));
                 // If not custom-only mode, also generate LLM questions
                 if (!this.config.customScenariosOnly) {
                     const llmQuestions = await orchestrator.generateQuestions(tool, this.config.maxQuestionsPerTool, this.config.skipErrorTests);
@@ -1231,7 +1250,8 @@ export class Interviewer {
                     });
                     // If we have multiple failures, regenerate remaining questions
                     if (!this.config.customScenariosOnly &&
-                        previousErrors.length >= 2 && personaInteractions.length < questions.length) {
+                        previousErrors.length >= 2 &&
+                        personaInteractions.length < questions.length) {
                         const remaining = this.config.maxQuestionsPerTool - personaInteractions.length;
                         if (remaining > 0) {
                             this.logger.debug({ tool: tool.name, errors: previousErrors.length }, 'Regenerating questions after errors');
@@ -1253,7 +1273,7 @@ export class Interviewer {
                 };
             }
             else {
-                personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map(i => ({
+                personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map((i) => ({
                     question: i.question,
                     response: i.response,
                     error: i.error,
@@ -1361,20 +1381,19 @@ export class Interviewer {
                 const results = await this.executeToolScenarios(client, tool.name, customScenarios);
                 scenarioResults.push(...results);
                 toolCallCount += results.length;
-                errorCount += results.filter(r => !r.passed).length;
+                errorCount += results.filter((r) => !r.passed).length;
             }
             finally {
                 toolCallMutex.release();
             }
             // Convert scenarios to interview questions
-            questions = customScenarios.map(s => this.scenarioToQuestion(s));
+            questions = customScenarios.map((s) => this.scenarioToQuestion(s));
         }
         else {
             // No custom scenarios - use fallback questions (check mode, no LLM)
             // We need an orchestrator for fallback questions, but we won't use LLM
             // Get fallback questions directly
-            questions = this.getFallbackQuestionsForTool(tool, this.config.skipErrorTests)
-                .slice(0, this.config.maxQuestionsPerTool);
+            questions = this.getFallbackQuestionsForTool(tool, this.config.skipErrorTests).slice(0, this.config.maxQuestionsPerTool);
         }
         // Execute warmup runs if configured (helps reduce cold-start timing variance)
         // Warmup runs are not recorded in interactions
@@ -1444,7 +1463,10 @@ export class Interviewer {
             // Generate simple analysis (no LLM in check mode)
             const analysis = this.generateSimpleAnalysis(error, !!response, 'Tool call succeeded.');
             const outcomeAssessment = this.assessOutcome(resolvedQuestion, response, error);
-            if (this.config.assertions?.enabled && outcomeAssessment.expected === 'success' && response && !response.isError) {
+            if (this.config.assertions?.enabled &&
+                outcomeAssessment.expected === 'success' &&
+                response &&
+                !response.isError) {
                 let schema = this.responseSchemas.get(tool.name);
                 if (!schema && this.config.assertions?.infer) {
                     const inferred = inferResponseSchema(response);
@@ -1546,7 +1568,7 @@ export class Interviewer {
             parallel: this.config.parallelTools,
         }, 'Running check mode tool testing');
         // Create tasks for each tool
-        const toolTasks = tools.map(tool => async () => {
+        const toolTasks = tools.map((tool) => async () => {
             progress.currentTool = tool.name;
             onProgress?.(progress);
             const result = await this.interviewToolInCheckMode(client, tool, toolCallMutex, options?.statefulRunner, options?.dependencyMap?.get(tool.name), options?.statefulConfig);
@@ -1575,7 +1597,7 @@ export class Interviewer {
         let totalErrorCount = 0;
         let totalQuestionsAsked = 0;
         for (const result of successfulResults) {
-            const tool = tools.find(t => t.name === result.toolName);
+            const tool = tools.find((t) => t.name === result.toolName);
             if (!tool)
                 continue;
             // Classify errors to separate tool correctness from environment issues
@@ -1618,7 +1640,7 @@ export class Interviewer {
         };
     }
     buildToolProgressSummary(result) {
-        const interactions = result.interactions.filter(i => !i.mocked);
+        const interactions = result.interactions.filter((i) => !i.mocked);
         const totalTests = interactions.length;
         let passedTests = 0;
         let validationTotal = 0;
@@ -1674,14 +1696,14 @@ export class Interviewer {
      */
     getScenariosForTool(toolName) {
         const scenarios = this.config.customScenarios?.toolScenarios ?? [];
-        return scenarios.filter(s => s.tool === toolName && !s.skip);
+        return scenarios.filter((s) => s.tool === toolName && !s.skip);
     }
     /**
      * Get custom scenarios for a specific prompt.
      */
     getScenariosForPrompt(promptName) {
         const scenarios = this.config.customScenarios?.promptScenarios ?? [];
-        return scenarios.filter(s => s.prompt === promptName && !s.skip);
+        return scenarios.filter((s) => s.prompt === promptName && !s.skip);
     }
     /**
      * Execute custom test scenarios for a tool.
@@ -1708,7 +1730,7 @@ export class Interviewer {
                     response = result.response;
                     isError = response?.isError ?? false;
                     if (isError) {
-                        const errorContent = response?.content?.find(c => c.type === 'text');
+                        const errorContent = response?.content?.find((c) => c.type === 'text');
                         if (errorContent && 'text' in errorContent) {
                             error = String(errorContent.text);
                         }
@@ -1728,7 +1750,7 @@ export class Interviewer {
                 ? evaluateAssertions(scenario.assertions, response, isError)
                 : [];
             // Scenario passes if no error (or expected error) and all assertions pass
-            const allAssertionsPassed = assertionResults.every(r => r.passed);
+            const allAssertionsPassed = assertionResults.every((r) => r.passed);
             const passed = allAssertionsPassed && (!isError || scenario.category === 'error_handling');
             const result = {
                 scenario,
@@ -1771,9 +1793,9 @@ export class Interviewer {
             const assertionResults = scenario.assertions
                 ? evaluateAssertions(scenario.assertions, response, !!error)
                 : [];
-            const allAssertionsPassed = assertionResults.every(r => r.passed);
+            const allAssertionsPassed = assertionResults.every((r) => r.passed);
             // Check if this scenario expects an error (has an assertion checking for 'error' to exist)
-            const expectsError = scenario.assertions?.some(a => a.path === 'error' && a.condition === 'exists') ?? false;
+            const expectsError = scenario.assertions?.some((a) => a.path === 'error' && a.condition === 'exists') ?? false;
             // Scenario passes if assertions pass AND (no error OR scenario expects error)
             const passed = allAssertionsPassed && (!error || expectsError);
             const result = {
@@ -1824,7 +1846,7 @@ export class Interviewer {
                     discoveredCount = discovered.length;
                     this.logger.info({
                         count: discoveredCount,
-                        workflows: discovered.map(w => w.name),
+                        workflows: discovered.map((w) => w.name),
                     }, 'Discovered workflows');
                 }
                 else {
@@ -1904,7 +1926,7 @@ export class Interviewer {
             }
         }
         // Build summary
-        const successfulCount = results.filter(r => r.success).length;
+        const successfulCount = results.filter((r) => r.success).length;
         const summary = {
             workflowCount: results.length,
             successfulCount,
@@ -1923,9 +1945,7 @@ export class Interviewer {
     }
 }
 function summarizeAssertions(interactions) {
-    const allResults = interactions
-        .filter((i) => !i.mocked)
-        .flatMap((i) => i.assertionResults ?? []);
+    const allResults = interactions.filter((i) => !i.mocked).flatMap((i) => i.assertionResults ?? []);
     if (allResults.length === 0)
         return undefined;
     const passed = allResults.filter((r) => r.passed).length;

package/dist/interview/orchestrator.js CHANGED Viewed

@@ -44,7 +44,11 @@ function categorizeLLMError(error) {
     if (message.includes('empty or whitespace') ||
         message.includes('token exhaustion') ||
         message.includes('unexpected end of json')) {
-        return { category: 'format_error', isRetryable: true, message: 'LLM returned empty response (possible token exhaustion)' };
+        return {
+            category: 'format_error',
+            isRetryable: true,
+            message: 'LLM returned empty response (possible token exhaustion)',
+        };
     }
     // Check for format errors (LLM returned wrong format) - retryable once
     if (message.includes('invalid question format') ||
@@ -53,7 +57,11 @@ function categorizeLLMError(error) {
         message.includes('not valid json')) {
         return { category: 'format_error', isRetryable: true, message: 'LLM returned invalid format' };
     }
-    return { category: 'unknown', isRetryable: false, message: error instanceof Error ? error.message : String(error) };
+    return {
+        category: 'unknown',
+        isRetryable: false,
+        message: error instanceof Error ? error.message : String(error),
+    };
 }
 /**
  * Orchestrator uses an LLM to generate interview questions and synthesize findings.
@@ -248,11 +256,13 @@ export class Orchestrator {
         for (let attempt = 0; attempt <= maxRetries; attempt++) {
             let rawResponse;
             try {
+                const abortController = new AbortController();
                 // Apply timeout to LLM call - use streaming if enabled
                 const response = await withTimeout(this.completeWithStreaming(prompt, {
                     ...COMPLETION_OPTIONS.questionGeneration,
                     systemPrompt: this.getSystemPrompt(),
-                }, `generate-questions:${tool.name}`), DEFAULT_TIMEOUTS.questionGeneration, `Question generation for ${tool.name}`);
+                    signal: abortController.signal,
+                }, `generate-questions:${tool.name}`), DEFAULT_TIMEOUTS.questionGeneration, `Question generation for ${tool.name}`, { abortController });
                 rawResponse = response;
                 // Check for empty/whitespace-only responses (common with token exhaustion)
                 const trimmed = response.trim();
@@ -304,7 +314,7 @@ export class Orchestrator {
                 // Wait before retry with exponential backoff
                 if (attempt < maxRetries) {
                     const delay = Math.min(RETRY.INITIAL_DELAY * Math.pow(2, attempt), RETRY.MAX_DELAY);
-                    await new Promise(resolve => setTimeout(resolve, delay));
+                    await new Promise((resolve) => setTimeout(resolve, delay));
                 }
             }
         }
@@ -361,7 +371,7 @@ export class Orchestrator {
                 return `Tool returned an error: ${error}`;
             }
             if (response?.content) {
-                const textContent = response.content.find(c => c.type === 'text');
+                const textContent = response.content.find((c) => c.type === 'text');
                 if (textContent && 'text' in textContent) {
                     return `Tool returned: ${String(textContent.text).substring(0, DISPLAY_LIMITS.TOOL_RESPONSE_PREVIEW)}`;
                 }
@@ -397,7 +407,7 @@ export class Orchestrator {
             return {
                 name: tool.name,
                 description: tool.description ?? 'No description provided',
-                behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
+                behavioralNotes: interactions.map((i) => i.analysis).filter((a) => a),
                 limitations: [],
                 securityNotes: [],
             };
@@ -694,7 +704,9 @@ export class Orchestrator {
         // Check name-based hints
         if (lowerName.includes('path') || lowerName.includes('file')) {
             const baseDir = this.serverContext?.allowedDirectories?.[0] ?? '/tmp';
-            if (lowerName.includes('dir') || lowerName.includes('directory') || lowerName.includes('folder')) {
+            if (lowerName.includes('dir') ||
+                lowerName.includes('directory') ||
+                lowerName.includes('folder')) {
                 return baseDir;
             }
             return `${baseDir}/test.txt`;
@@ -717,12 +729,16 @@ export class Orchestrator {
             }
             return 'test-name';
         }
-        if (lowerName.includes('query') || lowerName.includes('search') || lowerName.includes('filter')) {
+        if (lowerName.includes('query') ||
+            lowerName.includes('search') ||
+            lowerName.includes('filter')) {
             // Use a more realistic search term based on description
             if (description.includes('movie') || description.includes('film')) {
                 return 'The Matrix';
             }
-            if (description.includes('music') || description.includes('song') || description.includes('artist')) {
+            if (description.includes('music') ||
+                description.includes('song') ||
+                description.includes('artist')) {
                 return 'Beatles';
             }
             if (description.includes('book') || description.includes('author')) {
@@ -733,10 +749,14 @@ export class Orchestrator {
         if (lowerName.includes('title')) {
             return 'Test Title';
         }
-        if (lowerName.includes('description') || lowerName.includes('summary') || lowerName.includes('text')) {
+        if (lowerName.includes('description') ||
+            lowerName.includes('summary') ||
+            lowerName.includes('text')) {
             return 'This is a test description for validation purposes.';
         }
-        if (lowerName.includes('content') || lowerName.includes('body') || lowerName.includes('message')) {
+        if (lowerName.includes('content') ||
+            lowerName.includes('body') ||
+            lowerName.includes('message')) {
             return 'Test content for the operation.';
         }
         if (lowerName.includes('comment')) {
@@ -745,7 +765,9 @@ export class Orchestrator {
         if (lowerName.includes('code') || lowerName.includes('snippet')) {
             return 'function example() { return "Hello"; }';
         }
-        if (lowerName.includes('pattern') || lowerName.includes('glob') || lowerName.includes('regex')) {
+        if (lowerName.includes('pattern') ||
+            lowerName.includes('glob') ||
+            lowerName.includes('regex')) {
             return '*.txt';
         }
         if (lowerName.includes('format') || lowerName.includes('type')) {
@@ -830,13 +852,17 @@ export class Orchestrator {
         if (lowerName.includes('count') || lowerName.includes('limit') || lowerName.includes('num')) {
             return 10;
         }
-        if (lowerName.includes('enabled') || lowerName.includes('active') || lowerName.includes('flag')) {
+        if (lowerName.includes('enabled') ||
+            lowerName.includes('active') ||
+            lowerName.includes('flag')) {
             return true;
         }
         if (lowerName.includes('list') || lowerName.includes('items') || lowerName.includes('array')) {
             return [];
         }
-        if (lowerName.includes('config') || lowerName.includes('options') || lowerName.includes('settings')) {
+        if (lowerName.includes('config') ||
+            lowerName.includes('options') ||
+            lowerName.includes('settings')) {
             return {};
         }
         return 'test';
@@ -919,8 +945,7 @@ export class Orchestrator {
         if (!schema?.properties)
             return tests;
         const required = new Set(schema.required ?? []);
-        const optionalParams = Object.entries(schema.properties)
-            .filter(([name]) => !required.has(name));
+        const optionalParams = Object.entries(schema.properties).filter(([name]) => !required.has(name));
         if (optionalParams.length === 0)
             return tests;
         const allArgs = {};
@@ -1053,7 +1078,7 @@ export class Orchestrator {
             const result = this.llm.parseJSON(response);
             // Extract example output from first successful interaction
             let exampleOutput;
-            const successful = interactions.find(i => !i.error && i.response?.messages?.length);
+            const successful = interactions.find((i) => !i.error && i.response?.messages?.length);
             if (successful?.response) {
                 const firstMsg = successful.response.messages[0];
                 if (firstMsg?.content?.type === 'text' && firstMsg.content.text) {
@@ -1078,7 +1103,7 @@ export class Orchestrator {
                 name: prompt.name,
                 description: prompt.description ?? 'No description provided',
                 arguments: prompt.arguments ?? [],
-                behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
+                behavioralNotes: interactions.map((i) => i.analysis).filter((a) => a),
                 limitations: [],
             };
         }
@@ -1102,7 +1127,7 @@ export class Orchestrator {
             args,
         });
         // If there are optional args, add a test with all args
-        const optionalArgs = prompt.arguments?.filter(a => !a.required) ?? [];
+        const optionalArgs = prompt.arguments?.filter((a) => !a.required) ?? [];
         if (optionalArgs.length > 0) {
             const allArgs = { ...args };
             for (const arg of optionalArgs) {
@@ -1210,10 +1235,12 @@ Description: ${resource.description ?? 'No description'}
 MIME Type: ${resource.mimeType ?? 'Not specified'}
 Test interactions:
-${interactions.map((i, idx) => `
+${interactions
+            .map((i, idx) => `
 ${idx + 1}. ${i.question.description}
    ${i.error ? `Error: ${i.error}` : `Analysis: ${i.analysis}`}
-`).join('')}
+`)
+            .join('')}
 Generate a JSON object with:
 {
@@ -1247,7 +1274,7 @@ Return ONLY valid JSON, no explanation.`;
                 name: resource.name,
                 description: resource.description ?? 'No description provided',
                 mimeType: resource.mimeType,
-                behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
+                behavioralNotes: interactions.map((i) => i.analysis).filter((a) => a),
                 limitations: [],
             };
         }