@dotsetlabs/bellwether 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { DEFAULT_PERSONA } from '../persona/builtins.js';
4
4
  import { getLogger, startTiming } from '../logging/logger.js';
5
5
  import { evaluateAssertions } from '../scenarios/evaluator.js';
6
6
  import { withTimeout, DEFAULT_TIMEOUTS, parallelLimit, createMutex } from '../utils/index.js';
7
- import { INTERVIEW, WORKFLOW, DISPLAY_LIMITS, SCHEMA_TESTING, OUTCOME_ASSESSMENT } from '../constants.js';
7
+ import { INTERVIEW, WORKFLOW, DISPLAY_LIMITS, SCHEMA_TESTING, OUTCOME_ASSESSMENT, } from '../constants.js';
8
8
  import { generateSchemaTests } from './schema-test-generator.js';
9
9
  import { WorkflowDiscoverer } from '../workflow/discovery.js';
10
10
  import { WorkflowExecutor } from '../workflow/executor.js';
@@ -62,7 +62,8 @@ export class Interviewer {
62
62
  // Use multiple personas by default for better coverage
63
63
  // Fall back to DEFAULT_PERSONAS if no personas provided or empty array
64
64
  const providedPersonas = config?.personas;
65
- this.personas = (providedPersonas && providedPersonas.length > 0) ? providedPersonas : DEFAULT_PERSONAS;
65
+ this.personas =
66
+ providedPersonas && providedPersonas.length > 0 ? providedPersonas : DEFAULT_PERSONAS;
66
67
  // Store cache reference for tool response and analysis caching
67
68
  this.cache = config?.cache;
68
69
  if (this.config.rateLimit?.enabled) {
@@ -270,12 +271,12 @@ export class Interviewer {
270
271
  };
271
272
  // Look for tools that reveal server constraints
272
273
  for (const toolName of INTERVIEW.CONSTRAINT_DISCOVERY_TOOLS) {
273
- const tool = discovery.tools.find(t => t.name === toolName);
274
+ const tool = discovery.tools.find((t) => t.name === toolName);
274
275
  if (tool) {
275
276
  try {
276
277
  const result = await client.callTool(toolName, {});
277
278
  if (result?.content) {
278
- const textContent = result.content.find(c => c.type === 'text');
279
+ const textContent = result.content.find((c) => c.type === 'text');
279
280
  if (textContent && 'text' in textContent) {
280
281
  const text = String(textContent.text);
281
282
  // Parse allowed directories from response
@@ -344,7 +345,7 @@ export class Interviewer {
344
345
  try {
345
346
  const parsed = JSON.parse(text);
346
347
  if (Array.isArray(parsed)) {
347
- return parsed.filter(d => typeof d === 'string' && d.startsWith('/'));
348
+ return parsed.filter((d) => typeof d === 'string' && d.startsWith('/'));
348
349
  }
349
350
  }
350
351
  catch (error) {
@@ -443,7 +444,7 @@ export class Interviewer {
443
444
  concurrency,
444
445
  }, 'Running persona interviews in parallel');
445
446
  // Create tasks for each persona
446
- const personaTasks = this.personas.map(persona => async () => {
447
+ const personaTasks = this.personas.map((persona) => async () => {
447
448
  progress.currentPersona = persona.name;
448
449
  onProgress?.(progress);
449
450
  const result = await this.interviewPersona(client, discovery, persona, toolCallMutex);
@@ -501,7 +502,10 @@ export class Interviewer {
501
502
  if (statefulEnabled) {
502
503
  this.logger.info({ toolCount: orderedTools.length }, 'Stateful testing enabled');
503
504
  }
504
- this.logger.info({ parallel: this.config.parallelTools && !statefulEnabled, concurrency: effectiveConcurrency }, 'Using check mode tool testing');
505
+ this.logger.info({
506
+ parallel: this.config.parallelTools && !statefulEnabled,
507
+ concurrency: effectiveConcurrency,
508
+ }, 'Using check mode tool testing');
505
509
  const statefulRunner = statefulEnabled
506
510
  ? new StatefulTestRunner({ shareOutputs: statefulConfig?.shareOutputsBetweenTools ?? true })
507
511
  : undefined;
@@ -516,13 +520,15 @@ export class Interviewer {
516
520
  const toolData = toolInteractionsMap.get(profile.name);
517
521
  if (toolData) {
518
522
  toolData.interactions = profile.interactions;
519
- toolData.findingsByPersona = [{
523
+ toolData.findingsByPersona = [
524
+ {
520
525
  personaId: 'check_mode',
521
526
  personaName: 'Check Mode',
522
527
  behavioralNotes: [],
523
528
  limitations: [],
524
529
  securityNotes: [],
525
- }];
530
+ },
531
+ ];
526
532
  }
527
533
  }
528
534
  // Update persona stats with aggregated counts
@@ -557,7 +563,7 @@ export class Interviewer {
557
563
  const scenarioResults = await this.executeToolScenarios(client, tool.name, customScenarios);
558
564
  allScenarioResults.push(...scenarioResults);
559
565
  // Convert scenarios to interview questions for integration with profiling
560
- questions = customScenarios.map(s => this.scenarioToQuestion(s));
566
+ questions = customScenarios.map((s) => this.scenarioToQuestion(s));
561
567
  // If not custom-only mode, also generate LLM questions (skip in fast CI mode)
562
568
  if (!this.config.customScenariosOnly && !this.config.checkMode) {
563
569
  const llmQuestions = await orchestrator.generateQuestions(tool, this.config.maxQuestionsPerTool, this.config.skipErrorTests);
@@ -568,7 +574,8 @@ export class Interviewer {
568
574
  // No custom scenarios - generate questions
569
575
  if (this.config.checkMode) {
570
576
  // Fast CI mode: use fallback questions (no LLM call)
571
- questions = orchestrator.getFallbackQuestions(tool, this.config.skipErrorTests)
577
+ questions = orchestrator
578
+ .getFallbackQuestions(tool, this.config.skipErrorTests)
572
579
  .slice(0, this.config.maxQuestionsPerTool);
573
580
  }
574
581
  else {
@@ -589,8 +596,10 @@ export class Interviewer {
589
596
  });
590
597
  // If we have multiple failures, regenerate remaining questions with error context
591
598
  // Skip in scenarios-only mode and fast CI mode
592
- if (!this.config.customScenariosOnly && !this.config.checkMode &&
593
- previousErrors.length >= 2 && personaInteractions.length < questions.length) {
599
+ if (!this.config.customScenariosOnly &&
600
+ !this.config.checkMode &&
601
+ previousErrors.length >= 2 &&
602
+ personaInteractions.length < questions.length) {
594
603
  const remaining = this.config.maxQuestionsPerTool - personaInteractions.length;
595
604
  if (remaining > 0) {
596
605
  this.logger.debug({ tool: tool.name, errors: previousErrors.length }, 'Regenerating questions after errors');
@@ -616,7 +625,7 @@ export class Interviewer {
616
625
  };
617
626
  }
618
627
  else {
619
- personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map(i => ({
628
+ personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map((i) => ({
620
629
  question: i.question,
621
630
  response: i.response,
622
631
  error: i.error,
@@ -664,7 +673,9 @@ export class Interviewer {
664
673
  progress.promptsCompleted = 0;
665
674
  onProgress?.(progress);
666
675
  // Only create orchestrator if NOT in check mode (requires LLM)
667
- const primaryOrchestrator = this.isCheckMode() ? null : this.createOrchestrator(this.personas[0]);
676
+ const primaryOrchestrator = this.isCheckMode()
677
+ ? null
678
+ : this.createOrchestrator(this.personas[0]);
668
679
  for (const prompt of discovery.prompts) {
669
680
  progress.currentTool = `prompt:${prompt.name}`;
670
681
  onProgress?.(progress);
@@ -678,7 +689,7 @@ export class Interviewer {
678
689
  const scenarioResults = await this.executePromptScenarios(client, prompt.name, customScenarios);
679
690
  allScenarioResults.push(...scenarioResults);
680
691
  // Convert scenarios to prompt questions for profiling
681
- questions = customScenarios.map(s => ({
692
+ questions = customScenarios.map((s) => ({
682
693
  description: s.description,
683
694
  args: s.args,
684
695
  }));
@@ -688,7 +699,9 @@ export class Interviewer {
688
699
  questions = [...questions, ...llmQuestions];
689
700
  }
690
701
  }
691
- else if (!this.config.customScenariosOnly && !this.config.checkMode && primaryOrchestrator) {
702
+ else if (!this.config.customScenariosOnly &&
703
+ !this.config.checkMode &&
704
+ primaryOrchestrator) {
692
705
  // No custom scenarios - generate LLM questions as usual
693
706
  questions = await primaryOrchestrator.generatePromptQuestions(prompt, 2);
694
707
  }
@@ -740,7 +753,7 @@ export class Interviewer {
740
753
  };
741
754
  }
742
755
  else {
743
- profile = await primaryOrchestrator.synthesizePromptProfile(prompt, promptInteractions.map(i => ({
756
+ profile = await primaryOrchestrator.synthesizePromptProfile(prompt, promptInteractions.map((i) => ({
744
757
  question: i.question,
745
758
  response: i.response,
746
759
  error: i.error,
@@ -768,7 +781,9 @@ export class Interviewer {
768
781
  progress.resourcesCompleted = 0;
769
782
  onProgress?.(progress);
770
783
  // Only create orchestrator if NOT in check mode (requires LLM)
771
- const primaryOrchestrator = this.isCheckMode() ? null : this.createOrchestrator(this.personas[0]);
784
+ const primaryOrchestrator = this.isCheckMode()
785
+ ? null
786
+ : this.createOrchestrator(this.personas[0]);
772
787
  for (const resource of discoveredResources) {
773
788
  progress.currentTool = `resource:${resource.name}`;
774
789
  onProgress?.(progress);
@@ -777,7 +792,9 @@ export class Interviewer {
777
792
  let questions;
778
793
  if (this.config.checkMode || !primaryOrchestrator) {
779
794
  // Fast CI mode: use simple fallback question
780
- questions = [{ description: 'Basic resource read test', category: 'happy_path' }];
795
+ questions = [
796
+ { description: 'Basic resource read test', category: 'happy_path' },
797
+ ];
781
798
  }
782
799
  else {
783
800
  questions = await primaryOrchestrator.generateResourceQuestions(resource, 2);
@@ -787,8 +804,9 @@ export class Interviewer {
787
804
  let response = null;
788
805
  let error = null;
789
806
  try {
807
+ const abortController = new AbortController();
790
808
  // Apply timeout to resource read to prevent indefinite hangs
791
- response = await withTimeout(client.readResource(resource.uri), this.config.resourceTimeout ?? DEFAULT_TIMEOUTS.resourceRead, `Resource read: ${resource.uri}`);
809
+ response = await withTimeout(client.readResource(resource.uri, { signal: abortController.signal }), this.config.resourceTimeout ?? DEFAULT_TIMEOUTS.resourceRead, `Resource read: ${resource.uri}`, { abortController });
792
810
  resourceReadCount++;
793
811
  }
794
812
  catch (e) {
@@ -829,7 +847,7 @@ export class Interviewer {
829
847
  };
830
848
  }
831
849
  else {
832
- profile = await primaryOrchestrator.synthesizeResourceProfile(resource, resourceInteractions.map(i => ({
850
+ profile = await primaryOrchestrator.synthesizeResourceProfile(resource, resourceInteractions.map((i) => ({
833
851
  question: i.question,
834
852
  response: i.response,
835
853
  error: i.error,
@@ -838,13 +856,14 @@ export class Interviewer {
838
856
  }
839
857
  // Extract content preview from first successful read
840
858
  let contentPreview;
841
- const successfulRead = resourceInteractions.find(i => i.response && !i.error);
859
+ const successfulRead = resourceInteractions.find((i) => i.response && !i.error);
842
860
  if (successfulRead?.response?.contents?.[0]) {
843
861
  const content = successfulRead.response.contents[0];
844
862
  if (content.text) {
845
- contentPreview = content.text.length > DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW
846
- ? `${content.text.substring(0, DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW)}...`
847
- : content.text;
863
+ contentPreview =
864
+ content.text.length > DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW
865
+ ? `${content.text.substring(0, DISPLAY_LIMITS.CONTENT_TEXT_PREVIEW)}...`
866
+ : content.text;
848
867
  }
849
868
  else if (content.blob) {
850
869
  contentPreview = `[Binary data: ${content.blob.length} bytes base64]`;
@@ -1058,7 +1077,7 @@ export class Interviewer {
1058
1077
  if (response.isError) {
1059
1078
  stats.errorCount++;
1060
1079
  hadError = true;
1061
- const errorContent = response.content?.find(c => c.type === 'text');
1080
+ const errorContent = response.content?.find((c) => c.type === 'text');
1062
1081
  if (errorContent && 'text' in errorContent) {
1063
1082
  error = String(errorContent.text);
1064
1083
  }
@@ -1143,7 +1162,7 @@ export class Interviewer {
1143
1162
  // Extract allowed directories explicitly mentioned
1144
1163
  const allowedMatch = error.match(/allowed director(?:y|ies)[:\s]+([^\n]+)/i);
1145
1164
  if (allowedMatch) {
1146
- const dirs = allowedMatch[1].split(/[,\s]+/).filter(d => d.startsWith('/'));
1165
+ const dirs = allowedMatch[1].split(/[,\s]+/).filter((d) => d.startsWith('/'));
1147
1166
  if (dirs.length > 0) {
1148
1167
  const currentContext = orchestrator.getServerContext() ?? { allowedDirectories: [] };
1149
1168
  const existingDirs = currentContext.allowedDirectories ?? [];
@@ -1197,7 +1216,7 @@ export class Interviewer {
1197
1216
  toolCallMutex.release();
1198
1217
  }
1199
1218
  // Convert scenarios to interview questions
1200
- questions = customScenarios.map(s => this.scenarioToQuestion(s));
1219
+ questions = customScenarios.map((s) => this.scenarioToQuestion(s));
1201
1220
  // If not custom-only mode, also generate LLM questions
1202
1221
  if (!this.config.customScenariosOnly) {
1203
1222
  const llmQuestions = await orchestrator.generateQuestions(tool, this.config.maxQuestionsPerTool, this.config.skipErrorTests);
@@ -1231,7 +1250,8 @@ export class Interviewer {
1231
1250
  });
1232
1251
  // If we have multiple failures, regenerate remaining questions
1233
1252
  if (!this.config.customScenariosOnly &&
1234
- previousErrors.length >= 2 && personaInteractions.length < questions.length) {
1253
+ previousErrors.length >= 2 &&
1254
+ personaInteractions.length < questions.length) {
1235
1255
  const remaining = this.config.maxQuestionsPerTool - personaInteractions.length;
1236
1256
  if (remaining > 0) {
1237
1257
  this.logger.debug({ tool: tool.name, errors: previousErrors.length }, 'Regenerating questions after errors');
@@ -1253,7 +1273,7 @@ export class Interviewer {
1253
1273
  };
1254
1274
  }
1255
1275
  else {
1256
- personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map(i => ({
1276
+ personaProfile = await orchestrator.synthesizeToolProfile(tool, personaInteractions.map((i) => ({
1257
1277
  question: i.question,
1258
1278
  response: i.response,
1259
1279
  error: i.error,
@@ -1361,20 +1381,19 @@ export class Interviewer {
1361
1381
  const results = await this.executeToolScenarios(client, tool.name, customScenarios);
1362
1382
  scenarioResults.push(...results);
1363
1383
  toolCallCount += results.length;
1364
- errorCount += results.filter(r => !r.passed).length;
1384
+ errorCount += results.filter((r) => !r.passed).length;
1365
1385
  }
1366
1386
  finally {
1367
1387
  toolCallMutex.release();
1368
1388
  }
1369
1389
  // Convert scenarios to interview questions
1370
- questions = customScenarios.map(s => this.scenarioToQuestion(s));
1390
+ questions = customScenarios.map((s) => this.scenarioToQuestion(s));
1371
1391
  }
1372
1392
  else {
1373
1393
  // No custom scenarios - use fallback questions (check mode, no LLM)
1374
1394
  // We need an orchestrator for fallback questions, but we won't use LLM
1375
1395
  // Get fallback questions directly
1376
- questions = this.getFallbackQuestionsForTool(tool, this.config.skipErrorTests)
1377
- .slice(0, this.config.maxQuestionsPerTool);
1396
+ questions = this.getFallbackQuestionsForTool(tool, this.config.skipErrorTests).slice(0, this.config.maxQuestionsPerTool);
1378
1397
  }
1379
1398
  // Execute warmup runs if configured (helps reduce cold-start timing variance)
1380
1399
  // Warmup runs are not recorded in interactions
@@ -1444,7 +1463,10 @@ export class Interviewer {
1444
1463
  // Generate simple analysis (no LLM in check mode)
1445
1464
  const analysis = this.generateSimpleAnalysis(error, !!response, 'Tool call succeeded.');
1446
1465
  const outcomeAssessment = this.assessOutcome(resolvedQuestion, response, error);
1447
- if (this.config.assertions?.enabled && outcomeAssessment.expected === 'success' && response && !response.isError) {
1466
+ if (this.config.assertions?.enabled &&
1467
+ outcomeAssessment.expected === 'success' &&
1468
+ response &&
1469
+ !response.isError) {
1448
1470
  let schema = this.responseSchemas.get(tool.name);
1449
1471
  if (!schema && this.config.assertions?.infer) {
1450
1472
  const inferred = inferResponseSchema(response);
@@ -1546,7 +1568,7 @@ export class Interviewer {
1546
1568
  parallel: this.config.parallelTools,
1547
1569
  }, 'Running check mode tool testing');
1548
1570
  // Create tasks for each tool
1549
- const toolTasks = tools.map(tool => async () => {
1571
+ const toolTasks = tools.map((tool) => async () => {
1550
1572
  progress.currentTool = tool.name;
1551
1573
  onProgress?.(progress);
1552
1574
  const result = await this.interviewToolInCheckMode(client, tool, toolCallMutex, options?.statefulRunner, options?.dependencyMap?.get(tool.name), options?.statefulConfig);
@@ -1575,7 +1597,7 @@ export class Interviewer {
1575
1597
  let totalErrorCount = 0;
1576
1598
  let totalQuestionsAsked = 0;
1577
1599
  for (const result of successfulResults) {
1578
- const tool = tools.find(t => t.name === result.toolName);
1600
+ const tool = tools.find((t) => t.name === result.toolName);
1579
1601
  if (!tool)
1580
1602
  continue;
1581
1603
  // Classify errors to separate tool correctness from environment issues
@@ -1618,7 +1640,7 @@ export class Interviewer {
1618
1640
  };
1619
1641
  }
1620
1642
  buildToolProgressSummary(result) {
1621
- const interactions = result.interactions.filter(i => !i.mocked);
1643
+ const interactions = result.interactions.filter((i) => !i.mocked);
1622
1644
  const totalTests = interactions.length;
1623
1645
  let passedTests = 0;
1624
1646
  let validationTotal = 0;
@@ -1674,14 +1696,14 @@ export class Interviewer {
1674
1696
  */
1675
1697
  getScenariosForTool(toolName) {
1676
1698
  const scenarios = this.config.customScenarios?.toolScenarios ?? [];
1677
- return scenarios.filter(s => s.tool === toolName && !s.skip);
1699
+ return scenarios.filter((s) => s.tool === toolName && !s.skip);
1678
1700
  }
1679
1701
  /**
1680
1702
  * Get custom scenarios for a specific prompt.
1681
1703
  */
1682
1704
  getScenariosForPrompt(promptName) {
1683
1705
  const scenarios = this.config.customScenarios?.promptScenarios ?? [];
1684
- return scenarios.filter(s => s.prompt === promptName && !s.skip);
1706
+ return scenarios.filter((s) => s.prompt === promptName && !s.skip);
1685
1707
  }
1686
1708
  /**
1687
1709
  * Execute custom test scenarios for a tool.
@@ -1708,7 +1730,7 @@ export class Interviewer {
1708
1730
  response = result.response;
1709
1731
  isError = response?.isError ?? false;
1710
1732
  if (isError) {
1711
- const errorContent = response?.content?.find(c => c.type === 'text');
1733
+ const errorContent = response?.content?.find((c) => c.type === 'text');
1712
1734
  if (errorContent && 'text' in errorContent) {
1713
1735
  error = String(errorContent.text);
1714
1736
  }
@@ -1728,7 +1750,7 @@ export class Interviewer {
1728
1750
  ? evaluateAssertions(scenario.assertions, response, isError)
1729
1751
  : [];
1730
1752
  // Scenario passes if no error (or expected error) and all assertions pass
1731
- const allAssertionsPassed = assertionResults.every(r => r.passed);
1753
+ const allAssertionsPassed = assertionResults.every((r) => r.passed);
1732
1754
  const passed = allAssertionsPassed && (!isError || scenario.category === 'error_handling');
1733
1755
  const result = {
1734
1756
  scenario,
@@ -1771,9 +1793,9 @@ export class Interviewer {
1771
1793
  const assertionResults = scenario.assertions
1772
1794
  ? evaluateAssertions(scenario.assertions, response, !!error)
1773
1795
  : [];
1774
- const allAssertionsPassed = assertionResults.every(r => r.passed);
1796
+ const allAssertionsPassed = assertionResults.every((r) => r.passed);
1775
1797
  // Check if this scenario expects an error (has an assertion checking for 'error' to exist)
1776
- const expectsError = scenario.assertions?.some(a => a.path === 'error' && a.condition === 'exists') ?? false;
1798
+ const expectsError = scenario.assertions?.some((a) => a.path === 'error' && a.condition === 'exists') ?? false;
1777
1799
  // Scenario passes if assertions pass AND (no error OR scenario expects error)
1778
1800
  const passed = allAssertionsPassed && (!error || expectsError);
1779
1801
  const result = {
@@ -1824,7 +1846,7 @@ export class Interviewer {
1824
1846
  discoveredCount = discovered.length;
1825
1847
  this.logger.info({
1826
1848
  count: discoveredCount,
1827
- workflows: discovered.map(w => w.name),
1849
+ workflows: discovered.map((w) => w.name),
1828
1850
  }, 'Discovered workflows');
1829
1851
  }
1830
1852
  else {
@@ -1904,7 +1926,7 @@ export class Interviewer {
1904
1926
  }
1905
1927
  }
1906
1928
  // Build summary
1907
- const successfulCount = results.filter(r => r.success).length;
1929
+ const successfulCount = results.filter((r) => r.success).length;
1908
1930
  const summary = {
1909
1931
  workflowCount: results.length,
1910
1932
  successfulCount,
@@ -1923,9 +1945,7 @@ export class Interviewer {
1923
1945
  }
1924
1946
  }
1925
1947
  function summarizeAssertions(interactions) {
1926
- const allResults = interactions
1927
- .filter((i) => !i.mocked)
1928
- .flatMap((i) => i.assertionResults ?? []);
1948
+ const allResults = interactions.filter((i) => !i.mocked).flatMap((i) => i.assertionResults ?? []);
1929
1949
  if (allResults.length === 0)
1930
1950
  return undefined;
1931
1951
  const passed = allResults.filter((r) => r.passed).length;
@@ -44,7 +44,11 @@ function categorizeLLMError(error) {
44
44
  if (message.includes('empty or whitespace') ||
45
45
  message.includes('token exhaustion') ||
46
46
  message.includes('unexpected end of json')) {
47
- return { category: 'format_error', isRetryable: true, message: 'LLM returned empty response (possible token exhaustion)' };
47
+ return {
48
+ category: 'format_error',
49
+ isRetryable: true,
50
+ message: 'LLM returned empty response (possible token exhaustion)',
51
+ };
48
52
  }
49
53
  // Check for format errors (LLM returned wrong format) - retryable once
50
54
  if (message.includes('invalid question format') ||
@@ -53,7 +57,11 @@ function categorizeLLMError(error) {
53
57
  message.includes('not valid json')) {
54
58
  return { category: 'format_error', isRetryable: true, message: 'LLM returned invalid format' };
55
59
  }
56
- return { category: 'unknown', isRetryable: false, message: error instanceof Error ? error.message : String(error) };
60
+ return {
61
+ category: 'unknown',
62
+ isRetryable: false,
63
+ message: error instanceof Error ? error.message : String(error),
64
+ };
57
65
  }
58
66
  /**
59
67
  * Orchestrator uses an LLM to generate interview questions and synthesize findings.
@@ -248,11 +256,13 @@ export class Orchestrator {
248
256
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
249
257
  let rawResponse;
250
258
  try {
259
+ const abortController = new AbortController();
251
260
  // Apply timeout to LLM call - use streaming if enabled
252
261
  const response = await withTimeout(this.completeWithStreaming(prompt, {
253
262
  ...COMPLETION_OPTIONS.questionGeneration,
254
263
  systemPrompt: this.getSystemPrompt(),
255
- }, `generate-questions:${tool.name}`), DEFAULT_TIMEOUTS.questionGeneration, `Question generation for ${tool.name}`);
264
+ signal: abortController.signal,
265
+ }, `generate-questions:${tool.name}`), DEFAULT_TIMEOUTS.questionGeneration, `Question generation for ${tool.name}`, { abortController });
256
266
  rawResponse = response;
257
267
  // Check for empty/whitespace-only responses (common with token exhaustion)
258
268
  const trimmed = response.trim();
@@ -304,7 +314,7 @@ export class Orchestrator {
304
314
  // Wait before retry with exponential backoff
305
315
  if (attempt < maxRetries) {
306
316
  const delay = Math.min(RETRY.INITIAL_DELAY * Math.pow(2, attempt), RETRY.MAX_DELAY);
307
- await new Promise(resolve => setTimeout(resolve, delay));
317
+ await new Promise((resolve) => setTimeout(resolve, delay));
308
318
  }
309
319
  }
310
320
  }
@@ -361,7 +371,7 @@ export class Orchestrator {
361
371
  return `Tool returned an error: ${error}`;
362
372
  }
363
373
  if (response?.content) {
364
- const textContent = response.content.find(c => c.type === 'text');
374
+ const textContent = response.content.find((c) => c.type === 'text');
365
375
  if (textContent && 'text' in textContent) {
366
376
  return `Tool returned: ${String(textContent.text).substring(0, DISPLAY_LIMITS.TOOL_RESPONSE_PREVIEW)}`;
367
377
  }
@@ -397,7 +407,7 @@ export class Orchestrator {
397
407
  return {
398
408
  name: tool.name,
399
409
  description: tool.description ?? 'No description provided',
400
- behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
410
+ behavioralNotes: interactions.map((i) => i.analysis).filter((a) => a),
401
411
  limitations: [],
402
412
  securityNotes: [],
403
413
  };
@@ -694,7 +704,9 @@ export class Orchestrator {
694
704
  // Check name-based hints
695
705
  if (lowerName.includes('path') || lowerName.includes('file')) {
696
706
  const baseDir = this.serverContext?.allowedDirectories?.[0] ?? '/tmp';
697
- if (lowerName.includes('dir') || lowerName.includes('directory') || lowerName.includes('folder')) {
707
+ if (lowerName.includes('dir') ||
708
+ lowerName.includes('directory') ||
709
+ lowerName.includes('folder')) {
698
710
  return baseDir;
699
711
  }
700
712
  return `${baseDir}/test.txt`;
@@ -717,12 +729,16 @@ export class Orchestrator {
717
729
  }
718
730
  return 'test-name';
719
731
  }
720
- if (lowerName.includes('query') || lowerName.includes('search') || lowerName.includes('filter')) {
732
+ if (lowerName.includes('query') ||
733
+ lowerName.includes('search') ||
734
+ lowerName.includes('filter')) {
721
735
  // Use a more realistic search term based on description
722
736
  if (description.includes('movie') || description.includes('film')) {
723
737
  return 'The Matrix';
724
738
  }
725
- if (description.includes('music') || description.includes('song') || description.includes('artist')) {
739
+ if (description.includes('music') ||
740
+ description.includes('song') ||
741
+ description.includes('artist')) {
726
742
  return 'Beatles';
727
743
  }
728
744
  if (description.includes('book') || description.includes('author')) {
@@ -733,10 +749,14 @@ export class Orchestrator {
733
749
  if (lowerName.includes('title')) {
734
750
  return 'Test Title';
735
751
  }
736
- if (lowerName.includes('description') || lowerName.includes('summary') || lowerName.includes('text')) {
752
+ if (lowerName.includes('description') ||
753
+ lowerName.includes('summary') ||
754
+ lowerName.includes('text')) {
737
755
  return 'This is a test description for validation purposes.';
738
756
  }
739
- if (lowerName.includes('content') || lowerName.includes('body') || lowerName.includes('message')) {
757
+ if (lowerName.includes('content') ||
758
+ lowerName.includes('body') ||
759
+ lowerName.includes('message')) {
740
760
  return 'Test content for the operation.';
741
761
  }
742
762
  if (lowerName.includes('comment')) {
@@ -745,7 +765,9 @@ export class Orchestrator {
745
765
  if (lowerName.includes('code') || lowerName.includes('snippet')) {
746
766
  return 'function example() { return "Hello"; }';
747
767
  }
748
- if (lowerName.includes('pattern') || lowerName.includes('glob') || lowerName.includes('regex')) {
768
+ if (lowerName.includes('pattern') ||
769
+ lowerName.includes('glob') ||
770
+ lowerName.includes('regex')) {
749
771
  return '*.txt';
750
772
  }
751
773
  if (lowerName.includes('format') || lowerName.includes('type')) {
@@ -830,13 +852,17 @@ export class Orchestrator {
830
852
  if (lowerName.includes('count') || lowerName.includes('limit') || lowerName.includes('num')) {
831
853
  return 10;
832
854
  }
833
- if (lowerName.includes('enabled') || lowerName.includes('active') || lowerName.includes('flag')) {
855
+ if (lowerName.includes('enabled') ||
856
+ lowerName.includes('active') ||
857
+ lowerName.includes('flag')) {
834
858
  return true;
835
859
  }
836
860
  if (lowerName.includes('list') || lowerName.includes('items') || lowerName.includes('array')) {
837
861
  return [];
838
862
  }
839
- if (lowerName.includes('config') || lowerName.includes('options') || lowerName.includes('settings')) {
863
+ if (lowerName.includes('config') ||
864
+ lowerName.includes('options') ||
865
+ lowerName.includes('settings')) {
840
866
  return {};
841
867
  }
842
868
  return 'test';
@@ -919,8 +945,7 @@ export class Orchestrator {
919
945
  if (!schema?.properties)
920
946
  return tests;
921
947
  const required = new Set(schema.required ?? []);
922
- const optionalParams = Object.entries(schema.properties)
923
- .filter(([name]) => !required.has(name));
948
+ const optionalParams = Object.entries(schema.properties).filter(([name]) => !required.has(name));
924
949
  if (optionalParams.length === 0)
925
950
  return tests;
926
951
  const allArgs = {};
@@ -1053,7 +1078,7 @@ export class Orchestrator {
1053
1078
  const result = this.llm.parseJSON(response);
1054
1079
  // Extract example output from first successful interaction
1055
1080
  let exampleOutput;
1056
- const successful = interactions.find(i => !i.error && i.response?.messages?.length);
1081
+ const successful = interactions.find((i) => !i.error && i.response?.messages?.length);
1057
1082
  if (successful?.response) {
1058
1083
  const firstMsg = successful.response.messages[0];
1059
1084
  if (firstMsg?.content?.type === 'text' && firstMsg.content.text) {
@@ -1078,7 +1103,7 @@ export class Orchestrator {
1078
1103
  name: prompt.name,
1079
1104
  description: prompt.description ?? 'No description provided',
1080
1105
  arguments: prompt.arguments ?? [],
1081
- behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
1106
+ behavioralNotes: interactions.map((i) => i.analysis).filter((a) => a),
1082
1107
  limitations: [],
1083
1108
  };
1084
1109
  }
@@ -1102,7 +1127,7 @@ export class Orchestrator {
1102
1127
  args,
1103
1128
  });
1104
1129
  // If there are optional args, add a test with all args
1105
- const optionalArgs = prompt.arguments?.filter(a => !a.required) ?? [];
1130
+ const optionalArgs = prompt.arguments?.filter((a) => !a.required) ?? [];
1106
1131
  if (optionalArgs.length > 0) {
1107
1132
  const allArgs = { ...args };
1108
1133
  for (const arg of optionalArgs) {
@@ -1210,10 +1235,12 @@ Description: ${resource.description ?? 'No description'}
1210
1235
  MIME Type: ${resource.mimeType ?? 'Not specified'}
1211
1236
 
1212
1237
  Test interactions:
1213
- ${interactions.map((i, idx) => `
1238
+ ${interactions
1239
+ .map((i, idx) => `
1214
1240
  ${idx + 1}. ${i.question.description}
1215
1241
  ${i.error ? `Error: ${i.error}` : `Analysis: ${i.analysis}`}
1216
- `).join('')}
1242
+ `)
1243
+ .join('')}
1217
1244
 
1218
1245
  Generate a JSON object with:
1219
1246
  {
@@ -1247,7 +1274,7 @@ Return ONLY valid JSON, no explanation.`;
1247
1274
  name: resource.name,
1248
1275
  description: resource.description ?? 'No description provided',
1249
1276
  mimeType: resource.mimeType,
1250
- behavioralNotes: interactions.map(i => i.analysis).filter(a => a),
1277
+ behavioralNotes: interactions.map((i) => i.analysis).filter((a) => a),
1251
1278
  limitations: [],
1252
1279
  };
1253
1280
  }