foundry-mcp 0.8.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of foundry-mcp might be problematic. Click here for more details.

Files changed (153) hide show
  1. foundry_mcp/__init__.py +13 -0
  2. foundry_mcp/cli/__init__.py +67 -0
  3. foundry_mcp/cli/__main__.py +9 -0
  4. foundry_mcp/cli/agent.py +96 -0
  5. foundry_mcp/cli/commands/__init__.py +37 -0
  6. foundry_mcp/cli/commands/cache.py +137 -0
  7. foundry_mcp/cli/commands/dashboard.py +148 -0
  8. foundry_mcp/cli/commands/dev.py +446 -0
  9. foundry_mcp/cli/commands/journal.py +377 -0
  10. foundry_mcp/cli/commands/lifecycle.py +274 -0
  11. foundry_mcp/cli/commands/modify.py +824 -0
  12. foundry_mcp/cli/commands/plan.py +640 -0
  13. foundry_mcp/cli/commands/pr.py +393 -0
  14. foundry_mcp/cli/commands/review.py +667 -0
  15. foundry_mcp/cli/commands/session.py +472 -0
  16. foundry_mcp/cli/commands/specs.py +686 -0
  17. foundry_mcp/cli/commands/tasks.py +807 -0
  18. foundry_mcp/cli/commands/testing.py +676 -0
  19. foundry_mcp/cli/commands/validate.py +982 -0
  20. foundry_mcp/cli/config.py +98 -0
  21. foundry_mcp/cli/context.py +298 -0
  22. foundry_mcp/cli/logging.py +212 -0
  23. foundry_mcp/cli/main.py +44 -0
  24. foundry_mcp/cli/output.py +122 -0
  25. foundry_mcp/cli/registry.py +110 -0
  26. foundry_mcp/cli/resilience.py +178 -0
  27. foundry_mcp/cli/transcript.py +217 -0
  28. foundry_mcp/config.py +1454 -0
  29. foundry_mcp/core/__init__.py +144 -0
  30. foundry_mcp/core/ai_consultation.py +1773 -0
  31. foundry_mcp/core/batch_operations.py +1202 -0
  32. foundry_mcp/core/cache.py +195 -0
  33. foundry_mcp/core/capabilities.py +446 -0
  34. foundry_mcp/core/concurrency.py +898 -0
  35. foundry_mcp/core/context.py +540 -0
  36. foundry_mcp/core/discovery.py +1603 -0
  37. foundry_mcp/core/error_collection.py +728 -0
  38. foundry_mcp/core/error_store.py +592 -0
  39. foundry_mcp/core/health.py +749 -0
  40. foundry_mcp/core/intake.py +933 -0
  41. foundry_mcp/core/journal.py +700 -0
  42. foundry_mcp/core/lifecycle.py +412 -0
  43. foundry_mcp/core/llm_config.py +1376 -0
  44. foundry_mcp/core/llm_patterns.py +510 -0
  45. foundry_mcp/core/llm_provider.py +1569 -0
  46. foundry_mcp/core/logging_config.py +374 -0
  47. foundry_mcp/core/metrics_persistence.py +584 -0
  48. foundry_mcp/core/metrics_registry.py +327 -0
  49. foundry_mcp/core/metrics_store.py +641 -0
  50. foundry_mcp/core/modifications.py +224 -0
  51. foundry_mcp/core/naming.py +146 -0
  52. foundry_mcp/core/observability.py +1216 -0
  53. foundry_mcp/core/otel.py +452 -0
  54. foundry_mcp/core/otel_stubs.py +264 -0
  55. foundry_mcp/core/pagination.py +255 -0
  56. foundry_mcp/core/progress.py +387 -0
  57. foundry_mcp/core/prometheus.py +564 -0
  58. foundry_mcp/core/prompts/__init__.py +464 -0
  59. foundry_mcp/core/prompts/fidelity_review.py +691 -0
  60. foundry_mcp/core/prompts/markdown_plan_review.py +515 -0
  61. foundry_mcp/core/prompts/plan_review.py +627 -0
  62. foundry_mcp/core/providers/__init__.py +237 -0
  63. foundry_mcp/core/providers/base.py +515 -0
  64. foundry_mcp/core/providers/claude.py +472 -0
  65. foundry_mcp/core/providers/codex.py +637 -0
  66. foundry_mcp/core/providers/cursor_agent.py +630 -0
  67. foundry_mcp/core/providers/detectors.py +515 -0
  68. foundry_mcp/core/providers/gemini.py +426 -0
  69. foundry_mcp/core/providers/opencode.py +718 -0
  70. foundry_mcp/core/providers/opencode_wrapper.js +308 -0
  71. foundry_mcp/core/providers/package-lock.json +24 -0
  72. foundry_mcp/core/providers/package.json +25 -0
  73. foundry_mcp/core/providers/registry.py +607 -0
  74. foundry_mcp/core/providers/test_provider.py +171 -0
  75. foundry_mcp/core/providers/validation.py +857 -0
  76. foundry_mcp/core/rate_limit.py +427 -0
  77. foundry_mcp/core/research/__init__.py +68 -0
  78. foundry_mcp/core/research/memory.py +528 -0
  79. foundry_mcp/core/research/models.py +1234 -0
  80. foundry_mcp/core/research/providers/__init__.py +40 -0
  81. foundry_mcp/core/research/providers/base.py +242 -0
  82. foundry_mcp/core/research/providers/google.py +507 -0
  83. foundry_mcp/core/research/providers/perplexity.py +442 -0
  84. foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
  85. foundry_mcp/core/research/providers/tavily.py +383 -0
  86. foundry_mcp/core/research/workflows/__init__.py +25 -0
  87. foundry_mcp/core/research/workflows/base.py +298 -0
  88. foundry_mcp/core/research/workflows/chat.py +271 -0
  89. foundry_mcp/core/research/workflows/consensus.py +539 -0
  90. foundry_mcp/core/research/workflows/deep_research.py +4142 -0
  91. foundry_mcp/core/research/workflows/ideate.py +682 -0
  92. foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
  93. foundry_mcp/core/resilience.py +600 -0
  94. foundry_mcp/core/responses.py +1624 -0
  95. foundry_mcp/core/review.py +366 -0
  96. foundry_mcp/core/security.py +438 -0
  97. foundry_mcp/core/spec.py +4119 -0
  98. foundry_mcp/core/task.py +2463 -0
  99. foundry_mcp/core/testing.py +839 -0
  100. foundry_mcp/core/validation.py +2357 -0
  101. foundry_mcp/dashboard/__init__.py +32 -0
  102. foundry_mcp/dashboard/app.py +119 -0
  103. foundry_mcp/dashboard/components/__init__.py +17 -0
  104. foundry_mcp/dashboard/components/cards.py +88 -0
  105. foundry_mcp/dashboard/components/charts.py +177 -0
  106. foundry_mcp/dashboard/components/filters.py +136 -0
  107. foundry_mcp/dashboard/components/tables.py +195 -0
  108. foundry_mcp/dashboard/data/__init__.py +11 -0
  109. foundry_mcp/dashboard/data/stores.py +433 -0
  110. foundry_mcp/dashboard/launcher.py +300 -0
  111. foundry_mcp/dashboard/views/__init__.py +12 -0
  112. foundry_mcp/dashboard/views/errors.py +217 -0
  113. foundry_mcp/dashboard/views/metrics.py +164 -0
  114. foundry_mcp/dashboard/views/overview.py +96 -0
  115. foundry_mcp/dashboard/views/providers.py +83 -0
  116. foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
  117. foundry_mcp/dashboard/views/tool_usage.py +139 -0
  118. foundry_mcp/prompts/__init__.py +9 -0
  119. foundry_mcp/prompts/workflows.py +525 -0
  120. foundry_mcp/resources/__init__.py +9 -0
  121. foundry_mcp/resources/specs.py +591 -0
  122. foundry_mcp/schemas/__init__.py +38 -0
  123. foundry_mcp/schemas/intake-schema.json +89 -0
  124. foundry_mcp/schemas/sdd-spec-schema.json +414 -0
  125. foundry_mcp/server.py +150 -0
  126. foundry_mcp/tools/__init__.py +10 -0
  127. foundry_mcp/tools/unified/__init__.py +92 -0
  128. foundry_mcp/tools/unified/authoring.py +3620 -0
  129. foundry_mcp/tools/unified/context_helpers.py +98 -0
  130. foundry_mcp/tools/unified/documentation_helpers.py +268 -0
  131. foundry_mcp/tools/unified/environment.py +1341 -0
  132. foundry_mcp/tools/unified/error.py +479 -0
  133. foundry_mcp/tools/unified/health.py +225 -0
  134. foundry_mcp/tools/unified/journal.py +841 -0
  135. foundry_mcp/tools/unified/lifecycle.py +640 -0
  136. foundry_mcp/tools/unified/metrics.py +777 -0
  137. foundry_mcp/tools/unified/plan.py +876 -0
  138. foundry_mcp/tools/unified/pr.py +294 -0
  139. foundry_mcp/tools/unified/provider.py +589 -0
  140. foundry_mcp/tools/unified/research.py +1283 -0
  141. foundry_mcp/tools/unified/review.py +1042 -0
  142. foundry_mcp/tools/unified/review_helpers.py +314 -0
  143. foundry_mcp/tools/unified/router.py +102 -0
  144. foundry_mcp/tools/unified/server.py +565 -0
  145. foundry_mcp/tools/unified/spec.py +1283 -0
  146. foundry_mcp/tools/unified/task.py +3846 -0
  147. foundry_mcp/tools/unified/test.py +431 -0
  148. foundry_mcp/tools/unified/verification.py +520 -0
  149. foundry_mcp-0.8.22.dist-info/METADATA +344 -0
  150. foundry_mcp-0.8.22.dist-info/RECORD +153 -0
  151. foundry_mcp-0.8.22.dist-info/WHEEL +4 -0
  152. foundry_mcp-0.8.22.dist-info/entry_points.txt +3 -0
  153. foundry_mcp-0.8.22.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1773 @@
1
+ """
2
+ AI Consultation Layer for foundry-mcp.
3
+
4
+ This module provides a unified interface for AI-assisted operations including
5
+ plan review and fidelity checking. It integrates with the provider registry
6
+ to support multiple LLM backends while providing caching, timeout handling,
7
+ and consistent result structures.
8
+
9
+ Design Principles:
10
+ - Workflow-specific prompt templates (plan_review, fidelity_review)
11
+ - Provider-agnostic orchestration via the provider registry
12
+ - Filesystem-based caching for consultation results
13
+ - Consistent result structures across all workflows
14
+ - Graceful degradation when providers are unavailable
15
+
16
+ Example Usage:
17
+ from foundry_mcp.core.ai_consultation import (
18
+ ConsultationOrchestrator,
19
+ ConsultationRequest,
20
+ ConsultationWorkflow,
21
+ )
22
+ from foundry_mcp.core.providers import ProviderHooks
23
+
24
+ orchestrator = ConsultationOrchestrator()
25
+
26
+ # Check availability
27
+ if orchestrator.is_available():
28
+ request = ConsultationRequest(
29
+ workflow=ConsultationWorkflow.PLAN_REVIEW,
30
+ prompt_id="spec_review",
31
+ context={"spec_content": "..."},
32
+ provider_id="gemini",
33
+ )
34
+ result = orchestrator.consult(request)
35
+ if result.content:
36
+ print(result.content)
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import asyncio
42
+ import concurrent.futures
43
+ import hashlib
44
+ import json
45
+ import logging
46
+ import time
47
+ from dataclasses import dataclass, field, replace
48
+ from enum import Enum
49
+ from pathlib import Path
50
+ from typing import Any, Dict, List, Optional, Sequence, Union
51
+
52
+ from foundry_mcp.core.providers import (
53
+ ProviderHooks,
54
+ ProviderRequest,
55
+ ProviderResult,
56
+ ProviderStatus,
57
+ ProviderUnavailableError,
58
+ available_providers,
59
+ check_provider_available,
60
+ resolve_provider,
61
+ )
62
+ from foundry_mcp.core.llm_config import ProviderSpec
63
+
64
+ logger = logging.getLogger(__name__)
65
+
66
+
67
+ def _collect_provider_error(
68
+ provider_id: str,
69
+ error: Exception,
70
+ request_context: dict,
71
+ ) -> None:
72
+ """
73
+ Collect AI provider error data for later introspection.
74
+
75
+ Uses lazy import to avoid circular dependencies and only
76
+ collects if error collection is enabled.
77
+
78
+ Args:
79
+ provider_id: The provider that raised the error
80
+ error: The exception that was raised
81
+ request_context: Context about the request (workflow, prompt_id, etc.)
82
+ """
83
+ try:
84
+ # Lazy import to avoid circular dependencies
85
+ from foundry_mcp.config import get_config
86
+
87
+ config = get_config()
88
+ if not config.error_collection.enabled:
89
+ return
90
+
91
+ from foundry_mcp.core.error_collection import get_error_collector
92
+
93
+ collector = get_error_collector()
94
+ collector.collect_provider_error(
95
+ provider_id=provider_id,
96
+ error=error,
97
+ request_context=request_context,
98
+ )
99
+ except Exception as collect_error:
100
+ # Never let error collection failures affect consultation execution
101
+ logger.debug(
102
+ f"Error collection failed for provider {provider_id}: {collect_error}"
103
+ )
104
+
105
+
106
+ # =============================================================================
107
+ # Workflow Types
108
+ # =============================================================================
109
+
110
+
111
+ class ConsultationWorkflow(str, Enum):
112
+ """
113
+ Supported AI consultation workflows.
114
+
115
+ Each workflow corresponds to a category of prompt templates and
116
+ determines cache partitioning and result handling.
117
+
118
+ Values:
119
+ PLAN_REVIEW: Review and critique SDD specifications
120
+ FIDELITY_REVIEW: Compare implementation against specifications
121
+ MARKDOWN_PLAN_REVIEW: Review markdown plans before spec creation
122
+ """
123
+
124
+ PLAN_REVIEW = "plan_review"
125
+ FIDELITY_REVIEW = "fidelity_review"
126
+ MARKDOWN_PLAN_REVIEW = "markdown_plan_review"
127
+
128
+
129
+ # =============================================================================
130
+ # Request/Response Dataclasses
131
+ # =============================================================================
132
+
133
+
134
+ @dataclass
135
+ class ResolvedProvider:
136
+ """
137
+ Resolved provider information from a ProviderSpec.
138
+
139
+ Contains the provider ID to use for registry lookup, along with
140
+ model and override settings from the priority configuration.
141
+
142
+ Attributes:
143
+ provider_id: Provider ID for registry lookup (e.g., "gemini", "opencode")
144
+ model: Model identifier to use (may include backend routing for CLI)
145
+ overrides: Per-provider setting overrides from config
146
+ spec_str: Original spec string for logging/debugging
147
+ """
148
+
149
+ provider_id: str
150
+ model: Optional[str] = None
151
+ overrides: Dict[str, Any] = field(default_factory=dict)
152
+ spec_str: str = ""
153
+
154
+
155
+ @dataclass(frozen=True)
156
+ class ConsultationRequest:
157
+ """
158
+ Request payload for AI consultation.
159
+
160
+ Encapsulates all parameters needed to execute a consultation workflow,
161
+ including prompt selection, context data, and provider preferences.
162
+
163
+ Attributes:
164
+ workflow: The consultation workflow type
165
+ prompt_id: Identifier for the prompt template within the workflow
166
+ context: Structured context data to inject into the prompt
167
+ provider_id: Optional preferred provider (uses first available if None)
168
+ model: Optional model override for the provider
169
+ cache_key: Optional explicit cache key (auto-generated if None)
170
+ timeout: Request timeout in seconds (default: 120)
171
+ temperature: Sampling temperature (default: provider default)
172
+ max_tokens: Maximum output tokens (default: provider default)
173
+ system_prompt_override: Optional system prompt override
174
+ """
175
+
176
+ workflow: ConsultationWorkflow
177
+ prompt_id: str
178
+ context: Dict[str, Any] = field(default_factory=dict)
179
+ provider_id: Optional[str] = None
180
+ model: Optional[str] = None
181
+ cache_key: Optional[str] = None
182
+ timeout: float = 120.0
183
+ temperature: Optional[float] = None
184
+ max_tokens: Optional[int] = None
185
+ system_prompt_override: Optional[str] = None
186
+
187
+
188
+ @dataclass
189
+ class ConsultationResult:
190
+ """
191
+ Result of an AI consultation.
192
+
193
+ Provides a consistent structure for consultation outcomes across all
194
+ workflows and providers, including metadata for debugging and analytics.
195
+
196
+ Attributes:
197
+ workflow: The workflow that produced this result
198
+ content: The generated content (may be empty on failure)
199
+ provider_id: Provider that handled the request
200
+ model_used: Fully-qualified model identifier
201
+ tokens: Token usage if reported by provider
202
+ duration_ms: Total consultation duration in milliseconds
203
+ cache_hit: Whether result was served from cache
204
+ raw_payload: Provider-specific metadata and debug info
205
+ warnings: Non-fatal issues encountered during consultation
206
+ error: Error message if consultation failed
207
+ """
208
+
209
+ workflow: ConsultationWorkflow
210
+ content: str
211
+ provider_id: str
212
+ model_used: str
213
+ tokens: Dict[str, int] = field(default_factory=dict)
214
+ duration_ms: float = 0.0
215
+ cache_hit: bool = False
216
+ raw_payload: Dict[str, Any] = field(default_factory=dict)
217
+ warnings: List[str] = field(default_factory=list)
218
+ error: Optional[str] = None
219
+
220
+ @property
221
+ def success(self) -> bool:
222
+ """Return True if consultation succeeded (has content, no error)."""
223
+ return bool(self.content) and self.error is None
224
+
225
+
226
+ @dataclass
227
+ class ProviderResponse:
228
+ """
229
+ Response from a single provider in a multi-model consultation.
230
+
231
+ Encapsulates the result from one provider when executing parallel
232
+ consultations across multiple models. Used as building blocks for
233
+ ConsensusResult aggregation.
234
+
235
+ Attributes:
236
+ provider_id: Identifier of the provider that handled this request
237
+ model_used: Fully-qualified model identifier used for generation
238
+ content: Generated content (empty string on failure)
239
+ success: Whether this provider's request succeeded
240
+ error: Error message if the request failed
241
+ tokens: Total token usage (prompt + completion) if available
242
+ duration_ms: Request duration in milliseconds
243
+ cache_hit: Whether result was served from cache
244
+ """
245
+
246
+ provider_id: str
247
+ model_used: str
248
+ content: str
249
+ success: bool
250
+ error: Optional[str] = None
251
+ tokens: Optional[int] = None
252
+ duration_ms: Optional[int] = None
253
+ cache_hit: bool = False
254
+
255
+ @classmethod
256
+ def from_result(
257
+ cls,
258
+ result: ConsultationResult,
259
+ ) -> "ProviderResponse":
260
+ """
261
+ Create a ProviderResponse from a ConsultationResult.
262
+
263
+ Convenience factory for converting single-provider results to the
264
+ multi-provider response format.
265
+
266
+ Args:
267
+ result: ConsultationResult to convert
268
+
269
+ Returns:
270
+ ProviderResponse with fields mapped from the result
271
+ """
272
+ total_tokens = sum(result.tokens.values()) if result.tokens else None
273
+ return cls(
274
+ provider_id=result.provider_id,
275
+ model_used=result.model_used,
276
+ content=result.content,
277
+ success=result.success,
278
+ error=result.error,
279
+ tokens=total_tokens,
280
+ duration_ms=int(result.duration_ms) if result.duration_ms else None,
281
+ cache_hit=result.cache_hit,
282
+ )
283
+
284
+
285
+ @dataclass
286
+ class AgreementMetadata:
287
+ """
288
+ Metadata about provider agreement in a multi-model consultation.
289
+
290
+ Tracks how many providers were consulted, how many succeeded, and how
291
+ many failed. Used to assess consensus quality and reliability.
292
+
293
+ Attributes:
294
+ total_providers: Total number of providers that were consulted
295
+ successful_providers: Number of providers that returned successful responses
296
+ failed_providers: Number of providers that failed (timeout, error, etc.)
297
+ """
298
+
299
+ total_providers: int
300
+ successful_providers: int
301
+ failed_providers: int
302
+
303
+ @property
304
+ def success_rate(self) -> float:
305
+ """Calculate the success rate as a percentage (0.0 - 1.0)."""
306
+ if self.total_providers == 0:
307
+ return 0.0
308
+ return self.successful_providers / self.total_providers
309
+
310
+ @property
311
+ def has_consensus(self) -> bool:
312
+ """Return True if at least 2 providers succeeded."""
313
+ return self.successful_providers >= 2
314
+
315
+ @classmethod
316
+ def from_responses(
317
+ cls, responses: Sequence["ProviderResponse"]
318
+ ) -> "AgreementMetadata":
319
+ """
320
+ Create AgreementMetadata from a list of provider responses.
321
+
322
+ Args:
323
+ responses: Sequence of ProviderResponse objects
324
+
325
+ Returns:
326
+ AgreementMetadata with computed counts
327
+ """
328
+ total = len(responses)
329
+ successful = sum(1 for r in responses if r.success)
330
+ failed = total - successful
331
+ return cls(
332
+ total_providers=total,
333
+ successful_providers=successful,
334
+ failed_providers=failed,
335
+ )
336
+
337
+
338
+ @dataclass
339
+ class ConsensusResult:
340
+ """
341
+ Aggregated result from multi-model consensus consultation.
342
+
343
+ Collects responses from multiple providers along with metadata about
344
+ agreement levels and overall success. Used when min_models > 1 in
345
+ workflow configuration.
346
+
347
+ Attributes:
348
+ workflow: The consultation workflow that produced this result
349
+ responses: List of individual provider responses
350
+ agreement: Metadata about provider agreement and success rates
351
+ duration_ms: Total consultation duration in milliseconds
352
+ warnings: Non-fatal issues encountered during consultation
353
+
354
+ Properties:
355
+ success: True if at least one provider succeeded
356
+ primary_content: Content from the first successful response (for compatibility)
357
+ """
358
+
359
+ workflow: ConsultationWorkflow
360
+ responses: List[ProviderResponse] = field(default_factory=list)
361
+ agreement: Optional[AgreementMetadata] = None
362
+ duration_ms: float = 0.0
363
+ warnings: List[str] = field(default_factory=list)
364
+
365
+ def __post_init__(self) -> None:
366
+ """Auto-compute agreement metadata if not provided."""
367
+ if self.agreement is None and self.responses:
368
+ self.agreement = AgreementMetadata.from_responses(self.responses)
369
+
370
+ @property
371
+ def success(self) -> bool:
372
+ """Return True if at least one provider returned a successful response."""
373
+ return any(r.success for r in self.responses)
374
+
375
+ @property
376
+ def primary_content(self) -> str:
377
+ """
378
+ Return content from the first successful response.
379
+
380
+ For backward compatibility with code expecting a single response.
381
+ Returns empty string if no successful responses.
382
+ """
383
+ for response in self.responses:
384
+ if response.success and response.content:
385
+ return response.content
386
+ return ""
387
+
388
+ @property
389
+ def successful_responses(self) -> List[ProviderResponse]:
390
+ """Return list of successful responses only."""
391
+ return [r for r in self.responses if r.success]
392
+
393
+ @property
394
+ def failed_responses(self) -> List[ProviderResponse]:
395
+ """Return list of failed responses only."""
396
+ return [r for r in self.responses if not r.success]
397
+
398
+
399
+ # Type alias for backward-compatible result handling
400
+ ConsultationOutcome = Union[ConsultationResult, ConsensusResult]
401
+ """
402
+ Type alias for consultation results supporting both single and multi-model modes.
403
+
404
+ When min_models == 1 (default): Returns ConsultationResult (single provider)
405
+ When min_models > 1: Returns ConsensusResult (multiple providers with agreement)
406
+
407
+ Use isinstance() to differentiate:
408
+ if isinstance(outcome, ConsensusResult):
409
+ # Handle multi-model result with agreement metadata
410
+ else:
411
+ # Handle single-model ConsultationResult
412
+ """
413
+
414
+
415
+ # =============================================================================
416
+ # Cache Implementation
417
+ # =============================================================================
418
+
419
+
420
+ class ResultCache:
421
+ """
422
+ Filesystem-based cache for consultation results.
423
+
424
+ Provides persistent caching of AI consultation results to reduce
425
+ redundant API calls and improve response times for repeated queries.
426
+
427
+ Cache Structure:
428
+ .cache/foundry-mcp/consultations/{workflow}/{key}.json
429
+
430
+ Each cached entry contains:
431
+ - content: The consultation result
432
+ - provider_id: Provider that generated the result
433
+ - model_used: Model identifier
434
+ - tokens: Token usage
435
+ - timestamp: Cache entry creation time
436
+ - ttl: Time-to-live in seconds
437
+
438
+ Attributes:
439
+ base_dir: Root directory for cache storage
440
+ default_ttl: Default time-to-live in seconds (default: 3600 = 1 hour)
441
+ """
442
+
443
+ def __init__(
444
+ self,
445
+ base_dir: Optional[Path] = None,
446
+ default_ttl: int = 3600,
447
+ ):
448
+ """
449
+ Initialize the result cache.
450
+
451
+ Args:
452
+ base_dir: Root directory for cache (default: .cache/foundry-mcp/consultations)
453
+ default_ttl: Default TTL in seconds (default: 3600)
454
+ """
455
+ if base_dir is None:
456
+ base_dir = Path.cwd() / ".cache" / "foundry-mcp" / "consultations"
457
+ self.base_dir = base_dir
458
+ self.default_ttl = default_ttl
459
+
460
+ def _get_cache_path(self, workflow: ConsultationWorkflow, key: str) -> Path:
461
+ """Return the cache file path for a workflow and key."""
462
+ # Sanitize key to be filesystem-safe
463
+ safe_key = "".join(c if c.isalnum() or c in "-_" else "_" for c in key)
464
+ return self.base_dir / workflow.value / f"{safe_key}.json"
465
+
466
+ def get(
467
+ self,
468
+ workflow: ConsultationWorkflow,
469
+ key: str,
470
+ ) -> Optional[Dict[str, Any]]:
471
+ """
472
+ Retrieve a cached result.
473
+
474
+ Args:
475
+ workflow: The consultation workflow
476
+ key: The cache key
477
+
478
+ Returns:
479
+ Cached data dict if found and not expired, None otherwise
480
+ """
481
+ cache_path = self._get_cache_path(workflow, key)
482
+ if not cache_path.exists():
483
+ return None
484
+
485
+ try:
486
+ with open(cache_path, "r", encoding="utf-8") as f:
487
+ data = json.load(f)
488
+
489
+ # Check TTL
490
+ timestamp = data.get("timestamp", 0)
491
+ ttl = data.get("ttl", self.default_ttl)
492
+ if time.time() - timestamp > ttl:
493
+ # Expired - remove file
494
+ cache_path.unlink(missing_ok=True)
495
+ return None
496
+
497
+ return data
498
+ except (json.JSONDecodeError, OSError) as exc:
499
+ logger.warning("Failed to read cache entry %s: %s", cache_path, exc)
500
+ return None
501
+
502
+ def set(
503
+ self,
504
+ workflow: ConsultationWorkflow,
505
+ key: str,
506
+ result: ConsultationResult,
507
+ ttl: Optional[int] = None,
508
+ ) -> None:
509
+ """
510
+ Store a consultation result in the cache.
511
+
512
+ Args:
513
+ workflow: The consultation workflow
514
+ key: The cache key
515
+ result: The consultation result to cache
516
+ ttl: Time-to-live in seconds (default: default_ttl)
517
+ """
518
+ cache_path = self._get_cache_path(workflow, key)
519
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
520
+
521
+ data = {
522
+ "content": result.content,
523
+ "provider_id": result.provider_id,
524
+ "model_used": result.model_used,
525
+ "tokens": result.tokens,
526
+ "timestamp": time.time(),
527
+ "ttl": ttl if ttl is not None else self.default_ttl,
528
+ }
529
+
530
+ try:
531
+ with open(cache_path, "w", encoding="utf-8") as f:
532
+ json.dump(data, f, indent=2)
533
+ except OSError as exc:
534
+ logger.warning("Failed to write cache entry %s: %s", cache_path, exc)
535
+
536
+ def invalidate(
537
+ self,
538
+ workflow: Optional[ConsultationWorkflow] = None,
539
+ key: Optional[str] = None,
540
+ ) -> int:
541
+ """
542
+ Invalidate cache entries.
543
+
544
+ Args:
545
+ workflow: If provided, only invalidate entries for this workflow
546
+ key: If provided (with workflow), only invalidate this specific entry
547
+
548
+ Returns:
549
+ Number of entries invalidated
550
+ """
551
+ count = 0
552
+
553
+ if workflow is not None and key is not None:
554
+ # Invalidate specific entry
555
+ cache_path = self._get_cache_path(workflow, key)
556
+ if cache_path.exists():
557
+ cache_path.unlink()
558
+ count = 1
559
+ elif workflow is not None:
560
+ # Invalidate all entries for workflow
561
+ workflow_dir = self.base_dir / workflow.value
562
+ if workflow_dir.exists():
563
+ for cache_file in workflow_dir.glob("*.json"):
564
+ cache_file.unlink()
565
+ count += 1
566
+ else:
567
+ # Invalidate all entries
568
+ for workflow_enum in ConsultationWorkflow:
569
+ workflow_dir = self.base_dir / workflow_enum.value
570
+ if workflow_dir.exists():
571
+ for cache_file in workflow_dir.glob("*.json"):
572
+ cache_file.unlink()
573
+ count += 1
574
+
575
+ return count
576
+
577
+ def stats(self) -> Dict[str, Any]:
578
+ """
579
+ Return cache statistics.
580
+
581
+ Returns:
582
+ Dict with entry counts per workflow and total size
583
+ """
584
+ stats: Dict[str, Any] = {
585
+ "total_entries": 0,
586
+ "total_size_bytes": 0,
587
+ "by_workflow": {},
588
+ }
589
+
590
+ for workflow in ConsultationWorkflow:
591
+ workflow_dir = self.base_dir / workflow.value
592
+ if workflow_dir.exists():
593
+ entries = list(workflow_dir.glob("*.json"))
594
+ size = sum(f.stat().st_size for f in entries if f.exists())
595
+ stats["by_workflow"][workflow.value] = {
596
+ "entries": len(entries),
597
+ "size_bytes": size,
598
+ }
599
+ stats["total_entries"] += len(entries)
600
+ stats["total_size_bytes"] += size
601
+ else:
602
+ stats["by_workflow"][workflow.value] = {
603
+ "entries": 0,
604
+ "size_bytes": 0,
605
+ }
606
+
607
+ return stats
608
+
609
+
610
+ # =============================================================================
611
+ # Consultation Orchestrator
612
+ # =============================================================================
613
+
614
+
615
+ class ConsultationOrchestrator:
616
+ """
617
+ Central orchestrator for AI consultation workflows.
618
+
619
+ Coordinates between prompt templates, the provider registry, and
620
+ the result cache to execute consultation requests. Handles provider
621
+ selection, timeout management, and error handling.
622
+
623
+ Attributes:
624
+ cache: ResultCache instance for caching results
625
+ preferred_providers: Ordered list of preferred provider IDs
626
+ default_timeout: Default timeout in seconds
627
+
628
+ Example:
629
+ orchestrator = ConsultationOrchestrator()
630
+
631
+ if orchestrator.is_available():
632
+ request = ConsultationRequest(
633
+ workflow=ConsultationWorkflow.PLAN_REVIEW,
634
+ prompt_id="spec_review",
635
+ context={"spec_content": "..."},
636
+ )
637
+ result = orchestrator.consult(request)
638
+ """
639
+
640
+ def __init__(
641
+ self,
642
+ cache: Optional[ResultCache] = None,
643
+ default_timeout: Optional[float] = None,
644
+ config: Optional["ConsultationConfig"] = None,
645
+ ):
646
+ """
647
+ Initialize the consultation orchestrator.
648
+
649
+ Args:
650
+ cache: ResultCache instance (creates default if None)
651
+ default_timeout: Default timeout in seconds (uses config if None)
652
+ config: ConsultationConfig instance (uses global config if None)
653
+ """
654
+ # Lazy import to avoid circular dependency
655
+ from foundry_mcp.core.llm_config import (
656
+ ConsultationConfig,
657
+ get_consultation_config,
658
+ )
659
+
660
+ self._config: ConsultationConfig = config or get_consultation_config()
661
+ self.cache = cache or ResultCache(default_ttl=self._config.cache_ttl)
662
+ self.default_timeout = (
663
+ default_timeout
664
+ if default_timeout is not None
665
+ else self._config.default_timeout
666
+ )
667
+
668
+ # Parse priority list from config into ProviderSpec objects
669
+ # Priority: 1) config.priority specs
670
+ self._priority_specs: List[ProviderSpec] = []
671
+ if self._config.priority:
672
+ for spec_str in self._config.priority:
673
+ try:
674
+ self._priority_specs.append(ProviderSpec.parse(spec_str))
675
+ except ValueError as e:
676
+ logger.warning(
677
+ f"Invalid provider spec in priority list: {spec_str}: {e}"
678
+ )
679
+
680
+ def is_available(self, provider_id: Optional[str] = None) -> bool:
681
+ """
682
+ Check if consultation services are available.
683
+
684
+ Args:
685
+ provider_id: Check specific provider, or any available if None
686
+
687
+ Returns:
688
+ True if at least one provider is available
689
+ """
690
+ if provider_id:
691
+ return check_provider_available(provider_id)
692
+
693
+ # Check priority providers first
694
+ for spec in self._priority_specs:
695
+ if check_provider_available(spec.provider):
696
+ return True
697
+
698
+ # Fall back to any available provider
699
+ return len(available_providers()) > 0
700
+
701
+ def get_available_providers(self) -> List[str]:
702
+ """
703
+ Return list of available provider IDs.
704
+
705
+ Returns:
706
+ List of available provider IDs
707
+ """
708
+ return sorted(available_providers())
709
+
710
+ def _select_provider(self, request: ConsultationRequest) -> str:
711
+ """
712
+ Select the provider to use for a request.
713
+
714
+ Args:
715
+ request: The consultation request
716
+
717
+ Returns:
718
+ Provider ID to use
719
+
720
+ Raises:
721
+ ProviderUnavailableError: If no providers are available
722
+ """
723
+ # Explicit provider requested
724
+ if request.provider_id:
725
+ if check_provider_available(request.provider_id):
726
+ return request.provider_id
727
+ raise ProviderUnavailableError(
728
+ f"Requested provider '{request.provider_id}' is not available",
729
+ provider=request.provider_id,
730
+ )
731
+
732
+ # Try priority providers
733
+ for spec in self._priority_specs:
734
+ if check_provider_available(spec.provider):
735
+ return spec.provider
736
+
737
+ # Fall back to first available
738
+ providers = available_providers()
739
+ if providers:
740
+ return providers[0]
741
+
742
+ raise ProviderUnavailableError(
743
+ "No AI providers are currently available",
744
+ provider=None,
745
+ )
746
+
747
+ def _generate_cache_key(self, request: ConsultationRequest) -> str:
748
+ """
749
+ Generate a cache key for a consultation request.
750
+
751
+ Args:
752
+ request: The consultation request
753
+
754
+ Returns:
755
+ Cache key string
756
+ """
757
+ if request.cache_key:
758
+ return request.cache_key
759
+
760
+ # Build a deterministic key from request parameters
761
+ key_parts = [
762
+ request.prompt_id,
763
+ json.dumps(request.context, sort_keys=True),
764
+ request.model or "default",
765
+ ]
766
+ key_string = "|".join(key_parts)
767
+ return hashlib.sha256(key_string.encode()).hexdigest()[:32]
768
+
769
+ def _build_prompt(self, request: ConsultationRequest) -> str:
770
+ """
771
+ Build the full prompt from template and context.
772
+
773
+ This method delegates to workflow-specific prompt builders.
774
+
775
+ Args:
776
+ request: The consultation request
777
+
778
+ Returns:
779
+ The rendered prompt string
780
+ """
781
+ # Import prompt builders lazily to avoid circular imports
782
+ from foundry_mcp.core.prompts import get_prompt_builder
783
+
784
+ builder = get_prompt_builder(request.workflow)
785
+ return builder.build(request.prompt_id, request.context)
786
+
787
+ def _resolve_spec_to_provider(
788
+ self, spec: ProviderSpec
789
+ ) -> Optional[ResolvedProvider]:
790
+ """
791
+ Resolve a ProviderSpec to a ResolvedProvider if available.
792
+
793
+ For CLI providers, checks registry availability.
794
+ For API providers, logs a warning (not yet implemented).
795
+
796
+ Args:
797
+ spec: The provider specification to resolve
798
+
799
+ Returns:
800
+ ResolvedProvider if available, None otherwise
801
+ """
802
+ if spec.type == "api":
803
+ # API providers not yet integrated into registry
804
+ # TODO: Register API providers (openai, anthropic, local) in registry
805
+ logger.debug(
806
+ f"API provider spec '{spec}' skipped - API providers not yet "
807
+ "integrated into consultation registry"
808
+ )
809
+ return None
810
+
811
+ # CLI provider - check registry availability
812
+ if not check_provider_available(spec.provider):
813
+ return None
814
+
815
+ # Build model string - include backend routing if specified
816
+ model = None
817
+ if spec.backend and spec.model:
818
+ # Backend routing: "openai/gpt-5.1-codex"
819
+ model = f"{spec.backend}/{spec.model}"
820
+ elif spec.model:
821
+ model = spec.model
822
+
823
+ # Get overrides from config
824
+ overrides = self._config.get_override(str(spec))
825
+
826
+ return ResolvedProvider(
827
+ provider_id=spec.provider,
828
+ model=model,
829
+ overrides=overrides,
830
+ spec_str=str(spec),
831
+ )
832
+
833
+ def _get_providers_to_try(
834
+ self, request: ConsultationRequest
835
+ ) -> List[ResolvedProvider]:
836
+ """
837
+ Get ordered list of providers to try for a request.
838
+
839
+ Provider selection priority:
840
+ 1. Explicit provider_id in request (wraps to ResolvedProvider)
841
+ 2. Priority specs from config (parsed ProviderSpec list)
842
+ 3. Legacy preferred_providers (for backwards compatibility)
843
+ 4. Available providers from registry (fallback)
844
+
845
+ Args:
846
+ request: The consultation request
847
+
848
+ Returns:
849
+ Ordered list of ResolvedProvider instances to try
850
+ """
851
+ result: List[ResolvedProvider] = []
852
+ seen_providers: set = set()
853
+
854
+ # 1. Explicit provider requested - only try that one
855
+ if request.provider_id:
856
+ return [
857
+ ResolvedProvider(
858
+ provider_id=request.provider_id,
859
+ model=request.model,
860
+ spec_str=f"explicit:{request.provider_id}",
861
+ )
862
+ ]
863
+
864
+ # 2. Priority specs from config
865
+ for spec in self._priority_specs:
866
+ resolved = self._resolve_spec_to_provider(spec)
867
+ if resolved and resolved.provider_id not in seen_providers:
868
+ result.append(resolved)
869
+ seen_providers.add(resolved.provider_id)
870
+
871
+ # 3. Fallback to available providers from registry
872
+ for prov_id in available_providers():
873
+ if prov_id not in seen_providers:
874
+ result.append(
875
+ ResolvedProvider(
876
+ provider_id=prov_id,
877
+ spec_str=f"fallback:{prov_id}",
878
+ )
879
+ )
880
+ seen_providers.add(prov_id)
881
+
882
+ return result
883
+
884
+ def _is_retryable_error(self, error: Exception) -> bool:
885
+ """
886
+ Determine if an error warrants a retry.
887
+
888
+ Retryable errors include timeouts and rate limits.
889
+ Non-retryable errors include authentication failures and invalid prompts.
890
+
891
+ Args:
892
+ error: The exception that occurred
893
+
894
+ Returns:
895
+ True if the error is transient and retry may succeed
896
+ """
897
+ error_str = str(error).lower()
898
+
899
+ # Timeout errors are retryable
900
+ if "timeout" in error_str or "timed out" in error_str:
901
+ return True
902
+
903
+ # Rate limit errors are retryable
904
+ if "rate limit" in error_str or "rate_limit" in error_str or "429" in error_str:
905
+ return True
906
+
907
+ # Connection errors may be transient
908
+ if "connection" in error_str and (
909
+ "reset" in error_str or "refused" in error_str
910
+ ):
911
+ return True
912
+
913
+ # Server errors (5xx) are potentially retryable
914
+ if any(code in error_str for code in ["500", "502", "503", "504"]):
915
+ return True
916
+
917
+ return False
918
+
919
+ def _should_try_next_provider(self, error: Exception) -> bool:
920
+ """
921
+ Determine if we should try the next provider after an error.
922
+
923
+ Args:
924
+ error: The exception that occurred
925
+
926
+ Returns:
927
+ True if fallback to next provider is appropriate
928
+ """
929
+ # Don't fallback if disabled
930
+ if not self._config.fallback_enabled:
931
+ return False
932
+
933
+ error_str = str(error).lower()
934
+
935
+ # Don't fallback for prompt-level errors (these will fail with any provider)
936
+ if "prompt" in error_str and (
937
+ "too long" in error_str or "invalid" in error_str
938
+ ):
939
+ return False
940
+
941
+ # Don't fallback for authentication errors specific to all providers
942
+ if "api key" in error_str or "authentication" in error_str:
943
+ # This might be provider-specific, so allow fallback
944
+ return True
945
+
946
+ # Fallback for most other errors
947
+ return True
948
+
949
+ def _try_provider_with_retries(
950
+ self,
951
+ request: ConsultationRequest,
952
+ prompt: str,
953
+ resolved: ResolvedProvider,
954
+ warnings: List[str],
955
+ ) -> Optional[ProviderResult]:
956
+ """
957
+ Try a single provider with retry logic.
958
+
959
+ Args:
960
+ request: The consultation request
961
+ prompt: The rendered prompt
962
+ resolved: Resolved provider information (includes model and overrides)
963
+ warnings: List to append warnings to
964
+
965
+ Returns:
966
+ ProviderResult on success, None on failure
967
+ """
968
+ hooks = ProviderHooks()
969
+ last_error: Optional[Exception] = None
970
+ provider_id = resolved.provider_id
971
+
972
+ max_attempts = self._config.max_retries + 1 # +1 for initial attempt
973
+
974
+ # Determine model: request.model > resolved.model > None
975
+ effective_model = request.model or resolved.model
976
+
977
+ # Apply overrides from config
978
+ effective_timeout = (
979
+ resolved.overrides.get("timeout", request.timeout) or self.default_timeout
980
+ )
981
+ effective_temperature = resolved.overrides.get(
982
+ "temperature", request.temperature
983
+ )
984
+ effective_max_tokens = resolved.overrides.get("max_tokens", request.max_tokens)
985
+
986
+ for attempt in range(max_attempts):
987
+ try:
988
+ provider = resolve_provider(
989
+ provider_id, hooks=hooks, model=effective_model
990
+ )
991
+ provider_request = ProviderRequest(
992
+ prompt=prompt,
993
+ system_prompt=request.system_prompt_override,
994
+ model=effective_model,
995
+ timeout=effective_timeout,
996
+ temperature=effective_temperature,
997
+ max_tokens=effective_max_tokens,
998
+ metadata={
999
+ "workflow": request.workflow.value,
1000
+ "prompt_id": request.prompt_id,
1001
+ },
1002
+ )
1003
+ result = provider.generate(provider_request)
1004
+
1005
+ # Success
1006
+ if result.status == ProviderStatus.SUCCESS:
1007
+ if attempt > 0:
1008
+ warnings.append(
1009
+ f"Provider {provider_id} succeeded on attempt {attempt + 1}"
1010
+ )
1011
+ return result
1012
+
1013
+ # Non-success status from provider
1014
+ error_msg = (
1015
+ f"Provider {provider_id} returned status: {result.status.value}"
1016
+ )
1017
+ if result.stderr:
1018
+ error_msg += f" - {result.stderr}"
1019
+ last_error = Exception(error_msg)
1020
+
1021
+ # Check if this error type is retryable
1022
+ if not self._is_retryable_error(last_error):
1023
+ break
1024
+
1025
+ except ProviderUnavailableError as exc:
1026
+ last_error = exc
1027
+ # Provider unavailable - don't retry, move to fallback
1028
+ break
1029
+
1030
+ except Exception as exc: # noqa: BLE001
1031
+ last_error = exc
1032
+ if not self._is_retryable_error(exc):
1033
+ break
1034
+
1035
+ # Retry delay
1036
+ if attempt < max_attempts - 1:
1037
+ warnings.append(
1038
+ f"Provider {provider_id} attempt {attempt + 1} failed: {last_error}, "
1039
+ f"retrying in {self._config.retry_delay}s..."
1040
+ )
1041
+ time.sleep(self._config.retry_delay)
1042
+
1043
+ # All retries exhausted - collect error for introspection
1044
+ if last_error:
1045
+ warnings.append(
1046
+ f"Provider {provider_id} failed after {max_attempts} attempt(s): {last_error}"
1047
+ )
1048
+ # Collect provider error for future introspection
1049
+ _collect_provider_error(
1050
+ provider_id=provider_id,
1051
+ error=last_error,
1052
+ request_context={
1053
+ "workflow": request.workflow.value,
1054
+ "prompt_id": request.prompt_id,
1055
+ "model": effective_model,
1056
+ "attempts": max_attempts,
1057
+ },
1058
+ )
1059
+ return None
1060
+
1061
+ async def _try_provider_with_retries_async(
1062
+ self,
1063
+ request: ConsultationRequest,
1064
+ prompt: str,
1065
+ resolved: ResolvedProvider,
1066
+ warnings: List[str],
1067
+ ) -> Optional[ProviderResult]:
1068
+ """
1069
+ Async version of provider execution with retry logic.
1070
+
1071
+ Uses asyncio.sleep() for non-blocking retry delays and runs the
1072
+ synchronous provider.generate() in a thread pool executor to avoid
1073
+ blocking the event loop.
1074
+
1075
+ Args:
1076
+ request: The consultation request
1077
+ prompt: The rendered prompt
1078
+ resolved: Resolved provider information (includes model and overrides)
1079
+ warnings: List to append warnings to
1080
+
1081
+ Returns:
1082
+ ProviderResult on success, None on failure
1083
+ """
1084
+ hooks = ProviderHooks()
1085
+ last_error: Optional[Exception] = None
1086
+ provider_id = resolved.provider_id
1087
+
1088
+ max_attempts = self._config.max_retries + 1 # +1 for initial attempt
1089
+
1090
+ # Determine model: request.model > resolved.model > None
1091
+ effective_model = request.model or resolved.model
1092
+
1093
+ # Apply overrides from config
1094
+ effective_timeout = (
1095
+ resolved.overrides.get("timeout", request.timeout) or self.default_timeout
1096
+ )
1097
+ effective_temperature = resolved.overrides.get(
1098
+ "temperature", request.temperature
1099
+ )
1100
+ effective_max_tokens = resolved.overrides.get("max_tokens", request.max_tokens)
1101
+
1102
+ for attempt in range(max_attempts):
1103
+ try:
1104
+ provider = resolve_provider(
1105
+ provider_id, hooks=hooks, model=effective_model
1106
+ )
1107
+ provider_request = ProviderRequest(
1108
+ prompt=prompt,
1109
+ system_prompt=request.system_prompt_override,
1110
+ model=effective_model,
1111
+ timeout=effective_timeout,
1112
+ temperature=effective_temperature,
1113
+ max_tokens=effective_max_tokens,
1114
+ metadata={
1115
+ "workflow": request.workflow.value,
1116
+ "prompt_id": request.prompt_id,
1117
+ },
1118
+ )
1119
+
1120
+ # Run sync provider.generate() in executor to avoid blocking
1121
+ loop = asyncio.get_running_loop()
1122
+ result = await loop.run_in_executor(
1123
+ None, provider.generate, provider_request
1124
+ )
1125
+
1126
+ # Success
1127
+ if result.status == ProviderStatus.SUCCESS:
1128
+ if attempt > 0:
1129
+ warnings.append(
1130
+ f"Provider {provider_id} succeeded on attempt {attempt + 1}"
1131
+ )
1132
+ return result
1133
+
1134
+ # Non-success status from provider
1135
+ error_msg = (
1136
+ f"Provider {provider_id} returned status: {result.status.value}"
1137
+ )
1138
+ if result.stderr:
1139
+ error_msg += f" - {result.stderr}"
1140
+ last_error = Exception(error_msg)
1141
+
1142
+ # Check if this error type is retryable
1143
+ if not self._is_retryable_error(last_error):
1144
+ break
1145
+
1146
+ except ProviderUnavailableError as exc:
1147
+ last_error = exc
1148
+ # Provider unavailable - don't retry, move to fallback
1149
+ break
1150
+
1151
+ except Exception as exc: # noqa: BLE001
1152
+ last_error = exc
1153
+ if not self._is_retryable_error(exc):
1154
+ break
1155
+
1156
+ # Async retry delay (non-blocking)
1157
+ if attempt < max_attempts - 1:
1158
+ warnings.append(
1159
+ f"Provider {provider_id} attempt {attempt + 1} failed: {last_error}, "
1160
+ f"retrying in {self._config.retry_delay}s..."
1161
+ )
1162
+ await asyncio.sleep(self._config.retry_delay)
1163
+
1164
+ # All retries exhausted - collect error for introspection
1165
+ if last_error:
1166
+ warnings.append(
1167
+ f"Provider {provider_id} failed after {max_attempts} attempt(s): {last_error}"
1168
+ )
1169
+ # Collect provider error for future introspection
1170
+ _collect_provider_error(
1171
+ provider_id=provider_id,
1172
+ error=last_error,
1173
+ request_context={
1174
+ "workflow": request.workflow.value,
1175
+ "prompt_id": request.prompt_id,
1176
+ "model": effective_model,
1177
+ "attempts": max_attempts,
1178
+ },
1179
+ )
1180
+ return None
1181
+
1182
+ async def _execute_single_provider_async(
1183
+ self,
1184
+ request: ConsultationRequest,
1185
+ prompt: str,
1186
+ resolved: ResolvedProvider,
1187
+ ) -> ProviderResponse:
1188
+ """
1189
+ Execute a single provider asynchronously and return a ProviderResponse.
1190
+
1191
+ Wraps _try_provider_with_retries_async and converts the result to
1192
+ a ProviderResponse for use in multi-model consensus workflows.
1193
+
1194
+ Args:
1195
+ request: The consultation request
1196
+ prompt: The rendered prompt
1197
+ resolved: Resolved provider information
1198
+
1199
+ Returns:
1200
+ ProviderResponse with success/failure status and content
1201
+ """
1202
+ warnings: List[str] = []
1203
+ start_time = time.time()
1204
+
1205
+ result = await self._try_provider_with_retries_async(
1206
+ request, prompt, resolved, warnings
1207
+ )
1208
+
1209
+ duration_ms = int((time.time() - start_time) * 1000)
1210
+
1211
+ if result is None:
1212
+ # Provider failed after all retries
1213
+ error_msg = (
1214
+ warnings[-1] if warnings else f"Provider {resolved.provider_id} failed"
1215
+ )
1216
+ return ProviderResponse(
1217
+ provider_id=resolved.provider_id,
1218
+ model_used=resolved.model or "unknown",
1219
+ content="",
1220
+ success=False,
1221
+ error=error_msg,
1222
+ duration_ms=duration_ms,
1223
+ cache_hit=False,
1224
+ )
1225
+
1226
+ # Success - convert ProviderResult to ProviderResponse
1227
+ total_tokens = None
1228
+ if result.tokens:
1229
+ total_tokens = result.tokens.total_tokens
1230
+
1231
+ return ProviderResponse(
1232
+ provider_id=result.provider_id,
1233
+ model_used=result.model_used,
1234
+ content=result.content,
1235
+ success=True,
1236
+ error=None,
1237
+ tokens=total_tokens,
1238
+ duration_ms=duration_ms,
1239
+ cache_hit=False,
1240
+ )
1241
+
1242
+ async def _execute_parallel_providers_async(
1243
+ self,
1244
+ request: ConsultationRequest,
1245
+ prompt: str,
1246
+ providers: List[ResolvedProvider],
1247
+ min_models: int = 1,
1248
+ ) -> ConsensusResult:
1249
+ """
1250
+ Execute multiple providers in parallel and return a ConsensusResult.
1251
+
1252
+ Uses asyncio.gather to run all provider executions concurrently,
1253
+ then aggregates the results into a ConsensusResult with agreement
1254
+ metadata.
1255
+
1256
+ Args:
1257
+ request: The consultation request
1258
+ prompt: The rendered prompt
1259
+ providers: List of resolved providers to execute
1260
+ min_models: Minimum successful models required (for warnings)
1261
+
1262
+ Returns:
1263
+ ConsensusResult with all provider responses and agreement metadata
1264
+ """
1265
+ start_time = time.time()
1266
+ warnings: List[str] = []
1267
+
1268
+ if not providers:
1269
+ return ConsensusResult(
1270
+ workflow=request.workflow,
1271
+ responses=[],
1272
+ duration_ms=0.0,
1273
+ warnings=["No providers available for parallel execution"],
1274
+ )
1275
+
1276
+ # Create tasks for all providers
1277
+ tasks = [
1278
+ self._execute_single_provider_async(request, prompt, resolved)
1279
+ for resolved in providers
1280
+ ]
1281
+
1282
+ # Execute all providers in parallel
1283
+ responses: List[ProviderResponse] = await asyncio.gather(*tasks)
1284
+
1285
+ duration_ms = (time.time() - start_time) * 1000
1286
+
1287
+ # Check if we met the minimum model requirement
1288
+ successful_count = sum(1 for r in responses if r.success)
1289
+ if successful_count < min_models:
1290
+ warnings.append(
1291
+ f"Only {successful_count} of {min_models} required models succeeded"
1292
+ )
1293
+
1294
+ # Log failed providers
1295
+ for response in responses:
1296
+ if not response.success:
1297
+ warnings.append(
1298
+ f"Provider {response.provider_id} failed: {response.error}"
1299
+ )
1300
+
1301
+ return ConsensusResult(
1302
+ workflow=request.workflow,
1303
+ responses=responses,
1304
+ duration_ms=duration_ms,
1305
+ warnings=warnings,
1306
+ )
1307
+
1308
+ async def _execute_parallel_providers_with_fallback_async(
1309
+ self,
1310
+ request: ConsultationRequest,
1311
+ prompt: str,
1312
+ all_providers: List[ResolvedProvider],
1313
+ min_models: int = 1,
1314
+ ) -> ConsensusResult:
1315
+ """
1316
+ Execute providers in parallel with sequential fallback on failures.
1317
+
1318
+ Uses a two-phase approach:
1319
+ 1. Execute first min_models providers in parallel
1320
+ 2. If any fail and fallback_enabled, try remaining providers sequentially
1321
+ until min_models succeed or providers exhausted
1322
+
1323
+ Args:
1324
+ request: The consultation request
1325
+ prompt: The rendered prompt
1326
+ all_providers: Complete priority list of providers to try
1327
+ min_models: Minimum successful models required
1328
+
1329
+ Returns:
1330
+ ConsensusResult with all attempted provider responses
1331
+ """
1332
+ start_time = time.time()
1333
+ warnings: List[str] = []
1334
+ all_responses: List[ProviderResponse] = []
1335
+
1336
+ if not all_providers:
1337
+ return ConsensusResult(
1338
+ workflow=request.workflow,
1339
+ responses=[],
1340
+ duration_ms=0.0,
1341
+ warnings=["No providers available for parallel execution"],
1342
+ )
1343
+
1344
+ # Phase 1: Initial parallel execution of first min_models providers
1345
+ initial_providers = all_providers[:min_models]
1346
+ logger.debug(
1347
+ f"Phase 1: Executing {len(initial_providers)} providers in parallel"
1348
+ )
1349
+
1350
+ tasks = [
1351
+ self._execute_single_provider_async(request, prompt, resolved)
1352
+ for resolved in initial_providers
1353
+ ]
1354
+ initial_responses: List[ProviderResponse] = await asyncio.gather(*tasks)
1355
+ all_responses.extend(initial_responses)
1356
+
1357
+ # Count successes and log failures
1358
+ # A response is only truly successful if it has non-empty content
1359
+ successful_count = sum(
1360
+ 1 for r in initial_responses if r.success and r.content.strip()
1361
+ )
1362
+ for response in initial_responses:
1363
+ if not response.success:
1364
+ warnings.append(
1365
+ f"Provider {response.provider_id} failed: {response.error}"
1366
+ )
1367
+ elif not response.content.strip():
1368
+ warnings.append(
1369
+ f"Provider {response.provider_id} returned empty content"
1370
+ )
1371
+
1372
+ # Phase 2: Sequential fallback if needed and enabled
1373
+ if successful_count < min_models and self._config.fallback_enabled:
1374
+ needed = min_models - successful_count
1375
+ remaining_providers = all_providers[min_models:]
1376
+
1377
+ if remaining_providers:
1378
+ warnings.append(
1379
+ f"Initial parallel execution yielded {successful_count}/{min_models} "
1380
+ f"successes, attempting fallback for {needed} more"
1381
+ )
1382
+
1383
+ for fallback_provider in remaining_providers:
1384
+ # Skip if already tried (shouldn't happen, but safety check)
1385
+ if any(
1386
+ r.provider_id == fallback_provider.provider_id
1387
+ for r in all_responses
1388
+ ):
1389
+ continue
1390
+
1391
+ # Check if provider is available
1392
+ if not check_provider_available(fallback_provider.provider_id):
1393
+ warnings.append(
1394
+ f"Fallback provider {fallback_provider.provider_id} "
1395
+ "is not available, skipping"
1396
+ )
1397
+ continue
1398
+
1399
+ logger.debug(
1400
+ f"Fallback attempt: trying provider {fallback_provider.provider_id}"
1401
+ )
1402
+
1403
+ response = await self._execute_single_provider_async(
1404
+ request, prompt, fallback_provider
1405
+ )
1406
+ all_responses.append(response)
1407
+
1408
+ if response.success and response.content.strip():
1409
+ successful_count += 1
1410
+ warnings.append(
1411
+ f"Fallback provider {fallback_provider.provider_id} succeeded"
1412
+ )
1413
+ if successful_count >= min_models:
1414
+ logger.debug(
1415
+ f"Reached {min_models} successful providers via fallback"
1416
+ )
1417
+ break
1418
+ elif response.success and not response.content.strip():
1419
+ warnings.append(
1420
+ f"Fallback provider {fallback_provider.provider_id} "
1421
+ "returned empty content"
1422
+ )
1423
+ else:
1424
+ warnings.append(
1425
+ f"Fallback provider {fallback_provider.provider_id} "
1426
+ f"failed: {response.error}"
1427
+ )
1428
+
1429
+ duration_ms = (time.time() - start_time) * 1000
1430
+
1431
+ # Final warning if still insufficient
1432
+ if successful_count < min_models:
1433
+ warnings.append(
1434
+ f"Only {successful_count} of {min_models} required models succeeded "
1435
+ f"after trying {len(all_responses)} provider(s)"
1436
+ )
1437
+
1438
+ return ConsensusResult(
1439
+ workflow=request.workflow,
1440
+ responses=all_responses,
1441
+ duration_ms=duration_ms,
1442
+ warnings=warnings,
1443
+ )
1444
+
1445
+ def _execute_with_fallback(
1446
+ self,
1447
+ request: ConsultationRequest,
1448
+ prompt: str,
1449
+ providers: List[ResolvedProvider],
1450
+ warnings: List[str],
1451
+ ) -> tuple[Optional[ProviderResult], str, Optional[str]]:
1452
+ """
1453
+ Execute request with fallback across providers.
1454
+
1455
+ Args:
1456
+ request: The consultation request
1457
+ prompt: The rendered prompt
1458
+ providers: Ordered list of ResolvedProvider instances to try
1459
+ warnings: List to append warnings to
1460
+
1461
+ Returns:
1462
+ Tuple of (result, provider_id, error_message)
1463
+ """
1464
+ if not providers:
1465
+ return None, "none", "No AI providers are currently available"
1466
+
1467
+ last_error: Optional[str] = None
1468
+ last_provider_id = providers[0].provider_id
1469
+
1470
+ for i, resolved in enumerate(providers):
1471
+ provider_id = resolved.provider_id
1472
+ last_provider_id = provider_id
1473
+
1474
+ # Check if provider is available (may have changed since _get_providers_to_try)
1475
+ if not check_provider_available(provider_id):
1476
+ warnings.append(f"Provider {provider_id} is not available, skipping")
1477
+ continue
1478
+
1479
+ logger.debug(
1480
+ f"Trying provider {provider_id} (spec: {resolved.spec_str}, "
1481
+ f"model: {resolved.model})"
1482
+ )
1483
+ result = self._try_provider_with_retries(
1484
+ request, prompt, resolved, warnings
1485
+ )
1486
+
1487
+ if result is not None:
1488
+ return result, provider_id, None
1489
+
1490
+ # Determine if we should try next provider
1491
+ if i < len(providers) - 1:
1492
+ # Check the last warning for the error
1493
+ last_warning = warnings[-1] if warnings else ""
1494
+ # Create a pseudo-error from the warning to check fallback eligibility
1495
+ pseudo_error = Exception(last_warning)
1496
+ if self._should_try_next_provider(pseudo_error):
1497
+ warnings.append("Falling back to next provider...")
1498
+ else:
1499
+ last_error = (
1500
+ f"Provider {provider_id} failed and fallback is not appropriate"
1501
+ )
1502
+ break
1503
+ else:
1504
+ last_error = f"All {len(providers)} provider(s) failed"
1505
+
1506
+ return None, last_provider_id, last_error or "All providers failed"
1507
+
1508
+ def consult(
1509
+ self,
1510
+ request: ConsultationRequest,
1511
+ *,
1512
+ use_cache: bool = True,
1513
+ cache_ttl: Optional[int] = None,
1514
+ workflow_name: Optional[str] = None,
1515
+ ) -> ConsultationOutcome:
1516
+ """
1517
+ Execute a consultation request with retry, fallback, and multi-model support.
1518
+
1519
+ This is the synchronous wrapper for consult_async(). It routes to either
1520
+ single-provider or multi-model parallel execution based on the workflow
1521
+ configuration's min_models setting.
1522
+
1523
+ The consultation process:
1524
+ 1. Check cache for existing result (single-model mode only)
1525
+ 2. Build prompt from template and context
1526
+ 3. Get ordered list of providers to try
1527
+ 4. Execute based on min_models:
1528
+ - min_models=1: Sequential with fallback, returns ConsultationResult
1529
+ - min_models>1: Parallel execution, returns ConsensusResult
1530
+ 5. Cache successful results (single-model mode only)
1531
+
1532
+ Retry behavior (configurable via ConsultationConfig):
1533
+ - max_retries: Number of retry attempts per provider (default: 2)
1534
+ - retry_delay: Delay between retries in seconds (default: 5.0)
1535
+ - Retries occur for transient errors (timeouts, rate limits, 5xx errors)
1536
+
1537
+ Fallback behavior (configurable via ConsultationConfig):
1538
+ - fallback_enabled: Whether to try next provider on failure (default: True)
1539
+ - Fallback skipped for prompt-level errors that would fail with any provider
1540
+
1541
+ Args:
1542
+ request: The consultation request
1543
+ use_cache: Whether to use cached results (default: True)
1544
+ cache_ttl: Cache TTL override in seconds
1545
+ workflow_name: Override workflow name for config lookup
1546
+ (defaults to request.workflow.value)
1547
+
1548
+ Returns:
1549
+ ConsultationOutcome: Either ConsultationResult (min_models=1) or
1550
+ ConsensusResult (min_models>1)
1551
+ """
1552
+ # Delegate to async implementation
1553
+ # Check if we're already in an async context
1554
+ try:
1555
+ asyncio.get_running_loop()
1556
+ # Already in async context - use thread pool to avoid nested asyncio.run()
1557
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
1558
+ future = executor.submit(
1559
+ asyncio.run,
1560
+ self.consult_async(
1561
+ request,
1562
+ use_cache=use_cache,
1563
+ cache_ttl=cache_ttl,
1564
+ workflow_name=workflow_name,
1565
+ ),
1566
+ )
1567
+ return future.result()
1568
+ except RuntimeError:
1569
+ # No running loop - safe to use asyncio.run()
1570
+ return asyncio.run(
1571
+ self.consult_async(
1572
+ request,
1573
+ use_cache=use_cache,
1574
+ cache_ttl=cache_ttl,
1575
+ workflow_name=workflow_name,
1576
+ )
1577
+ )
1578
+
1579
+ def consult_multiple(
1580
+ self,
1581
+ requests: Sequence[ConsultationRequest],
1582
+ *,
1583
+ use_cache: bool = True,
1584
+ ) -> List[ConsultationResult]:
1585
+ """
1586
+ Execute multiple consultation requests sequentially.
1587
+
1588
+ Args:
1589
+ requests: Sequence of consultation requests
1590
+ use_cache: Whether to use cached results
1591
+
1592
+ Returns:
1593
+ List of ConsultationResult objects in the same order as requests
1594
+ """
1595
+ return [self.consult(req, use_cache=use_cache) for req in requests]
1596
+
1597
+ async def consult_async(
1598
+ self,
1599
+ request: ConsultationRequest,
1600
+ *,
1601
+ use_cache: bool = True,
1602
+ cache_ttl: Optional[int] = None,
1603
+ workflow_name: Optional[str] = None,
1604
+ ) -> ConsultationOutcome:
1605
+ """
1606
+ Execute a consultation request asynchronously with multi-model support.
1607
+
1608
+ Routes to single-provider or parallel execution based on the workflow
1609
+ configuration's min_models setting. Returns ConsultationResult for
1610
+ single-provider mode or ConsensusResult for multi-model mode.
1611
+
1612
+ Args:
1613
+ request: The consultation request
1614
+ use_cache: Whether to use cached results (default: True)
1615
+ cache_ttl: Cache TTL override in seconds
1616
+ workflow_name: Override workflow name for config lookup
1617
+ (defaults to request.workflow.value)
1618
+
1619
+ Returns:
1620
+ ConsultationOutcome: Either ConsultationResult (min_models=1) or
1621
+ ConsensusResult (min_models>1)
1622
+ """
1623
+ start_time = time.time()
1624
+
1625
+ # Get workflow config (determines single vs multi-model mode)
1626
+ effective_workflow = workflow_name or request.workflow.value
1627
+ workflow_config = self._config.get_workflow_config(effective_workflow)
1628
+ min_models = workflow_config.min_models
1629
+
1630
+ # Apply workflow-specific timeout override if configured
1631
+ if workflow_config.timeout_override is not None:
1632
+ request = replace(request, timeout=workflow_config.timeout_override)
1633
+
1634
+ # Generate cache key
1635
+ cache_key = self._generate_cache_key(request)
1636
+
1637
+ # Check cache (only for single-model mode for now)
1638
+ if use_cache and min_models == 1:
1639
+ cached = self.cache.get(request.workflow, cache_key)
1640
+ if cached:
1641
+ duration_ms = (time.time() - start_time) * 1000
1642
+ return ConsultationResult(
1643
+ workflow=request.workflow,
1644
+ content=cached.get("content", ""),
1645
+ provider_id=cached.get("provider_id", "cached"),
1646
+ model_used=cached.get("model_used", "cached"),
1647
+ tokens=cached.get("tokens", {}),
1648
+ duration_ms=duration_ms,
1649
+ cache_hit=True,
1650
+ )
1651
+
1652
+ # Build prompt
1653
+ try:
1654
+ prompt = self._build_prompt(request)
1655
+ except Exception as exc: # noqa: BLE001 - wrap prompt build errors
1656
+ duration_ms = (time.time() - start_time) * 1000
1657
+ if min_models > 1:
1658
+ return ConsensusResult(
1659
+ workflow=request.workflow,
1660
+ responses=[],
1661
+ duration_ms=duration_ms,
1662
+ warnings=[f"Failed to build prompt: {exc}"],
1663
+ )
1664
+ return ConsultationResult(
1665
+ workflow=request.workflow,
1666
+ content="",
1667
+ provider_id="none",
1668
+ model_used="none",
1669
+ duration_ms=duration_ms,
1670
+ error=f"Failed to build prompt: {exc}",
1671
+ )
1672
+
1673
+ # Get providers to try
1674
+ providers = self._get_providers_to_try(request)
1675
+
1676
+ if min_models > 1:
1677
+ # Multi-model mode: execute providers in parallel with fallback support
1678
+ # Pass full provider list - fallback will try additional providers if needed
1679
+ result = await self._execute_parallel_providers_with_fallback_async(
1680
+ request, prompt, providers, min_models
1681
+ )
1682
+ return result
1683
+ else:
1684
+ # Single-model mode: execute with fallback (using first success)
1685
+ if not providers:
1686
+ duration_ms = (time.time() - start_time) * 1000
1687
+ return ConsultationResult(
1688
+ workflow=request.workflow,
1689
+ content="",
1690
+ provider_id="none",
1691
+ model_used="none",
1692
+ duration_ms=duration_ms,
1693
+ error="No AI providers are currently available",
1694
+ )
1695
+
1696
+ # Try providers in order until one succeeds
1697
+ warnings: List[str] = []
1698
+ for resolved in providers:
1699
+ if not check_provider_available(resolved.provider_id):
1700
+ warnings.append(
1701
+ f"Provider {resolved.provider_id} is not available, skipping"
1702
+ )
1703
+ continue
1704
+
1705
+ response = await self._execute_single_provider_async(
1706
+ request, prompt, resolved
1707
+ )
1708
+
1709
+ if response.success:
1710
+ duration_ms = (time.time() - start_time) * 1000
1711
+ result = ConsultationResult(
1712
+ workflow=request.workflow,
1713
+ content=response.content,
1714
+ provider_id=response.provider_id,
1715
+ model_used=response.model_used,
1716
+ tokens={"total_tokens": response.tokens}
1717
+ if response.tokens
1718
+ else {},
1719
+ duration_ms=duration_ms,
1720
+ cache_hit=False,
1721
+ warnings=warnings,
1722
+ error=None,
1723
+ )
1724
+
1725
+ # Cache successful results
1726
+ if use_cache:
1727
+ self.cache.set(
1728
+ request.workflow, cache_key, result, ttl=cache_ttl
1729
+ )
1730
+
1731
+ return result
1732
+
1733
+ # Provider failed, try next
1734
+ warnings.append(
1735
+ f"Provider {resolved.provider_id} failed: {response.error}"
1736
+ )
1737
+
1738
+ if not self._config.fallback_enabled:
1739
+ break
1740
+
1741
+ # All providers failed
1742
+ duration_ms = (time.time() - start_time) * 1000
1743
+ return ConsultationResult(
1744
+ workflow=request.workflow,
1745
+ content="",
1746
+ provider_id=providers[0].provider_id if providers else "none",
1747
+ model_used="none",
1748
+ duration_ms=duration_ms,
1749
+ warnings=warnings,
1750
+ error="All providers failed",
1751
+ )
1752
+
1753
+
1754
+ # =============================================================================
1755
+ # Module Exports
1756
+ # =============================================================================
1757
+
1758
+
1759
+ __all__ = [
1760
+ # Workflow types
1761
+ "ConsultationWorkflow",
1762
+ # Request/Response
1763
+ "ConsultationRequest",
1764
+ "ConsultationResult",
1765
+ "ProviderResponse",
1766
+ "AgreementMetadata",
1767
+ "ConsensusResult",
1768
+ "ConsultationOutcome",
1769
+ # Cache
1770
+ "ResultCache",
1771
+ # Orchestrator
1772
+ "ConsultationOrchestrator",
1773
+ ]