foundry-mcp 0.8.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of foundry-mcp might be problematic. Click here for more details.
- foundry_mcp/__init__.py +13 -0
- foundry_mcp/cli/__init__.py +67 -0
- foundry_mcp/cli/__main__.py +9 -0
- foundry_mcp/cli/agent.py +96 -0
- foundry_mcp/cli/commands/__init__.py +37 -0
- foundry_mcp/cli/commands/cache.py +137 -0
- foundry_mcp/cli/commands/dashboard.py +148 -0
- foundry_mcp/cli/commands/dev.py +446 -0
- foundry_mcp/cli/commands/journal.py +377 -0
- foundry_mcp/cli/commands/lifecycle.py +274 -0
- foundry_mcp/cli/commands/modify.py +824 -0
- foundry_mcp/cli/commands/plan.py +640 -0
- foundry_mcp/cli/commands/pr.py +393 -0
- foundry_mcp/cli/commands/review.py +667 -0
- foundry_mcp/cli/commands/session.py +472 -0
- foundry_mcp/cli/commands/specs.py +686 -0
- foundry_mcp/cli/commands/tasks.py +807 -0
- foundry_mcp/cli/commands/testing.py +676 -0
- foundry_mcp/cli/commands/validate.py +982 -0
- foundry_mcp/cli/config.py +98 -0
- foundry_mcp/cli/context.py +298 -0
- foundry_mcp/cli/logging.py +212 -0
- foundry_mcp/cli/main.py +44 -0
- foundry_mcp/cli/output.py +122 -0
- foundry_mcp/cli/registry.py +110 -0
- foundry_mcp/cli/resilience.py +178 -0
- foundry_mcp/cli/transcript.py +217 -0
- foundry_mcp/config.py +1454 -0
- foundry_mcp/core/__init__.py +144 -0
- foundry_mcp/core/ai_consultation.py +1773 -0
- foundry_mcp/core/batch_operations.py +1202 -0
- foundry_mcp/core/cache.py +195 -0
- foundry_mcp/core/capabilities.py +446 -0
- foundry_mcp/core/concurrency.py +898 -0
- foundry_mcp/core/context.py +540 -0
- foundry_mcp/core/discovery.py +1603 -0
- foundry_mcp/core/error_collection.py +728 -0
- foundry_mcp/core/error_store.py +592 -0
- foundry_mcp/core/health.py +749 -0
- foundry_mcp/core/intake.py +933 -0
- foundry_mcp/core/journal.py +700 -0
- foundry_mcp/core/lifecycle.py +412 -0
- foundry_mcp/core/llm_config.py +1376 -0
- foundry_mcp/core/llm_patterns.py +510 -0
- foundry_mcp/core/llm_provider.py +1569 -0
- foundry_mcp/core/logging_config.py +374 -0
- foundry_mcp/core/metrics_persistence.py +584 -0
- foundry_mcp/core/metrics_registry.py +327 -0
- foundry_mcp/core/metrics_store.py +641 -0
- foundry_mcp/core/modifications.py +224 -0
- foundry_mcp/core/naming.py +146 -0
- foundry_mcp/core/observability.py +1216 -0
- foundry_mcp/core/otel.py +452 -0
- foundry_mcp/core/otel_stubs.py +264 -0
- foundry_mcp/core/pagination.py +255 -0
- foundry_mcp/core/progress.py +387 -0
- foundry_mcp/core/prometheus.py +564 -0
- foundry_mcp/core/prompts/__init__.py +464 -0
- foundry_mcp/core/prompts/fidelity_review.py +691 -0
- foundry_mcp/core/prompts/markdown_plan_review.py +515 -0
- foundry_mcp/core/prompts/plan_review.py +627 -0
- foundry_mcp/core/providers/__init__.py +237 -0
- foundry_mcp/core/providers/base.py +515 -0
- foundry_mcp/core/providers/claude.py +472 -0
- foundry_mcp/core/providers/codex.py +637 -0
- foundry_mcp/core/providers/cursor_agent.py +630 -0
- foundry_mcp/core/providers/detectors.py +515 -0
- foundry_mcp/core/providers/gemini.py +426 -0
- foundry_mcp/core/providers/opencode.py +718 -0
- foundry_mcp/core/providers/opencode_wrapper.js +308 -0
- foundry_mcp/core/providers/package-lock.json +24 -0
- foundry_mcp/core/providers/package.json +25 -0
- foundry_mcp/core/providers/registry.py +607 -0
- foundry_mcp/core/providers/test_provider.py +171 -0
- foundry_mcp/core/providers/validation.py +857 -0
- foundry_mcp/core/rate_limit.py +427 -0
- foundry_mcp/core/research/__init__.py +68 -0
- foundry_mcp/core/research/memory.py +528 -0
- foundry_mcp/core/research/models.py +1234 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +25 -0
- foundry_mcp/core/research/workflows/base.py +298 -0
- foundry_mcp/core/research/workflows/chat.py +271 -0
- foundry_mcp/core/research/workflows/consensus.py +539 -0
- foundry_mcp/core/research/workflows/deep_research.py +4142 -0
- foundry_mcp/core/research/workflows/ideate.py +682 -0
- foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
- foundry_mcp/core/resilience.py +600 -0
- foundry_mcp/core/responses.py +1624 -0
- foundry_mcp/core/review.py +366 -0
- foundry_mcp/core/security.py +438 -0
- foundry_mcp/core/spec.py +4119 -0
- foundry_mcp/core/task.py +2463 -0
- foundry_mcp/core/testing.py +839 -0
- foundry_mcp/core/validation.py +2357 -0
- foundry_mcp/dashboard/__init__.py +32 -0
- foundry_mcp/dashboard/app.py +119 -0
- foundry_mcp/dashboard/components/__init__.py +17 -0
- foundry_mcp/dashboard/components/cards.py +88 -0
- foundry_mcp/dashboard/components/charts.py +177 -0
- foundry_mcp/dashboard/components/filters.py +136 -0
- foundry_mcp/dashboard/components/tables.py +195 -0
- foundry_mcp/dashboard/data/__init__.py +11 -0
- foundry_mcp/dashboard/data/stores.py +433 -0
- foundry_mcp/dashboard/launcher.py +300 -0
- foundry_mcp/dashboard/views/__init__.py +12 -0
- foundry_mcp/dashboard/views/errors.py +217 -0
- foundry_mcp/dashboard/views/metrics.py +164 -0
- foundry_mcp/dashboard/views/overview.py +96 -0
- foundry_mcp/dashboard/views/providers.py +83 -0
- foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
- foundry_mcp/dashboard/views/tool_usage.py +139 -0
- foundry_mcp/prompts/__init__.py +9 -0
- foundry_mcp/prompts/workflows.py +525 -0
- foundry_mcp/resources/__init__.py +9 -0
- foundry_mcp/resources/specs.py +591 -0
- foundry_mcp/schemas/__init__.py +38 -0
- foundry_mcp/schemas/intake-schema.json +89 -0
- foundry_mcp/schemas/sdd-spec-schema.json +414 -0
- foundry_mcp/server.py +150 -0
- foundry_mcp/tools/__init__.py +10 -0
- foundry_mcp/tools/unified/__init__.py +92 -0
- foundry_mcp/tools/unified/authoring.py +3620 -0
- foundry_mcp/tools/unified/context_helpers.py +98 -0
- foundry_mcp/tools/unified/documentation_helpers.py +268 -0
- foundry_mcp/tools/unified/environment.py +1341 -0
- foundry_mcp/tools/unified/error.py +479 -0
- foundry_mcp/tools/unified/health.py +225 -0
- foundry_mcp/tools/unified/journal.py +841 -0
- foundry_mcp/tools/unified/lifecycle.py +640 -0
- foundry_mcp/tools/unified/metrics.py +777 -0
- foundry_mcp/tools/unified/plan.py +876 -0
- foundry_mcp/tools/unified/pr.py +294 -0
- foundry_mcp/tools/unified/provider.py +589 -0
- foundry_mcp/tools/unified/research.py +1283 -0
- foundry_mcp/tools/unified/review.py +1042 -0
- foundry_mcp/tools/unified/review_helpers.py +314 -0
- foundry_mcp/tools/unified/router.py +102 -0
- foundry_mcp/tools/unified/server.py +565 -0
- foundry_mcp/tools/unified/spec.py +1283 -0
- foundry_mcp/tools/unified/task.py +3846 -0
- foundry_mcp/tools/unified/test.py +431 -0
- foundry_mcp/tools/unified/verification.py +520 -0
- foundry_mcp-0.8.22.dist-info/METADATA +344 -0
- foundry_mcp-0.8.22.dist-info/RECORD +153 -0
- foundry_mcp-0.8.22.dist-info/WHEEL +4 -0
- foundry_mcp-0.8.22.dist-info/entry_points.txt +3 -0
- foundry_mcp-0.8.22.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1773 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI Consultation Layer for foundry-mcp.
|
|
3
|
+
|
|
4
|
+
This module provides a unified interface for AI-assisted operations including
|
|
5
|
+
plan review and fidelity checking. It integrates with the provider registry
|
|
6
|
+
to support multiple LLM backends while providing caching, timeout handling,
|
|
7
|
+
and consistent result structures.
|
|
8
|
+
|
|
9
|
+
Design Principles:
|
|
10
|
+
- Workflow-specific prompt templates (plan_review, fidelity_review)
|
|
11
|
+
- Provider-agnostic orchestration via the provider registry
|
|
12
|
+
- Filesystem-based caching for consultation results
|
|
13
|
+
- Consistent result structures across all workflows
|
|
14
|
+
- Graceful degradation when providers are unavailable
|
|
15
|
+
|
|
16
|
+
Example Usage:
|
|
17
|
+
from foundry_mcp.core.ai_consultation import (
|
|
18
|
+
ConsultationOrchestrator,
|
|
19
|
+
ConsultationRequest,
|
|
20
|
+
ConsultationWorkflow,
|
|
21
|
+
)
|
|
22
|
+
from foundry_mcp.core.providers import ProviderHooks
|
|
23
|
+
|
|
24
|
+
orchestrator = ConsultationOrchestrator()
|
|
25
|
+
|
|
26
|
+
# Check availability
|
|
27
|
+
if orchestrator.is_available():
|
|
28
|
+
request = ConsultationRequest(
|
|
29
|
+
workflow=ConsultationWorkflow.PLAN_REVIEW,
|
|
30
|
+
prompt_id="spec_review",
|
|
31
|
+
context={"spec_content": "..."},
|
|
32
|
+
provider_id="gemini",
|
|
33
|
+
)
|
|
34
|
+
result = orchestrator.consult(request)
|
|
35
|
+
if result.content:
|
|
36
|
+
print(result.content)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import asyncio
|
|
42
|
+
import concurrent.futures
|
|
43
|
+
import hashlib
|
|
44
|
+
import json
|
|
45
|
+
import logging
|
|
46
|
+
import time
|
|
47
|
+
from dataclasses import dataclass, field, replace
|
|
48
|
+
from enum import Enum
|
|
49
|
+
from pathlib import Path
|
|
50
|
+
from typing import Any, Dict, List, Optional, Sequence, Union
|
|
51
|
+
|
|
52
|
+
from foundry_mcp.core.providers import (
|
|
53
|
+
ProviderHooks,
|
|
54
|
+
ProviderRequest,
|
|
55
|
+
ProviderResult,
|
|
56
|
+
ProviderStatus,
|
|
57
|
+
ProviderUnavailableError,
|
|
58
|
+
available_providers,
|
|
59
|
+
check_provider_available,
|
|
60
|
+
resolve_provider,
|
|
61
|
+
)
|
|
62
|
+
from foundry_mcp.core.llm_config import ProviderSpec
|
|
63
|
+
|
|
64
|
+
logger = logging.getLogger(__name__)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _collect_provider_error(
|
|
68
|
+
provider_id: str,
|
|
69
|
+
error: Exception,
|
|
70
|
+
request_context: dict,
|
|
71
|
+
) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Collect AI provider error data for later introspection.
|
|
74
|
+
|
|
75
|
+
Uses lazy import to avoid circular dependencies and only
|
|
76
|
+
collects if error collection is enabled.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
provider_id: The provider that raised the error
|
|
80
|
+
error: The exception that was raised
|
|
81
|
+
request_context: Context about the request (workflow, prompt_id, etc.)
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
# Lazy import to avoid circular dependencies
|
|
85
|
+
from foundry_mcp.config import get_config
|
|
86
|
+
|
|
87
|
+
config = get_config()
|
|
88
|
+
if not config.error_collection.enabled:
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
from foundry_mcp.core.error_collection import get_error_collector
|
|
92
|
+
|
|
93
|
+
collector = get_error_collector()
|
|
94
|
+
collector.collect_provider_error(
|
|
95
|
+
provider_id=provider_id,
|
|
96
|
+
error=error,
|
|
97
|
+
request_context=request_context,
|
|
98
|
+
)
|
|
99
|
+
except Exception as collect_error:
|
|
100
|
+
# Never let error collection failures affect consultation execution
|
|
101
|
+
logger.debug(
|
|
102
|
+
f"Error collection failed for provider {provider_id}: {collect_error}"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# =============================================================================
|
|
107
|
+
# Workflow Types
|
|
108
|
+
# =============================================================================
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class ConsultationWorkflow(str, Enum):
|
|
112
|
+
"""
|
|
113
|
+
Supported AI consultation workflows.
|
|
114
|
+
|
|
115
|
+
Each workflow corresponds to a category of prompt templates and
|
|
116
|
+
determines cache partitioning and result handling.
|
|
117
|
+
|
|
118
|
+
Values:
|
|
119
|
+
PLAN_REVIEW: Review and critique SDD specifications
|
|
120
|
+
FIDELITY_REVIEW: Compare implementation against specifications
|
|
121
|
+
MARKDOWN_PLAN_REVIEW: Review markdown plans before spec creation
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
PLAN_REVIEW = "plan_review"
|
|
125
|
+
FIDELITY_REVIEW = "fidelity_review"
|
|
126
|
+
MARKDOWN_PLAN_REVIEW = "markdown_plan_review"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# =============================================================================
|
|
130
|
+
# Request/Response Dataclasses
|
|
131
|
+
# =============================================================================
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class ResolvedProvider:
|
|
136
|
+
"""
|
|
137
|
+
Resolved provider information from a ProviderSpec.
|
|
138
|
+
|
|
139
|
+
Contains the provider ID to use for registry lookup, along with
|
|
140
|
+
model and override settings from the priority configuration.
|
|
141
|
+
|
|
142
|
+
Attributes:
|
|
143
|
+
provider_id: Provider ID for registry lookup (e.g., "gemini", "opencode")
|
|
144
|
+
model: Model identifier to use (may include backend routing for CLI)
|
|
145
|
+
overrides: Per-provider setting overrides from config
|
|
146
|
+
spec_str: Original spec string for logging/debugging
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
provider_id: str
|
|
150
|
+
model: Optional[str] = None
|
|
151
|
+
overrides: Dict[str, Any] = field(default_factory=dict)
|
|
152
|
+
spec_str: str = ""
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass(frozen=True)
|
|
156
|
+
class ConsultationRequest:
|
|
157
|
+
"""
|
|
158
|
+
Request payload for AI consultation.
|
|
159
|
+
|
|
160
|
+
Encapsulates all parameters needed to execute a consultation workflow,
|
|
161
|
+
including prompt selection, context data, and provider preferences.
|
|
162
|
+
|
|
163
|
+
Attributes:
|
|
164
|
+
workflow: The consultation workflow type
|
|
165
|
+
prompt_id: Identifier for the prompt template within the workflow
|
|
166
|
+
context: Structured context data to inject into the prompt
|
|
167
|
+
provider_id: Optional preferred provider (uses first available if None)
|
|
168
|
+
model: Optional model override for the provider
|
|
169
|
+
cache_key: Optional explicit cache key (auto-generated if None)
|
|
170
|
+
timeout: Request timeout in seconds (default: 120)
|
|
171
|
+
temperature: Sampling temperature (default: provider default)
|
|
172
|
+
max_tokens: Maximum output tokens (default: provider default)
|
|
173
|
+
system_prompt_override: Optional system prompt override
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
workflow: ConsultationWorkflow
|
|
177
|
+
prompt_id: str
|
|
178
|
+
context: Dict[str, Any] = field(default_factory=dict)
|
|
179
|
+
provider_id: Optional[str] = None
|
|
180
|
+
model: Optional[str] = None
|
|
181
|
+
cache_key: Optional[str] = None
|
|
182
|
+
timeout: float = 120.0
|
|
183
|
+
temperature: Optional[float] = None
|
|
184
|
+
max_tokens: Optional[int] = None
|
|
185
|
+
system_prompt_override: Optional[str] = None
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@dataclass
|
|
189
|
+
class ConsultationResult:
|
|
190
|
+
"""
|
|
191
|
+
Result of an AI consultation.
|
|
192
|
+
|
|
193
|
+
Provides a consistent structure for consultation outcomes across all
|
|
194
|
+
workflows and providers, including metadata for debugging and analytics.
|
|
195
|
+
|
|
196
|
+
Attributes:
|
|
197
|
+
workflow: The workflow that produced this result
|
|
198
|
+
content: The generated content (may be empty on failure)
|
|
199
|
+
provider_id: Provider that handled the request
|
|
200
|
+
model_used: Fully-qualified model identifier
|
|
201
|
+
tokens: Token usage if reported by provider
|
|
202
|
+
duration_ms: Total consultation duration in milliseconds
|
|
203
|
+
cache_hit: Whether result was served from cache
|
|
204
|
+
raw_payload: Provider-specific metadata and debug info
|
|
205
|
+
warnings: Non-fatal issues encountered during consultation
|
|
206
|
+
error: Error message if consultation failed
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
workflow: ConsultationWorkflow
|
|
210
|
+
content: str
|
|
211
|
+
provider_id: str
|
|
212
|
+
model_used: str
|
|
213
|
+
tokens: Dict[str, int] = field(default_factory=dict)
|
|
214
|
+
duration_ms: float = 0.0
|
|
215
|
+
cache_hit: bool = False
|
|
216
|
+
raw_payload: Dict[str, Any] = field(default_factory=dict)
|
|
217
|
+
warnings: List[str] = field(default_factory=list)
|
|
218
|
+
error: Optional[str] = None
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def success(self) -> bool:
|
|
222
|
+
"""Return True if consultation succeeded (has content, no error)."""
|
|
223
|
+
return bool(self.content) and self.error is None
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
@dataclass
|
|
227
|
+
class ProviderResponse:
|
|
228
|
+
"""
|
|
229
|
+
Response from a single provider in a multi-model consultation.
|
|
230
|
+
|
|
231
|
+
Encapsulates the result from one provider when executing parallel
|
|
232
|
+
consultations across multiple models. Used as building blocks for
|
|
233
|
+
ConsensusResult aggregation.
|
|
234
|
+
|
|
235
|
+
Attributes:
|
|
236
|
+
provider_id: Identifier of the provider that handled this request
|
|
237
|
+
model_used: Fully-qualified model identifier used for generation
|
|
238
|
+
content: Generated content (empty string on failure)
|
|
239
|
+
success: Whether this provider's request succeeded
|
|
240
|
+
error: Error message if the request failed
|
|
241
|
+
tokens: Total token usage (prompt + completion) if available
|
|
242
|
+
duration_ms: Request duration in milliseconds
|
|
243
|
+
cache_hit: Whether result was served from cache
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
provider_id: str
|
|
247
|
+
model_used: str
|
|
248
|
+
content: str
|
|
249
|
+
success: bool
|
|
250
|
+
error: Optional[str] = None
|
|
251
|
+
tokens: Optional[int] = None
|
|
252
|
+
duration_ms: Optional[int] = None
|
|
253
|
+
cache_hit: bool = False
|
|
254
|
+
|
|
255
|
+
@classmethod
|
|
256
|
+
def from_result(
|
|
257
|
+
cls,
|
|
258
|
+
result: ConsultationResult,
|
|
259
|
+
) -> "ProviderResponse":
|
|
260
|
+
"""
|
|
261
|
+
Create a ProviderResponse from a ConsultationResult.
|
|
262
|
+
|
|
263
|
+
Convenience factory for converting single-provider results to the
|
|
264
|
+
multi-provider response format.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
result: ConsultationResult to convert
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
ProviderResponse with fields mapped from the result
|
|
271
|
+
"""
|
|
272
|
+
total_tokens = sum(result.tokens.values()) if result.tokens else None
|
|
273
|
+
return cls(
|
|
274
|
+
provider_id=result.provider_id,
|
|
275
|
+
model_used=result.model_used,
|
|
276
|
+
content=result.content,
|
|
277
|
+
success=result.success,
|
|
278
|
+
error=result.error,
|
|
279
|
+
tokens=total_tokens,
|
|
280
|
+
duration_ms=int(result.duration_ms) if result.duration_ms else None,
|
|
281
|
+
cache_hit=result.cache_hit,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
@dataclass
|
|
286
|
+
class AgreementMetadata:
|
|
287
|
+
"""
|
|
288
|
+
Metadata about provider agreement in a multi-model consultation.
|
|
289
|
+
|
|
290
|
+
Tracks how many providers were consulted, how many succeeded, and how
|
|
291
|
+
many failed. Used to assess consensus quality and reliability.
|
|
292
|
+
|
|
293
|
+
Attributes:
|
|
294
|
+
total_providers: Total number of providers that were consulted
|
|
295
|
+
successful_providers: Number of providers that returned successful responses
|
|
296
|
+
failed_providers: Number of providers that failed (timeout, error, etc.)
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
total_providers: int
|
|
300
|
+
successful_providers: int
|
|
301
|
+
failed_providers: int
|
|
302
|
+
|
|
303
|
+
@property
|
|
304
|
+
def success_rate(self) -> float:
|
|
305
|
+
"""Calculate the success rate as a percentage (0.0 - 1.0)."""
|
|
306
|
+
if self.total_providers == 0:
|
|
307
|
+
return 0.0
|
|
308
|
+
return self.successful_providers / self.total_providers
|
|
309
|
+
|
|
310
|
+
@property
|
|
311
|
+
def has_consensus(self) -> bool:
|
|
312
|
+
"""Return True if at least 2 providers succeeded."""
|
|
313
|
+
return self.successful_providers >= 2
|
|
314
|
+
|
|
315
|
+
@classmethod
|
|
316
|
+
def from_responses(
|
|
317
|
+
cls, responses: Sequence["ProviderResponse"]
|
|
318
|
+
) -> "AgreementMetadata":
|
|
319
|
+
"""
|
|
320
|
+
Create AgreementMetadata from a list of provider responses.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
responses: Sequence of ProviderResponse objects
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
AgreementMetadata with computed counts
|
|
327
|
+
"""
|
|
328
|
+
total = len(responses)
|
|
329
|
+
successful = sum(1 for r in responses if r.success)
|
|
330
|
+
failed = total - successful
|
|
331
|
+
return cls(
|
|
332
|
+
total_providers=total,
|
|
333
|
+
successful_providers=successful,
|
|
334
|
+
failed_providers=failed,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@dataclass
|
|
339
|
+
class ConsensusResult:
|
|
340
|
+
"""
|
|
341
|
+
Aggregated result from multi-model consensus consultation.
|
|
342
|
+
|
|
343
|
+
Collects responses from multiple providers along with metadata about
|
|
344
|
+
agreement levels and overall success. Used when min_models > 1 in
|
|
345
|
+
workflow configuration.
|
|
346
|
+
|
|
347
|
+
Attributes:
|
|
348
|
+
workflow: The consultation workflow that produced this result
|
|
349
|
+
responses: List of individual provider responses
|
|
350
|
+
agreement: Metadata about provider agreement and success rates
|
|
351
|
+
duration_ms: Total consultation duration in milliseconds
|
|
352
|
+
warnings: Non-fatal issues encountered during consultation
|
|
353
|
+
|
|
354
|
+
Properties:
|
|
355
|
+
success: True if at least one provider succeeded
|
|
356
|
+
primary_content: Content from the first successful response (for compatibility)
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
workflow: ConsultationWorkflow
|
|
360
|
+
responses: List[ProviderResponse] = field(default_factory=list)
|
|
361
|
+
agreement: Optional[AgreementMetadata] = None
|
|
362
|
+
duration_ms: float = 0.0
|
|
363
|
+
warnings: List[str] = field(default_factory=list)
|
|
364
|
+
|
|
365
|
+
def __post_init__(self) -> None:
|
|
366
|
+
"""Auto-compute agreement metadata if not provided."""
|
|
367
|
+
if self.agreement is None and self.responses:
|
|
368
|
+
self.agreement = AgreementMetadata.from_responses(self.responses)
|
|
369
|
+
|
|
370
|
+
@property
|
|
371
|
+
def success(self) -> bool:
|
|
372
|
+
"""Return True if at least one provider returned a successful response."""
|
|
373
|
+
return any(r.success for r in self.responses)
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def primary_content(self) -> str:
|
|
377
|
+
"""
|
|
378
|
+
Return content from the first successful response.
|
|
379
|
+
|
|
380
|
+
For backward compatibility with code expecting a single response.
|
|
381
|
+
Returns empty string if no successful responses.
|
|
382
|
+
"""
|
|
383
|
+
for response in self.responses:
|
|
384
|
+
if response.success and response.content:
|
|
385
|
+
return response.content
|
|
386
|
+
return ""
|
|
387
|
+
|
|
388
|
+
@property
|
|
389
|
+
def successful_responses(self) -> List[ProviderResponse]:
|
|
390
|
+
"""Return list of successful responses only."""
|
|
391
|
+
return [r for r in self.responses if r.success]
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def failed_responses(self) -> List[ProviderResponse]:
|
|
395
|
+
"""Return list of failed responses only."""
|
|
396
|
+
return [r for r in self.responses if not r.success]
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# Type alias for backward-compatible result handling
|
|
400
|
+
ConsultationOutcome = Union[ConsultationResult, ConsensusResult]
|
|
401
|
+
"""
|
|
402
|
+
Type alias for consultation results supporting both single and multi-model modes.
|
|
403
|
+
|
|
404
|
+
When min_models == 1 (default): Returns ConsultationResult (single provider)
|
|
405
|
+
When min_models > 1: Returns ConsensusResult (multiple providers with agreement)
|
|
406
|
+
|
|
407
|
+
Use isinstance() to differentiate:
|
|
408
|
+
if isinstance(outcome, ConsensusResult):
|
|
409
|
+
# Handle multi-model result with agreement metadata
|
|
410
|
+
else:
|
|
411
|
+
# Handle single-model ConsultationResult
|
|
412
|
+
"""
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# =============================================================================
|
|
416
|
+
# Cache Implementation
|
|
417
|
+
# =============================================================================
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
class ResultCache:
|
|
421
|
+
"""
|
|
422
|
+
Filesystem-based cache for consultation results.
|
|
423
|
+
|
|
424
|
+
Provides persistent caching of AI consultation results to reduce
|
|
425
|
+
redundant API calls and improve response times for repeated queries.
|
|
426
|
+
|
|
427
|
+
Cache Structure:
|
|
428
|
+
.cache/foundry-mcp/consultations/{workflow}/{key}.json
|
|
429
|
+
|
|
430
|
+
Each cached entry contains:
|
|
431
|
+
- content: The consultation result
|
|
432
|
+
- provider_id: Provider that generated the result
|
|
433
|
+
- model_used: Model identifier
|
|
434
|
+
- tokens: Token usage
|
|
435
|
+
- timestamp: Cache entry creation time
|
|
436
|
+
- ttl: Time-to-live in seconds
|
|
437
|
+
|
|
438
|
+
Attributes:
|
|
439
|
+
base_dir: Root directory for cache storage
|
|
440
|
+
default_ttl: Default time-to-live in seconds (default: 3600 = 1 hour)
|
|
441
|
+
"""
|
|
442
|
+
|
|
443
|
+
def __init__(
|
|
444
|
+
self,
|
|
445
|
+
base_dir: Optional[Path] = None,
|
|
446
|
+
default_ttl: int = 3600,
|
|
447
|
+
):
|
|
448
|
+
"""
|
|
449
|
+
Initialize the result cache.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
base_dir: Root directory for cache (default: .cache/foundry-mcp/consultations)
|
|
453
|
+
default_ttl: Default TTL in seconds (default: 3600)
|
|
454
|
+
"""
|
|
455
|
+
if base_dir is None:
|
|
456
|
+
base_dir = Path.cwd() / ".cache" / "foundry-mcp" / "consultations"
|
|
457
|
+
self.base_dir = base_dir
|
|
458
|
+
self.default_ttl = default_ttl
|
|
459
|
+
|
|
460
|
+
def _get_cache_path(self, workflow: ConsultationWorkflow, key: str) -> Path:
|
|
461
|
+
"""Return the cache file path for a workflow and key."""
|
|
462
|
+
# Sanitize key to be filesystem-safe
|
|
463
|
+
safe_key = "".join(c if c.isalnum() or c in "-_" else "_" for c in key)
|
|
464
|
+
return self.base_dir / workflow.value / f"{safe_key}.json"
|
|
465
|
+
|
|
466
|
+
def get(
|
|
467
|
+
self,
|
|
468
|
+
workflow: ConsultationWorkflow,
|
|
469
|
+
key: str,
|
|
470
|
+
) -> Optional[Dict[str, Any]]:
|
|
471
|
+
"""
|
|
472
|
+
Retrieve a cached result.
|
|
473
|
+
|
|
474
|
+
Args:
|
|
475
|
+
workflow: The consultation workflow
|
|
476
|
+
key: The cache key
|
|
477
|
+
|
|
478
|
+
Returns:
|
|
479
|
+
Cached data dict if found and not expired, None otherwise
|
|
480
|
+
"""
|
|
481
|
+
cache_path = self._get_cache_path(workflow, key)
|
|
482
|
+
if not cache_path.exists():
|
|
483
|
+
return None
|
|
484
|
+
|
|
485
|
+
try:
|
|
486
|
+
with open(cache_path, "r", encoding="utf-8") as f:
|
|
487
|
+
data = json.load(f)
|
|
488
|
+
|
|
489
|
+
# Check TTL
|
|
490
|
+
timestamp = data.get("timestamp", 0)
|
|
491
|
+
ttl = data.get("ttl", self.default_ttl)
|
|
492
|
+
if time.time() - timestamp > ttl:
|
|
493
|
+
# Expired - remove file
|
|
494
|
+
cache_path.unlink(missing_ok=True)
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
return data
|
|
498
|
+
except (json.JSONDecodeError, OSError) as exc:
|
|
499
|
+
logger.warning("Failed to read cache entry %s: %s", cache_path, exc)
|
|
500
|
+
return None
|
|
501
|
+
|
|
502
|
+
def set(
|
|
503
|
+
self,
|
|
504
|
+
workflow: ConsultationWorkflow,
|
|
505
|
+
key: str,
|
|
506
|
+
result: ConsultationResult,
|
|
507
|
+
ttl: Optional[int] = None,
|
|
508
|
+
) -> None:
|
|
509
|
+
"""
|
|
510
|
+
Store a consultation result in the cache.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
workflow: The consultation workflow
|
|
514
|
+
key: The cache key
|
|
515
|
+
result: The consultation result to cache
|
|
516
|
+
ttl: Time-to-live in seconds (default: default_ttl)
|
|
517
|
+
"""
|
|
518
|
+
cache_path = self._get_cache_path(workflow, key)
|
|
519
|
+
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
520
|
+
|
|
521
|
+
data = {
|
|
522
|
+
"content": result.content,
|
|
523
|
+
"provider_id": result.provider_id,
|
|
524
|
+
"model_used": result.model_used,
|
|
525
|
+
"tokens": result.tokens,
|
|
526
|
+
"timestamp": time.time(),
|
|
527
|
+
"ttl": ttl if ttl is not None else self.default_ttl,
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
try:
|
|
531
|
+
with open(cache_path, "w", encoding="utf-8") as f:
|
|
532
|
+
json.dump(data, f, indent=2)
|
|
533
|
+
except OSError as exc:
|
|
534
|
+
logger.warning("Failed to write cache entry %s: %s", cache_path, exc)
|
|
535
|
+
|
|
536
|
+
def invalidate(
|
|
537
|
+
self,
|
|
538
|
+
workflow: Optional[ConsultationWorkflow] = None,
|
|
539
|
+
key: Optional[str] = None,
|
|
540
|
+
) -> int:
|
|
541
|
+
"""
|
|
542
|
+
Invalidate cache entries.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
workflow: If provided, only invalidate entries for this workflow
|
|
546
|
+
key: If provided (with workflow), only invalidate this specific entry
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Number of entries invalidated
|
|
550
|
+
"""
|
|
551
|
+
count = 0
|
|
552
|
+
|
|
553
|
+
if workflow is not None and key is not None:
|
|
554
|
+
# Invalidate specific entry
|
|
555
|
+
cache_path = self._get_cache_path(workflow, key)
|
|
556
|
+
if cache_path.exists():
|
|
557
|
+
cache_path.unlink()
|
|
558
|
+
count = 1
|
|
559
|
+
elif workflow is not None:
|
|
560
|
+
# Invalidate all entries for workflow
|
|
561
|
+
workflow_dir = self.base_dir / workflow.value
|
|
562
|
+
if workflow_dir.exists():
|
|
563
|
+
for cache_file in workflow_dir.glob("*.json"):
|
|
564
|
+
cache_file.unlink()
|
|
565
|
+
count += 1
|
|
566
|
+
else:
|
|
567
|
+
# Invalidate all entries
|
|
568
|
+
for workflow_enum in ConsultationWorkflow:
|
|
569
|
+
workflow_dir = self.base_dir / workflow_enum.value
|
|
570
|
+
if workflow_dir.exists():
|
|
571
|
+
for cache_file in workflow_dir.glob("*.json"):
|
|
572
|
+
cache_file.unlink()
|
|
573
|
+
count += 1
|
|
574
|
+
|
|
575
|
+
return count
|
|
576
|
+
|
|
577
|
+
def stats(self) -> Dict[str, Any]:
|
|
578
|
+
"""
|
|
579
|
+
Return cache statistics.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
Dict with entry counts per workflow and total size
|
|
583
|
+
"""
|
|
584
|
+
stats: Dict[str, Any] = {
|
|
585
|
+
"total_entries": 0,
|
|
586
|
+
"total_size_bytes": 0,
|
|
587
|
+
"by_workflow": {},
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
for workflow in ConsultationWorkflow:
|
|
591
|
+
workflow_dir = self.base_dir / workflow.value
|
|
592
|
+
if workflow_dir.exists():
|
|
593
|
+
entries = list(workflow_dir.glob("*.json"))
|
|
594
|
+
size = sum(f.stat().st_size for f in entries if f.exists())
|
|
595
|
+
stats["by_workflow"][workflow.value] = {
|
|
596
|
+
"entries": len(entries),
|
|
597
|
+
"size_bytes": size,
|
|
598
|
+
}
|
|
599
|
+
stats["total_entries"] += len(entries)
|
|
600
|
+
stats["total_size_bytes"] += size
|
|
601
|
+
else:
|
|
602
|
+
stats["by_workflow"][workflow.value] = {
|
|
603
|
+
"entries": 0,
|
|
604
|
+
"size_bytes": 0,
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
return stats
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
# =============================================================================
|
|
611
|
+
# Consultation Orchestrator
|
|
612
|
+
# =============================================================================
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
class ConsultationOrchestrator:
|
|
616
|
+
"""
|
|
617
|
+
Central orchestrator for AI consultation workflows.
|
|
618
|
+
|
|
619
|
+
Coordinates between prompt templates, the provider registry, and
|
|
620
|
+
the result cache to execute consultation requests. Handles provider
|
|
621
|
+
selection, timeout management, and error handling.
|
|
622
|
+
|
|
623
|
+
Attributes:
|
|
624
|
+
cache: ResultCache instance for caching results
|
|
625
|
+
preferred_providers: Ordered list of preferred provider IDs
|
|
626
|
+
default_timeout: Default timeout in seconds
|
|
627
|
+
|
|
628
|
+
Example:
|
|
629
|
+
orchestrator = ConsultationOrchestrator()
|
|
630
|
+
|
|
631
|
+
if orchestrator.is_available():
|
|
632
|
+
request = ConsultationRequest(
|
|
633
|
+
workflow=ConsultationWorkflow.PLAN_REVIEW,
|
|
634
|
+
prompt_id="spec_review",
|
|
635
|
+
context={"spec_content": "..."},
|
|
636
|
+
)
|
|
637
|
+
result = orchestrator.consult(request)
|
|
638
|
+
"""
|
|
639
|
+
|
|
640
|
+
def __init__(
|
|
641
|
+
self,
|
|
642
|
+
cache: Optional[ResultCache] = None,
|
|
643
|
+
default_timeout: Optional[float] = None,
|
|
644
|
+
config: Optional["ConsultationConfig"] = None,
|
|
645
|
+
):
|
|
646
|
+
"""
|
|
647
|
+
Initialize the consultation orchestrator.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
cache: ResultCache instance (creates default if None)
|
|
651
|
+
default_timeout: Default timeout in seconds (uses config if None)
|
|
652
|
+
config: ConsultationConfig instance (uses global config if None)
|
|
653
|
+
"""
|
|
654
|
+
# Lazy import to avoid circular dependency
|
|
655
|
+
from foundry_mcp.core.llm_config import (
|
|
656
|
+
ConsultationConfig,
|
|
657
|
+
get_consultation_config,
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
self._config: ConsultationConfig = config or get_consultation_config()
|
|
661
|
+
self.cache = cache or ResultCache(default_ttl=self._config.cache_ttl)
|
|
662
|
+
self.default_timeout = (
|
|
663
|
+
default_timeout
|
|
664
|
+
if default_timeout is not None
|
|
665
|
+
else self._config.default_timeout
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
# Parse priority list from config into ProviderSpec objects
|
|
669
|
+
# Priority: 1) config.priority specs
|
|
670
|
+
self._priority_specs: List[ProviderSpec] = []
|
|
671
|
+
if self._config.priority:
|
|
672
|
+
for spec_str in self._config.priority:
|
|
673
|
+
try:
|
|
674
|
+
self._priority_specs.append(ProviderSpec.parse(spec_str))
|
|
675
|
+
except ValueError as e:
|
|
676
|
+
logger.warning(
|
|
677
|
+
f"Invalid provider spec in priority list: {spec_str}: {e}"
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
def is_available(self, provider_id: Optional[str] = None) -> bool:
|
|
681
|
+
"""
|
|
682
|
+
Check if consultation services are available.
|
|
683
|
+
|
|
684
|
+
Args:
|
|
685
|
+
provider_id: Check specific provider, or any available if None
|
|
686
|
+
|
|
687
|
+
Returns:
|
|
688
|
+
True if at least one provider is available
|
|
689
|
+
"""
|
|
690
|
+
if provider_id:
|
|
691
|
+
return check_provider_available(provider_id)
|
|
692
|
+
|
|
693
|
+
# Check priority providers first
|
|
694
|
+
for spec in self._priority_specs:
|
|
695
|
+
if check_provider_available(spec.provider):
|
|
696
|
+
return True
|
|
697
|
+
|
|
698
|
+
# Fall back to any available provider
|
|
699
|
+
return len(available_providers()) > 0
|
|
700
|
+
|
|
701
|
+
def get_available_providers(self) -> List[str]:
|
|
702
|
+
"""
|
|
703
|
+
Return list of available provider IDs.
|
|
704
|
+
|
|
705
|
+
Returns:
|
|
706
|
+
List of available provider IDs
|
|
707
|
+
"""
|
|
708
|
+
return sorted(available_providers())
|
|
709
|
+
|
|
710
|
+
def _select_provider(self, request: ConsultationRequest) -> str:
|
|
711
|
+
"""
|
|
712
|
+
Select the provider to use for a request.
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
request: The consultation request
|
|
716
|
+
|
|
717
|
+
Returns:
|
|
718
|
+
Provider ID to use
|
|
719
|
+
|
|
720
|
+
Raises:
|
|
721
|
+
ProviderUnavailableError: If no providers are available
|
|
722
|
+
"""
|
|
723
|
+
# Explicit provider requested
|
|
724
|
+
if request.provider_id:
|
|
725
|
+
if check_provider_available(request.provider_id):
|
|
726
|
+
return request.provider_id
|
|
727
|
+
raise ProviderUnavailableError(
|
|
728
|
+
f"Requested provider '{request.provider_id}' is not available",
|
|
729
|
+
provider=request.provider_id,
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
# Try priority providers
|
|
733
|
+
for spec in self._priority_specs:
|
|
734
|
+
if check_provider_available(spec.provider):
|
|
735
|
+
return spec.provider
|
|
736
|
+
|
|
737
|
+
# Fall back to first available
|
|
738
|
+
providers = available_providers()
|
|
739
|
+
if providers:
|
|
740
|
+
return providers[0]
|
|
741
|
+
|
|
742
|
+
raise ProviderUnavailableError(
|
|
743
|
+
"No AI providers are currently available",
|
|
744
|
+
provider=None,
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
def _generate_cache_key(self, request: ConsultationRequest) -> str:
|
|
748
|
+
"""
|
|
749
|
+
Generate a cache key for a consultation request.
|
|
750
|
+
|
|
751
|
+
Args:
|
|
752
|
+
request: The consultation request
|
|
753
|
+
|
|
754
|
+
Returns:
|
|
755
|
+
Cache key string
|
|
756
|
+
"""
|
|
757
|
+
if request.cache_key:
|
|
758
|
+
return request.cache_key
|
|
759
|
+
|
|
760
|
+
# Build a deterministic key from request parameters
|
|
761
|
+
key_parts = [
|
|
762
|
+
request.prompt_id,
|
|
763
|
+
json.dumps(request.context, sort_keys=True),
|
|
764
|
+
request.model or "default",
|
|
765
|
+
]
|
|
766
|
+
key_string = "|".join(key_parts)
|
|
767
|
+
return hashlib.sha256(key_string.encode()).hexdigest()[:32]
|
|
768
|
+
|
|
769
|
+
def _build_prompt(self, request: ConsultationRequest) -> str:
|
|
770
|
+
"""
|
|
771
|
+
Build the full prompt from template and context.
|
|
772
|
+
|
|
773
|
+
This method delegates to workflow-specific prompt builders.
|
|
774
|
+
|
|
775
|
+
Args:
|
|
776
|
+
request: The consultation request
|
|
777
|
+
|
|
778
|
+
Returns:
|
|
779
|
+
The rendered prompt string
|
|
780
|
+
"""
|
|
781
|
+
# Import prompt builders lazily to avoid circular imports
|
|
782
|
+
from foundry_mcp.core.prompts import get_prompt_builder
|
|
783
|
+
|
|
784
|
+
builder = get_prompt_builder(request.workflow)
|
|
785
|
+
return builder.build(request.prompt_id, request.context)
|
|
786
|
+
|
|
787
|
+
def _resolve_spec_to_provider(
|
|
788
|
+
self, spec: ProviderSpec
|
|
789
|
+
) -> Optional[ResolvedProvider]:
|
|
790
|
+
"""
|
|
791
|
+
Resolve a ProviderSpec to a ResolvedProvider if available.
|
|
792
|
+
|
|
793
|
+
For CLI providers, checks registry availability.
|
|
794
|
+
For API providers, logs a warning (not yet implemented).
|
|
795
|
+
|
|
796
|
+
Args:
|
|
797
|
+
spec: The provider specification to resolve
|
|
798
|
+
|
|
799
|
+
Returns:
|
|
800
|
+
ResolvedProvider if available, None otherwise
|
|
801
|
+
"""
|
|
802
|
+
if spec.type == "api":
|
|
803
|
+
# API providers not yet integrated into registry
|
|
804
|
+
# TODO: Register API providers (openai, anthropic, local) in registry
|
|
805
|
+
logger.debug(
|
|
806
|
+
f"API provider spec '{spec}' skipped - API providers not yet "
|
|
807
|
+
"integrated into consultation registry"
|
|
808
|
+
)
|
|
809
|
+
return None
|
|
810
|
+
|
|
811
|
+
# CLI provider - check registry availability
|
|
812
|
+
if not check_provider_available(spec.provider):
|
|
813
|
+
return None
|
|
814
|
+
|
|
815
|
+
# Build model string - include backend routing if specified
|
|
816
|
+
model = None
|
|
817
|
+
if spec.backend and spec.model:
|
|
818
|
+
# Backend routing: "openai/gpt-5.1-codex"
|
|
819
|
+
model = f"{spec.backend}/{spec.model}"
|
|
820
|
+
elif spec.model:
|
|
821
|
+
model = spec.model
|
|
822
|
+
|
|
823
|
+
# Get overrides from config
|
|
824
|
+
overrides = self._config.get_override(str(spec))
|
|
825
|
+
|
|
826
|
+
return ResolvedProvider(
|
|
827
|
+
provider_id=spec.provider,
|
|
828
|
+
model=model,
|
|
829
|
+
overrides=overrides,
|
|
830
|
+
spec_str=str(spec),
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
def _get_providers_to_try(
|
|
834
|
+
self, request: ConsultationRequest
|
|
835
|
+
) -> List[ResolvedProvider]:
|
|
836
|
+
"""
|
|
837
|
+
Get ordered list of providers to try for a request.
|
|
838
|
+
|
|
839
|
+
Provider selection priority:
|
|
840
|
+
1. Explicit provider_id in request (wraps to ResolvedProvider)
|
|
841
|
+
2. Priority specs from config (parsed ProviderSpec list)
|
|
842
|
+
3. Legacy preferred_providers (for backwards compatibility)
|
|
843
|
+
4. Available providers from registry (fallback)
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
request: The consultation request
|
|
847
|
+
|
|
848
|
+
Returns:
|
|
849
|
+
Ordered list of ResolvedProvider instances to try
|
|
850
|
+
"""
|
|
851
|
+
result: List[ResolvedProvider] = []
|
|
852
|
+
seen_providers: set = set()
|
|
853
|
+
|
|
854
|
+
# 1. Explicit provider requested - only try that one
|
|
855
|
+
if request.provider_id:
|
|
856
|
+
return [
|
|
857
|
+
ResolvedProvider(
|
|
858
|
+
provider_id=request.provider_id,
|
|
859
|
+
model=request.model,
|
|
860
|
+
spec_str=f"explicit:{request.provider_id}",
|
|
861
|
+
)
|
|
862
|
+
]
|
|
863
|
+
|
|
864
|
+
# 2. Priority specs from config
|
|
865
|
+
for spec in self._priority_specs:
|
|
866
|
+
resolved = self._resolve_spec_to_provider(spec)
|
|
867
|
+
if resolved and resolved.provider_id not in seen_providers:
|
|
868
|
+
result.append(resolved)
|
|
869
|
+
seen_providers.add(resolved.provider_id)
|
|
870
|
+
|
|
871
|
+
# 3. Fallback to available providers from registry
|
|
872
|
+
for prov_id in available_providers():
|
|
873
|
+
if prov_id not in seen_providers:
|
|
874
|
+
result.append(
|
|
875
|
+
ResolvedProvider(
|
|
876
|
+
provider_id=prov_id,
|
|
877
|
+
spec_str=f"fallback:{prov_id}",
|
|
878
|
+
)
|
|
879
|
+
)
|
|
880
|
+
seen_providers.add(prov_id)
|
|
881
|
+
|
|
882
|
+
return result
|
|
883
|
+
|
|
884
|
+
def _is_retryable_error(self, error: Exception) -> bool:
|
|
885
|
+
"""
|
|
886
|
+
Determine if an error warrants a retry.
|
|
887
|
+
|
|
888
|
+
Retryable errors include timeouts and rate limits.
|
|
889
|
+
Non-retryable errors include authentication failures and invalid prompts.
|
|
890
|
+
|
|
891
|
+
Args:
|
|
892
|
+
error: The exception that occurred
|
|
893
|
+
|
|
894
|
+
Returns:
|
|
895
|
+
True if the error is transient and retry may succeed
|
|
896
|
+
"""
|
|
897
|
+
error_str = str(error).lower()
|
|
898
|
+
|
|
899
|
+
# Timeout errors are retryable
|
|
900
|
+
if "timeout" in error_str or "timed out" in error_str:
|
|
901
|
+
return True
|
|
902
|
+
|
|
903
|
+
# Rate limit errors are retryable
|
|
904
|
+
if "rate limit" in error_str or "rate_limit" in error_str or "429" in error_str:
|
|
905
|
+
return True
|
|
906
|
+
|
|
907
|
+
# Connection errors may be transient
|
|
908
|
+
if "connection" in error_str and (
|
|
909
|
+
"reset" in error_str or "refused" in error_str
|
|
910
|
+
):
|
|
911
|
+
return True
|
|
912
|
+
|
|
913
|
+
# Server errors (5xx) are potentially retryable
|
|
914
|
+
if any(code in error_str for code in ["500", "502", "503", "504"]):
|
|
915
|
+
return True
|
|
916
|
+
|
|
917
|
+
return False
|
|
918
|
+
|
|
919
|
+
def _should_try_next_provider(self, error: Exception) -> bool:
|
|
920
|
+
"""
|
|
921
|
+
Determine if we should try the next provider after an error.
|
|
922
|
+
|
|
923
|
+
Args:
|
|
924
|
+
error: The exception that occurred
|
|
925
|
+
|
|
926
|
+
Returns:
|
|
927
|
+
True if fallback to next provider is appropriate
|
|
928
|
+
"""
|
|
929
|
+
# Don't fallback if disabled
|
|
930
|
+
if not self._config.fallback_enabled:
|
|
931
|
+
return False
|
|
932
|
+
|
|
933
|
+
error_str = str(error).lower()
|
|
934
|
+
|
|
935
|
+
# Don't fallback for prompt-level errors (these will fail with any provider)
|
|
936
|
+
if "prompt" in error_str and (
|
|
937
|
+
"too long" in error_str or "invalid" in error_str
|
|
938
|
+
):
|
|
939
|
+
return False
|
|
940
|
+
|
|
941
|
+
# Don't fallback for authentication errors specific to all providers
|
|
942
|
+
if "api key" in error_str or "authentication" in error_str:
|
|
943
|
+
# This might be provider-specific, so allow fallback
|
|
944
|
+
return True
|
|
945
|
+
|
|
946
|
+
# Fallback for most other errors
|
|
947
|
+
return True
|
|
948
|
+
|
|
949
|
+
def _try_provider_with_retries(
|
|
950
|
+
self,
|
|
951
|
+
request: ConsultationRequest,
|
|
952
|
+
prompt: str,
|
|
953
|
+
resolved: ResolvedProvider,
|
|
954
|
+
warnings: List[str],
|
|
955
|
+
) -> Optional[ProviderResult]:
|
|
956
|
+
"""
|
|
957
|
+
Try a single provider with retry logic.
|
|
958
|
+
|
|
959
|
+
Args:
|
|
960
|
+
request: The consultation request
|
|
961
|
+
prompt: The rendered prompt
|
|
962
|
+
resolved: Resolved provider information (includes model and overrides)
|
|
963
|
+
warnings: List to append warnings to
|
|
964
|
+
|
|
965
|
+
Returns:
|
|
966
|
+
ProviderResult on success, None on failure
|
|
967
|
+
"""
|
|
968
|
+
hooks = ProviderHooks()
|
|
969
|
+
last_error: Optional[Exception] = None
|
|
970
|
+
provider_id = resolved.provider_id
|
|
971
|
+
|
|
972
|
+
max_attempts = self._config.max_retries + 1 # +1 for initial attempt
|
|
973
|
+
|
|
974
|
+
# Determine model: request.model > resolved.model > None
|
|
975
|
+
effective_model = request.model or resolved.model
|
|
976
|
+
|
|
977
|
+
# Apply overrides from config
|
|
978
|
+
effective_timeout = (
|
|
979
|
+
resolved.overrides.get("timeout", request.timeout) or self.default_timeout
|
|
980
|
+
)
|
|
981
|
+
effective_temperature = resolved.overrides.get(
|
|
982
|
+
"temperature", request.temperature
|
|
983
|
+
)
|
|
984
|
+
effective_max_tokens = resolved.overrides.get("max_tokens", request.max_tokens)
|
|
985
|
+
|
|
986
|
+
for attempt in range(max_attempts):
|
|
987
|
+
try:
|
|
988
|
+
provider = resolve_provider(
|
|
989
|
+
provider_id, hooks=hooks, model=effective_model
|
|
990
|
+
)
|
|
991
|
+
provider_request = ProviderRequest(
|
|
992
|
+
prompt=prompt,
|
|
993
|
+
system_prompt=request.system_prompt_override,
|
|
994
|
+
model=effective_model,
|
|
995
|
+
timeout=effective_timeout,
|
|
996
|
+
temperature=effective_temperature,
|
|
997
|
+
max_tokens=effective_max_tokens,
|
|
998
|
+
metadata={
|
|
999
|
+
"workflow": request.workflow.value,
|
|
1000
|
+
"prompt_id": request.prompt_id,
|
|
1001
|
+
},
|
|
1002
|
+
)
|
|
1003
|
+
result = provider.generate(provider_request)
|
|
1004
|
+
|
|
1005
|
+
# Success
|
|
1006
|
+
if result.status == ProviderStatus.SUCCESS:
|
|
1007
|
+
if attempt > 0:
|
|
1008
|
+
warnings.append(
|
|
1009
|
+
f"Provider {provider_id} succeeded on attempt {attempt + 1}"
|
|
1010
|
+
)
|
|
1011
|
+
return result
|
|
1012
|
+
|
|
1013
|
+
# Non-success status from provider
|
|
1014
|
+
error_msg = (
|
|
1015
|
+
f"Provider {provider_id} returned status: {result.status.value}"
|
|
1016
|
+
)
|
|
1017
|
+
if result.stderr:
|
|
1018
|
+
error_msg += f" - {result.stderr}"
|
|
1019
|
+
last_error = Exception(error_msg)
|
|
1020
|
+
|
|
1021
|
+
# Check if this error type is retryable
|
|
1022
|
+
if not self._is_retryable_error(last_error):
|
|
1023
|
+
break
|
|
1024
|
+
|
|
1025
|
+
except ProviderUnavailableError as exc:
|
|
1026
|
+
last_error = exc
|
|
1027
|
+
# Provider unavailable - don't retry, move to fallback
|
|
1028
|
+
break
|
|
1029
|
+
|
|
1030
|
+
except Exception as exc: # noqa: BLE001
|
|
1031
|
+
last_error = exc
|
|
1032
|
+
if not self._is_retryable_error(exc):
|
|
1033
|
+
break
|
|
1034
|
+
|
|
1035
|
+
# Retry delay
|
|
1036
|
+
if attempt < max_attempts - 1:
|
|
1037
|
+
warnings.append(
|
|
1038
|
+
f"Provider {provider_id} attempt {attempt + 1} failed: {last_error}, "
|
|
1039
|
+
f"retrying in {self._config.retry_delay}s..."
|
|
1040
|
+
)
|
|
1041
|
+
time.sleep(self._config.retry_delay)
|
|
1042
|
+
|
|
1043
|
+
# All retries exhausted - collect error for introspection
|
|
1044
|
+
if last_error:
|
|
1045
|
+
warnings.append(
|
|
1046
|
+
f"Provider {provider_id} failed after {max_attempts} attempt(s): {last_error}"
|
|
1047
|
+
)
|
|
1048
|
+
# Collect provider error for future introspection
|
|
1049
|
+
_collect_provider_error(
|
|
1050
|
+
provider_id=provider_id,
|
|
1051
|
+
error=last_error,
|
|
1052
|
+
request_context={
|
|
1053
|
+
"workflow": request.workflow.value,
|
|
1054
|
+
"prompt_id": request.prompt_id,
|
|
1055
|
+
"model": effective_model,
|
|
1056
|
+
"attempts": max_attempts,
|
|
1057
|
+
},
|
|
1058
|
+
)
|
|
1059
|
+
return None
|
|
1060
|
+
|
|
1061
|
+
async def _try_provider_with_retries_async(
|
|
1062
|
+
self,
|
|
1063
|
+
request: ConsultationRequest,
|
|
1064
|
+
prompt: str,
|
|
1065
|
+
resolved: ResolvedProvider,
|
|
1066
|
+
warnings: List[str],
|
|
1067
|
+
) -> Optional[ProviderResult]:
|
|
1068
|
+
"""
|
|
1069
|
+
Async version of provider execution with retry logic.
|
|
1070
|
+
|
|
1071
|
+
Uses asyncio.sleep() for non-blocking retry delays and runs the
|
|
1072
|
+
synchronous provider.generate() in a thread pool executor to avoid
|
|
1073
|
+
blocking the event loop.
|
|
1074
|
+
|
|
1075
|
+
Args:
|
|
1076
|
+
request: The consultation request
|
|
1077
|
+
prompt: The rendered prompt
|
|
1078
|
+
resolved: Resolved provider information (includes model and overrides)
|
|
1079
|
+
warnings: List to append warnings to
|
|
1080
|
+
|
|
1081
|
+
Returns:
|
|
1082
|
+
ProviderResult on success, None on failure
|
|
1083
|
+
"""
|
|
1084
|
+
hooks = ProviderHooks()
|
|
1085
|
+
last_error: Optional[Exception] = None
|
|
1086
|
+
provider_id = resolved.provider_id
|
|
1087
|
+
|
|
1088
|
+
max_attempts = self._config.max_retries + 1 # +1 for initial attempt
|
|
1089
|
+
|
|
1090
|
+
# Determine model: request.model > resolved.model > None
|
|
1091
|
+
effective_model = request.model or resolved.model
|
|
1092
|
+
|
|
1093
|
+
# Apply overrides from config
|
|
1094
|
+
effective_timeout = (
|
|
1095
|
+
resolved.overrides.get("timeout", request.timeout) or self.default_timeout
|
|
1096
|
+
)
|
|
1097
|
+
effective_temperature = resolved.overrides.get(
|
|
1098
|
+
"temperature", request.temperature
|
|
1099
|
+
)
|
|
1100
|
+
effective_max_tokens = resolved.overrides.get("max_tokens", request.max_tokens)
|
|
1101
|
+
|
|
1102
|
+
for attempt in range(max_attempts):
|
|
1103
|
+
try:
|
|
1104
|
+
provider = resolve_provider(
|
|
1105
|
+
provider_id, hooks=hooks, model=effective_model
|
|
1106
|
+
)
|
|
1107
|
+
provider_request = ProviderRequest(
|
|
1108
|
+
prompt=prompt,
|
|
1109
|
+
system_prompt=request.system_prompt_override,
|
|
1110
|
+
model=effective_model,
|
|
1111
|
+
timeout=effective_timeout,
|
|
1112
|
+
temperature=effective_temperature,
|
|
1113
|
+
max_tokens=effective_max_tokens,
|
|
1114
|
+
metadata={
|
|
1115
|
+
"workflow": request.workflow.value,
|
|
1116
|
+
"prompt_id": request.prompt_id,
|
|
1117
|
+
},
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1120
|
+
# Run sync provider.generate() in executor to avoid blocking
|
|
1121
|
+
loop = asyncio.get_running_loop()
|
|
1122
|
+
result = await loop.run_in_executor(
|
|
1123
|
+
None, provider.generate, provider_request
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
# Success
|
|
1127
|
+
if result.status == ProviderStatus.SUCCESS:
|
|
1128
|
+
if attempt > 0:
|
|
1129
|
+
warnings.append(
|
|
1130
|
+
f"Provider {provider_id} succeeded on attempt {attempt + 1}"
|
|
1131
|
+
)
|
|
1132
|
+
return result
|
|
1133
|
+
|
|
1134
|
+
# Non-success status from provider
|
|
1135
|
+
error_msg = (
|
|
1136
|
+
f"Provider {provider_id} returned status: {result.status.value}"
|
|
1137
|
+
)
|
|
1138
|
+
if result.stderr:
|
|
1139
|
+
error_msg += f" - {result.stderr}"
|
|
1140
|
+
last_error = Exception(error_msg)
|
|
1141
|
+
|
|
1142
|
+
# Check if this error type is retryable
|
|
1143
|
+
if not self._is_retryable_error(last_error):
|
|
1144
|
+
break
|
|
1145
|
+
|
|
1146
|
+
except ProviderUnavailableError as exc:
|
|
1147
|
+
last_error = exc
|
|
1148
|
+
# Provider unavailable - don't retry, move to fallback
|
|
1149
|
+
break
|
|
1150
|
+
|
|
1151
|
+
except Exception as exc: # noqa: BLE001
|
|
1152
|
+
last_error = exc
|
|
1153
|
+
if not self._is_retryable_error(exc):
|
|
1154
|
+
break
|
|
1155
|
+
|
|
1156
|
+
# Async retry delay (non-blocking)
|
|
1157
|
+
if attempt < max_attempts - 1:
|
|
1158
|
+
warnings.append(
|
|
1159
|
+
f"Provider {provider_id} attempt {attempt + 1} failed: {last_error}, "
|
|
1160
|
+
f"retrying in {self._config.retry_delay}s..."
|
|
1161
|
+
)
|
|
1162
|
+
await asyncio.sleep(self._config.retry_delay)
|
|
1163
|
+
|
|
1164
|
+
# All retries exhausted - collect error for introspection
|
|
1165
|
+
if last_error:
|
|
1166
|
+
warnings.append(
|
|
1167
|
+
f"Provider {provider_id} failed after {max_attempts} attempt(s): {last_error}"
|
|
1168
|
+
)
|
|
1169
|
+
# Collect provider error for future introspection
|
|
1170
|
+
_collect_provider_error(
|
|
1171
|
+
provider_id=provider_id,
|
|
1172
|
+
error=last_error,
|
|
1173
|
+
request_context={
|
|
1174
|
+
"workflow": request.workflow.value,
|
|
1175
|
+
"prompt_id": request.prompt_id,
|
|
1176
|
+
"model": effective_model,
|
|
1177
|
+
"attempts": max_attempts,
|
|
1178
|
+
},
|
|
1179
|
+
)
|
|
1180
|
+
return None
|
|
1181
|
+
|
|
1182
|
+
async def _execute_single_provider_async(
|
|
1183
|
+
self,
|
|
1184
|
+
request: ConsultationRequest,
|
|
1185
|
+
prompt: str,
|
|
1186
|
+
resolved: ResolvedProvider,
|
|
1187
|
+
) -> ProviderResponse:
|
|
1188
|
+
"""
|
|
1189
|
+
Execute a single provider asynchronously and return a ProviderResponse.
|
|
1190
|
+
|
|
1191
|
+
Wraps _try_provider_with_retries_async and converts the result to
|
|
1192
|
+
a ProviderResponse for use in multi-model consensus workflows.
|
|
1193
|
+
|
|
1194
|
+
Args:
|
|
1195
|
+
request: The consultation request
|
|
1196
|
+
prompt: The rendered prompt
|
|
1197
|
+
resolved: Resolved provider information
|
|
1198
|
+
|
|
1199
|
+
Returns:
|
|
1200
|
+
ProviderResponse with success/failure status and content
|
|
1201
|
+
"""
|
|
1202
|
+
warnings: List[str] = []
|
|
1203
|
+
start_time = time.time()
|
|
1204
|
+
|
|
1205
|
+
result = await self._try_provider_with_retries_async(
|
|
1206
|
+
request, prompt, resolved, warnings
|
|
1207
|
+
)
|
|
1208
|
+
|
|
1209
|
+
duration_ms = int((time.time() - start_time) * 1000)
|
|
1210
|
+
|
|
1211
|
+
if result is None:
|
|
1212
|
+
# Provider failed after all retries
|
|
1213
|
+
error_msg = (
|
|
1214
|
+
warnings[-1] if warnings else f"Provider {resolved.provider_id} failed"
|
|
1215
|
+
)
|
|
1216
|
+
return ProviderResponse(
|
|
1217
|
+
provider_id=resolved.provider_id,
|
|
1218
|
+
model_used=resolved.model or "unknown",
|
|
1219
|
+
content="",
|
|
1220
|
+
success=False,
|
|
1221
|
+
error=error_msg,
|
|
1222
|
+
duration_ms=duration_ms,
|
|
1223
|
+
cache_hit=False,
|
|
1224
|
+
)
|
|
1225
|
+
|
|
1226
|
+
# Success - convert ProviderResult to ProviderResponse
|
|
1227
|
+
total_tokens = None
|
|
1228
|
+
if result.tokens:
|
|
1229
|
+
total_tokens = result.tokens.total_tokens
|
|
1230
|
+
|
|
1231
|
+
return ProviderResponse(
|
|
1232
|
+
provider_id=result.provider_id,
|
|
1233
|
+
model_used=result.model_used,
|
|
1234
|
+
content=result.content,
|
|
1235
|
+
success=True,
|
|
1236
|
+
error=None,
|
|
1237
|
+
tokens=total_tokens,
|
|
1238
|
+
duration_ms=duration_ms,
|
|
1239
|
+
cache_hit=False,
|
|
1240
|
+
)
|
|
1241
|
+
|
|
1242
|
+
async def _execute_parallel_providers_async(
|
|
1243
|
+
self,
|
|
1244
|
+
request: ConsultationRequest,
|
|
1245
|
+
prompt: str,
|
|
1246
|
+
providers: List[ResolvedProvider],
|
|
1247
|
+
min_models: int = 1,
|
|
1248
|
+
) -> ConsensusResult:
|
|
1249
|
+
"""
|
|
1250
|
+
Execute multiple providers in parallel and return a ConsensusResult.
|
|
1251
|
+
|
|
1252
|
+
Uses asyncio.gather to run all provider executions concurrently,
|
|
1253
|
+
then aggregates the results into a ConsensusResult with agreement
|
|
1254
|
+
metadata.
|
|
1255
|
+
|
|
1256
|
+
Args:
|
|
1257
|
+
request: The consultation request
|
|
1258
|
+
prompt: The rendered prompt
|
|
1259
|
+
providers: List of resolved providers to execute
|
|
1260
|
+
min_models: Minimum successful models required (for warnings)
|
|
1261
|
+
|
|
1262
|
+
Returns:
|
|
1263
|
+
ConsensusResult with all provider responses and agreement metadata
|
|
1264
|
+
"""
|
|
1265
|
+
start_time = time.time()
|
|
1266
|
+
warnings: List[str] = []
|
|
1267
|
+
|
|
1268
|
+
if not providers:
|
|
1269
|
+
return ConsensusResult(
|
|
1270
|
+
workflow=request.workflow,
|
|
1271
|
+
responses=[],
|
|
1272
|
+
duration_ms=0.0,
|
|
1273
|
+
warnings=["No providers available for parallel execution"],
|
|
1274
|
+
)
|
|
1275
|
+
|
|
1276
|
+
# Create tasks for all providers
|
|
1277
|
+
tasks = [
|
|
1278
|
+
self._execute_single_provider_async(request, prompt, resolved)
|
|
1279
|
+
for resolved in providers
|
|
1280
|
+
]
|
|
1281
|
+
|
|
1282
|
+
# Execute all providers in parallel
|
|
1283
|
+
responses: List[ProviderResponse] = await asyncio.gather(*tasks)
|
|
1284
|
+
|
|
1285
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
1286
|
+
|
|
1287
|
+
# Check if we met the minimum model requirement
|
|
1288
|
+
successful_count = sum(1 for r in responses if r.success)
|
|
1289
|
+
if successful_count < min_models:
|
|
1290
|
+
warnings.append(
|
|
1291
|
+
f"Only {successful_count} of {min_models} required models succeeded"
|
|
1292
|
+
)
|
|
1293
|
+
|
|
1294
|
+
# Log failed providers
|
|
1295
|
+
for response in responses:
|
|
1296
|
+
if not response.success:
|
|
1297
|
+
warnings.append(
|
|
1298
|
+
f"Provider {response.provider_id} failed: {response.error}"
|
|
1299
|
+
)
|
|
1300
|
+
|
|
1301
|
+
return ConsensusResult(
|
|
1302
|
+
workflow=request.workflow,
|
|
1303
|
+
responses=responses,
|
|
1304
|
+
duration_ms=duration_ms,
|
|
1305
|
+
warnings=warnings,
|
|
1306
|
+
)
|
|
1307
|
+
|
|
1308
|
+
async def _execute_parallel_providers_with_fallback_async(
|
|
1309
|
+
self,
|
|
1310
|
+
request: ConsultationRequest,
|
|
1311
|
+
prompt: str,
|
|
1312
|
+
all_providers: List[ResolvedProvider],
|
|
1313
|
+
min_models: int = 1,
|
|
1314
|
+
) -> ConsensusResult:
|
|
1315
|
+
"""
|
|
1316
|
+
Execute providers in parallel with sequential fallback on failures.
|
|
1317
|
+
|
|
1318
|
+
Uses a two-phase approach:
|
|
1319
|
+
1. Execute first min_models providers in parallel
|
|
1320
|
+
2. If any fail and fallback_enabled, try remaining providers sequentially
|
|
1321
|
+
until min_models succeed or providers exhausted
|
|
1322
|
+
|
|
1323
|
+
Args:
|
|
1324
|
+
request: The consultation request
|
|
1325
|
+
prompt: The rendered prompt
|
|
1326
|
+
all_providers: Complete priority list of providers to try
|
|
1327
|
+
min_models: Minimum successful models required
|
|
1328
|
+
|
|
1329
|
+
Returns:
|
|
1330
|
+
ConsensusResult with all attempted provider responses
|
|
1331
|
+
"""
|
|
1332
|
+
start_time = time.time()
|
|
1333
|
+
warnings: List[str] = []
|
|
1334
|
+
all_responses: List[ProviderResponse] = []
|
|
1335
|
+
|
|
1336
|
+
if not all_providers:
|
|
1337
|
+
return ConsensusResult(
|
|
1338
|
+
workflow=request.workflow,
|
|
1339
|
+
responses=[],
|
|
1340
|
+
duration_ms=0.0,
|
|
1341
|
+
warnings=["No providers available for parallel execution"],
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
# Phase 1: Initial parallel execution of first min_models providers
|
|
1345
|
+
initial_providers = all_providers[:min_models]
|
|
1346
|
+
logger.debug(
|
|
1347
|
+
f"Phase 1: Executing {len(initial_providers)} providers in parallel"
|
|
1348
|
+
)
|
|
1349
|
+
|
|
1350
|
+
tasks = [
|
|
1351
|
+
self._execute_single_provider_async(request, prompt, resolved)
|
|
1352
|
+
for resolved in initial_providers
|
|
1353
|
+
]
|
|
1354
|
+
initial_responses: List[ProviderResponse] = await asyncio.gather(*tasks)
|
|
1355
|
+
all_responses.extend(initial_responses)
|
|
1356
|
+
|
|
1357
|
+
# Count successes and log failures
|
|
1358
|
+
# A response is only truly successful if it has non-empty content
|
|
1359
|
+
successful_count = sum(
|
|
1360
|
+
1 for r in initial_responses if r.success and r.content.strip()
|
|
1361
|
+
)
|
|
1362
|
+
for response in initial_responses:
|
|
1363
|
+
if not response.success:
|
|
1364
|
+
warnings.append(
|
|
1365
|
+
f"Provider {response.provider_id} failed: {response.error}"
|
|
1366
|
+
)
|
|
1367
|
+
elif not response.content.strip():
|
|
1368
|
+
warnings.append(
|
|
1369
|
+
f"Provider {response.provider_id} returned empty content"
|
|
1370
|
+
)
|
|
1371
|
+
|
|
1372
|
+
# Phase 2: Sequential fallback if needed and enabled
|
|
1373
|
+
if successful_count < min_models and self._config.fallback_enabled:
|
|
1374
|
+
needed = min_models - successful_count
|
|
1375
|
+
remaining_providers = all_providers[min_models:]
|
|
1376
|
+
|
|
1377
|
+
if remaining_providers:
|
|
1378
|
+
warnings.append(
|
|
1379
|
+
f"Initial parallel execution yielded {successful_count}/{min_models} "
|
|
1380
|
+
f"successes, attempting fallback for {needed} more"
|
|
1381
|
+
)
|
|
1382
|
+
|
|
1383
|
+
for fallback_provider in remaining_providers:
|
|
1384
|
+
# Skip if already tried (shouldn't happen, but safety check)
|
|
1385
|
+
if any(
|
|
1386
|
+
r.provider_id == fallback_provider.provider_id
|
|
1387
|
+
for r in all_responses
|
|
1388
|
+
):
|
|
1389
|
+
continue
|
|
1390
|
+
|
|
1391
|
+
# Check if provider is available
|
|
1392
|
+
if not check_provider_available(fallback_provider.provider_id):
|
|
1393
|
+
warnings.append(
|
|
1394
|
+
f"Fallback provider {fallback_provider.provider_id} "
|
|
1395
|
+
"is not available, skipping"
|
|
1396
|
+
)
|
|
1397
|
+
continue
|
|
1398
|
+
|
|
1399
|
+
logger.debug(
|
|
1400
|
+
f"Fallback attempt: trying provider {fallback_provider.provider_id}"
|
|
1401
|
+
)
|
|
1402
|
+
|
|
1403
|
+
response = await self._execute_single_provider_async(
|
|
1404
|
+
request, prompt, fallback_provider
|
|
1405
|
+
)
|
|
1406
|
+
all_responses.append(response)
|
|
1407
|
+
|
|
1408
|
+
if response.success and response.content.strip():
|
|
1409
|
+
successful_count += 1
|
|
1410
|
+
warnings.append(
|
|
1411
|
+
f"Fallback provider {fallback_provider.provider_id} succeeded"
|
|
1412
|
+
)
|
|
1413
|
+
if successful_count >= min_models:
|
|
1414
|
+
logger.debug(
|
|
1415
|
+
f"Reached {min_models} successful providers via fallback"
|
|
1416
|
+
)
|
|
1417
|
+
break
|
|
1418
|
+
elif response.success and not response.content.strip():
|
|
1419
|
+
warnings.append(
|
|
1420
|
+
f"Fallback provider {fallback_provider.provider_id} "
|
|
1421
|
+
"returned empty content"
|
|
1422
|
+
)
|
|
1423
|
+
else:
|
|
1424
|
+
warnings.append(
|
|
1425
|
+
f"Fallback provider {fallback_provider.provider_id} "
|
|
1426
|
+
f"failed: {response.error}"
|
|
1427
|
+
)
|
|
1428
|
+
|
|
1429
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
1430
|
+
|
|
1431
|
+
# Final warning if still insufficient
|
|
1432
|
+
if successful_count < min_models:
|
|
1433
|
+
warnings.append(
|
|
1434
|
+
f"Only {successful_count} of {min_models} required models succeeded "
|
|
1435
|
+
f"after trying {len(all_responses)} provider(s)"
|
|
1436
|
+
)
|
|
1437
|
+
|
|
1438
|
+
return ConsensusResult(
|
|
1439
|
+
workflow=request.workflow,
|
|
1440
|
+
responses=all_responses,
|
|
1441
|
+
duration_ms=duration_ms,
|
|
1442
|
+
warnings=warnings,
|
|
1443
|
+
)
|
|
1444
|
+
|
|
1445
|
+
def _execute_with_fallback(
|
|
1446
|
+
self,
|
|
1447
|
+
request: ConsultationRequest,
|
|
1448
|
+
prompt: str,
|
|
1449
|
+
providers: List[ResolvedProvider],
|
|
1450
|
+
warnings: List[str],
|
|
1451
|
+
) -> tuple[Optional[ProviderResult], str, Optional[str]]:
|
|
1452
|
+
"""
|
|
1453
|
+
Execute request with fallback across providers.
|
|
1454
|
+
|
|
1455
|
+
Args:
|
|
1456
|
+
request: The consultation request
|
|
1457
|
+
prompt: The rendered prompt
|
|
1458
|
+
providers: Ordered list of ResolvedProvider instances to try
|
|
1459
|
+
warnings: List to append warnings to
|
|
1460
|
+
|
|
1461
|
+
Returns:
|
|
1462
|
+
Tuple of (result, provider_id, error_message)
|
|
1463
|
+
"""
|
|
1464
|
+
if not providers:
|
|
1465
|
+
return None, "none", "No AI providers are currently available"
|
|
1466
|
+
|
|
1467
|
+
last_error: Optional[str] = None
|
|
1468
|
+
last_provider_id = providers[0].provider_id
|
|
1469
|
+
|
|
1470
|
+
for i, resolved in enumerate(providers):
|
|
1471
|
+
provider_id = resolved.provider_id
|
|
1472
|
+
last_provider_id = provider_id
|
|
1473
|
+
|
|
1474
|
+
# Check if provider is available (may have changed since _get_providers_to_try)
|
|
1475
|
+
if not check_provider_available(provider_id):
|
|
1476
|
+
warnings.append(f"Provider {provider_id} is not available, skipping")
|
|
1477
|
+
continue
|
|
1478
|
+
|
|
1479
|
+
logger.debug(
|
|
1480
|
+
f"Trying provider {provider_id} (spec: {resolved.spec_str}, "
|
|
1481
|
+
f"model: {resolved.model})"
|
|
1482
|
+
)
|
|
1483
|
+
result = self._try_provider_with_retries(
|
|
1484
|
+
request, prompt, resolved, warnings
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
if result is not None:
|
|
1488
|
+
return result, provider_id, None
|
|
1489
|
+
|
|
1490
|
+
# Determine if we should try next provider
|
|
1491
|
+
if i < len(providers) - 1:
|
|
1492
|
+
# Check the last warning for the error
|
|
1493
|
+
last_warning = warnings[-1] if warnings else ""
|
|
1494
|
+
# Create a pseudo-error from the warning to check fallback eligibility
|
|
1495
|
+
pseudo_error = Exception(last_warning)
|
|
1496
|
+
if self._should_try_next_provider(pseudo_error):
|
|
1497
|
+
warnings.append("Falling back to next provider...")
|
|
1498
|
+
else:
|
|
1499
|
+
last_error = (
|
|
1500
|
+
f"Provider {provider_id} failed and fallback is not appropriate"
|
|
1501
|
+
)
|
|
1502
|
+
break
|
|
1503
|
+
else:
|
|
1504
|
+
last_error = f"All {len(providers)} provider(s) failed"
|
|
1505
|
+
|
|
1506
|
+
return None, last_provider_id, last_error or "All providers failed"
|
|
1507
|
+
|
|
1508
|
+
def consult(
|
|
1509
|
+
self,
|
|
1510
|
+
request: ConsultationRequest,
|
|
1511
|
+
*,
|
|
1512
|
+
use_cache: bool = True,
|
|
1513
|
+
cache_ttl: Optional[int] = None,
|
|
1514
|
+
workflow_name: Optional[str] = None,
|
|
1515
|
+
) -> ConsultationOutcome:
|
|
1516
|
+
"""
|
|
1517
|
+
Execute a consultation request with retry, fallback, and multi-model support.
|
|
1518
|
+
|
|
1519
|
+
This is the synchronous wrapper for consult_async(). It routes to either
|
|
1520
|
+
single-provider or multi-model parallel execution based on the workflow
|
|
1521
|
+
configuration's min_models setting.
|
|
1522
|
+
|
|
1523
|
+
The consultation process:
|
|
1524
|
+
1. Check cache for existing result (single-model mode only)
|
|
1525
|
+
2. Build prompt from template and context
|
|
1526
|
+
3. Get ordered list of providers to try
|
|
1527
|
+
4. Execute based on min_models:
|
|
1528
|
+
- min_models=1: Sequential with fallback, returns ConsultationResult
|
|
1529
|
+
- min_models>1: Parallel execution, returns ConsensusResult
|
|
1530
|
+
5. Cache successful results (single-model mode only)
|
|
1531
|
+
|
|
1532
|
+
Retry behavior (configurable via ConsultationConfig):
|
|
1533
|
+
- max_retries: Number of retry attempts per provider (default: 2)
|
|
1534
|
+
- retry_delay: Delay between retries in seconds (default: 5.0)
|
|
1535
|
+
- Retries occur for transient errors (timeouts, rate limits, 5xx errors)
|
|
1536
|
+
|
|
1537
|
+
Fallback behavior (configurable via ConsultationConfig):
|
|
1538
|
+
- fallback_enabled: Whether to try next provider on failure (default: True)
|
|
1539
|
+
- Fallback skipped for prompt-level errors that would fail with any provider
|
|
1540
|
+
|
|
1541
|
+
Args:
|
|
1542
|
+
request: The consultation request
|
|
1543
|
+
use_cache: Whether to use cached results (default: True)
|
|
1544
|
+
cache_ttl: Cache TTL override in seconds
|
|
1545
|
+
workflow_name: Override workflow name for config lookup
|
|
1546
|
+
(defaults to request.workflow.value)
|
|
1547
|
+
|
|
1548
|
+
Returns:
|
|
1549
|
+
ConsultationOutcome: Either ConsultationResult (min_models=1) or
|
|
1550
|
+
ConsensusResult (min_models>1)
|
|
1551
|
+
"""
|
|
1552
|
+
# Delegate to async implementation
|
|
1553
|
+
# Check if we're already in an async context
|
|
1554
|
+
try:
|
|
1555
|
+
asyncio.get_running_loop()
|
|
1556
|
+
# Already in async context - use thread pool to avoid nested asyncio.run()
|
|
1557
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
|
1558
|
+
future = executor.submit(
|
|
1559
|
+
asyncio.run,
|
|
1560
|
+
self.consult_async(
|
|
1561
|
+
request,
|
|
1562
|
+
use_cache=use_cache,
|
|
1563
|
+
cache_ttl=cache_ttl,
|
|
1564
|
+
workflow_name=workflow_name,
|
|
1565
|
+
),
|
|
1566
|
+
)
|
|
1567
|
+
return future.result()
|
|
1568
|
+
except RuntimeError:
|
|
1569
|
+
# No running loop - safe to use asyncio.run()
|
|
1570
|
+
return asyncio.run(
|
|
1571
|
+
self.consult_async(
|
|
1572
|
+
request,
|
|
1573
|
+
use_cache=use_cache,
|
|
1574
|
+
cache_ttl=cache_ttl,
|
|
1575
|
+
workflow_name=workflow_name,
|
|
1576
|
+
)
|
|
1577
|
+
)
|
|
1578
|
+
|
|
1579
|
+
def consult_multiple(
|
|
1580
|
+
self,
|
|
1581
|
+
requests: Sequence[ConsultationRequest],
|
|
1582
|
+
*,
|
|
1583
|
+
use_cache: bool = True,
|
|
1584
|
+
) -> List[ConsultationResult]:
|
|
1585
|
+
"""
|
|
1586
|
+
Execute multiple consultation requests sequentially.
|
|
1587
|
+
|
|
1588
|
+
Args:
|
|
1589
|
+
requests: Sequence of consultation requests
|
|
1590
|
+
use_cache: Whether to use cached results
|
|
1591
|
+
|
|
1592
|
+
Returns:
|
|
1593
|
+
List of ConsultationResult objects in the same order as requests
|
|
1594
|
+
"""
|
|
1595
|
+
return [self.consult(req, use_cache=use_cache) for req in requests]
|
|
1596
|
+
|
|
1597
|
+
async def consult_async(
|
|
1598
|
+
self,
|
|
1599
|
+
request: ConsultationRequest,
|
|
1600
|
+
*,
|
|
1601
|
+
use_cache: bool = True,
|
|
1602
|
+
cache_ttl: Optional[int] = None,
|
|
1603
|
+
workflow_name: Optional[str] = None,
|
|
1604
|
+
) -> ConsultationOutcome:
|
|
1605
|
+
"""
|
|
1606
|
+
Execute a consultation request asynchronously with multi-model support.
|
|
1607
|
+
|
|
1608
|
+
Routes to single-provider or parallel execution based on the workflow
|
|
1609
|
+
configuration's min_models setting. Returns ConsultationResult for
|
|
1610
|
+
single-provider mode or ConsensusResult for multi-model mode.
|
|
1611
|
+
|
|
1612
|
+
Args:
|
|
1613
|
+
request: The consultation request
|
|
1614
|
+
use_cache: Whether to use cached results (default: True)
|
|
1615
|
+
cache_ttl: Cache TTL override in seconds
|
|
1616
|
+
workflow_name: Override workflow name for config lookup
|
|
1617
|
+
(defaults to request.workflow.value)
|
|
1618
|
+
|
|
1619
|
+
Returns:
|
|
1620
|
+
ConsultationOutcome: Either ConsultationResult (min_models=1) or
|
|
1621
|
+
ConsensusResult (min_models>1)
|
|
1622
|
+
"""
|
|
1623
|
+
start_time = time.time()
|
|
1624
|
+
|
|
1625
|
+
# Get workflow config (determines single vs multi-model mode)
|
|
1626
|
+
effective_workflow = workflow_name or request.workflow.value
|
|
1627
|
+
workflow_config = self._config.get_workflow_config(effective_workflow)
|
|
1628
|
+
min_models = workflow_config.min_models
|
|
1629
|
+
|
|
1630
|
+
# Apply workflow-specific timeout override if configured
|
|
1631
|
+
if workflow_config.timeout_override is not None:
|
|
1632
|
+
request = replace(request, timeout=workflow_config.timeout_override)
|
|
1633
|
+
|
|
1634
|
+
# Generate cache key
|
|
1635
|
+
cache_key = self._generate_cache_key(request)
|
|
1636
|
+
|
|
1637
|
+
# Check cache (only for single-model mode for now)
|
|
1638
|
+
if use_cache and min_models == 1:
|
|
1639
|
+
cached = self.cache.get(request.workflow, cache_key)
|
|
1640
|
+
if cached:
|
|
1641
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
1642
|
+
return ConsultationResult(
|
|
1643
|
+
workflow=request.workflow,
|
|
1644
|
+
content=cached.get("content", ""),
|
|
1645
|
+
provider_id=cached.get("provider_id", "cached"),
|
|
1646
|
+
model_used=cached.get("model_used", "cached"),
|
|
1647
|
+
tokens=cached.get("tokens", {}),
|
|
1648
|
+
duration_ms=duration_ms,
|
|
1649
|
+
cache_hit=True,
|
|
1650
|
+
)
|
|
1651
|
+
|
|
1652
|
+
# Build prompt
|
|
1653
|
+
try:
|
|
1654
|
+
prompt = self._build_prompt(request)
|
|
1655
|
+
except Exception as exc: # noqa: BLE001 - wrap prompt build errors
|
|
1656
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
1657
|
+
if min_models > 1:
|
|
1658
|
+
return ConsensusResult(
|
|
1659
|
+
workflow=request.workflow,
|
|
1660
|
+
responses=[],
|
|
1661
|
+
duration_ms=duration_ms,
|
|
1662
|
+
warnings=[f"Failed to build prompt: {exc}"],
|
|
1663
|
+
)
|
|
1664
|
+
return ConsultationResult(
|
|
1665
|
+
workflow=request.workflow,
|
|
1666
|
+
content="",
|
|
1667
|
+
provider_id="none",
|
|
1668
|
+
model_used="none",
|
|
1669
|
+
duration_ms=duration_ms,
|
|
1670
|
+
error=f"Failed to build prompt: {exc}",
|
|
1671
|
+
)
|
|
1672
|
+
|
|
1673
|
+
# Get providers to try
|
|
1674
|
+
providers = self._get_providers_to_try(request)
|
|
1675
|
+
|
|
1676
|
+
if min_models > 1:
|
|
1677
|
+
# Multi-model mode: execute providers in parallel with fallback support
|
|
1678
|
+
# Pass full provider list - fallback will try additional providers if needed
|
|
1679
|
+
result = await self._execute_parallel_providers_with_fallback_async(
|
|
1680
|
+
request, prompt, providers, min_models
|
|
1681
|
+
)
|
|
1682
|
+
return result
|
|
1683
|
+
else:
|
|
1684
|
+
# Single-model mode: execute with fallback (using first success)
|
|
1685
|
+
if not providers:
|
|
1686
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
1687
|
+
return ConsultationResult(
|
|
1688
|
+
workflow=request.workflow,
|
|
1689
|
+
content="",
|
|
1690
|
+
provider_id="none",
|
|
1691
|
+
model_used="none",
|
|
1692
|
+
duration_ms=duration_ms,
|
|
1693
|
+
error="No AI providers are currently available",
|
|
1694
|
+
)
|
|
1695
|
+
|
|
1696
|
+
# Try providers in order until one succeeds
|
|
1697
|
+
warnings: List[str] = []
|
|
1698
|
+
for resolved in providers:
|
|
1699
|
+
if not check_provider_available(resolved.provider_id):
|
|
1700
|
+
warnings.append(
|
|
1701
|
+
f"Provider {resolved.provider_id} is not available, skipping"
|
|
1702
|
+
)
|
|
1703
|
+
continue
|
|
1704
|
+
|
|
1705
|
+
response = await self._execute_single_provider_async(
|
|
1706
|
+
request, prompt, resolved
|
|
1707
|
+
)
|
|
1708
|
+
|
|
1709
|
+
if response.success:
|
|
1710
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
1711
|
+
result = ConsultationResult(
|
|
1712
|
+
workflow=request.workflow,
|
|
1713
|
+
content=response.content,
|
|
1714
|
+
provider_id=response.provider_id,
|
|
1715
|
+
model_used=response.model_used,
|
|
1716
|
+
tokens={"total_tokens": response.tokens}
|
|
1717
|
+
if response.tokens
|
|
1718
|
+
else {},
|
|
1719
|
+
duration_ms=duration_ms,
|
|
1720
|
+
cache_hit=False,
|
|
1721
|
+
warnings=warnings,
|
|
1722
|
+
error=None,
|
|
1723
|
+
)
|
|
1724
|
+
|
|
1725
|
+
# Cache successful results
|
|
1726
|
+
if use_cache:
|
|
1727
|
+
self.cache.set(
|
|
1728
|
+
request.workflow, cache_key, result, ttl=cache_ttl
|
|
1729
|
+
)
|
|
1730
|
+
|
|
1731
|
+
return result
|
|
1732
|
+
|
|
1733
|
+
# Provider failed, try next
|
|
1734
|
+
warnings.append(
|
|
1735
|
+
f"Provider {resolved.provider_id} failed: {response.error}"
|
|
1736
|
+
)
|
|
1737
|
+
|
|
1738
|
+
if not self._config.fallback_enabled:
|
|
1739
|
+
break
|
|
1740
|
+
|
|
1741
|
+
# All providers failed
|
|
1742
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
1743
|
+
return ConsultationResult(
|
|
1744
|
+
workflow=request.workflow,
|
|
1745
|
+
content="",
|
|
1746
|
+
provider_id=providers[0].provider_id if providers else "none",
|
|
1747
|
+
model_used="none",
|
|
1748
|
+
duration_ms=duration_ms,
|
|
1749
|
+
warnings=warnings,
|
|
1750
|
+
error="All providers failed",
|
|
1751
|
+
)
|
|
1752
|
+
|
|
1753
|
+
|
|
1754
|
+
# =============================================================================
|
|
1755
|
+
# Module Exports
|
|
1756
|
+
# =============================================================================
|
|
1757
|
+
|
|
1758
|
+
|
|
1759
|
+
__all__ = [
|
|
1760
|
+
# Workflow types
|
|
1761
|
+
"ConsultationWorkflow",
|
|
1762
|
+
# Request/Response
|
|
1763
|
+
"ConsultationRequest",
|
|
1764
|
+
"ConsultationResult",
|
|
1765
|
+
"ProviderResponse",
|
|
1766
|
+
"AgreementMetadata",
|
|
1767
|
+
"ConsensusResult",
|
|
1768
|
+
"ConsultationOutcome",
|
|
1769
|
+
# Cache
|
|
1770
|
+
"ResultCache",
|
|
1771
|
+
# Orchestrator
|
|
1772
|
+
"ConsultationOrchestrator",
|
|
1773
|
+
]
|