foundry-mcp 0.7.0__py3-none-any.whl → 0.8.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- foundry_mcp/cli/__init__.py +0 -13
- foundry_mcp/cli/commands/session.py +1 -8
- foundry_mcp/cli/context.py +39 -0
- foundry_mcp/config.py +381 -7
- foundry_mcp/core/batch_operations.py +1196 -0
- foundry_mcp/core/discovery.py +1 -1
- foundry_mcp/core/llm_config.py +8 -0
- foundry_mcp/core/naming.py +25 -2
- foundry_mcp/core/prometheus.py +0 -13
- foundry_mcp/core/providers/__init__.py +12 -0
- foundry_mcp/core/providers/base.py +39 -0
- foundry_mcp/core/providers/claude.py +45 -1
- foundry_mcp/core/providers/codex.py +64 -3
- foundry_mcp/core/providers/cursor_agent.py +22 -3
- foundry_mcp/core/providers/detectors.py +34 -7
- foundry_mcp/core/providers/gemini.py +63 -1
- foundry_mcp/core/providers/opencode.py +95 -71
- foundry_mcp/core/providers/package-lock.json +4 -4
- foundry_mcp/core/providers/package.json +1 -1
- foundry_mcp/core/providers/validation.py +128 -0
- foundry_mcp/core/research/memory.py +103 -0
- foundry_mcp/core/research/models.py +783 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +5 -2
- foundry_mcp/core/research/workflows/base.py +106 -12
- foundry_mcp/core/research/workflows/consensus.py +160 -17
- foundry_mcp/core/research/workflows/deep_research.py +4020 -0
- foundry_mcp/core/responses.py +240 -0
- foundry_mcp/core/spec.py +1 -0
- foundry_mcp/core/task.py +141 -12
- foundry_mcp/core/validation.py +6 -1
- foundry_mcp/server.py +0 -52
- foundry_mcp/tools/unified/__init__.py +37 -18
- foundry_mcp/tools/unified/authoring.py +0 -33
- foundry_mcp/tools/unified/environment.py +202 -29
- foundry_mcp/tools/unified/plan.py +20 -1
- foundry_mcp/tools/unified/provider.py +0 -40
- foundry_mcp/tools/unified/research.py +644 -19
- foundry_mcp/tools/unified/review.py +5 -2
- foundry_mcp/tools/unified/review_helpers.py +16 -1
- foundry_mcp/tools/unified/server.py +9 -24
- foundry_mcp/tools/unified/task.py +528 -9
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/METADATA +2 -1
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/RECORD +52 -46
- foundry_mcp/cli/flags.py +0 -266
- foundry_mcp/core/feature_flags.py +0 -592
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/WHEEL +0 -0
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/entry_points.txt +0 -0
- {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""Tavily search provider for web search.
|
|
2
|
+
|
|
3
|
+
This module implements TavilySearchProvider, which wraps the Tavily Search API
|
|
4
|
+
to provide web search capabilities for the deep research workflow.
|
|
5
|
+
|
|
6
|
+
Tavily API documentation: https://docs.tavily.com/
|
|
7
|
+
|
|
8
|
+
Example usage:
|
|
9
|
+
provider = TavilySearchProvider(api_key="tvly-...")
|
|
10
|
+
sources = await provider.search("machine learning trends", max_results=5)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import Any, Optional
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
from foundry_mcp.core.research.models import ResearchSource, SourceType
|
|
22
|
+
from foundry_mcp.core.research.providers.base import (
|
|
23
|
+
AuthenticationError,
|
|
24
|
+
RateLimitError,
|
|
25
|
+
SearchProvider,
|
|
26
|
+
SearchProviderError,
|
|
27
|
+
SearchResult,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# Tavily API constants
|
|
33
|
+
TAVILY_API_BASE_URL = "https://api.tavily.com"
|
|
34
|
+
TAVILY_SEARCH_ENDPOINT = "/search"
|
|
35
|
+
DEFAULT_TIMEOUT = 30.0
|
|
36
|
+
DEFAULT_MAX_RETRIES = 3
|
|
37
|
+
DEFAULT_RATE_LIMIT = 1.0 # requests per second
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TavilySearchProvider(SearchProvider):
|
|
41
|
+
"""Tavily Search API provider for web search.
|
|
42
|
+
|
|
43
|
+
Wraps the Tavily Search API to provide web search capabilities.
|
|
44
|
+
Supports basic and advanced search depths, domain filtering,
|
|
45
|
+
and automatic content extraction.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
api_key: Tavily API key (required)
|
|
49
|
+
base_url: API base URL (default: https://api.tavily.com)
|
|
50
|
+
timeout: Request timeout in seconds (default: 30.0)
|
|
51
|
+
max_retries: Maximum retry attempts for rate limits (default: 3)
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
provider = TavilySearchProvider(api_key="tvly-...")
|
|
55
|
+
sources = await provider.search(
|
|
56
|
+
"AI trends 2024",
|
|
57
|
+
max_results=5,
|
|
58
|
+
search_depth="advanced",
|
|
59
|
+
)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
api_key: Optional[str] = None,
|
|
65
|
+
base_url: str = TAVILY_API_BASE_URL,
|
|
66
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
67
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
68
|
+
):
|
|
69
|
+
"""Initialize Tavily search provider.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
api_key: Tavily API key. If not provided, reads from TAVILY_API_KEY env var.
|
|
73
|
+
base_url: API base URL (default: https://api.tavily.com)
|
|
74
|
+
timeout: Request timeout in seconds (default: 30.0)
|
|
75
|
+
max_retries: Maximum retry attempts for rate limits (default: 3)
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If no API key is provided or found in environment
|
|
79
|
+
"""
|
|
80
|
+
self._api_key = api_key or os.environ.get("TAVILY_API_KEY")
|
|
81
|
+
if not self._api_key:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
"Tavily API key required. Provide via api_key parameter "
|
|
84
|
+
"or TAVILY_API_KEY environment variable."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
self._base_url = base_url.rstrip("/")
|
|
88
|
+
self._timeout = timeout
|
|
89
|
+
self._max_retries = max_retries
|
|
90
|
+
self._rate_limit_value = DEFAULT_RATE_LIMIT
|
|
91
|
+
|
|
92
|
+
def get_provider_name(self) -> str:
|
|
93
|
+
"""Return the provider identifier.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
"tavily"
|
|
97
|
+
"""
|
|
98
|
+
return "tavily"
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def rate_limit(self) -> Optional[float]:
|
|
102
|
+
"""Return the rate limit in requests per second.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
1.0 (one request per second)
|
|
106
|
+
"""
|
|
107
|
+
return self._rate_limit_value
|
|
108
|
+
|
|
109
|
+
async def search(
|
|
110
|
+
self,
|
|
111
|
+
query: str,
|
|
112
|
+
max_results: int = 10,
|
|
113
|
+
**kwargs: Any,
|
|
114
|
+
) -> list[ResearchSource]:
|
|
115
|
+
"""Execute a web search via Tavily API.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
query: The search query string
|
|
119
|
+
max_results: Maximum number of results to return (default: 10, max: 20)
|
|
120
|
+
**kwargs: Additional Tavily options:
|
|
121
|
+
- search_depth: "basic" or "advanced" (default: "basic")
|
|
122
|
+
- include_domains: List of domains to include
|
|
123
|
+
- exclude_domains: List of domains to exclude
|
|
124
|
+
- include_answer: Whether to include AI answer (default: False)
|
|
125
|
+
- include_raw_content: Whether to include raw HTML (default: False)
|
|
126
|
+
- sub_query_id: SubQuery ID for source tracking
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
List of ResearchSource objects
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
AuthenticationError: If API key is invalid
|
|
133
|
+
RateLimitError: If rate limit exceeded after all retries
|
|
134
|
+
SearchProviderError: For other API errors
|
|
135
|
+
"""
|
|
136
|
+
# Extract Tavily-specific options
|
|
137
|
+
search_depth = kwargs.get("search_depth", "basic")
|
|
138
|
+
include_domains = kwargs.get("include_domains", [])
|
|
139
|
+
exclude_domains = kwargs.get("exclude_domains", [])
|
|
140
|
+
include_answer = kwargs.get("include_answer", False)
|
|
141
|
+
include_raw_content = kwargs.get("include_raw_content", False)
|
|
142
|
+
sub_query_id = kwargs.get("sub_query_id")
|
|
143
|
+
|
|
144
|
+
# Clamp max_results to Tavily's limit
|
|
145
|
+
max_results = min(max_results, 20)
|
|
146
|
+
|
|
147
|
+
# Build request payload
|
|
148
|
+
payload = {
|
|
149
|
+
"api_key": self._api_key,
|
|
150
|
+
"query": query,
|
|
151
|
+
"max_results": max_results,
|
|
152
|
+
"search_depth": search_depth,
|
|
153
|
+
"include_answer": include_answer,
|
|
154
|
+
"include_raw_content": include_raw_content,
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if include_domains:
|
|
158
|
+
payload["include_domains"] = include_domains
|
|
159
|
+
if exclude_domains:
|
|
160
|
+
payload["exclude_domains"] = exclude_domains
|
|
161
|
+
|
|
162
|
+
# Execute with retry logic
|
|
163
|
+
response_data = await self._execute_with_retry(payload)
|
|
164
|
+
|
|
165
|
+
# Parse results
|
|
166
|
+
return self._parse_response(response_data, sub_query_id)
|
|
167
|
+
|
|
168
|
+
async def _execute_with_retry(
|
|
169
|
+
self,
|
|
170
|
+
payload: dict[str, Any],
|
|
171
|
+
) -> dict[str, Any]:
|
|
172
|
+
"""Execute API request with exponential backoff retry.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
payload: Request payload
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Parsed JSON response
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
AuthenticationError: If API key is invalid
|
|
182
|
+
RateLimitError: If rate limit exceeded after all retries
|
|
183
|
+
SearchProviderError: For other API errors
|
|
184
|
+
"""
|
|
185
|
+
url = f"{self._base_url}{TAVILY_SEARCH_ENDPOINT}"
|
|
186
|
+
last_error: Optional[Exception] = None
|
|
187
|
+
|
|
188
|
+
for attempt in range(self._max_retries):
|
|
189
|
+
try:
|
|
190
|
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
|
191
|
+
response = await client.post(url, json=payload)
|
|
192
|
+
|
|
193
|
+
# Handle authentication errors (not retryable)
|
|
194
|
+
if response.status_code == 401:
|
|
195
|
+
raise AuthenticationError(
|
|
196
|
+
provider="tavily",
|
|
197
|
+
message="Invalid API key",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Handle rate limiting
|
|
201
|
+
if response.status_code == 429:
|
|
202
|
+
retry_after = self._parse_retry_after(response)
|
|
203
|
+
if attempt < self._max_retries - 1:
|
|
204
|
+
wait_time = retry_after or (2**attempt)
|
|
205
|
+
logger.warning(
|
|
206
|
+
f"Tavily rate limit hit, waiting {wait_time}s "
|
|
207
|
+
f"(attempt {attempt + 1}/{self._max_retries})"
|
|
208
|
+
)
|
|
209
|
+
await asyncio.sleep(wait_time)
|
|
210
|
+
continue
|
|
211
|
+
raise RateLimitError(
|
|
212
|
+
provider="tavily",
|
|
213
|
+
retry_after=retry_after,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Handle other errors
|
|
217
|
+
if response.status_code >= 400:
|
|
218
|
+
error_msg = self._extract_error_message(response)
|
|
219
|
+
raise SearchProviderError(
|
|
220
|
+
provider="tavily",
|
|
221
|
+
message=f"API error {response.status_code}: {error_msg}",
|
|
222
|
+
retryable=response.status_code >= 500,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return response.json()
|
|
226
|
+
|
|
227
|
+
except httpx.TimeoutException as e:
|
|
228
|
+
last_error = e
|
|
229
|
+
if attempt < self._max_retries - 1:
|
|
230
|
+
wait_time = 2**attempt
|
|
231
|
+
logger.warning(
|
|
232
|
+
f"Tavily request timeout, retrying in {wait_time}s "
|
|
233
|
+
f"(attempt {attempt + 1}/{self._max_retries})"
|
|
234
|
+
)
|
|
235
|
+
await asyncio.sleep(wait_time)
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
except httpx.RequestError as e:
|
|
239
|
+
last_error = e
|
|
240
|
+
if attempt < self._max_retries - 1:
|
|
241
|
+
wait_time = 2**attempt
|
|
242
|
+
logger.warning(
|
|
243
|
+
f"Tavily request error: {e}, retrying in {wait_time}s "
|
|
244
|
+
f"(attempt {attempt + 1}/{self._max_retries})"
|
|
245
|
+
)
|
|
246
|
+
await asyncio.sleep(wait_time)
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
except (AuthenticationError, RateLimitError, SearchProviderError):
|
|
250
|
+
raise
|
|
251
|
+
|
|
252
|
+
# All retries exhausted
|
|
253
|
+
raise SearchProviderError(
|
|
254
|
+
provider="tavily",
|
|
255
|
+
message=f"Request failed after {self._max_retries} attempts",
|
|
256
|
+
retryable=False,
|
|
257
|
+
original_error=last_error,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def _parse_retry_after(self, response: httpx.Response) -> Optional[float]:
|
|
261
|
+
"""Parse Retry-After header from response.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
response: HTTP response
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Seconds to wait, or None if not provided
|
|
268
|
+
"""
|
|
269
|
+
retry_after = response.headers.get("Retry-After")
|
|
270
|
+
if retry_after:
|
|
271
|
+
try:
|
|
272
|
+
return float(retry_after)
|
|
273
|
+
except ValueError:
|
|
274
|
+
pass
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
def _extract_error_message(self, response: httpx.Response) -> str:
|
|
278
|
+
"""Extract error message from response.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
response: HTTP response
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Error message string
|
|
285
|
+
"""
|
|
286
|
+
try:
|
|
287
|
+
data = response.json()
|
|
288
|
+
return data.get("error", data.get("message", response.text[:200]))
|
|
289
|
+
except Exception:
|
|
290
|
+
return response.text[:200] if response.text else "Unknown error"
|
|
291
|
+
|
|
292
|
+
def _parse_response(
|
|
293
|
+
self,
|
|
294
|
+
data: dict[str, Any],
|
|
295
|
+
sub_query_id: Optional[str] = None,
|
|
296
|
+
) -> list[ResearchSource]:
|
|
297
|
+
"""Parse Tavily API response into ResearchSource objects.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
data: Tavily API response JSON
|
|
301
|
+
sub_query_id: SubQuery ID for source tracking
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of ResearchSource objects
|
|
305
|
+
"""
|
|
306
|
+
sources: list[ResearchSource] = []
|
|
307
|
+
results = data.get("results", [])
|
|
308
|
+
|
|
309
|
+
for result in results:
|
|
310
|
+
# Create SearchResult from Tavily response
|
|
311
|
+
search_result = SearchResult(
|
|
312
|
+
url=result.get("url", ""),
|
|
313
|
+
title=result.get("title", "Untitled"),
|
|
314
|
+
snippet=result.get("content"), # Tavily uses "content" for snippet
|
|
315
|
+
content=result.get("raw_content"), # Full content if requested
|
|
316
|
+
score=result.get("score"),
|
|
317
|
+
published_date=self._parse_date(result.get("published_date")),
|
|
318
|
+
source=self._extract_domain(result.get("url", "")),
|
|
319
|
+
metadata={
|
|
320
|
+
"tavily_score": result.get("score"),
|
|
321
|
+
},
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Convert to ResearchSource
|
|
325
|
+
research_source = search_result.to_research_source(
|
|
326
|
+
source_type=SourceType.WEB,
|
|
327
|
+
sub_query_id=sub_query_id,
|
|
328
|
+
)
|
|
329
|
+
sources.append(research_source)
|
|
330
|
+
|
|
331
|
+
return sources
|
|
332
|
+
|
|
333
|
+
def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
|
|
334
|
+
"""Parse date string from Tavily response.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
date_str: ISO format date string
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Parsed datetime or None
|
|
341
|
+
"""
|
|
342
|
+
if not date_str:
|
|
343
|
+
return None
|
|
344
|
+
try:
|
|
345
|
+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
346
|
+
except ValueError:
|
|
347
|
+
return None
|
|
348
|
+
|
|
349
|
+
def _extract_domain(self, url: str) -> Optional[str]:
|
|
350
|
+
"""Extract domain from URL.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
url: Full URL
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Domain name or None
|
|
357
|
+
"""
|
|
358
|
+
try:
|
|
359
|
+
from urllib.parse import urlparse
|
|
360
|
+
|
|
361
|
+
parsed = urlparse(url)
|
|
362
|
+
return parsed.netloc
|
|
363
|
+
except Exception:
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
async def health_check(self) -> bool:
|
|
367
|
+
"""Check if Tavily API is accessible.
|
|
368
|
+
|
|
369
|
+
Performs a lightweight search to verify API key and connectivity.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
True if provider is healthy, False otherwise
|
|
373
|
+
"""
|
|
374
|
+
try:
|
|
375
|
+
# Perform minimal search to verify connectivity
|
|
376
|
+
await self.search("test", max_results=1)
|
|
377
|
+
return True
|
|
378
|
+
except AuthenticationError:
|
|
379
|
+
logger.error("Tavily health check failed: invalid API key")
|
|
380
|
+
return False
|
|
381
|
+
except Exception as e:
|
|
382
|
+
logger.warning(f"Tavily health check failed: {e}")
|
|
383
|
+
return False
|
|
@@ -5,18 +5,21 @@ This package provides the workflow classes for multi-model orchestration:
|
|
|
5
5
|
- ConsensusWorkflow: Multi-model parallel consultation with synthesis
|
|
6
6
|
- ThinkDeepWorkflow: Hypothesis-driven systematic investigation
|
|
7
7
|
- IdeateWorkflow: Creative brainstorming with idea clustering
|
|
8
|
+
- DeepResearchWorkflow: Multi-phase iterative deep research
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
11
|
from foundry_mcp.core.research.workflows.base import ResearchWorkflowBase
|
|
11
12
|
from foundry_mcp.core.research.workflows.chat import ChatWorkflow
|
|
12
13
|
from foundry_mcp.core.research.workflows.consensus import ConsensusWorkflow
|
|
13
|
-
from foundry_mcp.core.research.workflows.
|
|
14
|
+
from foundry_mcp.core.research.workflows.deep_research import DeepResearchWorkflow
|
|
14
15
|
from foundry_mcp.core.research.workflows.ideate import IdeateWorkflow
|
|
16
|
+
from foundry_mcp.core.research.workflows.thinkdeep import ThinkDeepWorkflow
|
|
15
17
|
|
|
16
18
|
__all__ = [
|
|
17
19
|
"ResearchWorkflowBase",
|
|
18
20
|
"ChatWorkflow",
|
|
19
21
|
"ConsensusWorkflow",
|
|
20
|
-
"
|
|
22
|
+
"DeepResearchWorkflow",
|
|
21
23
|
"IdeateWorkflow",
|
|
24
|
+
"ThinkDeepWorkflow",
|
|
22
25
|
]
|
|
@@ -10,12 +10,17 @@ from dataclasses import dataclass
|
|
|
10
10
|
from typing import Any, Optional
|
|
11
11
|
|
|
12
12
|
from foundry_mcp.config import ResearchConfig
|
|
13
|
+
from foundry_mcp.core.llm_config import ProviderSpec
|
|
13
14
|
from foundry_mcp.core.providers import (
|
|
15
|
+
ContextWindowError,
|
|
14
16
|
ProviderContext,
|
|
15
17
|
ProviderHooks,
|
|
16
18
|
ProviderRequest,
|
|
17
19
|
ProviderResult,
|
|
18
20
|
ProviderStatus,
|
|
21
|
+
is_context_window_error,
|
|
22
|
+
extract_token_counts,
|
|
23
|
+
create_context_window_guidance,
|
|
19
24
|
)
|
|
20
25
|
from foundry_mcp.core.providers.registry import available_providers, resolve_provider
|
|
21
26
|
from foundry_mcp.core.research.memory import ResearchMemory
|
|
@@ -23,6 +28,25 @@ from foundry_mcp.core.research.memory import ResearchMemory
|
|
|
23
28
|
logger = logging.getLogger(__name__)
|
|
24
29
|
|
|
25
30
|
|
|
31
|
+
def _estimate_prompt_tokens(prompt: str, system_prompt: str | None = None) -> int:
|
|
32
|
+
"""Estimate token count for a prompt using simple heuristic.
|
|
33
|
+
|
|
34
|
+
Uses ~4 characters per token as a rough estimate. This is conservative
|
|
35
|
+
and works reasonably well for English text.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
prompt: User prompt
|
|
39
|
+
system_prompt: Optional system prompt
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Estimated token count
|
|
43
|
+
"""
|
|
44
|
+
total_chars = len(prompt)
|
|
45
|
+
if system_prompt:
|
|
46
|
+
total_chars += len(system_prompt)
|
|
47
|
+
return total_chars // 4
|
|
48
|
+
|
|
49
|
+
|
|
26
50
|
@dataclass
|
|
27
51
|
class WorkflowResult:
|
|
28
52
|
"""Result of a workflow execution.
|
|
@@ -33,6 +57,9 @@ class WorkflowResult:
|
|
|
33
57
|
provider_id: Provider that generated the response
|
|
34
58
|
model_used: Model that generated the response
|
|
35
59
|
tokens_used: Total tokens consumed
|
|
60
|
+
input_tokens: Tokens consumed by the prompt
|
|
61
|
+
output_tokens: Tokens generated in the response
|
|
62
|
+
cached_tokens: Tokens served from cache
|
|
36
63
|
duration_ms: Execution duration in milliseconds
|
|
37
64
|
metadata: Additional workflow-specific data
|
|
38
65
|
error: Error message if success is False
|
|
@@ -43,6 +70,9 @@ class WorkflowResult:
|
|
|
43
70
|
provider_id: Optional[str] = None
|
|
44
71
|
model_used: Optional[str] = None
|
|
45
72
|
tokens_used: Optional[int] = None
|
|
73
|
+
input_tokens: Optional[int] = None
|
|
74
|
+
output_tokens: Optional[int] = None
|
|
75
|
+
cached_tokens: Optional[int] = None
|
|
46
76
|
duration_ms: Optional[float] = None
|
|
47
77
|
metadata: dict[str, Any] = None
|
|
48
78
|
error: Optional[str] = None
|
|
@@ -85,30 +115,48 @@ class ResearchWorkflowBase(ABC):
|
|
|
85
115
|
"""Resolve and cache a provider instance.
|
|
86
116
|
|
|
87
117
|
Args:
|
|
88
|
-
provider_id: Provider ID to resolve (uses config default if None)
|
|
118
|
+
provider_id: Provider ID or full spec to resolve (uses config default if None)
|
|
119
|
+
Supports both simple IDs ("codex") and full specs ("[cli]codex:gpt-5.2")
|
|
89
120
|
hooks: Optional provider hooks
|
|
90
121
|
|
|
91
122
|
Returns:
|
|
92
123
|
ProviderContext instance or None if unavailable
|
|
93
124
|
"""
|
|
94
|
-
|
|
125
|
+
provider_spec_str = provider_id or self.config.default_provider
|
|
126
|
+
|
|
127
|
+
# Check cache first (using full spec string as key)
|
|
128
|
+
if provider_spec_str in self._provider_cache:
|
|
129
|
+
return self._provider_cache[provider_spec_str]
|
|
95
130
|
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
|
|
131
|
+
# Parse the provider spec to extract base provider ID
|
|
132
|
+
try:
|
|
133
|
+
spec = ProviderSpec.parse_flexible(provider_spec_str)
|
|
134
|
+
except ValueError as exc:
|
|
135
|
+
logger.warning("Invalid provider spec '%s': %s", provider_spec_str, exc)
|
|
136
|
+
return None
|
|
99
137
|
|
|
100
|
-
# Check availability
|
|
138
|
+
# Check availability using base provider ID
|
|
101
139
|
available = available_providers()
|
|
102
|
-
if
|
|
103
|
-
logger.warning(
|
|
140
|
+
if spec.provider not in available:
|
|
141
|
+
logger.warning(
|
|
142
|
+
"Provider %s (from spec '%s') not available. Available: %s",
|
|
143
|
+
spec.provider,
|
|
144
|
+
provider_spec_str,
|
|
145
|
+
available,
|
|
146
|
+
)
|
|
104
147
|
return None
|
|
105
148
|
|
|
106
149
|
try:
|
|
107
|
-
provider
|
|
108
|
-
|
|
150
|
+
# Resolve using base provider ID and pass model override if specified
|
|
151
|
+
provider = resolve_provider(
|
|
152
|
+
spec.provider,
|
|
153
|
+
hooks=hooks or ProviderHooks(),
|
|
154
|
+
model=spec.model,
|
|
155
|
+
)
|
|
156
|
+
self._provider_cache[provider_spec_str] = provider
|
|
109
157
|
return provider
|
|
110
158
|
except Exception as exc:
|
|
111
|
-
logger.error("Failed to resolve provider %s: %s",
|
|
159
|
+
logger.error("Failed to resolve provider %s: %s", spec.provider, exc)
|
|
112
160
|
return None
|
|
113
161
|
|
|
114
162
|
def _execute_provider(
|
|
@@ -149,11 +197,14 @@ class ResearchWorkflowBase(ABC):
|
|
|
149
197
|
prompt=prompt,
|
|
150
198
|
system_prompt=system_prompt,
|
|
151
199
|
model=model,
|
|
152
|
-
timeout=timeout or
|
|
200
|
+
timeout=timeout or self.config.default_timeout,
|
|
153
201
|
temperature=temperature,
|
|
154
202
|
max_tokens=max_tokens,
|
|
155
203
|
)
|
|
156
204
|
|
|
205
|
+
# Estimate prompt tokens for error reporting
|
|
206
|
+
estimated_tokens = _estimate_prompt_tokens(prompt, system_prompt)
|
|
207
|
+
|
|
157
208
|
try:
|
|
158
209
|
result: ProviderResult = provider.generate(request)
|
|
159
210
|
|
|
@@ -172,10 +223,53 @@ class ResearchWorkflowBase(ABC):
|
|
|
172
223
|
provider_id=result.provider_id,
|
|
173
224
|
model_used=result.model_used,
|
|
174
225
|
tokens_used=result.tokens.total_tokens if result.tokens else None,
|
|
226
|
+
input_tokens=result.tokens.input_tokens if result.tokens else None,
|
|
227
|
+
output_tokens=result.tokens.output_tokens if result.tokens else None,
|
|
228
|
+
cached_tokens=result.tokens.cached_input_tokens if result.tokens else None,
|
|
175
229
|
duration_ms=result.duration_ms,
|
|
176
230
|
)
|
|
177
231
|
|
|
232
|
+
except ContextWindowError:
|
|
233
|
+
# Re-raise context window errors directly
|
|
234
|
+
raise
|
|
235
|
+
|
|
178
236
|
except Exception as exc:
|
|
237
|
+
# Check if this is a context window error
|
|
238
|
+
if is_context_window_error(exc):
|
|
239
|
+
# Extract token counts from error message if available
|
|
240
|
+
prompt_tokens, max_context = extract_token_counts(str(exc))
|
|
241
|
+
|
|
242
|
+
# Use estimated tokens if not extracted
|
|
243
|
+
if prompt_tokens is None:
|
|
244
|
+
prompt_tokens = estimated_tokens
|
|
245
|
+
|
|
246
|
+
# Log detailed context window error
|
|
247
|
+
logger.error(
|
|
248
|
+
"Context window exceeded: prompt_tokens=%s, max_tokens=%s, "
|
|
249
|
+
"estimated_tokens=%d, provider=%s, error=%s",
|
|
250
|
+
prompt_tokens,
|
|
251
|
+
max_context,
|
|
252
|
+
estimated_tokens,
|
|
253
|
+
provider_id,
|
|
254
|
+
str(exc),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Generate actionable guidance
|
|
258
|
+
guidance = create_context_window_guidance(
|
|
259
|
+
prompt_tokens=prompt_tokens,
|
|
260
|
+
max_tokens=max_context,
|
|
261
|
+
provider_id=provider_id,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Raise specific context window error with details
|
|
265
|
+
raise ContextWindowError(
|
|
266
|
+
guidance,
|
|
267
|
+
provider=provider_id,
|
|
268
|
+
prompt_tokens=prompt_tokens,
|
|
269
|
+
max_tokens=max_context,
|
|
270
|
+
) from exc
|
|
271
|
+
|
|
272
|
+
# Non-context-window error - log and return error result
|
|
179
273
|
logger.error("Provider execution failed: %s", exc)
|
|
180
274
|
return WorkflowResult(
|
|
181
275
|
success=False,
|