foundry-mcp 0.3.3__py3-none-any.whl → 0.8.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- foundry_mcp/__init__.py +7 -1
- foundry_mcp/cli/__init__.py +0 -13
- foundry_mcp/cli/commands/plan.py +10 -3
- foundry_mcp/cli/commands/review.py +19 -4
- foundry_mcp/cli/commands/session.py +1 -8
- foundry_mcp/cli/commands/specs.py +38 -208
- foundry_mcp/cli/context.py +39 -0
- foundry_mcp/cli/output.py +3 -3
- foundry_mcp/config.py +615 -11
- foundry_mcp/core/ai_consultation.py +146 -9
- foundry_mcp/core/batch_operations.py +1196 -0
- foundry_mcp/core/discovery.py +7 -7
- foundry_mcp/core/error_store.py +2 -2
- foundry_mcp/core/intake.py +933 -0
- foundry_mcp/core/llm_config.py +28 -2
- foundry_mcp/core/metrics_store.py +2 -2
- foundry_mcp/core/naming.py +25 -2
- foundry_mcp/core/progress.py +70 -0
- foundry_mcp/core/prometheus.py +0 -13
- foundry_mcp/core/prompts/fidelity_review.py +149 -4
- foundry_mcp/core/prompts/markdown_plan_review.py +5 -1
- foundry_mcp/core/prompts/plan_review.py +5 -1
- foundry_mcp/core/providers/__init__.py +12 -0
- foundry_mcp/core/providers/base.py +39 -0
- foundry_mcp/core/providers/claude.py +51 -48
- foundry_mcp/core/providers/codex.py +70 -60
- foundry_mcp/core/providers/cursor_agent.py +25 -47
- foundry_mcp/core/providers/detectors.py +34 -7
- foundry_mcp/core/providers/gemini.py +69 -58
- foundry_mcp/core/providers/opencode.py +101 -47
- foundry_mcp/core/providers/package-lock.json +4 -4
- foundry_mcp/core/providers/package.json +1 -1
- foundry_mcp/core/providers/validation.py +128 -0
- foundry_mcp/core/research/__init__.py +68 -0
- foundry_mcp/core/research/memory.py +528 -0
- foundry_mcp/core/research/models.py +1220 -0
- foundry_mcp/core/research/providers/__init__.py +40 -0
- foundry_mcp/core/research/providers/base.py +242 -0
- foundry_mcp/core/research/providers/google.py +507 -0
- foundry_mcp/core/research/providers/perplexity.py +442 -0
- foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
- foundry_mcp/core/research/providers/tavily.py +383 -0
- foundry_mcp/core/research/workflows/__init__.py +25 -0
- foundry_mcp/core/research/workflows/base.py +298 -0
- foundry_mcp/core/research/workflows/chat.py +271 -0
- foundry_mcp/core/research/workflows/consensus.py +539 -0
- foundry_mcp/core/research/workflows/deep_research.py +4020 -0
- foundry_mcp/core/research/workflows/ideate.py +682 -0
- foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
- foundry_mcp/core/responses.py +690 -0
- foundry_mcp/core/spec.py +2439 -236
- foundry_mcp/core/task.py +1205 -31
- foundry_mcp/core/testing.py +512 -123
- foundry_mcp/core/validation.py +319 -43
- foundry_mcp/dashboard/components/charts.py +0 -57
- foundry_mcp/dashboard/launcher.py +11 -0
- foundry_mcp/dashboard/views/metrics.py +25 -35
- foundry_mcp/dashboard/views/overview.py +1 -65
- foundry_mcp/resources/specs.py +25 -25
- foundry_mcp/schemas/intake-schema.json +89 -0
- foundry_mcp/schemas/sdd-spec-schema.json +33 -5
- foundry_mcp/server.py +0 -14
- foundry_mcp/tools/unified/__init__.py +39 -18
- foundry_mcp/tools/unified/authoring.py +2371 -248
- foundry_mcp/tools/unified/documentation_helpers.py +69 -6
- foundry_mcp/tools/unified/environment.py +434 -32
- foundry_mcp/tools/unified/error.py +18 -1
- foundry_mcp/tools/unified/lifecycle.py +8 -0
- foundry_mcp/tools/unified/plan.py +133 -2
- foundry_mcp/tools/unified/provider.py +0 -40
- foundry_mcp/tools/unified/research.py +1283 -0
- foundry_mcp/tools/unified/review.py +374 -17
- foundry_mcp/tools/unified/review_helpers.py +16 -1
- foundry_mcp/tools/unified/server.py +9 -24
- foundry_mcp/tools/unified/spec.py +367 -0
- foundry_mcp/tools/unified/task.py +1664 -30
- foundry_mcp/tools/unified/test.py +69 -8
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/METADATA +8 -1
- foundry_mcp-0.8.10.dist-info/RECORD +153 -0
- foundry_mcp/cli/flags.py +0 -266
- foundry_mcp/core/feature_flags.py +0 -592
- foundry_mcp-0.3.3.dist-info/RECORD +0 -135
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/WHEEL +0 -0
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/entry_points.txt +0 -0
- {foundry_mcp-0.3.3.dist-info → foundry_mcp-0.8.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""Tavily search provider for web search.
|
|
2
|
+
|
|
3
|
+
This module implements TavilySearchProvider, which wraps the Tavily Search API
|
|
4
|
+
to provide web search capabilities for the deep research workflow.
|
|
5
|
+
|
|
6
|
+
Tavily API documentation: https://docs.tavily.com/
|
|
7
|
+
|
|
8
|
+
Example usage:
|
|
9
|
+
provider = TavilySearchProvider(api_key="tvly-...")
|
|
10
|
+
sources = await provider.search("machine learning trends", max_results=5)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import Any, Optional
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
from foundry_mcp.core.research.models import ResearchSource, SourceType
|
|
22
|
+
from foundry_mcp.core.research.providers.base import (
|
|
23
|
+
AuthenticationError,
|
|
24
|
+
RateLimitError,
|
|
25
|
+
SearchProvider,
|
|
26
|
+
SearchProviderError,
|
|
27
|
+
SearchResult,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# Tavily API constants
|
|
33
|
+
TAVILY_API_BASE_URL = "https://api.tavily.com"
|
|
34
|
+
TAVILY_SEARCH_ENDPOINT = "/search"
|
|
35
|
+
DEFAULT_TIMEOUT = 30.0
|
|
36
|
+
DEFAULT_MAX_RETRIES = 3
|
|
37
|
+
DEFAULT_RATE_LIMIT = 1.0 # requests per second
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TavilySearchProvider(SearchProvider):
|
|
41
|
+
"""Tavily Search API provider for web search.
|
|
42
|
+
|
|
43
|
+
Wraps the Tavily Search API to provide web search capabilities.
|
|
44
|
+
Supports basic and advanced search depths, domain filtering,
|
|
45
|
+
and automatic content extraction.
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
api_key: Tavily API key (required)
|
|
49
|
+
base_url: API base URL (default: https://api.tavily.com)
|
|
50
|
+
timeout: Request timeout in seconds (default: 30.0)
|
|
51
|
+
max_retries: Maximum retry attempts for rate limits (default: 3)
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
provider = TavilySearchProvider(api_key="tvly-...")
|
|
55
|
+
sources = await provider.search(
|
|
56
|
+
"AI trends 2024",
|
|
57
|
+
max_results=5,
|
|
58
|
+
search_depth="advanced",
|
|
59
|
+
)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
api_key: Optional[str] = None,
|
|
65
|
+
base_url: str = TAVILY_API_BASE_URL,
|
|
66
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
67
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
68
|
+
):
|
|
69
|
+
"""Initialize Tavily search provider.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
api_key: Tavily API key. If not provided, reads from TAVILY_API_KEY env var.
|
|
73
|
+
base_url: API base URL (default: https://api.tavily.com)
|
|
74
|
+
timeout: Request timeout in seconds (default: 30.0)
|
|
75
|
+
max_retries: Maximum retry attempts for rate limits (default: 3)
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If no API key is provided or found in environment
|
|
79
|
+
"""
|
|
80
|
+
self._api_key = api_key or os.environ.get("TAVILY_API_KEY")
|
|
81
|
+
if not self._api_key:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
"Tavily API key required. Provide via api_key parameter "
|
|
84
|
+
"or TAVILY_API_KEY environment variable."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
self._base_url = base_url.rstrip("/")
|
|
88
|
+
self._timeout = timeout
|
|
89
|
+
self._max_retries = max_retries
|
|
90
|
+
self._rate_limit_value = DEFAULT_RATE_LIMIT
|
|
91
|
+
|
|
92
|
+
def get_provider_name(self) -> str:
|
|
93
|
+
"""Return the provider identifier.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
"tavily"
|
|
97
|
+
"""
|
|
98
|
+
return "tavily"
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def rate_limit(self) -> Optional[float]:
|
|
102
|
+
"""Return the rate limit in requests per second.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
1.0 (one request per second)
|
|
106
|
+
"""
|
|
107
|
+
return self._rate_limit_value
|
|
108
|
+
|
|
109
|
+
async def search(
|
|
110
|
+
self,
|
|
111
|
+
query: str,
|
|
112
|
+
max_results: int = 10,
|
|
113
|
+
**kwargs: Any,
|
|
114
|
+
) -> list[ResearchSource]:
|
|
115
|
+
"""Execute a web search via Tavily API.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
query: The search query string
|
|
119
|
+
max_results: Maximum number of results to return (default: 10, max: 20)
|
|
120
|
+
**kwargs: Additional Tavily options:
|
|
121
|
+
- search_depth: "basic" or "advanced" (default: "basic")
|
|
122
|
+
- include_domains: List of domains to include
|
|
123
|
+
- exclude_domains: List of domains to exclude
|
|
124
|
+
- include_answer: Whether to include AI answer (default: False)
|
|
125
|
+
- include_raw_content: Whether to include raw HTML (default: False)
|
|
126
|
+
- sub_query_id: SubQuery ID for source tracking
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
List of ResearchSource objects
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
AuthenticationError: If API key is invalid
|
|
133
|
+
RateLimitError: If rate limit exceeded after all retries
|
|
134
|
+
SearchProviderError: For other API errors
|
|
135
|
+
"""
|
|
136
|
+
# Extract Tavily-specific options
|
|
137
|
+
search_depth = kwargs.get("search_depth", "basic")
|
|
138
|
+
include_domains = kwargs.get("include_domains", [])
|
|
139
|
+
exclude_domains = kwargs.get("exclude_domains", [])
|
|
140
|
+
include_answer = kwargs.get("include_answer", False)
|
|
141
|
+
include_raw_content = kwargs.get("include_raw_content", False)
|
|
142
|
+
sub_query_id = kwargs.get("sub_query_id")
|
|
143
|
+
|
|
144
|
+
# Clamp max_results to Tavily's limit
|
|
145
|
+
max_results = min(max_results, 20)
|
|
146
|
+
|
|
147
|
+
# Build request payload
|
|
148
|
+
payload = {
|
|
149
|
+
"api_key": self._api_key,
|
|
150
|
+
"query": query,
|
|
151
|
+
"max_results": max_results,
|
|
152
|
+
"search_depth": search_depth,
|
|
153
|
+
"include_answer": include_answer,
|
|
154
|
+
"include_raw_content": include_raw_content,
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if include_domains:
|
|
158
|
+
payload["include_domains"] = include_domains
|
|
159
|
+
if exclude_domains:
|
|
160
|
+
payload["exclude_domains"] = exclude_domains
|
|
161
|
+
|
|
162
|
+
# Execute with retry logic
|
|
163
|
+
response_data = await self._execute_with_retry(payload)
|
|
164
|
+
|
|
165
|
+
# Parse results
|
|
166
|
+
return self._parse_response(response_data, sub_query_id)
|
|
167
|
+
|
|
168
|
+
async def _execute_with_retry(
|
|
169
|
+
self,
|
|
170
|
+
payload: dict[str, Any],
|
|
171
|
+
) -> dict[str, Any]:
|
|
172
|
+
"""Execute API request with exponential backoff retry.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
payload: Request payload
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Parsed JSON response
|
|
179
|
+
|
|
180
|
+
Raises:
|
|
181
|
+
AuthenticationError: If API key is invalid
|
|
182
|
+
RateLimitError: If rate limit exceeded after all retries
|
|
183
|
+
SearchProviderError: For other API errors
|
|
184
|
+
"""
|
|
185
|
+
url = f"{self._base_url}{TAVILY_SEARCH_ENDPOINT}"
|
|
186
|
+
last_error: Optional[Exception] = None
|
|
187
|
+
|
|
188
|
+
for attempt in range(self._max_retries):
|
|
189
|
+
try:
|
|
190
|
+
async with httpx.AsyncClient(timeout=self._timeout) as client:
|
|
191
|
+
response = await client.post(url, json=payload)
|
|
192
|
+
|
|
193
|
+
# Handle authentication errors (not retryable)
|
|
194
|
+
if response.status_code == 401:
|
|
195
|
+
raise AuthenticationError(
|
|
196
|
+
provider="tavily",
|
|
197
|
+
message="Invalid API key",
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Handle rate limiting
|
|
201
|
+
if response.status_code == 429:
|
|
202
|
+
retry_after = self._parse_retry_after(response)
|
|
203
|
+
if attempt < self._max_retries - 1:
|
|
204
|
+
wait_time = retry_after or (2**attempt)
|
|
205
|
+
logger.warning(
|
|
206
|
+
f"Tavily rate limit hit, waiting {wait_time}s "
|
|
207
|
+
f"(attempt {attempt + 1}/{self._max_retries})"
|
|
208
|
+
)
|
|
209
|
+
await asyncio.sleep(wait_time)
|
|
210
|
+
continue
|
|
211
|
+
raise RateLimitError(
|
|
212
|
+
provider="tavily",
|
|
213
|
+
retry_after=retry_after,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Handle other errors
|
|
217
|
+
if response.status_code >= 400:
|
|
218
|
+
error_msg = self._extract_error_message(response)
|
|
219
|
+
raise SearchProviderError(
|
|
220
|
+
provider="tavily",
|
|
221
|
+
message=f"API error {response.status_code}: {error_msg}",
|
|
222
|
+
retryable=response.status_code >= 500,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return response.json()
|
|
226
|
+
|
|
227
|
+
except httpx.TimeoutException as e:
|
|
228
|
+
last_error = e
|
|
229
|
+
if attempt < self._max_retries - 1:
|
|
230
|
+
wait_time = 2**attempt
|
|
231
|
+
logger.warning(
|
|
232
|
+
f"Tavily request timeout, retrying in {wait_time}s "
|
|
233
|
+
f"(attempt {attempt + 1}/{self._max_retries})"
|
|
234
|
+
)
|
|
235
|
+
await asyncio.sleep(wait_time)
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
except httpx.RequestError as e:
|
|
239
|
+
last_error = e
|
|
240
|
+
if attempt < self._max_retries - 1:
|
|
241
|
+
wait_time = 2**attempt
|
|
242
|
+
logger.warning(
|
|
243
|
+
f"Tavily request error: {e}, retrying in {wait_time}s "
|
|
244
|
+
f"(attempt {attempt + 1}/{self._max_retries})"
|
|
245
|
+
)
|
|
246
|
+
await asyncio.sleep(wait_time)
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
except (AuthenticationError, RateLimitError, SearchProviderError):
|
|
250
|
+
raise
|
|
251
|
+
|
|
252
|
+
# All retries exhausted
|
|
253
|
+
raise SearchProviderError(
|
|
254
|
+
provider="tavily",
|
|
255
|
+
message=f"Request failed after {self._max_retries} attempts",
|
|
256
|
+
retryable=False,
|
|
257
|
+
original_error=last_error,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def _parse_retry_after(self, response: httpx.Response) -> Optional[float]:
|
|
261
|
+
"""Parse Retry-After header from response.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
response: HTTP response
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Seconds to wait, or None if not provided
|
|
268
|
+
"""
|
|
269
|
+
retry_after = response.headers.get("Retry-After")
|
|
270
|
+
if retry_after:
|
|
271
|
+
try:
|
|
272
|
+
return float(retry_after)
|
|
273
|
+
except ValueError:
|
|
274
|
+
pass
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
def _extract_error_message(self, response: httpx.Response) -> str:
|
|
278
|
+
"""Extract error message from response.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
response: HTTP response
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Error message string
|
|
285
|
+
"""
|
|
286
|
+
try:
|
|
287
|
+
data = response.json()
|
|
288
|
+
return data.get("error", data.get("message", response.text[:200]))
|
|
289
|
+
except Exception:
|
|
290
|
+
return response.text[:200] if response.text else "Unknown error"
|
|
291
|
+
|
|
292
|
+
def _parse_response(
|
|
293
|
+
self,
|
|
294
|
+
data: dict[str, Any],
|
|
295
|
+
sub_query_id: Optional[str] = None,
|
|
296
|
+
) -> list[ResearchSource]:
|
|
297
|
+
"""Parse Tavily API response into ResearchSource objects.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
data: Tavily API response JSON
|
|
301
|
+
sub_query_id: SubQuery ID for source tracking
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of ResearchSource objects
|
|
305
|
+
"""
|
|
306
|
+
sources: list[ResearchSource] = []
|
|
307
|
+
results = data.get("results", [])
|
|
308
|
+
|
|
309
|
+
for result in results:
|
|
310
|
+
# Create SearchResult from Tavily response
|
|
311
|
+
search_result = SearchResult(
|
|
312
|
+
url=result.get("url", ""),
|
|
313
|
+
title=result.get("title", "Untitled"),
|
|
314
|
+
snippet=result.get("content"), # Tavily uses "content" for snippet
|
|
315
|
+
content=result.get("raw_content"), # Full content if requested
|
|
316
|
+
score=result.get("score"),
|
|
317
|
+
published_date=self._parse_date(result.get("published_date")),
|
|
318
|
+
source=self._extract_domain(result.get("url", "")),
|
|
319
|
+
metadata={
|
|
320
|
+
"tavily_score": result.get("score"),
|
|
321
|
+
},
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Convert to ResearchSource
|
|
325
|
+
research_source = search_result.to_research_source(
|
|
326
|
+
source_type=SourceType.WEB,
|
|
327
|
+
sub_query_id=sub_query_id,
|
|
328
|
+
)
|
|
329
|
+
sources.append(research_source)
|
|
330
|
+
|
|
331
|
+
return sources
|
|
332
|
+
|
|
333
|
+
def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
|
|
334
|
+
"""Parse date string from Tavily response.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
date_str: ISO format date string
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
Parsed datetime or None
|
|
341
|
+
"""
|
|
342
|
+
if not date_str:
|
|
343
|
+
return None
|
|
344
|
+
try:
|
|
345
|
+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
|
346
|
+
except ValueError:
|
|
347
|
+
return None
|
|
348
|
+
|
|
349
|
+
def _extract_domain(self, url: str) -> Optional[str]:
|
|
350
|
+
"""Extract domain from URL.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
url: Full URL
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Domain name or None
|
|
357
|
+
"""
|
|
358
|
+
try:
|
|
359
|
+
from urllib.parse import urlparse
|
|
360
|
+
|
|
361
|
+
parsed = urlparse(url)
|
|
362
|
+
return parsed.netloc
|
|
363
|
+
except Exception:
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
async def health_check(self) -> bool:
|
|
367
|
+
"""Check if Tavily API is accessible.
|
|
368
|
+
|
|
369
|
+
Performs a lightweight search to verify API key and connectivity.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
True if provider is healthy, False otherwise
|
|
373
|
+
"""
|
|
374
|
+
try:
|
|
375
|
+
# Perform minimal search to verify connectivity
|
|
376
|
+
await self.search("test", max_results=1)
|
|
377
|
+
return True
|
|
378
|
+
except AuthenticationError:
|
|
379
|
+
logger.error("Tavily health check failed: invalid API key")
|
|
380
|
+
return False
|
|
381
|
+
except Exception as e:
|
|
382
|
+
logger.warning(f"Tavily health check failed: {e}")
|
|
383
|
+
return False
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Research workflow implementations.
|
|
2
|
+
|
|
3
|
+
This package provides the workflow classes for multi-model orchestration:
|
|
4
|
+
- ChatWorkflow: Single-model conversation with thread persistence
|
|
5
|
+
- ConsensusWorkflow: Multi-model parallel consultation with synthesis
|
|
6
|
+
- ThinkDeepWorkflow: Hypothesis-driven systematic investigation
|
|
7
|
+
- IdeateWorkflow: Creative brainstorming with idea clustering
|
|
8
|
+
- DeepResearchWorkflow: Multi-phase iterative deep research
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from foundry_mcp.core.research.workflows.base import ResearchWorkflowBase
|
|
12
|
+
from foundry_mcp.core.research.workflows.chat import ChatWorkflow
|
|
13
|
+
from foundry_mcp.core.research.workflows.consensus import ConsensusWorkflow
|
|
14
|
+
from foundry_mcp.core.research.workflows.deep_research import DeepResearchWorkflow
|
|
15
|
+
from foundry_mcp.core.research.workflows.ideate import IdeateWorkflow
|
|
16
|
+
from foundry_mcp.core.research.workflows.thinkdeep import ThinkDeepWorkflow
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"ResearchWorkflowBase",
|
|
20
|
+
"ChatWorkflow",
|
|
21
|
+
"ConsensusWorkflow",
|
|
22
|
+
"DeepResearchWorkflow",
|
|
23
|
+
"IdeateWorkflow",
|
|
24
|
+
"ThinkDeepWorkflow",
|
|
25
|
+
]
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Base class for research workflows.
|
|
2
|
+
|
|
3
|
+
Provides common infrastructure for provider integration, error handling,
|
|
4
|
+
and response normalization across all research workflow types.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
from foundry_mcp.config import ResearchConfig
|
|
13
|
+
from foundry_mcp.core.llm_config import ProviderSpec
|
|
14
|
+
from foundry_mcp.core.providers import (
|
|
15
|
+
ContextWindowError,
|
|
16
|
+
ProviderContext,
|
|
17
|
+
ProviderHooks,
|
|
18
|
+
ProviderRequest,
|
|
19
|
+
ProviderResult,
|
|
20
|
+
ProviderStatus,
|
|
21
|
+
is_context_window_error,
|
|
22
|
+
extract_token_counts,
|
|
23
|
+
create_context_window_guidance,
|
|
24
|
+
)
|
|
25
|
+
from foundry_mcp.core.providers.registry import available_providers, resolve_provider
|
|
26
|
+
from foundry_mcp.core.research.memory import ResearchMemory
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _estimate_prompt_tokens(prompt: str, system_prompt: str | None = None) -> int:
|
|
32
|
+
"""Estimate token count for a prompt using simple heuristic.
|
|
33
|
+
|
|
34
|
+
Uses ~4 characters per token as a rough estimate. This is conservative
|
|
35
|
+
and works reasonably well for English text.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
prompt: User prompt
|
|
39
|
+
system_prompt: Optional system prompt
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Estimated token count
|
|
43
|
+
"""
|
|
44
|
+
total_chars = len(prompt)
|
|
45
|
+
if system_prompt:
|
|
46
|
+
total_chars += len(system_prompt)
|
|
47
|
+
return total_chars // 4
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class WorkflowResult:
|
|
52
|
+
"""Result of a workflow execution.
|
|
53
|
+
|
|
54
|
+
Attributes:
|
|
55
|
+
success: Whether the workflow completed successfully
|
|
56
|
+
content: Main response content
|
|
57
|
+
provider_id: Provider that generated the response
|
|
58
|
+
model_used: Model that generated the response
|
|
59
|
+
tokens_used: Total tokens consumed
|
|
60
|
+
input_tokens: Tokens consumed by the prompt
|
|
61
|
+
output_tokens: Tokens generated in the response
|
|
62
|
+
cached_tokens: Tokens served from cache
|
|
63
|
+
duration_ms: Execution duration in milliseconds
|
|
64
|
+
metadata: Additional workflow-specific data
|
|
65
|
+
error: Error message if success is False
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
success: bool
|
|
69
|
+
content: str
|
|
70
|
+
provider_id: Optional[str] = None
|
|
71
|
+
model_used: Optional[str] = None
|
|
72
|
+
tokens_used: Optional[int] = None
|
|
73
|
+
input_tokens: Optional[int] = None
|
|
74
|
+
output_tokens: Optional[int] = None
|
|
75
|
+
cached_tokens: Optional[int] = None
|
|
76
|
+
duration_ms: Optional[float] = None
|
|
77
|
+
metadata: dict[str, Any] = None
|
|
78
|
+
error: Optional[str] = None
|
|
79
|
+
|
|
80
|
+
def __post_init__(self) -> None:
|
|
81
|
+
if self.metadata is None:
|
|
82
|
+
self.metadata = {}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ResearchWorkflowBase(ABC):
|
|
86
|
+
"""Base class for all research workflows.
|
|
87
|
+
|
|
88
|
+
Provides common functionality for provider resolution, request execution,
|
|
89
|
+
and memory management.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(
|
|
93
|
+
self,
|
|
94
|
+
config: ResearchConfig,
|
|
95
|
+
memory: Optional[ResearchMemory] = None,
|
|
96
|
+
) -> None:
|
|
97
|
+
"""Initialize workflow with configuration and memory.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
config: Research configuration
|
|
101
|
+
memory: Optional memory instance (creates default if not provided)
|
|
102
|
+
"""
|
|
103
|
+
self.config = config
|
|
104
|
+
self.memory = memory or ResearchMemory(
|
|
105
|
+
base_path=config.get_storage_path(),
|
|
106
|
+
ttl_hours=config.ttl_hours,
|
|
107
|
+
)
|
|
108
|
+
self._provider_cache: dict[str, ProviderContext] = {}
|
|
109
|
+
|
|
110
|
+
def _resolve_provider(
|
|
111
|
+
self,
|
|
112
|
+
provider_id: Optional[str] = None,
|
|
113
|
+
hooks: Optional[ProviderHooks] = None,
|
|
114
|
+
) -> Optional[ProviderContext]:
|
|
115
|
+
"""Resolve and cache a provider instance.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
provider_id: Provider ID or full spec to resolve (uses config default if None)
|
|
119
|
+
Supports both simple IDs ("codex") and full specs ("[cli]codex:gpt-5.2")
|
|
120
|
+
hooks: Optional provider hooks
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
ProviderContext instance or None if unavailable
|
|
124
|
+
"""
|
|
125
|
+
provider_spec_str = provider_id or self.config.default_provider
|
|
126
|
+
|
|
127
|
+
# Check cache first (using full spec string as key)
|
|
128
|
+
if provider_spec_str in self._provider_cache:
|
|
129
|
+
return self._provider_cache[provider_spec_str]
|
|
130
|
+
|
|
131
|
+
# Parse the provider spec to extract base provider ID
|
|
132
|
+
try:
|
|
133
|
+
spec = ProviderSpec.parse_flexible(provider_spec_str)
|
|
134
|
+
except ValueError as exc:
|
|
135
|
+
logger.warning("Invalid provider spec '%s': %s", provider_spec_str, exc)
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
# Check availability using base provider ID
|
|
139
|
+
available = available_providers()
|
|
140
|
+
if spec.provider not in available:
|
|
141
|
+
logger.warning(
|
|
142
|
+
"Provider %s (from spec '%s') not available. Available: %s",
|
|
143
|
+
spec.provider,
|
|
144
|
+
provider_spec_str,
|
|
145
|
+
available,
|
|
146
|
+
)
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# Resolve using base provider ID and pass model override if specified
|
|
151
|
+
provider = resolve_provider(
|
|
152
|
+
spec.provider,
|
|
153
|
+
hooks=hooks or ProviderHooks(),
|
|
154
|
+
model=spec.model,
|
|
155
|
+
)
|
|
156
|
+
self._provider_cache[provider_spec_str] = provider
|
|
157
|
+
return provider
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
logger.error("Failed to resolve provider %s: %s", spec.provider, exc)
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
def _execute_provider(
|
|
163
|
+
self,
|
|
164
|
+
prompt: str,
|
|
165
|
+
provider_id: Optional[str] = None,
|
|
166
|
+
system_prompt: Optional[str] = None,
|
|
167
|
+
model: Optional[str] = None,
|
|
168
|
+
timeout: Optional[float] = None,
|
|
169
|
+
temperature: Optional[float] = None,
|
|
170
|
+
max_tokens: Optional[int] = None,
|
|
171
|
+
hooks: Optional[ProviderHooks] = None,
|
|
172
|
+
) -> WorkflowResult:
|
|
173
|
+
"""Execute a single provider request.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
prompt: User prompt
|
|
177
|
+
provider_id: Provider to use (uses config default if None)
|
|
178
|
+
system_prompt: Optional system prompt
|
|
179
|
+
model: Optional model override
|
|
180
|
+
timeout: Optional timeout in seconds
|
|
181
|
+
temperature: Optional temperature setting
|
|
182
|
+
max_tokens: Optional max tokens
|
|
183
|
+
hooks: Optional provider hooks
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
WorkflowResult with response or error
|
|
187
|
+
"""
|
|
188
|
+
provider = self._resolve_provider(provider_id, hooks)
|
|
189
|
+
if provider is None:
|
|
190
|
+
return WorkflowResult(
|
|
191
|
+
success=False,
|
|
192
|
+
content="",
|
|
193
|
+
error=f"Provider '{provider_id or self.config.default_provider}' is not available",
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
request = ProviderRequest(
|
|
197
|
+
prompt=prompt,
|
|
198
|
+
system_prompt=system_prompt,
|
|
199
|
+
model=model,
|
|
200
|
+
timeout=timeout or self.config.default_timeout,
|
|
201
|
+
temperature=temperature,
|
|
202
|
+
max_tokens=max_tokens,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Estimate prompt tokens for error reporting
|
|
206
|
+
estimated_tokens = _estimate_prompt_tokens(prompt, system_prompt)
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
result: ProviderResult = provider.generate(request)
|
|
210
|
+
|
|
211
|
+
if result.status != ProviderStatus.SUCCESS:
|
|
212
|
+
return WorkflowResult(
|
|
213
|
+
success=False,
|
|
214
|
+
content=result.content or "",
|
|
215
|
+
provider_id=result.provider_id,
|
|
216
|
+
model_used=result.model_used,
|
|
217
|
+
error=f"Provider returned status: {result.status.value}",
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return WorkflowResult(
|
|
221
|
+
success=True,
|
|
222
|
+
content=result.content,
|
|
223
|
+
provider_id=result.provider_id,
|
|
224
|
+
model_used=result.model_used,
|
|
225
|
+
tokens_used=result.tokens.total_tokens if result.tokens else None,
|
|
226
|
+
input_tokens=result.tokens.input_tokens if result.tokens else None,
|
|
227
|
+
output_tokens=result.tokens.output_tokens if result.tokens else None,
|
|
228
|
+
cached_tokens=result.tokens.cached_input_tokens if result.tokens else None,
|
|
229
|
+
duration_ms=result.duration_ms,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
except ContextWindowError:
|
|
233
|
+
# Re-raise context window errors directly
|
|
234
|
+
raise
|
|
235
|
+
|
|
236
|
+
except Exception as exc:
|
|
237
|
+
# Check if this is a context window error
|
|
238
|
+
if is_context_window_error(exc):
|
|
239
|
+
# Extract token counts from error message if available
|
|
240
|
+
prompt_tokens, max_context = extract_token_counts(str(exc))
|
|
241
|
+
|
|
242
|
+
# Use estimated tokens if not extracted
|
|
243
|
+
if prompt_tokens is None:
|
|
244
|
+
prompt_tokens = estimated_tokens
|
|
245
|
+
|
|
246
|
+
# Log detailed context window error
|
|
247
|
+
logger.error(
|
|
248
|
+
"Context window exceeded: prompt_tokens=%s, max_tokens=%s, "
|
|
249
|
+
"estimated_tokens=%d, provider=%s, error=%s",
|
|
250
|
+
prompt_tokens,
|
|
251
|
+
max_context,
|
|
252
|
+
estimated_tokens,
|
|
253
|
+
provider_id,
|
|
254
|
+
str(exc),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Generate actionable guidance
|
|
258
|
+
guidance = create_context_window_guidance(
|
|
259
|
+
prompt_tokens=prompt_tokens,
|
|
260
|
+
max_tokens=max_context,
|
|
261
|
+
provider_id=provider_id,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Raise specific context window error with details
|
|
265
|
+
raise ContextWindowError(
|
|
266
|
+
guidance,
|
|
267
|
+
provider=provider_id,
|
|
268
|
+
prompt_tokens=prompt_tokens,
|
|
269
|
+
max_tokens=max_context,
|
|
270
|
+
) from exc
|
|
271
|
+
|
|
272
|
+
# Non-context-window error - log and return error result
|
|
273
|
+
logger.error("Provider execution failed: %s", exc)
|
|
274
|
+
return WorkflowResult(
|
|
275
|
+
success=False,
|
|
276
|
+
content="",
|
|
277
|
+
provider_id=provider_id,
|
|
278
|
+
error=str(exc),
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def get_available_providers(self) -> list[str]:
|
|
282
|
+
"""Get list of available provider IDs.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
List of available provider identifiers
|
|
286
|
+
"""
|
|
287
|
+
return available_providers()
|
|
288
|
+
|
|
289
|
+
@abstractmethod
|
|
290
|
+
def execute(self, **kwargs: Any) -> WorkflowResult:
|
|
291
|
+
"""Execute the workflow.
|
|
292
|
+
|
|
293
|
+
Subclasses must implement this method with their specific logic.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
WorkflowResult with response or error
|
|
297
|
+
"""
|
|
298
|
+
...
|