foundry-mcp 0.7.0__py3-none-any.whl → 0.8.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. foundry_mcp/cli/__init__.py +0 -13
  2. foundry_mcp/cli/commands/session.py +1 -8
  3. foundry_mcp/cli/context.py +39 -0
  4. foundry_mcp/config.py +381 -7
  5. foundry_mcp/core/batch_operations.py +1196 -0
  6. foundry_mcp/core/discovery.py +1 -1
  7. foundry_mcp/core/llm_config.py +8 -0
  8. foundry_mcp/core/naming.py +25 -2
  9. foundry_mcp/core/prometheus.py +0 -13
  10. foundry_mcp/core/providers/__init__.py +12 -0
  11. foundry_mcp/core/providers/base.py +39 -0
  12. foundry_mcp/core/providers/claude.py +45 -1
  13. foundry_mcp/core/providers/codex.py +64 -3
  14. foundry_mcp/core/providers/cursor_agent.py +22 -3
  15. foundry_mcp/core/providers/detectors.py +34 -7
  16. foundry_mcp/core/providers/gemini.py +63 -1
  17. foundry_mcp/core/providers/opencode.py +95 -71
  18. foundry_mcp/core/providers/package-lock.json +4 -4
  19. foundry_mcp/core/providers/package.json +1 -1
  20. foundry_mcp/core/providers/validation.py +128 -0
  21. foundry_mcp/core/research/memory.py +103 -0
  22. foundry_mcp/core/research/models.py +783 -0
  23. foundry_mcp/core/research/providers/__init__.py +40 -0
  24. foundry_mcp/core/research/providers/base.py +242 -0
  25. foundry_mcp/core/research/providers/google.py +507 -0
  26. foundry_mcp/core/research/providers/perplexity.py +442 -0
  27. foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
  28. foundry_mcp/core/research/providers/tavily.py +383 -0
  29. foundry_mcp/core/research/workflows/__init__.py +5 -2
  30. foundry_mcp/core/research/workflows/base.py +106 -12
  31. foundry_mcp/core/research/workflows/consensus.py +160 -17
  32. foundry_mcp/core/research/workflows/deep_research.py +4020 -0
  33. foundry_mcp/core/responses.py +240 -0
  34. foundry_mcp/core/spec.py +1 -0
  35. foundry_mcp/core/task.py +141 -12
  36. foundry_mcp/core/validation.py +6 -1
  37. foundry_mcp/server.py +0 -52
  38. foundry_mcp/tools/unified/__init__.py +37 -18
  39. foundry_mcp/tools/unified/authoring.py +0 -33
  40. foundry_mcp/tools/unified/environment.py +202 -29
  41. foundry_mcp/tools/unified/plan.py +20 -1
  42. foundry_mcp/tools/unified/provider.py +0 -40
  43. foundry_mcp/tools/unified/research.py +644 -19
  44. foundry_mcp/tools/unified/review.py +5 -2
  45. foundry_mcp/tools/unified/review_helpers.py +16 -1
  46. foundry_mcp/tools/unified/server.py +9 -24
  47. foundry_mcp/tools/unified/task.py +528 -9
  48. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/METADATA +2 -1
  49. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/RECORD +52 -46
  50. foundry_mcp/cli/flags.py +0 -266
  51. foundry_mcp/core/feature_flags.py +0 -592
  52. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/WHEEL +0 -0
  53. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/entry_points.txt +0 -0
  54. {foundry_mcp-0.7.0.dist-info → foundry_mcp-0.8.10.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,507 @@
1
+ """Google Custom Search provider for web search.
2
+
3
+ This module implements GoogleSearchProvider, which wraps the Google Custom Search
4
+ JSON API to provide web search capabilities for the deep research workflow.
5
+
6
+ Google Custom Search API documentation:
7
+ https://developers.google.com/custom-search/v1/reference/rest/v1/cse/list
8
+
9
+ Example usage:
10
+ provider = GoogleSearchProvider(
11
+ api_key="AIza...",
12
+ cx="017576662512468239146:omuauf_lfve",
13
+ )
14
+ sources = await provider.search("machine learning trends", max_results=5)
15
+ """
16
+
17
+ import asyncio
18
+ import logging
19
+ import os
20
+ from datetime import datetime
21
+ from typing import Any, Optional
22
+
23
+ import httpx
24
+
25
+ from foundry_mcp.core.research.models import ResearchSource, SourceType
26
+ from foundry_mcp.core.research.providers.base import (
27
+ AuthenticationError,
28
+ RateLimitError,
29
+ SearchProvider,
30
+ SearchProviderError,
31
+ SearchResult,
32
+ )
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ # Google Custom Search API constants
37
+ GOOGLE_API_BASE_URL = "https://www.googleapis.com/customsearch/v1"
38
+ DEFAULT_TIMEOUT = 30.0
39
+ DEFAULT_MAX_RETRIES = 3
40
+ DEFAULT_RATE_LIMIT = 1.0 # requests per second (Google CSE has daily quota limits)
41
+
42
+
43
+ class GoogleSearchProvider(SearchProvider):
44
+ """Google Custom Search API provider for web search.
45
+
46
+ Wraps the Google Custom Search JSON API to provide web search capabilities.
47
+ Requires a Google API key and a Custom Search Engine (CSE) ID.
48
+
49
+ To set up:
50
+ 1. Create a project in Google Cloud Console
51
+ 2. Enable the Custom Search API
52
+ 3. Create an API key
53
+ 4. Create a Custom Search Engine at https://cse.google.com/
54
+ 5. Get the Search Engine ID (cx parameter)
55
+
56
+ Attributes:
57
+ api_key: Google API key (required)
58
+ cx: Custom Search Engine ID (required)
59
+ base_url: API base URL (default: https://www.googleapis.com/customsearch/v1)
60
+ timeout: Request timeout in seconds (default: 30.0)
61
+ max_retries: Maximum retry attempts for rate limits (default: 3)
62
+
63
+ Example:
64
+ provider = GoogleSearchProvider(
65
+ api_key="AIza...",
66
+ cx="017576662512468239146:omuauf_lfve",
67
+ )
68
+ sources = await provider.search(
69
+ "AI trends 2024",
70
+ max_results=5,
71
+ )
72
+ """
73
+
74
+ def __init__(
75
+ self,
76
+ api_key: Optional[str] = None,
77
+ cx: Optional[str] = None,
78
+ base_url: str = GOOGLE_API_BASE_URL,
79
+ timeout: float = DEFAULT_TIMEOUT,
80
+ max_retries: int = DEFAULT_MAX_RETRIES,
81
+ ):
82
+ """Initialize Google Custom Search provider.
83
+
84
+ Args:
85
+ api_key: Google API key. If not provided, reads from GOOGLE_API_KEY env var.
86
+ cx: Custom Search Engine ID. If not provided, reads from GOOGLE_CSE_ID env var.
87
+ base_url: API base URL (default: https://www.googleapis.com/customsearch/v1)
88
+ timeout: Request timeout in seconds (default: 30.0)
89
+ max_retries: Maximum retry attempts for rate limits (default: 3)
90
+
91
+ Raises:
92
+ ValueError: If API key or CSE ID is not provided or found in environment
93
+ """
94
+ self._api_key = api_key or os.environ.get("GOOGLE_API_KEY")
95
+ if not self._api_key:
96
+ raise ValueError(
97
+ "Google API key required. Provide via api_key parameter "
98
+ "or GOOGLE_API_KEY environment variable."
99
+ )
100
+
101
+ self._cx = cx or os.environ.get("GOOGLE_CSE_ID")
102
+ if not self._cx:
103
+ raise ValueError(
104
+ "Google Custom Search Engine ID required. Provide via cx parameter "
105
+ "or GOOGLE_CSE_ID environment variable."
106
+ )
107
+
108
+ self._base_url = base_url.rstrip("/")
109
+ self._timeout = timeout
110
+ self._max_retries = max_retries
111
+ self._rate_limit_value = DEFAULT_RATE_LIMIT
112
+
113
+ def get_provider_name(self) -> str:
114
+ """Return the provider identifier.
115
+
116
+ Returns:
117
+ "google"
118
+ """
119
+ return "google"
120
+
121
+ @property
122
+ def rate_limit(self) -> Optional[float]:
123
+ """Return the rate limit in requests per second.
124
+
125
+ Returns:
126
+ 1.0 (one request per second)
127
+ """
128
+ return self._rate_limit_value
129
+
130
+ async def search(
131
+ self,
132
+ query: str,
133
+ max_results: int = 10,
134
+ **kwargs: Any,
135
+ ) -> list[ResearchSource]:
136
+ """Execute a web search via Google Custom Search API.
137
+
138
+ Args:
139
+ query: The search query string
140
+ max_results: Maximum number of results to return (default: 10, max: 10 per request)
141
+ **kwargs: Additional Google CSE options:
142
+ - site_search: Restrict results to a specific site
143
+ - date_restrict: Restrict by date (e.g., "d7" for past week, "m1" for past month)
144
+ - file_type: Restrict to specific file types (e.g., "pdf")
145
+ - safe: Safe search level ("off", "medium", "high")
146
+ - sub_query_id: SubQuery ID for source tracking
147
+
148
+ Returns:
149
+ List of ResearchSource objects
150
+
151
+ Raises:
152
+ AuthenticationError: If API key is invalid
153
+ RateLimitError: If rate limit/quota exceeded after all retries
154
+ SearchProviderError: For other API errors
155
+ """
156
+ # Extract Google-specific options
157
+ site_search = kwargs.get("site_search")
158
+ date_restrict = kwargs.get("date_restrict")
159
+ file_type = kwargs.get("file_type")
160
+ safe = kwargs.get("safe", "off")
161
+ sub_query_id = kwargs.get("sub_query_id")
162
+
163
+ # Google CSE returns max 10 results per request
164
+ # For more results, pagination with 'start' parameter would be needed
165
+ max_results = min(max_results, 10)
166
+
167
+ # Build query parameters
168
+ params: dict[str, Any] = {
169
+ "key": self._api_key,
170
+ "cx": self._cx,
171
+ "q": query,
172
+ "num": max_results,
173
+ "safe": safe,
174
+ }
175
+
176
+ if site_search:
177
+ params["siteSearch"] = site_search
178
+ if date_restrict:
179
+ params["dateRestrict"] = date_restrict
180
+ if file_type:
181
+ params["fileType"] = file_type
182
+
183
+ # Execute with retry logic
184
+ response_data = await self._execute_with_retry(params)
185
+
186
+ # Parse results
187
+ return self._parse_response(response_data, sub_query_id)
188
+
189
+ async def _execute_with_retry(
190
+ self,
191
+ params: dict[str, Any],
192
+ ) -> dict[str, Any]:
193
+ """Execute API request with exponential backoff retry.
194
+
195
+ Args:
196
+ params: Query parameters
197
+
198
+ Returns:
199
+ Parsed JSON response
200
+
201
+ Raises:
202
+ AuthenticationError: If API key is invalid
203
+ RateLimitError: If rate limit exceeded after all retries
204
+ SearchProviderError: For other API errors
205
+ """
206
+ last_error: Optional[Exception] = None
207
+
208
+ for attempt in range(self._max_retries):
209
+ try:
210
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
211
+ response = await client.get(self._base_url, params=params)
212
+
213
+ # Handle authentication errors (not retryable)
214
+ if response.status_code == 401:
215
+ raise AuthenticationError(
216
+ provider="google",
217
+ message="Invalid API key",
218
+ )
219
+
220
+ # Handle forbidden (invalid CSE ID or API not enabled)
221
+ if response.status_code == 403:
222
+ error_data = self._parse_error_response(response)
223
+ # Check if it's a quota error (retryable) vs auth error (not retryable)
224
+ if "quota" in error_data.lower() or "limit" in error_data.lower():
225
+ retry_after = self._parse_retry_after(response)
226
+ if attempt < self._max_retries - 1:
227
+ wait_time = retry_after or (2**attempt)
228
+ logger.warning(
229
+ f"Google CSE quota limit hit, waiting {wait_time}s "
230
+ f"(attempt {attempt + 1}/{self._max_retries})"
231
+ )
232
+ await asyncio.sleep(wait_time)
233
+ continue
234
+ raise RateLimitError(
235
+ provider="google",
236
+ retry_after=retry_after,
237
+ )
238
+ # Non-quota 403 errors (bad CSE ID, API not enabled)
239
+ raise AuthenticationError(
240
+ provider="google",
241
+ message=f"Access denied: {error_data}",
242
+ )
243
+
244
+ # Handle rate limiting (429)
245
+ if response.status_code == 429:
246
+ retry_after = self._parse_retry_after(response)
247
+ if attempt < self._max_retries - 1:
248
+ wait_time = retry_after or (2**attempt)
249
+ logger.warning(
250
+ f"Google CSE rate limit hit, waiting {wait_time}s "
251
+ f"(attempt {attempt + 1}/{self._max_retries})"
252
+ )
253
+ await asyncio.sleep(wait_time)
254
+ continue
255
+ raise RateLimitError(
256
+ provider="google",
257
+ retry_after=retry_after,
258
+ )
259
+
260
+ # Handle other errors
261
+ if response.status_code >= 400:
262
+ error_msg = self._parse_error_response(response)
263
+ raise SearchProviderError(
264
+ provider="google",
265
+ message=f"API error {response.status_code}: {error_msg}",
266
+ retryable=response.status_code >= 500,
267
+ )
268
+
269
+ return response.json()
270
+
271
+ except httpx.TimeoutException as e:
272
+ last_error = e
273
+ if attempt < self._max_retries - 1:
274
+ wait_time = 2**attempt
275
+ logger.warning(
276
+ f"Google CSE request timeout, retrying in {wait_time}s "
277
+ f"(attempt {attempt + 1}/{self._max_retries})"
278
+ )
279
+ await asyncio.sleep(wait_time)
280
+ continue
281
+
282
+ except httpx.RequestError as e:
283
+ last_error = e
284
+ if attempt < self._max_retries - 1:
285
+ wait_time = 2**attempt
286
+ logger.warning(
287
+ f"Google CSE request error: {e}, retrying in {wait_time}s "
288
+ f"(attempt {attempt + 1}/{self._max_retries})"
289
+ )
290
+ await asyncio.sleep(wait_time)
291
+ continue
292
+
293
+ except (AuthenticationError, RateLimitError, SearchProviderError):
294
+ raise
295
+
296
+ # All retries exhausted
297
+ raise SearchProviderError(
298
+ provider="google",
299
+ message=f"Request failed after {self._max_retries} attempts",
300
+ retryable=False,
301
+ original_error=last_error,
302
+ )
303
+
304
+ def _parse_retry_after(self, response: httpx.Response) -> Optional[float]:
305
+ """Parse Retry-After header from response.
306
+
307
+ Args:
308
+ response: HTTP response
309
+
310
+ Returns:
311
+ Seconds to wait, or None if not provided
312
+ """
313
+ retry_after = response.headers.get("Retry-After")
314
+ if retry_after:
315
+ try:
316
+ return float(retry_after)
317
+ except ValueError:
318
+ pass
319
+ return None
320
+
321
+ def _parse_error_response(self, response: httpx.Response) -> str:
322
+ """Extract error message from Google API error response.
323
+
324
+ Google API returns errors in format:
325
+ {
326
+ "error": {
327
+ "code": 403,
328
+ "message": "...",
329
+ "errors": [...]
330
+ }
331
+ }
332
+
333
+ Args:
334
+ response: HTTP response
335
+
336
+ Returns:
337
+ Error message string
338
+ """
339
+ try:
340
+ data = response.json()
341
+ error = data.get("error", {})
342
+ if isinstance(error, dict):
343
+ return error.get("message", str(error))
344
+ return str(error)
345
+ except Exception:
346
+ return response.text[:200] if response.text else "Unknown error"
347
+
348
+ def _parse_response(
349
+ self,
350
+ data: dict[str, Any],
351
+ sub_query_id: Optional[str] = None,
352
+ ) -> list[ResearchSource]:
353
+ """Parse Google Custom Search API response into ResearchSource objects.
354
+
355
+ Google CSE response structure:
356
+ {
357
+ "items": [
358
+ {
359
+ "title": "...",
360
+ "link": "...",
361
+ "snippet": "...",
362
+ "displayLink": "example.com",
363
+ "pagemap": {
364
+ "metatags": [{"og:description": "...", "article:published_time": "..."}]
365
+ }
366
+ }
367
+ ],
368
+ "searchInformation": {
369
+ "totalResults": "123456"
370
+ }
371
+ }
372
+
373
+ Args:
374
+ data: Google CSE API response JSON
375
+ sub_query_id: SubQuery ID for source tracking
376
+
377
+ Returns:
378
+ List of ResearchSource objects
379
+ """
380
+ sources: list[ResearchSource] = []
381
+ items = data.get("items", [])
382
+
383
+ for item in items:
384
+ # Extract published date from pagemap metatags if available
385
+ published_date = self._extract_published_date(item)
386
+
387
+ # Create SearchResult from Google response
388
+ search_result = SearchResult(
389
+ url=item.get("link", ""),
390
+ title=item.get("title", "Untitled"),
391
+ snippet=item.get("snippet"),
392
+ content=None, # Google CSE doesn't provide full content
393
+ score=None, # Google CSE doesn't provide relevance scores
394
+ published_date=published_date,
395
+ source=item.get("displayLink"),
396
+ metadata={
397
+ "google_cache_id": item.get("cacheId"),
398
+ "mime_type": item.get("mime"),
399
+ "file_format": item.get("fileFormat"),
400
+ },
401
+ )
402
+
403
+ # Convert to ResearchSource
404
+ research_source = search_result.to_research_source(
405
+ source_type=SourceType.WEB,
406
+ sub_query_id=sub_query_id,
407
+ )
408
+ sources.append(research_source)
409
+
410
+ return sources
411
+
412
+ def _extract_published_date(self, item: dict[str, Any]) -> Optional[datetime]:
413
+ """Extract published date from Google CSE item pagemap.
414
+
415
+ Looks for common metatag fields that contain publication dates:
416
+ - article:published_time
417
+ - datePublished
418
+ - og:published_time
419
+ - article:modified_time (fallback)
420
+
421
+ Args:
422
+ item: Single item from Google CSE response
423
+
424
+ Returns:
425
+ Parsed datetime or None
426
+ """
427
+ pagemap = item.get("pagemap", {})
428
+ metatags = pagemap.get("metatags", [])
429
+
430
+ if not metatags:
431
+ return None
432
+
433
+ # Metatags is a list, typically with one element
434
+ tags = metatags[0] if metatags else {}
435
+
436
+ # Try various date fields in order of preference
437
+ date_fields = [
438
+ "article:published_time",
439
+ "datepublished",
440
+ "og:published_time",
441
+ "article:modified_time",
442
+ "datemodified",
443
+ ]
444
+
445
+ for field in date_fields:
446
+ date_str = tags.get(field)
447
+ if date_str:
448
+ parsed = self._parse_date(date_str)
449
+ if parsed:
450
+ return parsed
451
+
452
+ return None
453
+
454
+ def _parse_date(self, date_str: str) -> Optional[datetime]:
455
+ """Parse date string from various formats.
456
+
457
+ Args:
458
+ date_str: Date string (ISO format or other common formats)
459
+
460
+ Returns:
461
+ Parsed datetime or None
462
+ """
463
+ if not date_str:
464
+ return None
465
+
466
+ # Try ISO format first
467
+ try:
468
+ return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
469
+ except ValueError:
470
+ pass
471
+
472
+ # Try common date formats
473
+ formats = [
474
+ "%Y-%m-%d",
475
+ "%Y/%m/%d",
476
+ "%d-%m-%Y",
477
+ "%d/%m/%Y",
478
+ "%B %d, %Y",
479
+ "%b %d, %Y",
480
+ ]
481
+
482
+ for fmt in formats:
483
+ try:
484
+ return datetime.strptime(date_str, fmt)
485
+ except ValueError:
486
+ continue
487
+
488
+ return None
489
+
490
+ async def health_check(self) -> bool:
491
+ """Check if Google Custom Search API is accessible.
492
+
493
+ Performs a lightweight search to verify API key, CSE ID, and connectivity.
494
+
495
+ Returns:
496
+ True if provider is healthy, False otherwise
497
+ """
498
+ try:
499
+ # Perform minimal search to verify connectivity
500
+ await self.search("test", max_results=1)
501
+ return True
502
+ except AuthenticationError:
503
+ logger.error("Google CSE health check failed: invalid API key or CSE ID")
504
+ return False
505
+ except Exception as e:
506
+ logger.warning(f"Google CSE health check failed: {e}")
507
+ return False