amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5621
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.14.3.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -729
  180. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,518 +1,518 @@
1
- #!/usr/bin/env python
2
- #
3
- # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
4
- # SPDX-License-Identifier: MIT
5
-
6
- """
7
- External MCP Services Integration
8
-
9
- Provides wrappers for external MCP services like Context7 and Perplexity
10
- that run as separate processes via npx commands.
11
- """
12
-
13
- import json
14
- import os
15
- import subprocess
16
- import time
17
- from typing import Any, Dict, List, Optional
18
-
19
- from gaia.logger import get_logger
20
-
21
- logger = get_logger(__name__)
22
-
23
-
24
- class ExternalMCPService:
25
- """Base class for managing external MCP services via subprocess."""
26
-
27
- def __init__(
28
- self,
29
- command: List[str],
30
- env: Optional[Dict[str, str]] = None,
31
- timeout: int = 30,
32
- ):
33
- """
34
- Initialize external MCP service.
35
-
36
- Args:
37
- command: Command to start the MCP service (e.g., ["npx", "-y", "package"])
38
- env: Additional environment variables
39
- timeout: Timeout in seconds for subprocess calls
40
- """
41
- self.command = command
42
- self.env = {**os.environ.copy(), **(env or {})}
43
- self.timeout = timeout
44
- self.process = None
45
-
46
- def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
47
- """
48
- Call a tool on the external MCP service.
49
-
50
- Args:
51
- tool_name: Name of the tool to call
52
- arguments: Tool arguments as a dictionary
53
-
54
- Returns:
55
- Tool execution result
56
- """
57
- try:
58
- # Create JSON-RPC request
59
- request = {
60
- "jsonrpc": "2.0",
61
- "id": int(time.time() * 1000),
62
- "method": "tools/call",
63
- "params": {"name": tool_name, "arguments": arguments},
64
- }
65
-
66
- # Call the MCP service via subprocess
67
- result = subprocess.run(
68
- self.command,
69
- input=json.dumps(request) + "\n",
70
- capture_output=True,
71
- text=True,
72
- env=self.env,
73
- timeout=self.timeout,
74
- )
75
-
76
- if result.returncode != 0:
77
- logger.error(
78
- f"MCP service error (exit {result.returncode}): {result.stderr}"
79
- )
80
- return {"error": f"Service failed: {result.stderr or 'Unknown error'}"}
81
-
82
- # Parse response
83
- try:
84
- response = json.loads(result.stdout)
85
-
86
- # Extract result from JSON-RPC response
87
- if "result" in response:
88
- return response["result"]
89
- elif "error" in response:
90
- return {"error": response["error"].get("message", "Unknown error")}
91
- else:
92
- return {"error": "Invalid response format"}
93
-
94
- except json.JSONDecodeError as e:
95
- logger.error(f"Failed to parse MCP response: {e}")
96
- logger.debug(f"Raw output: {result.stdout}")
97
- return {"error": f"Invalid JSON response: {str(e)}"}
98
-
99
- except subprocess.TimeoutExpired:
100
- logger.error(f"MCP service call timed out after {self.timeout}s")
101
- return {"error": f"Request timed out after {self.timeout} seconds"}
102
- except Exception as e:
103
- logger.error(f"MCP service call failed: {e}")
104
- return {"error": str(e)}
105
-
106
- def list_tools(self) -> List[Dict[str, Any]]:
107
- """
108
- List available tools from the MCP service.
109
-
110
- Returns:
111
- List of tool definitions
112
- """
113
- try:
114
- request = {"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}
115
-
116
- result = subprocess.run(
117
- self.command,
118
- input=json.dumps(request) + "\n",
119
- capture_output=True,
120
- text=True,
121
- env=self.env,
122
- timeout=self.timeout,
123
- )
124
-
125
- if result.returncode == 0:
126
- response = json.loads(result.stdout)
127
- return response.get("result", {}).get("tools", [])
128
-
129
- except Exception as e:
130
- logger.warning(f"Failed to list tools: {e}")
131
-
132
- return []
133
-
134
-
135
- class Context7Service(ExternalMCPService):
136
- """Context7 documentation search service with caching and rate protection.
137
-
138
- This is an OPTIONAL service - the system works without it.
139
- """
140
-
141
- # Class-level availability tracking (cached after first check)
142
- _availability_checked: bool = False
143
- _is_available: bool = False
144
-
145
- def __init__(self, api_key: Optional[str] = None):
146
- """Initialize Context7 MCP service.
147
-
148
- Args:
149
- api_key: Optional Context7 API key (defaults to CONTEXT7_API_KEY env var)
150
- """
151
- # Get API key from parameter or environment
152
- api_key = api_key or os.getenv("CONTEXT7_API_KEY")
153
- env = {"CONTEXT7_API_KEY": api_key} if api_key else {}
154
-
155
- super().__init__(command=["npx", "-y", "@upstash/context7-mcp"], env=env)
156
-
157
- # Use persistent cache instead of session cache
158
- from gaia.mcp.context7_cache import Context7Cache, Context7RateLimiter
159
-
160
- self._cache = Context7Cache()
161
- self._rate_limiter = Context7RateLimiter()
162
-
163
- @classmethod
164
- def check_availability(cls) -> bool:
165
- """Check if Context7 can be used (npx available, package works).
166
-
167
- This check is cached after the first call to avoid repeated slow checks.
168
-
169
- Returns:
170
- True if Context7 is available and working, False otherwise
171
- """
172
- if cls._availability_checked:
173
- return cls._is_available
174
-
175
- cls._availability_checked = True
176
-
177
- # Check if npx is available
178
- try:
179
- import shutil
180
-
181
- if not shutil.which("npx"):
182
- logger.info("Context7 unavailable: npx not found in PATH")
183
- cls._is_available = False
184
- return False
185
- except Exception as e:
186
- logger.info(f"Context7 unavailable: failed to check for npx: {e}")
187
- cls._is_available = False
188
- return False
189
-
190
- # Try a simple operation to verify Context7 works
191
- try:
192
- service = cls()
193
- tools = service.list_tools()
194
- cls._is_available = len(tools) > 0
195
- if cls._is_available:
196
- logger.info(f"Context7 available ({len(tools)} tools found)")
197
- else:
198
- logger.info("Context7 unavailable: no tools returned from service")
199
- except Exception as e:
200
- logger.info(f"Context7 unavailable: {type(e).__name__}: {e}")
201
- cls._is_available = False
202
-
203
- return cls._is_available
204
-
205
- def _get_resolved_library_id(self, library: str) -> Optional[str]:
206
- """Resolve a library name to Context7-compatible ID with persistent caching.
207
-
208
- Args:
209
- library: Library name (e.g., "nextjs") or full ID (e.g., "/vercel/next.js")
210
-
211
- Returns:
212
- Resolved library ID or None if resolution failed
213
- """
214
- # Already a full ID (has /org/project format)
215
- if library.count("/") >= 2:
216
- return library if library.startswith("/") else f"/{library}"
217
-
218
- # Check persistent cache first
219
- cached = self._cache.get_library_id(library)
220
- if cached is not None:
221
- logger.debug(f"Cache hit for library ID: {library} -> {cached}")
222
- return cached
223
-
224
- # Rate limit check before API call
225
- can_proceed, reason = self._rate_limiter.can_make_request()
226
- if not can_proceed:
227
- logger.warning(f"Context7 rate limited: {reason}")
228
- return None
229
-
230
- # Resolve via API
231
- logger.info(f"Resolving library ID for '{library}' via Context7 API")
232
- self._rate_limiter.consume_token()
233
- resolved_id = self.resolve_library_id(library)
234
-
235
- # Record success/failure for circuit breaker
236
- if resolved_id:
237
- self._rate_limiter.record_success()
238
- logger.info(f"Resolved '{library}' to '{resolved_id}'")
239
- else:
240
- self._rate_limiter.record_failure()
241
- logger.warning(f"Could not resolve library ID for '{library}'")
242
-
243
- # Cache result (even None to avoid repeated failures)
244
- self._cache.set_library_id(library, resolved_id)
245
-
246
- return resolved_id
247
-
248
- def search_documentation(
249
- self, query: str, library: Optional[str] = None
250
- ) -> Dict[str, Any]:
251
- """Search documentation using Context7 with caching and rate protection.
252
-
253
- Args:
254
- query: Search query (e.g., "how to use useState")
255
- library: Optional library name to search in (e.g., "react")
256
-
257
- Returns:
258
- Documentation search results with code examples and references
259
- """
260
- # Check availability first (cached after first check)
261
- if not self.check_availability():
262
- return {
263
- "success": False,
264
- "documentation": "",
265
- "error": "Context7 not available - use embedded knowledge",
266
- "unavailable": True, # Signal to LLM to use embedded patterns
267
- }
268
-
269
- # Resolve library ID first
270
- resolved_id = None
271
- if library:
272
- resolved_id = self._get_resolved_library_id(library)
273
- if not resolved_id:
274
- logger.warning(f"Could not resolve library '{library}'")
275
-
276
- # Check documentation cache
277
- cache_key_lib = resolved_id or "global"
278
- cached_docs = self._cache.get_documentation(cache_key_lib, query)
279
- if cached_docs:
280
- logger.info(f"Cache hit for documentation: {cache_key_lib}:{query[:30]}...")
281
- return {
282
- "success": True,
283
- "documentation": cached_docs,
284
- "cached": True,
285
- }
286
-
287
- # Rate limit check before API call
288
- can_proceed, reason = self._rate_limiter.can_make_request()
289
- if not can_proceed:
290
- logger.warning(f"Context7 rate limited: {reason}")
291
- return {
292
- "success": False,
293
- "error": reason,
294
- "documentation": "",
295
- }
296
-
297
- # Make API call
298
- self._rate_limiter.consume_token()
299
- arguments = {"topic": query}
300
- if resolved_id:
301
- arguments["context7CompatibleLibraryID"] = resolved_id
302
-
303
- result = self.call_tool("get-library-docs", arguments)
304
-
305
- if "error" in result:
306
- # Check if it's a rate limit error (HTTP 429)
307
- is_rate_limit = "429" in str(result.get("error", ""))
308
- self._rate_limiter.record_failure(is_rate_limit)
309
-
310
- logger.error(f"Context7 search failed: {result['error']}")
311
- return {
312
- "success": False,
313
- "error": result["error"],
314
- "documentation": "",
315
- }
316
-
317
- # Success - cache and return
318
- self._rate_limiter.record_success()
319
- docs = (
320
- result.get("content", [{}])[0].get("text", "")
321
- if result.get("content")
322
- else ""
323
- )
324
- self._cache.set_documentation(cache_key_lib, query, docs)
325
-
326
- return {
327
- "success": True,
328
- "documentation": docs,
329
- "cached": False,
330
- "raw_result": result,
331
- }
332
-
333
- def resolve_library_id(self, library_name: str) -> Optional[str]:
334
- """
335
- Resolve a library name to Context7-compatible library ID.
336
-
337
- Args:
338
- library_name: Library name (e.g., "react", "tensorflow")
339
-
340
- Returns:
341
- Context7-compatible library ID (e.g., "/facebook/react") or None
342
- """
343
- result = self.call_tool("resolve-library-id", {"libraryName": library_name})
344
-
345
- if "error" in result:
346
- logger.warning(f"Failed to resolve library ID: {result['error']}")
347
- return None
348
-
349
- # Extract library ID from response
350
- content = result.get("content", [])
351
- if not content or len(content) == 0:
352
- logger.warning("Empty content in Context7 response")
353
- return None
354
-
355
- text = content[0].get("text", "")
356
- logger.debug(f"Context7 resolve-library-id response text:\n{text[:800]}")
357
-
358
- import re
359
-
360
- # Parse ALL libraries from response (separated by ----------)
361
- # Multiple libraries may have the same title - need smart selection
362
- libraries = []
363
- blocks = text.split("----------")
364
-
365
- for block in blocks:
366
- if not block.strip():
367
- continue
368
-
369
- title_match = re.search(r"Title:\s*(.+)", block)
370
- id_match = re.search(
371
- r"Context7-compatible library ID:\s*(/[\w.-]+/[\w.-]+(?:/[\w.-]+)?)",
372
- block,
373
- )
374
- score_match = re.search(r"Benchmark Score:\s*([\d.]+)", block)
375
- versions_match = re.search(r"Versions:\s*(.+)", block)
376
-
377
- if id_match:
378
- libraries.append(
379
- {
380
- "title": title_match.group(1).strip() if title_match else "",
381
- "id": id_match.group(1),
382
- "score": float(score_match.group(1)) if score_match else 0,
383
- "has_versions": versions_match is not None,
384
- }
385
- )
386
-
387
- if not libraries:
388
- logger.warning(f"No library IDs found in response for '{library_name}'")
389
- return None
390
-
391
- # Selection strategy (in order of priority):
392
- # 1. Exact title match that has versions (indicates official repo)
393
- # 2. Exact title match with highest score
394
- # 3. Title contains search term, prefer ones with versions
395
- # 4. Highest benchmark score overall
396
-
397
- # Normalize for comparison (remove dots, spaces, dashes)
398
- def normalize(s):
399
- return s.lower().replace(".", "").replace("-", "").replace(" ", "")
400
-
401
- normalized_search = normalize(library_name)
402
-
403
- # Find exact matches (after normalization)
404
- exact_matches = [
405
- lib for lib in libraries if normalize(lib["title"]) == normalized_search
406
- ]
407
-
408
- if exact_matches:
409
- # Prefer ones with versions (usually the official repo)
410
- versioned = [lib for lib in exact_matches if lib["has_versions"]]
411
- if versioned:
412
- best = max(versioned, key=lambda x: x["score"])
413
- logger.info(
414
- f"Resolved '{library_name}' to '{best['id']}' (exact match with versions, score={best['score']})"
415
- )
416
- return best["id"]
417
-
418
- # No versions, pick highest score
419
- best = max(exact_matches, key=lambda x: x["score"])
420
- logger.info(
421
- f"Resolved '{library_name}' to '{best['id']}' (exact match, score={best['score']})"
422
- )
423
- return best["id"]
424
-
425
- # No exact match - look for title containing search term
426
- partial_matches = [
427
- lib for lib in libraries if normalized_search in normalize(lib["title"])
428
- ]
429
- if partial_matches:
430
- versioned = [lib for lib in partial_matches if lib["has_versions"]]
431
- if versioned:
432
- best = max(versioned, key=lambda x: x["score"])
433
- logger.info(
434
- f"Resolved '{library_name}' to '{best['id']}' (partial match with versions, score={best['score']})"
435
- )
436
- return best["id"]
437
-
438
- best = max(partial_matches, key=lambda x: x["score"])
439
- logger.info(
440
- f"Resolved '{library_name}' to '{best['id']}' (partial match, score={best['score']})"
441
- )
442
- return best["id"]
443
-
444
- # Fallback: highest score overall
445
- best = max(libraries, key=lambda x: x["score"])
446
- logger.info(
447
- f"Resolved '{library_name}' to '{best['id']}' (fallback: highest score={best['score']})"
448
- )
449
- return best["id"]
450
-
451
-
452
- class PerplexityService(ExternalMCPService):
453
- """Perplexity web search service."""
454
-
455
- def __init__(self, api_key: Optional[str] = None):
456
- """
457
- Initialize Perplexity MCP service.
458
-
459
- Args:
460
- api_key: Perplexity API key (defaults to PERPLEXITY_API_KEY env var)
461
- """
462
- api_key = api_key or os.getenv("PERPLEXITY_API_KEY")
463
- if not api_key:
464
- logger.warning(
465
- "PERPLEXITY_API_KEY not set - web search will not be available"
466
- )
467
-
468
- super().__init__(
469
- command=["npx", "-y", "server-perplexity-ask"],
470
- env={"PERPLEXITY_API_KEY": api_key} if api_key else {},
471
- )
472
-
473
- def search_web(self, query: str) -> Dict[str, Any]:
474
- """
475
- Search the web using Perplexity.
476
-
477
- Args:
478
- query: Search query
479
-
480
- Returns:
481
- Web search results with answer and sources
482
- """
483
- result = self.call_tool(
484
- "perplexity_ask", {"messages": [{"role": "user", "content": query}]}
485
- )
486
-
487
- if "error" in result:
488
- logger.error(f"Perplexity search failed: {result['error']}")
489
- return {"success": False, "error": result["error"], "answer": ""}
490
-
491
- # Extract answer from response
492
- content = result.get("content", [])
493
- answer = ""
494
- if content and len(content) > 0:
495
- answer = content[0].get("text", "")
496
-
497
- return {"success": True, "answer": answer, "raw_result": result}
498
-
499
-
500
- # Singleton instances for reuse
501
- _context7_service: Optional[Context7Service] = None
502
- _perplexity_service: Optional[PerplexityService] = None
503
-
504
-
505
- def get_context7_service() -> Context7Service:
506
- """Get or create Context7 service singleton."""
507
- global _context7_service
508
- if _context7_service is None:
509
- _context7_service = Context7Service()
510
- return _context7_service
511
-
512
-
513
- def get_perplexity_service() -> PerplexityService:
514
- """Get or create Perplexity service singleton."""
515
- global _perplexity_service
516
- if _perplexity_service is None:
517
- _perplexity_service = PerplexityService()
518
- return _perplexity_service
1
+ #!/usr/bin/env python
2
+ #
3
+ # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
4
+ # SPDX-License-Identifier: MIT
5
+
6
+ """
7
+ External MCP Services Integration
8
+
9
+ Provides wrappers for external MCP services like Context7 and Perplexity
10
+ that run as separate processes via npx commands.
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import subprocess
16
+ import time
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ from gaia.logger import get_logger
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class ExternalMCPService:
25
+ """Base class for managing external MCP services via subprocess."""
26
+
27
+ def __init__(
28
+ self,
29
+ command: List[str],
30
+ env: Optional[Dict[str, str]] = None,
31
+ timeout: int = 30,
32
+ ):
33
+ """
34
+ Initialize external MCP service.
35
+
36
+ Args:
37
+ command: Command to start the MCP service (e.g., ["npx", "-y", "package"])
38
+ env: Additional environment variables
39
+ timeout: Timeout in seconds for subprocess calls
40
+ """
41
+ self.command = command
42
+ self.env = {**os.environ.copy(), **(env or {})}
43
+ self.timeout = timeout
44
+ self.process = None
45
+
46
+ def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
47
+ """
48
+ Call a tool on the external MCP service.
49
+
50
+ Args:
51
+ tool_name: Name of the tool to call
52
+ arguments: Tool arguments as a dictionary
53
+
54
+ Returns:
55
+ Tool execution result
56
+ """
57
+ try:
58
+ # Create JSON-RPC request
59
+ request = {
60
+ "jsonrpc": "2.0",
61
+ "id": int(time.time() * 1000),
62
+ "method": "tools/call",
63
+ "params": {"name": tool_name, "arguments": arguments},
64
+ }
65
+
66
+ # Call the MCP service via subprocess
67
+ result = subprocess.run(
68
+ self.command,
69
+ input=json.dumps(request) + "\n",
70
+ capture_output=True,
71
+ text=True,
72
+ env=self.env,
73
+ timeout=self.timeout,
74
+ )
75
+
76
+ if result.returncode != 0:
77
+ logger.error(
78
+ f"MCP service error (exit {result.returncode}): {result.stderr}"
79
+ )
80
+ return {"error": f"Service failed: {result.stderr or 'Unknown error'}"}
81
+
82
+ # Parse response
83
+ try:
84
+ response = json.loads(result.stdout)
85
+
86
+ # Extract result from JSON-RPC response
87
+ if "result" in response:
88
+ return response["result"]
89
+ elif "error" in response:
90
+ return {"error": response["error"].get("message", "Unknown error")}
91
+ else:
92
+ return {"error": "Invalid response format"}
93
+
94
+ except json.JSONDecodeError as e:
95
+ logger.error(f"Failed to parse MCP response: {e}")
96
+ logger.debug(f"Raw output: {result.stdout}")
97
+ return {"error": f"Invalid JSON response: {str(e)}"}
98
+
99
+ except subprocess.TimeoutExpired:
100
+ logger.error(f"MCP service call timed out after {self.timeout}s")
101
+ return {"error": f"Request timed out after {self.timeout} seconds"}
102
+ except Exception as e:
103
+ logger.error(f"MCP service call failed: {e}")
104
+ return {"error": str(e)}
105
+
106
+ def list_tools(self) -> List[Dict[str, Any]]:
107
+ """
108
+ List available tools from the MCP service.
109
+
110
+ Returns:
111
+ List of tool definitions
112
+ """
113
+ try:
114
+ request = {"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}
115
+
116
+ result = subprocess.run(
117
+ self.command,
118
+ input=json.dumps(request) + "\n",
119
+ capture_output=True,
120
+ text=True,
121
+ env=self.env,
122
+ timeout=self.timeout,
123
+ )
124
+
125
+ if result.returncode == 0:
126
+ response = json.loads(result.stdout)
127
+ return response.get("result", {}).get("tools", [])
128
+
129
+ except Exception as e:
130
+ logger.warning(f"Failed to list tools: {e}")
131
+
132
+ return []
133
+
134
+
135
+ class Context7Service(ExternalMCPService):
136
+ """Context7 documentation search service with caching and rate protection.
137
+
138
+ This is an OPTIONAL service - the system works without it.
139
+ """
140
+
141
+ # Class-level availability tracking (cached after first check)
142
+ _availability_checked: bool = False
143
+ _is_available: bool = False
144
+
145
+ def __init__(self, api_key: Optional[str] = None):
146
+ """Initialize Context7 MCP service.
147
+
148
+ Args:
149
+ api_key: Optional Context7 API key (defaults to CONTEXT7_API_KEY env var)
150
+ """
151
+ # Get API key from parameter or environment
152
+ api_key = api_key or os.getenv("CONTEXT7_API_KEY")
153
+ env = {"CONTEXT7_API_KEY": api_key} if api_key else {}
154
+
155
+ super().__init__(command=["npx", "-y", "@upstash/context7-mcp"], env=env)
156
+
157
+ # Use persistent cache instead of session cache
158
+ from gaia.mcp.context7_cache import Context7Cache, Context7RateLimiter
159
+
160
+ self._cache = Context7Cache()
161
+ self._rate_limiter = Context7RateLimiter()
162
+
163
+ @classmethod
164
+ def check_availability(cls) -> bool:
165
+ """Check if Context7 can be used (npx available, package works).
166
+
167
+ This check is cached after the first call to avoid repeated slow checks.
168
+
169
+ Returns:
170
+ True if Context7 is available and working, False otherwise
171
+ """
172
+ if cls._availability_checked:
173
+ return cls._is_available
174
+
175
+ cls._availability_checked = True
176
+
177
+ # Check if npx is available
178
+ try:
179
+ import shutil
180
+
181
+ if not shutil.which("npx"):
182
+ logger.info("Context7 unavailable: npx not found in PATH")
183
+ cls._is_available = False
184
+ return False
185
+ except Exception as e:
186
+ logger.info(f"Context7 unavailable: failed to check for npx: {e}")
187
+ cls._is_available = False
188
+ return False
189
+
190
+ # Try a simple operation to verify Context7 works
191
+ try:
192
+ service = cls()
193
+ tools = service.list_tools()
194
+ cls._is_available = len(tools) > 0
195
+ if cls._is_available:
196
+ logger.info(f"Context7 available ({len(tools)} tools found)")
197
+ else:
198
+ logger.info("Context7 unavailable: no tools returned from service")
199
+ except Exception as e:
200
+ logger.info(f"Context7 unavailable: {type(e).__name__}: {e}")
201
+ cls._is_available = False
202
+
203
+ return cls._is_available
204
+
205
+ def _get_resolved_library_id(self, library: str) -> Optional[str]:
206
+ """Resolve a library name to Context7-compatible ID with persistent caching.
207
+
208
+ Args:
209
+ library: Library name (e.g., "nextjs") or full ID (e.g., "/vercel/next.js")
210
+
211
+ Returns:
212
+ Resolved library ID or None if resolution failed
213
+ """
214
+ # Already a full ID (has /org/project format)
215
+ if library.count("/") >= 2:
216
+ return library if library.startswith("/") else f"/{library}"
217
+
218
+ # Check persistent cache first
219
+ cached = self._cache.get_library_id(library)
220
+ if cached is not None:
221
+ logger.debug(f"Cache hit for library ID: {library} -> {cached}")
222
+ return cached
223
+
224
+ # Rate limit check before API call
225
+ can_proceed, reason = self._rate_limiter.can_make_request()
226
+ if not can_proceed:
227
+ logger.warning(f"Context7 rate limited: {reason}")
228
+ return None
229
+
230
+ # Resolve via API
231
+ logger.info(f"Resolving library ID for '{library}' via Context7 API")
232
+ self._rate_limiter.consume_token()
233
+ resolved_id = self.resolve_library_id(library)
234
+
235
+ # Record success/failure for circuit breaker
236
+ if resolved_id:
237
+ self._rate_limiter.record_success()
238
+ logger.info(f"Resolved '{library}' to '{resolved_id}'")
239
+ else:
240
+ self._rate_limiter.record_failure()
241
+ logger.warning(f"Could not resolve library ID for '{library}'")
242
+
243
+ # Cache result (even None to avoid repeated failures)
244
+ self._cache.set_library_id(library, resolved_id)
245
+
246
+ return resolved_id
247
+
248
+ def search_documentation(
249
+ self, query: str, library: Optional[str] = None
250
+ ) -> Dict[str, Any]:
251
+ """Search documentation using Context7 with caching and rate protection.
252
+
253
+ Args:
254
+ query: Search query (e.g., "how to use useState")
255
+ library: Optional library name to search in (e.g., "react")
256
+
257
+ Returns:
258
+ Documentation search results with code examples and references
259
+ """
260
+ # Check availability first (cached after first check)
261
+ if not self.check_availability():
262
+ return {
263
+ "success": False,
264
+ "documentation": "",
265
+ "error": "Context7 not available - use embedded knowledge",
266
+ "unavailable": True, # Signal to LLM to use embedded patterns
267
+ }
268
+
269
+ # Resolve library ID first
270
+ resolved_id = None
271
+ if library:
272
+ resolved_id = self._get_resolved_library_id(library)
273
+ if not resolved_id:
274
+ logger.warning(f"Could not resolve library '{library}'")
275
+
276
+ # Check documentation cache
277
+ cache_key_lib = resolved_id or "global"
278
+ cached_docs = self._cache.get_documentation(cache_key_lib, query)
279
+ if cached_docs:
280
+ logger.info(f"Cache hit for documentation: {cache_key_lib}:{query[:30]}...")
281
+ return {
282
+ "success": True,
283
+ "documentation": cached_docs,
284
+ "cached": True,
285
+ }
286
+
287
+ # Rate limit check before API call
288
+ can_proceed, reason = self._rate_limiter.can_make_request()
289
+ if not can_proceed:
290
+ logger.warning(f"Context7 rate limited: {reason}")
291
+ return {
292
+ "success": False,
293
+ "error": reason,
294
+ "documentation": "",
295
+ }
296
+
297
+ # Make API call
298
+ self._rate_limiter.consume_token()
299
+ arguments = {"topic": query}
300
+ if resolved_id:
301
+ arguments["context7CompatibleLibraryID"] = resolved_id
302
+
303
+ result = self.call_tool("get-library-docs", arguments)
304
+
305
+ if "error" in result:
306
+ # Check if it's a rate limit error (HTTP 429)
307
+ is_rate_limit = "429" in str(result.get("error", ""))
308
+ self._rate_limiter.record_failure(is_rate_limit)
309
+
310
+ logger.error(f"Context7 search failed: {result['error']}")
311
+ return {
312
+ "success": False,
313
+ "error": result["error"],
314
+ "documentation": "",
315
+ }
316
+
317
+ # Success - cache and return
318
+ self._rate_limiter.record_success()
319
+ docs = (
320
+ result.get("content", [{}])[0].get("text", "")
321
+ if result.get("content")
322
+ else ""
323
+ )
324
+ self._cache.set_documentation(cache_key_lib, query, docs)
325
+
326
+ return {
327
+ "success": True,
328
+ "documentation": docs,
329
+ "cached": False,
330
+ "raw_result": result,
331
+ }
332
+
333
+ def resolve_library_id(self, library_name: str) -> Optional[str]:
334
+ """
335
+ Resolve a library name to Context7-compatible library ID.
336
+
337
+ Args:
338
+ library_name: Library name (e.g., "react", "tensorflow")
339
+
340
+ Returns:
341
+ Context7-compatible library ID (e.g., "/facebook/react") or None
342
+ """
343
+ result = self.call_tool("resolve-library-id", {"libraryName": library_name})
344
+
345
+ if "error" in result:
346
+ logger.warning(f"Failed to resolve library ID: {result['error']}")
347
+ return None
348
+
349
+ # Extract library ID from response
350
+ content = result.get("content", [])
351
+ if not content or len(content) == 0:
352
+ logger.warning("Empty content in Context7 response")
353
+ return None
354
+
355
+ text = content[0].get("text", "")
356
+ logger.debug(f"Context7 resolve-library-id response text:\n{text[:800]}")
357
+
358
+ import re
359
+
360
+ # Parse ALL libraries from response (separated by ----------)
361
+ # Multiple libraries may have the same title - need smart selection
362
+ libraries = []
363
+ blocks = text.split("----------")
364
+
365
+ for block in blocks:
366
+ if not block.strip():
367
+ continue
368
+
369
+ title_match = re.search(r"Title:\s*(.+)", block)
370
+ id_match = re.search(
371
+ r"Context7-compatible library ID:\s*(/[\w.-]+/[\w.-]+(?:/[\w.-]+)?)",
372
+ block,
373
+ )
374
+ score_match = re.search(r"Benchmark Score:\s*([\d.]+)", block)
375
+ versions_match = re.search(r"Versions:\s*(.+)", block)
376
+
377
+ if id_match:
378
+ libraries.append(
379
+ {
380
+ "title": title_match.group(1).strip() if title_match else "",
381
+ "id": id_match.group(1),
382
+ "score": float(score_match.group(1)) if score_match else 0,
383
+ "has_versions": versions_match is not None,
384
+ }
385
+ )
386
+
387
+ if not libraries:
388
+ logger.warning(f"No library IDs found in response for '{library_name}'")
389
+ return None
390
+
391
+ # Selection strategy (in order of priority):
392
+ # 1. Exact title match that has versions (indicates official repo)
393
+ # 2. Exact title match with highest score
394
+ # 3. Title contains search term, prefer ones with versions
395
+ # 4. Highest benchmark score overall
396
+
397
+ # Normalize for comparison (remove dots, spaces, dashes)
398
+ def normalize(s):
399
+ return s.lower().replace(".", "").replace("-", "").replace(" ", "")
400
+
401
+ normalized_search = normalize(library_name)
402
+
403
+ # Find exact matches (after normalization)
404
+ exact_matches = [
405
+ lib for lib in libraries if normalize(lib["title"]) == normalized_search
406
+ ]
407
+
408
+ if exact_matches:
409
+ # Prefer ones with versions (usually the official repo)
410
+ versioned = [lib for lib in exact_matches if lib["has_versions"]]
411
+ if versioned:
412
+ best = max(versioned, key=lambda x: x["score"])
413
+ logger.info(
414
+ f"Resolved '{library_name}' to '{best['id']}' (exact match with versions, score={best['score']})"
415
+ )
416
+ return best["id"]
417
+
418
+ # No versions, pick highest score
419
+ best = max(exact_matches, key=lambda x: x["score"])
420
+ logger.info(
421
+ f"Resolved '{library_name}' to '{best['id']}' (exact match, score={best['score']})"
422
+ )
423
+ return best["id"]
424
+
425
+ # No exact match - look for title containing search term
426
+ partial_matches = [
427
+ lib for lib in libraries if normalized_search in normalize(lib["title"])
428
+ ]
429
+ if partial_matches:
430
+ versioned = [lib for lib in partial_matches if lib["has_versions"]]
431
+ if versioned:
432
+ best = max(versioned, key=lambda x: x["score"])
433
+ logger.info(
434
+ f"Resolved '{library_name}' to '{best['id']}' (partial match with versions, score={best['score']})"
435
+ )
436
+ return best["id"]
437
+
438
+ best = max(partial_matches, key=lambda x: x["score"])
439
+ logger.info(
440
+ f"Resolved '{library_name}' to '{best['id']}' (partial match, score={best['score']})"
441
+ )
442
+ return best["id"]
443
+
444
+ # Fallback: highest score overall
445
+ best = max(libraries, key=lambda x: x["score"])
446
+ logger.info(
447
+ f"Resolved '{library_name}' to '{best['id']}' (fallback: highest score={best['score']})"
448
+ )
449
+ return best["id"]
450
+
451
+
452
+ class PerplexityService(ExternalMCPService):
453
+ """Perplexity web search service."""
454
+
455
+ def __init__(self, api_key: Optional[str] = None):
456
+ """
457
+ Initialize Perplexity MCP service.
458
+
459
+ Args:
460
+ api_key: Perplexity API key (defaults to PERPLEXITY_API_KEY env var)
461
+ """
462
+ api_key = api_key or os.getenv("PERPLEXITY_API_KEY")
463
+ if not api_key:
464
+ logger.warning(
465
+ "PERPLEXITY_API_KEY not set - web search will not be available"
466
+ )
467
+
468
+ super().__init__(
469
+ command=["npx", "-y", "server-perplexity-ask"],
470
+ env={"PERPLEXITY_API_KEY": api_key} if api_key else {},
471
+ )
472
+
473
+ def search_web(self, query: str) -> Dict[str, Any]:
474
+ """
475
+ Search the web using Perplexity.
476
+
477
+ Args:
478
+ query: Search query
479
+
480
+ Returns:
481
+ Web search results with answer and sources
482
+ """
483
+ result = self.call_tool(
484
+ "perplexity_ask", {"messages": [{"role": "user", "content": query}]}
485
+ )
486
+
487
+ if "error" in result:
488
+ logger.error(f"Perplexity search failed: {result['error']}")
489
+ return {"success": False, "error": result["error"], "answer": ""}
490
+
491
+ # Extract answer from response
492
+ content = result.get("content", [])
493
+ answer = ""
494
+ if content and len(content) > 0:
495
+ answer = content[0].get("text", "")
496
+
497
+ return {"success": True, "answer": answer, "raw_result": result}
498
+
499
+
500
+ # Singleton instances for reuse
501
+ _context7_service: Optional[Context7Service] = None
502
+ _perplexity_service: Optional[PerplexityService] = None
503
+
504
+
505
+ def get_context7_service() -> Context7Service:
506
+ """Get or create Context7 service singleton."""
507
+ global _context7_service
508
+ if _context7_service is None:
509
+ _context7_service = Context7Service()
510
+ return _context7_service
511
+
512
+
513
+ def get_perplexity_service() -> PerplexityService:
514
+ """Get or create Perplexity service singleton."""
515
+ global _perplexity_service
516
+ if _perplexity_service is None:
517
+ _perplexity_service = PerplexityService()
518
+ return _perplexity_service