amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
@@ -1,518 +1,518 @@
1
- #!/usr/bin/env python
2
- #
3
- # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
4
- # SPDX-License-Identifier: MIT
5
-
6
- """
7
- External MCP Services Integration
8
-
9
- Provides wrappers for external MCP services like Context7 and Perplexity
10
- that run as separate processes via npx commands.
11
- """
12
-
13
- import json
14
- import os
15
- import subprocess
16
- import time
17
- from typing import Any, Dict, List, Optional
18
-
19
- from gaia.logger import get_logger
20
-
21
- logger = get_logger(__name__)
22
-
23
-
24
- class ExternalMCPService:
25
- """Base class for managing external MCP services via subprocess."""
26
-
27
- def __init__(
28
- self,
29
- command: List[str],
30
- env: Optional[Dict[str, str]] = None,
31
- timeout: int = 30,
32
- ):
33
- """
34
- Initialize external MCP service.
35
-
36
- Args:
37
- command: Command to start the MCP service (e.g., ["npx", "-y", "package"])
38
- env: Additional environment variables
39
- timeout: Timeout in seconds for subprocess calls
40
- """
41
- self.command = command
42
- self.env = {**os.environ.copy(), **(env or {})}
43
- self.timeout = timeout
44
- self.process = None
45
-
46
- def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
47
- """
48
- Call a tool on the external MCP service.
49
-
50
- Args:
51
- tool_name: Name of the tool to call
52
- arguments: Tool arguments as a dictionary
53
-
54
- Returns:
55
- Tool execution result
56
- """
57
- try:
58
- # Create JSON-RPC request
59
- request = {
60
- "jsonrpc": "2.0",
61
- "id": int(time.time() * 1000),
62
- "method": "tools/call",
63
- "params": {"name": tool_name, "arguments": arguments},
64
- }
65
-
66
- # Call the MCP service via subprocess
67
- result = subprocess.run(
68
- self.command,
69
- input=json.dumps(request) + "\n",
70
- capture_output=True,
71
- text=True,
72
- env=self.env,
73
- timeout=self.timeout,
74
- )
75
-
76
- if result.returncode != 0:
77
- logger.error(
78
- f"MCP service error (exit {result.returncode}): {result.stderr}"
79
- )
80
- return {"error": f"Service failed: {result.stderr or 'Unknown error'}"}
81
-
82
- # Parse response
83
- try:
84
- response = json.loads(result.stdout)
85
-
86
- # Extract result from JSON-RPC response
87
- if "result" in response:
88
- return response["result"]
89
- elif "error" in response:
90
- return {"error": response["error"].get("message", "Unknown error")}
91
- else:
92
- return {"error": "Invalid response format"}
93
-
94
- except json.JSONDecodeError as e:
95
- logger.error(f"Failed to parse MCP response: {e}")
96
- logger.debug(f"Raw output: {result.stdout}")
97
- return {"error": f"Invalid JSON response: {str(e)}"}
98
-
99
- except subprocess.TimeoutExpired:
100
- logger.error(f"MCP service call timed out after {self.timeout}s")
101
- return {"error": f"Request timed out after {self.timeout} seconds"}
102
- except Exception as e:
103
- logger.error(f"MCP service call failed: {e}")
104
- return {"error": str(e)}
105
-
106
- def list_tools(self) -> List[Dict[str, Any]]:
107
- """
108
- List available tools from the MCP service.
109
-
110
- Returns:
111
- List of tool definitions
112
- """
113
- try:
114
- request = {"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}
115
-
116
- result = subprocess.run(
117
- self.command,
118
- input=json.dumps(request) + "\n",
119
- capture_output=True,
120
- text=True,
121
- env=self.env,
122
- timeout=self.timeout,
123
- )
124
-
125
- if result.returncode == 0:
126
- response = json.loads(result.stdout)
127
- return response.get("result", {}).get("tools", [])
128
-
129
- except Exception as e:
130
- logger.warning(f"Failed to list tools: {e}")
131
-
132
- return []
133
-
134
-
135
- class Context7Service(ExternalMCPService):
136
- """Context7 documentation search service with caching and rate protection.
137
-
138
- This is an OPTIONAL service - the system works without it.
139
- """
140
-
141
- # Class-level availability tracking (cached after first check)
142
- _availability_checked: bool = False
143
- _is_available: bool = False
144
-
145
- def __init__(self, api_key: Optional[str] = None):
146
- """Initialize Context7 MCP service.
147
-
148
- Args:
149
- api_key: Optional Context7 API key (defaults to CONTEXT7_API_KEY env var)
150
- """
151
- # Get API key from parameter or environment
152
- api_key = api_key or os.getenv("CONTEXT7_API_KEY")
153
- env = {"CONTEXT7_API_KEY": api_key} if api_key else {}
154
-
155
- super().__init__(command=["npx", "-y", "@upstash/context7-mcp"], env=env)
156
-
157
- # Use persistent cache instead of session cache
158
- from gaia.mcp.context7_cache import Context7Cache, Context7RateLimiter
159
-
160
- self._cache = Context7Cache()
161
- self._rate_limiter = Context7RateLimiter()
162
-
163
- @classmethod
164
- def check_availability(cls) -> bool:
165
- """Check if Context7 can be used (npx available, package works).
166
-
167
- This check is cached after the first call to avoid repeated slow checks.
168
-
169
- Returns:
170
- True if Context7 is available and working, False otherwise
171
- """
172
- if cls._availability_checked:
173
- return cls._is_available
174
-
175
- cls._availability_checked = True
176
-
177
- # Check if npx is available
178
- try:
179
- import shutil
180
-
181
- if not shutil.which("npx"):
182
- logger.info("Context7 unavailable: npx not found in PATH")
183
- cls._is_available = False
184
- return False
185
- except Exception as e:
186
- logger.info(f"Context7 unavailable: failed to check for npx: {e}")
187
- cls._is_available = False
188
- return False
189
-
190
- # Try a simple operation to verify Context7 works
191
- try:
192
- service = cls()
193
- tools = service.list_tools()
194
- cls._is_available = len(tools) > 0
195
- if cls._is_available:
196
- logger.info(f"Context7 available ({len(tools)} tools found)")
197
- else:
198
- logger.info("Context7 unavailable: no tools returned from service")
199
- except Exception as e:
200
- logger.info(f"Context7 unavailable: {type(e).__name__}: {e}")
201
- cls._is_available = False
202
-
203
- return cls._is_available
204
-
205
- def _get_resolved_library_id(self, library: str) -> Optional[str]:
206
- """Resolve a library name to Context7-compatible ID with persistent caching.
207
-
208
- Args:
209
- library: Library name (e.g., "nextjs") or full ID (e.g., "/vercel/next.js")
210
-
211
- Returns:
212
- Resolved library ID or None if resolution failed
213
- """
214
- # Already a full ID (has /org/project format)
215
- if library.count("/") >= 2:
216
- return library if library.startswith("/") else f"/{library}"
217
-
218
- # Check persistent cache first
219
- cached = self._cache.get_library_id(library)
220
- if cached is not None:
221
- logger.debug(f"Cache hit for library ID: {library} -> {cached}")
222
- return cached
223
-
224
- # Rate limit check before API call
225
- can_proceed, reason = self._rate_limiter.can_make_request()
226
- if not can_proceed:
227
- logger.warning(f"Context7 rate limited: {reason}")
228
- return None
229
-
230
- # Resolve via API
231
- logger.info(f"Resolving library ID for '{library}' via Context7 API")
232
- self._rate_limiter.consume_token()
233
- resolved_id = self.resolve_library_id(library)
234
-
235
- # Record success/failure for circuit breaker
236
- if resolved_id:
237
- self._rate_limiter.record_success()
238
- logger.info(f"Resolved '{library}' to '{resolved_id}'")
239
- else:
240
- self._rate_limiter.record_failure()
241
- logger.warning(f"Could not resolve library ID for '{library}'")
242
-
243
- # Cache result (even None to avoid repeated failures)
244
- self._cache.set_library_id(library, resolved_id)
245
-
246
- return resolved_id
247
-
248
- def search_documentation(
249
- self, query: str, library: Optional[str] = None
250
- ) -> Dict[str, Any]:
251
- """Search documentation using Context7 with caching and rate protection.
252
-
253
- Args:
254
- query: Search query (e.g., "how to use useState")
255
- library: Optional library name to search in (e.g., "react")
256
-
257
- Returns:
258
- Documentation search results with code examples and references
259
- """
260
- # Check availability first (cached after first check)
261
- if not self.check_availability():
262
- return {
263
- "success": False,
264
- "documentation": "",
265
- "error": "Context7 not available - use embedded knowledge",
266
- "unavailable": True, # Signal to LLM to use embedded patterns
267
- }
268
-
269
- # Resolve library ID first
270
- resolved_id = None
271
- if library:
272
- resolved_id = self._get_resolved_library_id(library)
273
- if not resolved_id:
274
- logger.warning(f"Could not resolve library '{library}'")
275
-
276
- # Check documentation cache
277
- cache_key_lib = resolved_id or "global"
278
- cached_docs = self._cache.get_documentation(cache_key_lib, query)
279
- if cached_docs:
280
- logger.info(f"Cache hit for documentation: {cache_key_lib}:{query[:30]}...")
281
- return {
282
- "success": True,
283
- "documentation": cached_docs,
284
- "cached": True,
285
- }
286
-
287
- # Rate limit check before API call
288
- can_proceed, reason = self._rate_limiter.can_make_request()
289
- if not can_proceed:
290
- logger.warning(f"Context7 rate limited: {reason}")
291
- return {
292
- "success": False,
293
- "error": reason,
294
- "documentation": "",
295
- }
296
-
297
- # Make API call
298
- self._rate_limiter.consume_token()
299
- arguments = {"topic": query}
300
- if resolved_id:
301
- arguments["context7CompatibleLibraryID"] = resolved_id
302
-
303
- result = self.call_tool("get-library-docs", arguments)
304
-
305
- if "error" in result:
306
- # Check if it's a rate limit error (HTTP 429)
307
- is_rate_limit = "429" in str(result.get("error", ""))
308
- self._rate_limiter.record_failure(is_rate_limit)
309
-
310
- logger.error(f"Context7 search failed: {result['error']}")
311
- return {
312
- "success": False,
313
- "error": result["error"],
314
- "documentation": "",
315
- }
316
-
317
- # Success - cache and return
318
- self._rate_limiter.record_success()
319
- docs = (
320
- result.get("content", [{}])[0].get("text", "")
321
- if result.get("content")
322
- else ""
323
- )
324
- self._cache.set_documentation(cache_key_lib, query, docs)
325
-
326
- return {
327
- "success": True,
328
- "documentation": docs,
329
- "cached": False,
330
- "raw_result": result,
331
- }
332
-
333
- def resolve_library_id(self, library_name: str) -> Optional[str]:
334
- """
335
- Resolve a library name to Context7-compatible library ID.
336
-
337
- Args:
338
- library_name: Library name (e.g., "react", "tensorflow")
339
-
340
- Returns:
341
- Context7-compatible library ID (e.g., "/facebook/react") or None
342
- """
343
- result = self.call_tool("resolve-library-id", {"libraryName": library_name})
344
-
345
- if "error" in result:
346
- logger.warning(f"Failed to resolve library ID: {result['error']}")
347
- return None
348
-
349
- # Extract library ID from response
350
- content = result.get("content", [])
351
- if not content or len(content) == 0:
352
- logger.warning("Empty content in Context7 response")
353
- return None
354
-
355
- text = content[0].get("text", "")
356
- logger.debug(f"Context7 resolve-library-id response text:\n{text[:800]}")
357
-
358
- import re
359
-
360
- # Parse ALL libraries from response (separated by ----------)
361
- # Multiple libraries may have the same title - need smart selection
362
- libraries = []
363
- blocks = text.split("----------")
364
-
365
- for block in blocks:
366
- if not block.strip():
367
- continue
368
-
369
- title_match = re.search(r"Title:\s*(.+)", block)
370
- id_match = re.search(
371
- r"Context7-compatible library ID:\s*(/[\w.-]+/[\w.-]+(?:/[\w.-]+)?)",
372
- block,
373
- )
374
- score_match = re.search(r"Benchmark Score:\s*([\d.]+)", block)
375
- versions_match = re.search(r"Versions:\s*(.+)", block)
376
-
377
- if id_match:
378
- libraries.append(
379
- {
380
- "title": title_match.group(1).strip() if title_match else "",
381
- "id": id_match.group(1),
382
- "score": float(score_match.group(1)) if score_match else 0,
383
- "has_versions": versions_match is not None,
384
- }
385
- )
386
-
387
- if not libraries:
388
- logger.warning(f"No library IDs found in response for '{library_name}'")
389
- return None
390
-
391
- # Selection strategy (in order of priority):
392
- # 1. Exact title match that has versions (indicates official repo)
393
- # 2. Exact title match with highest score
394
- # 3. Title contains search term, prefer ones with versions
395
- # 4. Highest benchmark score overall
396
-
397
- # Normalize for comparison (remove dots, spaces, dashes)
398
- def normalize(s):
399
- return s.lower().replace(".", "").replace("-", "").replace(" ", "")
400
-
401
- normalized_search = normalize(library_name)
402
-
403
- # Find exact matches (after normalization)
404
- exact_matches = [
405
- lib for lib in libraries if normalize(lib["title"]) == normalized_search
406
- ]
407
-
408
- if exact_matches:
409
- # Prefer ones with versions (usually the official repo)
410
- versioned = [lib for lib in exact_matches if lib["has_versions"]]
411
- if versioned:
412
- best = max(versioned, key=lambda x: x["score"])
413
- logger.info(
414
- f"Resolved '{library_name}' to '{best['id']}' (exact match with versions, score={best['score']})"
415
- )
416
- return best["id"]
417
-
418
- # No versions, pick highest score
419
- best = max(exact_matches, key=lambda x: x["score"])
420
- logger.info(
421
- f"Resolved '{library_name}' to '{best['id']}' (exact match, score={best['score']})"
422
- )
423
- return best["id"]
424
-
425
- # No exact match - look for title containing search term
426
- partial_matches = [
427
- lib for lib in libraries if normalized_search in normalize(lib["title"])
428
- ]
429
- if partial_matches:
430
- versioned = [lib for lib in partial_matches if lib["has_versions"]]
431
- if versioned:
432
- best = max(versioned, key=lambda x: x["score"])
433
- logger.info(
434
- f"Resolved '{library_name}' to '{best['id']}' (partial match with versions, score={best['score']})"
435
- )
436
- return best["id"]
437
-
438
- best = max(partial_matches, key=lambda x: x["score"])
439
- logger.info(
440
- f"Resolved '{library_name}' to '{best['id']}' (partial match, score={best['score']})"
441
- )
442
- return best["id"]
443
-
444
- # Fallback: highest score overall
445
- best = max(libraries, key=lambda x: x["score"])
446
- logger.info(
447
- f"Resolved '{library_name}' to '{best['id']}' (fallback: highest score={best['score']})"
448
- )
449
- return best["id"]
450
-
451
-
452
- class PerplexityService(ExternalMCPService):
453
- """Perplexity web search service."""
454
-
455
- def __init__(self, api_key: Optional[str] = None):
456
- """
457
- Initialize Perplexity MCP service.
458
-
459
- Args:
460
- api_key: Perplexity API key (defaults to PERPLEXITY_API_KEY env var)
461
- """
462
- api_key = api_key or os.getenv("PERPLEXITY_API_KEY")
463
- if not api_key:
464
- logger.warning(
465
- "PERPLEXITY_API_KEY not set - web search will not be available"
466
- )
467
-
468
- super().__init__(
469
- command=["npx", "-y", "server-perplexity-ask"],
470
- env={"PERPLEXITY_API_KEY": api_key} if api_key else {},
471
- )
472
-
473
- def search_web(self, query: str) -> Dict[str, Any]:
474
- """
475
- Search the web using Perplexity.
476
-
477
- Args:
478
- query: Search query
479
-
480
- Returns:
481
- Web search results with answer and sources
482
- """
483
- result = self.call_tool(
484
- "perplexity_ask", {"messages": [{"role": "user", "content": query}]}
485
- )
486
-
487
- if "error" in result:
488
- logger.error(f"Perplexity search failed: {result['error']}")
489
- return {"success": False, "error": result["error"], "answer": ""}
490
-
491
- # Extract answer from response
492
- content = result.get("content", [])
493
- answer = ""
494
- if content and len(content) > 0:
495
- answer = content[0].get("text", "")
496
-
497
- return {"success": True, "answer": answer, "raw_result": result}
498
-
499
-
500
- # Singleton instances for reuse
501
- _context7_service: Optional[Context7Service] = None
502
- _perplexity_service: Optional[PerplexityService] = None
503
-
504
-
505
- def get_context7_service() -> Context7Service:
506
- """Get or create Context7 service singleton."""
507
- global _context7_service
508
- if _context7_service is None:
509
- _context7_service = Context7Service()
510
- return _context7_service
511
-
512
-
513
- def get_perplexity_service() -> PerplexityService:
514
- """Get or create Perplexity service singleton."""
515
- global _perplexity_service
516
- if _perplexity_service is None:
517
- _perplexity_service = PerplexityService()
518
- return _perplexity_service
1
+ #!/usr/bin/env python
2
+ #
3
+ # Copyright(C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
4
+ # SPDX-License-Identifier: MIT
5
+
6
+ """
7
+ External MCP Services Integration
8
+
9
+ Provides wrappers for external MCP services like Context7 and Perplexity
10
+ that run as separate processes via npx commands.
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import subprocess
16
+ import time
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ from gaia.logger import get_logger
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class ExternalMCPService:
25
+ """Base class for managing external MCP services via subprocess."""
26
+
27
+ def __init__(
28
+ self,
29
+ command: List[str],
30
+ env: Optional[Dict[str, str]] = None,
31
+ timeout: int = 30,
32
+ ):
33
+ """
34
+ Initialize external MCP service.
35
+
36
+ Args:
37
+ command: Command to start the MCP service (e.g., ["npx", "-y", "package"])
38
+ env: Additional environment variables
39
+ timeout: Timeout in seconds for subprocess calls
40
+ """
41
+ self.command = command
42
+ self.env = {**os.environ.copy(), **(env or {})}
43
+ self.timeout = timeout
44
+ self.process = None
45
+
46
+ def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
47
+ """
48
+ Call a tool on the external MCP service.
49
+
50
+ Args:
51
+ tool_name: Name of the tool to call
52
+ arguments: Tool arguments as a dictionary
53
+
54
+ Returns:
55
+ Tool execution result
56
+ """
57
+ try:
58
+ # Create JSON-RPC request
59
+ request = {
60
+ "jsonrpc": "2.0",
61
+ "id": int(time.time() * 1000),
62
+ "method": "tools/call",
63
+ "params": {"name": tool_name, "arguments": arguments},
64
+ }
65
+
66
+ # Call the MCP service via subprocess
67
+ result = subprocess.run(
68
+ self.command,
69
+ input=json.dumps(request) + "\n",
70
+ capture_output=True,
71
+ text=True,
72
+ env=self.env,
73
+ timeout=self.timeout,
74
+ )
75
+
76
+ if result.returncode != 0:
77
+ logger.error(
78
+ f"MCP service error (exit {result.returncode}): {result.stderr}"
79
+ )
80
+ return {"error": f"Service failed: {result.stderr or 'Unknown error'}"}
81
+
82
+ # Parse response
83
+ try:
84
+ response = json.loads(result.stdout)
85
+
86
+ # Extract result from JSON-RPC response
87
+ if "result" in response:
88
+ return response["result"]
89
+ elif "error" in response:
90
+ return {"error": response["error"].get("message", "Unknown error")}
91
+ else:
92
+ return {"error": "Invalid response format"}
93
+
94
+ except json.JSONDecodeError as e:
95
+ logger.error(f"Failed to parse MCP response: {e}")
96
+ logger.debug(f"Raw output: {result.stdout}")
97
+ return {"error": f"Invalid JSON response: {str(e)}"}
98
+
99
+ except subprocess.TimeoutExpired:
100
+ logger.error(f"MCP service call timed out after {self.timeout}s")
101
+ return {"error": f"Request timed out after {self.timeout} seconds"}
102
+ except Exception as e:
103
+ logger.error(f"MCP service call failed: {e}")
104
+ return {"error": str(e)}
105
+
106
+ def list_tools(self) -> List[Dict[str, Any]]:
107
+ """
108
+ List available tools from the MCP service.
109
+
110
+ Returns:
111
+ List of tool definitions
112
+ """
113
+ try:
114
+ request = {"jsonrpc": "2.0", "id": 1, "method": "tools/list", "params": {}}
115
+
116
+ result = subprocess.run(
117
+ self.command,
118
+ input=json.dumps(request) + "\n",
119
+ capture_output=True,
120
+ text=True,
121
+ env=self.env,
122
+ timeout=self.timeout,
123
+ )
124
+
125
+ if result.returncode == 0:
126
+ response = json.loads(result.stdout)
127
+ return response.get("result", {}).get("tools", [])
128
+
129
+ except Exception as e:
130
+ logger.warning(f"Failed to list tools: {e}")
131
+
132
+ return []
133
+
134
+
135
+ class Context7Service(ExternalMCPService):
136
+ """Context7 documentation search service with caching and rate protection.
137
+
138
+ This is an OPTIONAL service - the system works without it.
139
+ """
140
+
141
+ # Class-level availability tracking (cached after first check)
142
+ _availability_checked: bool = False
143
+ _is_available: bool = False
144
+
145
+ def __init__(self, api_key: Optional[str] = None):
146
+ """Initialize Context7 MCP service.
147
+
148
+ Args:
149
+ api_key: Optional Context7 API key (defaults to CONTEXT7_API_KEY env var)
150
+ """
151
+ # Get API key from parameter or environment
152
+ api_key = api_key or os.getenv("CONTEXT7_API_KEY")
153
+ env = {"CONTEXT7_API_KEY": api_key} if api_key else {}
154
+
155
+ super().__init__(command=["npx", "-y", "@upstash/context7-mcp"], env=env)
156
+
157
+ # Use persistent cache instead of session cache
158
+ from gaia.mcp.context7_cache import Context7Cache, Context7RateLimiter
159
+
160
+ self._cache = Context7Cache()
161
+ self._rate_limiter = Context7RateLimiter()
162
+
163
+ @classmethod
164
+ def check_availability(cls) -> bool:
165
+ """Check if Context7 can be used (npx available, package works).
166
+
167
+ This check is cached after the first call to avoid repeated slow checks.
168
+
169
+ Returns:
170
+ True if Context7 is available and working, False otherwise
171
+ """
172
+ if cls._availability_checked:
173
+ return cls._is_available
174
+
175
+ cls._availability_checked = True
176
+
177
+ # Check if npx is available
178
+ try:
179
+ import shutil
180
+
181
+ if not shutil.which("npx"):
182
+ logger.info("Context7 unavailable: npx not found in PATH")
183
+ cls._is_available = False
184
+ return False
185
+ except Exception as e:
186
+ logger.info(f"Context7 unavailable: failed to check for npx: {e}")
187
+ cls._is_available = False
188
+ return False
189
+
190
+ # Try a simple operation to verify Context7 works
191
+ try:
192
+ service = cls()
193
+ tools = service.list_tools()
194
+ cls._is_available = len(tools) > 0
195
+ if cls._is_available:
196
+ logger.info(f"Context7 available ({len(tools)} tools found)")
197
+ else:
198
+ logger.info("Context7 unavailable: no tools returned from service")
199
+ except Exception as e:
200
+ logger.info(f"Context7 unavailable: {type(e).__name__}: {e}")
201
+ cls._is_available = False
202
+
203
+ return cls._is_available
204
+
205
+ def _get_resolved_library_id(self, library: str) -> Optional[str]:
206
+ """Resolve a library name to Context7-compatible ID with persistent caching.
207
+
208
+ Args:
209
+ library: Library name (e.g., "nextjs") or full ID (e.g., "/vercel/next.js")
210
+
211
+ Returns:
212
+ Resolved library ID or None if resolution failed
213
+ """
214
+ # Already a full ID (has /org/project format)
215
+ if library.count("/") >= 2:
216
+ return library if library.startswith("/") else f"/{library}"
217
+
218
+ # Check persistent cache first
219
+ cached = self._cache.get_library_id(library)
220
+ if cached is not None:
221
+ logger.debug(f"Cache hit for library ID: {library} -> {cached}")
222
+ return cached
223
+
224
+ # Rate limit check before API call
225
+ can_proceed, reason = self._rate_limiter.can_make_request()
226
+ if not can_proceed:
227
+ logger.warning(f"Context7 rate limited: {reason}")
228
+ return None
229
+
230
+ # Resolve via API
231
+ logger.info(f"Resolving library ID for '{library}' via Context7 API")
232
+ self._rate_limiter.consume_token()
233
+ resolved_id = self.resolve_library_id(library)
234
+
235
+ # Record success/failure for circuit breaker
236
+ if resolved_id:
237
+ self._rate_limiter.record_success()
238
+ logger.info(f"Resolved '{library}' to '{resolved_id}'")
239
+ else:
240
+ self._rate_limiter.record_failure()
241
+ logger.warning(f"Could not resolve library ID for '{library}'")
242
+
243
+ # Cache result (even None to avoid repeated failures)
244
+ self._cache.set_library_id(library, resolved_id)
245
+
246
+ return resolved_id
247
+
248
+ def search_documentation(
249
+ self, query: str, library: Optional[str] = None
250
+ ) -> Dict[str, Any]:
251
+ """Search documentation using Context7 with caching and rate protection.
252
+
253
+ Args:
254
+ query: Search query (e.g., "how to use useState")
255
+ library: Optional library name to search in (e.g., "react")
256
+
257
+ Returns:
258
+ Documentation search results with code examples and references
259
+ """
260
+ # Check availability first (cached after first check)
261
+ if not self.check_availability():
262
+ return {
263
+ "success": False,
264
+ "documentation": "",
265
+ "error": "Context7 not available - use embedded knowledge",
266
+ "unavailable": True, # Signal to LLM to use embedded patterns
267
+ }
268
+
269
+ # Resolve library ID first
270
+ resolved_id = None
271
+ if library:
272
+ resolved_id = self._get_resolved_library_id(library)
273
+ if not resolved_id:
274
+ logger.warning(f"Could not resolve library '{library}'")
275
+
276
+ # Check documentation cache
277
+ cache_key_lib = resolved_id or "global"
278
+ cached_docs = self._cache.get_documentation(cache_key_lib, query)
279
+ if cached_docs:
280
+ logger.info(f"Cache hit for documentation: {cache_key_lib}:{query[:30]}...")
281
+ return {
282
+ "success": True,
283
+ "documentation": cached_docs,
284
+ "cached": True,
285
+ }
286
+
287
+ # Rate limit check before API call
288
+ can_proceed, reason = self._rate_limiter.can_make_request()
289
+ if not can_proceed:
290
+ logger.warning(f"Context7 rate limited: {reason}")
291
+ return {
292
+ "success": False,
293
+ "error": reason,
294
+ "documentation": "",
295
+ }
296
+
297
+ # Make API call
298
+ self._rate_limiter.consume_token()
299
+ arguments = {"topic": query}
300
+ if resolved_id:
301
+ arguments["context7CompatibleLibraryID"] = resolved_id
302
+
303
+ result = self.call_tool("get-library-docs", arguments)
304
+
305
+ if "error" in result:
306
+ # Check if it's a rate limit error (HTTP 429)
307
+ is_rate_limit = "429" in str(result.get("error", ""))
308
+ self._rate_limiter.record_failure(is_rate_limit)
309
+
310
+ logger.error(f"Context7 search failed: {result['error']}")
311
+ return {
312
+ "success": False,
313
+ "error": result["error"],
314
+ "documentation": "",
315
+ }
316
+
317
+ # Success - cache and return
318
+ self._rate_limiter.record_success()
319
+ docs = (
320
+ result.get("content", [{}])[0].get("text", "")
321
+ if result.get("content")
322
+ else ""
323
+ )
324
+ self._cache.set_documentation(cache_key_lib, query, docs)
325
+
326
+ return {
327
+ "success": True,
328
+ "documentation": docs,
329
+ "cached": False,
330
+ "raw_result": result,
331
+ }
332
+
333
+ def resolve_library_id(self, library_name: str) -> Optional[str]:
334
+ """
335
+ Resolve a library name to Context7-compatible library ID.
336
+
337
+ Args:
338
+ library_name: Library name (e.g., "react", "tensorflow")
339
+
340
+ Returns:
341
+ Context7-compatible library ID (e.g., "/facebook/react") or None
342
+ """
343
+ result = self.call_tool("resolve-library-id", {"libraryName": library_name})
344
+
345
+ if "error" in result:
346
+ logger.warning(f"Failed to resolve library ID: {result['error']}")
347
+ return None
348
+
349
+ # Extract library ID from response
350
+ content = result.get("content", [])
351
+ if not content or len(content) == 0:
352
+ logger.warning("Empty content in Context7 response")
353
+ return None
354
+
355
+ text = content[0].get("text", "")
356
+ logger.debug(f"Context7 resolve-library-id response text:\n{text[:800]}")
357
+
358
+ import re
359
+
360
+ # Parse ALL libraries from response (separated by ----------)
361
+ # Multiple libraries may have the same title - need smart selection
362
+ libraries = []
363
+ blocks = text.split("----------")
364
+
365
+ for block in blocks:
366
+ if not block.strip():
367
+ continue
368
+
369
+ title_match = re.search(r"Title:\s*(.+)", block)
370
+ id_match = re.search(
371
+ r"Context7-compatible library ID:\s*(/[\w.-]+/[\w.-]+(?:/[\w.-]+)?)",
372
+ block,
373
+ )
374
+ score_match = re.search(r"Benchmark Score:\s*([\d.]+)", block)
375
+ versions_match = re.search(r"Versions:\s*(.+)", block)
376
+
377
+ if id_match:
378
+ libraries.append(
379
+ {
380
+ "title": title_match.group(1).strip() if title_match else "",
381
+ "id": id_match.group(1),
382
+ "score": float(score_match.group(1)) if score_match else 0,
383
+ "has_versions": versions_match is not None,
384
+ }
385
+ )
386
+
387
+ if not libraries:
388
+ logger.warning(f"No library IDs found in response for '{library_name}'")
389
+ return None
390
+
391
+ # Selection strategy (in order of priority):
392
+ # 1. Exact title match that has versions (indicates official repo)
393
+ # 2. Exact title match with highest score
394
+ # 3. Title contains search term, prefer ones with versions
395
+ # 4. Highest benchmark score overall
396
+
397
+ # Normalize for comparison (remove dots, spaces, dashes)
398
+ def normalize(s):
399
+ return s.lower().replace(".", "").replace("-", "").replace(" ", "")
400
+
401
+ normalized_search = normalize(library_name)
402
+
403
+ # Find exact matches (after normalization)
404
+ exact_matches = [
405
+ lib for lib in libraries if normalize(lib["title"]) == normalized_search
406
+ ]
407
+
408
+ if exact_matches:
409
+ # Prefer ones with versions (usually the official repo)
410
+ versioned = [lib for lib in exact_matches if lib["has_versions"]]
411
+ if versioned:
412
+ best = max(versioned, key=lambda x: x["score"])
413
+ logger.info(
414
+ f"Resolved '{library_name}' to '{best['id']}' (exact match with versions, score={best['score']})"
415
+ )
416
+ return best["id"]
417
+
418
+ # No versions, pick highest score
419
+ best = max(exact_matches, key=lambda x: x["score"])
420
+ logger.info(
421
+ f"Resolved '{library_name}' to '{best['id']}' (exact match, score={best['score']})"
422
+ )
423
+ return best["id"]
424
+
425
+ # No exact match - look for title containing search term
426
+ partial_matches = [
427
+ lib for lib in libraries if normalized_search in normalize(lib["title"])
428
+ ]
429
+ if partial_matches:
430
+ versioned = [lib for lib in partial_matches if lib["has_versions"]]
431
+ if versioned:
432
+ best = max(versioned, key=lambda x: x["score"])
433
+ logger.info(
434
+ f"Resolved '{library_name}' to '{best['id']}' (partial match with versions, score={best['score']})"
435
+ )
436
+ return best["id"]
437
+
438
+ best = max(partial_matches, key=lambda x: x["score"])
439
+ logger.info(
440
+ f"Resolved '{library_name}' to '{best['id']}' (partial match, score={best['score']})"
441
+ )
442
+ return best["id"]
443
+
444
+ # Fallback: highest score overall
445
+ best = max(libraries, key=lambda x: x["score"])
446
+ logger.info(
447
+ f"Resolved '{library_name}' to '{best['id']}' (fallback: highest score={best['score']})"
448
+ )
449
+ return best["id"]
450
+
451
+
452
+ class PerplexityService(ExternalMCPService):
453
+ """Perplexity web search service."""
454
+
455
+ def __init__(self, api_key: Optional[str] = None):
456
+ """
457
+ Initialize Perplexity MCP service.
458
+
459
+ Args:
460
+ api_key: Perplexity API key (defaults to PERPLEXITY_API_KEY env var)
461
+ """
462
+ api_key = api_key or os.getenv("PERPLEXITY_API_KEY")
463
+ if not api_key:
464
+ logger.warning(
465
+ "PERPLEXITY_API_KEY not set - web search will not be available"
466
+ )
467
+
468
+ super().__init__(
469
+ command=["npx", "-y", "server-perplexity-ask"],
470
+ env={"PERPLEXITY_API_KEY": api_key} if api_key else {},
471
+ )
472
+
473
+ def search_web(self, query: str) -> Dict[str, Any]:
474
+ """
475
+ Search the web using Perplexity.
476
+
477
+ Args:
478
+ query: Search query
479
+
480
+ Returns:
481
+ Web search results with answer and sources
482
+ """
483
+ result = self.call_tool(
484
+ "perplexity_ask", {"messages": [{"role": "user", "content": query}]}
485
+ )
486
+
487
+ if "error" in result:
488
+ logger.error(f"Perplexity search failed: {result['error']}")
489
+ return {"success": False, "error": result["error"], "answer": ""}
490
+
491
+ # Extract answer from response
492
+ content = result.get("content", [])
493
+ answer = ""
494
+ if content and len(content) > 0:
495
+ answer = content[0].get("text", "")
496
+
497
+ return {"success": True, "answer": answer, "raw_result": result}
498
+
499
+
500
+ # Singleton instances for reuse
501
+ _context7_service: Optional[Context7Service] = None
502
+ _perplexity_service: Optional[PerplexityService] = None
503
+
504
+
505
+ def get_context7_service() -> Context7Service:
506
+ """Get or create Context7 service singleton."""
507
+ global _context7_service
508
+ if _context7_service is None:
509
+ _context7_service = Context7Service()
510
+ return _context7_service
511
+
512
+
513
+ def get_perplexity_service() -> PerplexityService:
514
+ """Get or create Perplexity service singleton."""
515
+ global _perplexity_service
516
+ if _perplexity_service is None:
517
+ _perplexity_service = PerplexityService()
518
+ return _perplexity_service