nexus-dev 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexus_dev/__init__.py +4 -0
- nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev/agents/__init__.py +20 -0
- nexus_dev/agents/agent_config.py +97 -0
- nexus_dev/agents/agent_executor.py +197 -0
- nexus_dev/agents/agent_manager.py +104 -0
- nexus_dev/agents/prompt_factory.py +91 -0
- nexus_dev/chunkers/__init__.py +168 -0
- nexus_dev/chunkers/base.py +202 -0
- nexus_dev/chunkers/docs_chunker.py +291 -0
- nexus_dev/chunkers/java_chunker.py +343 -0
- nexus_dev/chunkers/javascript_chunker.py +312 -0
- nexus_dev/chunkers/python_chunker.py +308 -0
- nexus_dev/cli.py +2017 -0
- nexus_dev/config.py +261 -0
- nexus_dev/database.py +569 -0
- nexus_dev/embeddings.py +703 -0
- nexus_dev/gateway/__init__.py +10 -0
- nexus_dev/gateway/connection_manager.py +348 -0
- nexus_dev/github_importer.py +247 -0
- nexus_dev/mcp_client.py +281 -0
- nexus_dev/mcp_config.py +184 -0
- nexus_dev/schemas/mcp_config_schema.json +166 -0
- nexus_dev/server.py +1866 -0
- nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.dist-info/METADATA +668 -0
- nexus_dev-3.3.1.dist-info/RECORD +48 -0
- nexus_dev-3.3.1.dist-info/WHEEL +4 -0
- nexus_dev-3.3.1.dist-info/entry_points.txt +14 -0
- nexus_dev-3.3.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""MCP Connection Manager for Gateway Mode."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
from mcp import ClientSession
|
|
12
|
+
from mcp.client.sse import sse_client
|
|
13
|
+
from mcp.client.stdio import StdioServerParameters, stdio_client
|
|
14
|
+
from mcp.client.streamable_http import streamable_http_client
|
|
15
|
+
|
|
16
|
+
from ..mcp_config import MCPServerConfig
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MCPConnectionError(Exception):
|
|
22
|
+
"""Failed to connect to MCP server."""
|
|
23
|
+
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class MCPTimeoutError(Exception):
|
|
28
|
+
"""Tool invocation timed out."""
|
|
29
|
+
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class MCPConnection:
|
|
35
|
+
"""Active connection to an MCP server."""
|
|
36
|
+
|
|
37
|
+
name: str
|
|
38
|
+
config: MCPServerConfig
|
|
39
|
+
session: ClientSession | None = None
|
|
40
|
+
_lock: asyncio.Lock = field(default_factory=asyncio.Lock)
|
|
41
|
+
_cleanup_stack: list[Any] = field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
# Retry configuration
|
|
44
|
+
max_retries: int = 3
|
|
45
|
+
retry_delay: float = 1.0 # seconds (base delay for exponential backoff)
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def timeout(self) -> float:
|
|
49
|
+
"""Get tool execution timeout from config."""
|
|
50
|
+
return self.config.timeout
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def connect_timeout(self) -> float:
|
|
54
|
+
"""Get connection timeout from config."""
|
|
55
|
+
return self.config.connect_timeout
|
|
56
|
+
|
|
57
|
+
async def connect(self) -> ClientSession:
|
|
58
|
+
"""Get or create connection with retry logic."""
|
|
59
|
+
async with self._lock:
|
|
60
|
+
# For HTTP transport, always create fresh connections to avoid
|
|
61
|
+
# anyio TaskGroup conflicts with streamable_http_client
|
|
62
|
+
if self.config.transport == "http":
|
|
63
|
+
if self.session is not None:
|
|
64
|
+
logger.debug("[%s] Cleaning up previous HTTP session", self.name)
|
|
65
|
+
await self._cleanup()
|
|
66
|
+
elif self.session is not None:
|
|
67
|
+
# Check if existing session is still alive (non-HTTP transports only)
|
|
68
|
+
try:
|
|
69
|
+
logger.debug("[%s] Pinging existing session", self.name)
|
|
70
|
+
await self.session.send_ping()
|
|
71
|
+
logger.debug("[%s] Existing session is alive", self.name)
|
|
72
|
+
return self.session
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.warning(
|
|
75
|
+
"[%s] Connection lost or ping failed, reconnecting... Error: %s",
|
|
76
|
+
self.name,
|
|
77
|
+
e,
|
|
78
|
+
)
|
|
79
|
+
await self._cleanup()
|
|
80
|
+
|
|
81
|
+
# Try to connect with retries within total connect_timeout
|
|
82
|
+
last_error: Exception | None = None
|
|
83
|
+
try:
|
|
84
|
+
async with asyncio.timeout(self.connect_timeout):
|
|
85
|
+
for attempt in range(self.max_retries):
|
|
86
|
+
try:
|
|
87
|
+
logger.info(
|
|
88
|
+
"[%s] Connection attempt %d/%d",
|
|
89
|
+
self.name,
|
|
90
|
+
attempt + 1,
|
|
91
|
+
self.max_retries,
|
|
92
|
+
)
|
|
93
|
+
return await self._do_connect()
|
|
94
|
+
except Exception as e:
|
|
95
|
+
last_error = e
|
|
96
|
+
logger.warning(
|
|
97
|
+
"[%s] Connection attempt %d/%d failed: %s",
|
|
98
|
+
self.name,
|
|
99
|
+
attempt + 1,
|
|
100
|
+
self.max_retries,
|
|
101
|
+
e,
|
|
102
|
+
)
|
|
103
|
+
if attempt < self.max_retries - 1:
|
|
104
|
+
delay = self.retry_delay * (2**attempt)
|
|
105
|
+
logger.debug("[%s] Retrying in %.1fs...", self.name, delay)
|
|
106
|
+
await asyncio.sleep(delay)
|
|
107
|
+
except TimeoutError:
|
|
108
|
+
logger.error(
|
|
109
|
+
"[%s] Connection timed out after %.1fs", self.name, self.connect_timeout
|
|
110
|
+
)
|
|
111
|
+
raise MCPConnectionError(
|
|
112
|
+
f"Failed to connect to {self.name} due to timeout after {self.connect_timeout}s"
|
|
113
|
+
) from last_error
|
|
114
|
+
|
|
115
|
+
logger.error("[%s] All connection attempts failed", self.name)
|
|
116
|
+
raise MCPConnectionError(
|
|
117
|
+
f"Failed to connect to {self.name} after {self.max_retries} attempts"
|
|
118
|
+
) from last_error
|
|
119
|
+
|
|
120
|
+
async def _do_connect(self) -> ClientSession:
|
|
121
|
+
"""Perform actual connection to MCP server.
|
|
122
|
+
|
|
123
|
+
Note: We don't use asyncio.wait_for() here because anyio-based transports
|
|
124
|
+
(like streamable_http_client) use their own cancel scopes which conflict
|
|
125
|
+
with asyncio's cancellation. The httpx client has its own timeout configured.
|
|
126
|
+
"""
|
|
127
|
+
logger.debug("[%s] Connecting...", self.name)
|
|
128
|
+
try:
|
|
129
|
+
result = await self._do_connect_impl()
|
|
130
|
+
logger.info("[%s] Connection successful", self.name)
|
|
131
|
+
return result
|
|
132
|
+
except Exception as e:
|
|
133
|
+
logger.error("[%s] Connection failed: %s", self.name, e)
|
|
134
|
+
raise
|
|
135
|
+
|
|
136
|
+
async def _do_connect_impl(self) -> ClientSession:
|
|
137
|
+
"""Internal connection implementation for SSE and stdio transports.
|
|
138
|
+
|
|
139
|
+
Note: HTTP transport does NOT use this method - it uses _invoke_http instead
|
|
140
|
+
to properly handle anyio's structured concurrency requirements.
|
|
141
|
+
"""
|
|
142
|
+
if self.config.transport == "sse":
|
|
143
|
+
if not self.config.url:
|
|
144
|
+
raise ValueError(f"URL required for SSE transport: {self.name}")
|
|
145
|
+
|
|
146
|
+
logger.debug("[%s] Using SSE transport to %s", self.name, self.config.url)
|
|
147
|
+
transport_cm = sse_client(
|
|
148
|
+
url=self.config.url,
|
|
149
|
+
headers=self.config.headers,
|
|
150
|
+
)
|
|
151
|
+
elif self.config.transport == "stdio":
|
|
152
|
+
logger.debug(
|
|
153
|
+
"[%s] Using stdio transport with command: %s", self.name, self.config.command
|
|
154
|
+
)
|
|
155
|
+
server_params = StdioServerParameters(
|
|
156
|
+
command=self.config.command, # type: ignore
|
|
157
|
+
args=self.config.args,
|
|
158
|
+
env=self.config.env,
|
|
159
|
+
)
|
|
160
|
+
transport_cm = stdio_client(server_params)
|
|
161
|
+
else:
|
|
162
|
+
raise ValueError(f"Unsupported transport for pooling: {self.config.transport}")
|
|
163
|
+
|
|
164
|
+
logger.debug("[%s] Entering transport context manager", self.name)
|
|
165
|
+
read, write = await transport_cm.__aenter__()
|
|
166
|
+
self._cleanup_stack.append(transport_cm)
|
|
167
|
+
|
|
168
|
+
logger.debug("[%s] Creating client session", self.name)
|
|
169
|
+
session_cm = ClientSession(read, write)
|
|
170
|
+
self.session = await session_cm.__aenter__()
|
|
171
|
+
self._cleanup_stack.append(session_cm)
|
|
172
|
+
|
|
173
|
+
logger.debug("[%s] Initializing session", self.name)
|
|
174
|
+
await self.session.initialize()
|
|
175
|
+
logger.info("[%s] Connected to MCP server successfully", self.name)
|
|
176
|
+
return self.session
|
|
177
|
+
|
|
178
|
+
logger.debug("[%s] Creating client session", self.name)
|
|
179
|
+
session_cm = ClientSession(read, write)
|
|
180
|
+
self.session = await session_cm.__aenter__()
|
|
181
|
+
self._cleanup_stack.append(session_cm)
|
|
182
|
+
|
|
183
|
+
logger.debug("[%s] Initializing session", self.name)
|
|
184
|
+
await self.session.initialize()
|
|
185
|
+
logger.info("[%s] Connected to MCP server successfully", self.name)
|
|
186
|
+
return self.session
|
|
187
|
+
|
|
188
|
+
async def _cleanup(self) -> None:
|
|
189
|
+
"""Clean up connection resources (called with lock held)."""
|
|
190
|
+
while self._cleanup_stack:
|
|
191
|
+
cm = self._cleanup_stack.pop()
|
|
192
|
+
try:
|
|
193
|
+
await cm.__aexit__(None, None, None)
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.debug("Cleanup error for %s: %s", self.name, e)
|
|
196
|
+
self.session = None
|
|
197
|
+
|
|
198
|
+
async def disconnect(self) -> None:
|
|
199
|
+
"""Close connection."""
|
|
200
|
+
async with self._lock:
|
|
201
|
+
await self._cleanup()
|
|
202
|
+
logger.info("Disconnected from MCP server: %s", self.name)
|
|
203
|
+
|
|
204
|
+
async def _invoke_http(self, tool: str, arguments: dict[str, Any]) -> Any:
|
|
205
|
+
"""Invoke a tool using HTTP transport with proper context manager handling.
|
|
206
|
+
|
|
207
|
+
For HTTP transport, we must use proper async with blocks because
|
|
208
|
+
streamable_http_client uses anyio TaskGroups internally that conflict
|
|
209
|
+
with manual __aenter__/__aexit__ calls.
|
|
210
|
+
"""
|
|
211
|
+
logger.debug("[%s] Using HTTP transport for tool: %s", self.name, tool)
|
|
212
|
+
|
|
213
|
+
if not self.config.url:
|
|
214
|
+
raise ValueError(f"URL required for HTTP transport: {self.name}")
|
|
215
|
+
|
|
216
|
+
async with (
|
|
217
|
+
httpx.AsyncClient(
|
|
218
|
+
headers=self.config.headers,
|
|
219
|
+
timeout=httpx.Timeout(self.config.timeout),
|
|
220
|
+
) as http_client,
|
|
221
|
+
streamable_http_client(
|
|
222
|
+
url=self.config.url,
|
|
223
|
+
http_client=http_client,
|
|
224
|
+
terminate_on_close=True,
|
|
225
|
+
) as (read, write, _),
|
|
226
|
+
ClientSession(read, write) as session,
|
|
227
|
+
):
|
|
228
|
+
await session.initialize()
|
|
229
|
+
logger.debug("[%s] HTTP session initialized, calling tool: %s", self.name, tool)
|
|
230
|
+
result = await session.call_tool(tool, arguments)
|
|
231
|
+
logger.debug("[%s] Tool call completed: %s", self.name, tool)
|
|
232
|
+
return result
|
|
233
|
+
|
|
234
|
+
async def _invoke_impl(self, tool: str, arguments: dict[str, Any]) -> Any:
|
|
235
|
+
"""Internal implementation of tool invocation.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
tool: Tool name to invoke.
|
|
239
|
+
arguments: Tool arguments.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Tool execution result.
|
|
243
|
+
"""
|
|
244
|
+
# For HTTP transport, use dedicated method with proper async with handling
|
|
245
|
+
if self.config.transport == "http":
|
|
246
|
+
return await self._invoke_http(tool, arguments)
|
|
247
|
+
|
|
248
|
+
# For other transports (stdio, sse), use connection pooling
|
|
249
|
+
logger.debug("[%s] Getting connection for tool: %s", self.name, tool)
|
|
250
|
+
session = await self.connect()
|
|
251
|
+
logger.debug("[%s] Connection established, calling tool: %s", self.name, tool)
|
|
252
|
+
result = await session.call_tool(tool, arguments)
|
|
253
|
+
logger.debug("[%s] Tool call completed: %s", self.name, tool)
|
|
254
|
+
return result
|
|
255
|
+
|
|
256
|
+
async def invoke_with_timeout(self, tool: str, arguments: dict[str, Any]) -> Any:
|
|
257
|
+
"""Invoke a tool with timeout protection.
|
|
258
|
+
|
|
259
|
+
Note: We don't use asyncio.wait_for() here because anyio-based transports
|
|
260
|
+
(like streamable_http_client) use their own cancel scopes which conflict
|
|
261
|
+
with asyncio's cancellation. The httpx client has its own timeout configured.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
tool: Tool name to invoke.
|
|
265
|
+
arguments: Tool arguments.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Tool execution result.
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
MCPTimeoutError: If tool invocation times out.
|
|
272
|
+
MCPConnectionError: If connection fails.
|
|
273
|
+
"""
|
|
274
|
+
logger.info("[%s] Starting invoke_tool: %s with args: %s", self.name, tool, arguments)
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
if self.config.transport == "http":
|
|
278
|
+
# HTTP has its own timeout in httpx
|
|
279
|
+
result = await self._invoke_impl(tool, arguments)
|
|
280
|
+
else:
|
|
281
|
+
# Use asyncio.wait_for for stdio/sse as they don't have built-in timeout
|
|
282
|
+
result = await asyncio.wait_for(
|
|
283
|
+
self._invoke_impl(tool, arguments), timeout=self.timeout
|
|
284
|
+
)
|
|
285
|
+
logger.info("[%s] Tool invocation successful: %s", self.name, tool)
|
|
286
|
+
return result
|
|
287
|
+
except TimeoutError:
|
|
288
|
+
logger.error("[%s] Tool invocation timed out: %s", self.name, tool)
|
|
289
|
+
raise MCPTimeoutError(f"Tool '{tool}' timed out after {self.timeout}s") from None
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.error("[%s] Tool invocation failed: %s - %s", self.name, tool, e)
|
|
292
|
+
# Cleanup on any error (only for non-HTTP transports)
|
|
293
|
+
if self.config.transport != "http":
|
|
294
|
+
async with self._lock:
|
|
295
|
+
await self._cleanup()
|
|
296
|
+
raise
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class ConnectionManager:
|
|
300
|
+
"""Manages pool of MCP connections."""
|
|
301
|
+
|
|
302
|
+
def __init__(self) -> None:
|
|
303
|
+
self._connections: dict[str, MCPConnection] = {}
|
|
304
|
+
self._lock = asyncio.Lock()
|
|
305
|
+
|
|
306
|
+
async def get_connection(self, name: str, config: MCPServerConfig) -> ClientSession:
|
|
307
|
+
"""Get connection for a named server, creating if needed."""
|
|
308
|
+
async with self._lock:
|
|
309
|
+
if name not in self._connections:
|
|
310
|
+
self._connections[name] = MCPConnection(name=name, config=config)
|
|
311
|
+
|
|
312
|
+
connection = self._connections[name]
|
|
313
|
+
|
|
314
|
+
# Connect outside the manager lock to avoid blocking other requests
|
|
315
|
+
return await connection.connect()
|
|
316
|
+
|
|
317
|
+
async def disconnect_all(self) -> None:
|
|
318
|
+
"""Close all active connections."""
|
|
319
|
+
async with self._lock:
|
|
320
|
+
coros = [conn.disconnect() for conn in self._connections.values()]
|
|
321
|
+
if coros:
|
|
322
|
+
await asyncio.gather(*coros, return_exceptions=True)
|
|
323
|
+
self._connections.clear()
|
|
324
|
+
|
|
325
|
+
async def invoke_tool(
|
|
326
|
+
self, name: str, config: MCPServerConfig, tool: str, arguments: dict[str, Any]
|
|
327
|
+
) -> Any:
|
|
328
|
+
"""Invoke a tool on a backend MCP server with timeout.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
name: Server name for connection pooling.
|
|
332
|
+
config: Server configuration.
|
|
333
|
+
tool: Tool name to invoke.
|
|
334
|
+
arguments: Tool arguments.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Tool execution result (MCP CallToolResult).
|
|
338
|
+
|
|
339
|
+
Raises:
|
|
340
|
+
MCPConnectionError: If connection fails after retries.
|
|
341
|
+
MCPTimeoutError: If tool invocation times out.
|
|
342
|
+
"""
|
|
343
|
+
async with self._lock:
|
|
344
|
+
if name not in self._connections:
|
|
345
|
+
self._connections[name] = MCPConnection(name=name, config=config)
|
|
346
|
+
connection = self._connections[name]
|
|
347
|
+
|
|
348
|
+
return await connection.invoke_with_timeout(tool, arguments)
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""GitHub Knowledge Importer.
|
|
2
|
+
|
|
3
|
+
Imports issues and pull requests from GitHub via the MCP server
|
|
4
|
+
and indexes them into the Nexus knowledge base.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .database import Document, DocumentType, NexusDatabase, generate_document_id
|
|
13
|
+
from .mcp_client import MCPClientManager, MCPServerConnection
|
|
14
|
+
from .mcp_config import MCPConfig
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class GitHubImporter:
|
|
20
|
+
"""Imports GitHub data into Nexus knowledge base."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
database: NexusDatabase,
|
|
25
|
+
project_id: str,
|
|
26
|
+
client_manager: MCPClientManager | None = None,
|
|
27
|
+
mcp_config: MCPConfig | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""Initialize importer.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
database: Database instance to store documents.
|
|
33
|
+
project_id: Project ID to associate documents with.
|
|
34
|
+
client_manager: Optional MCP client manager (uses default if None).
|
|
35
|
+
mcp_config: MCP configuration (required to find 'github' server).
|
|
36
|
+
"""
|
|
37
|
+
self.database = database
|
|
38
|
+
self.project_id = project_id
|
|
39
|
+
self.client_manager = client_manager or MCPClientManager()
|
|
40
|
+
self.mcp_config = mcp_config
|
|
41
|
+
|
|
42
|
+
async def import_issues(
|
|
43
|
+
self,
|
|
44
|
+
owner: str,
|
|
45
|
+
repo: str,
|
|
46
|
+
limit: int = 20,
|
|
47
|
+
state: str = "all",
|
|
48
|
+
) -> int:
|
|
49
|
+
"""Import GitHub issues.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
owner: Repository owner.
|
|
53
|
+
repo: Repository name.
|
|
54
|
+
limit: Maximum number of issues to import.
|
|
55
|
+
state: Issue state ("open", "closed", "all").
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Number of issues imported.
|
|
59
|
+
"""
|
|
60
|
+
# Connect to GitHub MCP server
|
|
61
|
+
try:
|
|
62
|
+
if not self.mcp_config:
|
|
63
|
+
raise ValueError("MCP Config is required to find 'github' server.")
|
|
64
|
+
|
|
65
|
+
server_name = "github"
|
|
66
|
+
server_config = self.mcp_config.servers.get(server_name)
|
|
67
|
+
if not server_config:
|
|
68
|
+
raise ValueError("Server 'github' not found in MCP config.")
|
|
69
|
+
|
|
70
|
+
# Create connection object
|
|
71
|
+
connection = MCPServerConnection(
|
|
72
|
+
name=server_name,
|
|
73
|
+
command=server_config.command or "",
|
|
74
|
+
args=server_config.args,
|
|
75
|
+
env=server_config.env,
|
|
76
|
+
transport=server_config.transport,
|
|
77
|
+
url=server_config.url,
|
|
78
|
+
headers=server_config.headers,
|
|
79
|
+
timeout=server_config.timeout,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
all_items = []
|
|
83
|
+
|
|
84
|
+
# 1. Fetch Issues
|
|
85
|
+
issues = await self._fetch_tool_items(
|
|
86
|
+
connection, "list_issues", owner, repo, limit, state
|
|
87
|
+
)
|
|
88
|
+
logger.info(f"Fetched {len(issues)} issues")
|
|
89
|
+
all_items.extend(issues)
|
|
90
|
+
|
|
91
|
+
# 2. Fetch Pull Requests
|
|
92
|
+
prs = await self._fetch_tool_items(
|
|
93
|
+
connection, "list_pull_requests", owner, repo, limit, state
|
|
94
|
+
)
|
|
95
|
+
logger.info(f"Fetched {len(prs)} PRs")
|
|
96
|
+
all_items.extend(prs)
|
|
97
|
+
|
|
98
|
+
if not all_items:
|
|
99
|
+
return 0
|
|
100
|
+
|
|
101
|
+
return await self._index_issues(all_items, owner, repo)
|
|
102
|
+
|
|
103
|
+
except Exception as e:
|
|
104
|
+
logger.error("Failed to import GitHub data: %s", e)
|
|
105
|
+
raise
|
|
106
|
+
|
|
107
|
+
async def _fetch_tool_items(
|
|
108
|
+
self,
|
|
109
|
+
connection: MCPServerConnection,
|
|
110
|
+
tool_name: str,
|
|
111
|
+
owner: str,
|
|
112
|
+
repo: str,
|
|
113
|
+
limit: int,
|
|
114
|
+
state: str,
|
|
115
|
+
) -> list[dict[str, Any]]:
|
|
116
|
+
"""Fetch items using a specific GitHub MCP tool."""
|
|
117
|
+
args = {
|
|
118
|
+
"owner": owner,
|
|
119
|
+
"repo": repo,
|
|
120
|
+
"state": state,
|
|
121
|
+
"per_page": limit,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
result = await self.client_manager.call_tool(connection, tool_name, args)
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.warning("Failed to call tool %s: %s", tool_name, e)
|
|
128
|
+
return []
|
|
129
|
+
|
|
130
|
+
if not result:
|
|
131
|
+
return []
|
|
132
|
+
|
|
133
|
+
# Extract content
|
|
134
|
+
content_list = []
|
|
135
|
+
if hasattr(result, "content"):
|
|
136
|
+
content_list = result.content
|
|
137
|
+
elif isinstance(result, dict) and "content" in result:
|
|
138
|
+
content_list = result["content"]
|
|
139
|
+
|
|
140
|
+
text_content = ""
|
|
141
|
+
for content in content_list:
|
|
142
|
+
if hasattr(content, "text"):
|
|
143
|
+
text_content += content.text
|
|
144
|
+
elif isinstance(content, dict) and "text" in content:
|
|
145
|
+
text_content += content["text"]
|
|
146
|
+
|
|
147
|
+
if not text_content:
|
|
148
|
+
return []
|
|
149
|
+
|
|
150
|
+
import json
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
items = json.loads(text_content)
|
|
154
|
+
except json.JSONDecodeError:
|
|
155
|
+
logger.error("Failed to parse %s response as JSON", tool_name)
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
if not isinstance(items, list):
|
|
159
|
+
# Handle dictionary response
|
|
160
|
+
if isinstance(items, dict):
|
|
161
|
+
# Check for common wrapper keys
|
|
162
|
+
for key in ["items", "issues", "pull_requests", "data"]:
|
|
163
|
+
if key in items:
|
|
164
|
+
items = items[key]
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
if items is None:
|
|
168
|
+
items = []
|
|
169
|
+
elif not isinstance(items, list):
|
|
170
|
+
# Fallback: if it's a dict but we couldn't find a list, maybe it's a single item?
|
|
171
|
+
# Or unexpected format.
|
|
172
|
+
items = [items] if isinstance(items, dict) and items else []
|
|
173
|
+
|
|
174
|
+
return items
|
|
175
|
+
|
|
176
|
+
async def _index_issues(self, issues: list[dict[str, Any]], owner: str, repo: str) -> int:
|
|
177
|
+
"""Index a list of issue dictionaries."""
|
|
178
|
+
docs = []
|
|
179
|
+
embedder = self.database.embedder
|
|
180
|
+
|
|
181
|
+
# Batch embed would be efficient, but let's do one by one or prepare list first
|
|
182
|
+
# We need text for embedding
|
|
183
|
+
texts_to_embed = []
|
|
184
|
+
valid_issues = []
|
|
185
|
+
|
|
186
|
+
for issue in issues:
|
|
187
|
+
# Skip pull requests if they come through list_issues API (they often do)
|
|
188
|
+
# Unless we want to index them too (DocumentType.GITHUB_PR)
|
|
189
|
+
|
|
190
|
+
# GitHub API: PRs are issues. They have a 'pull_request' key.
|
|
191
|
+
# But from list_pull_requests endpoint, they have 'base' key.
|
|
192
|
+
is_pr = "pull_request" in issue or "base" in issue
|
|
193
|
+
doc_type = DocumentType.GITHUB_PR if is_pr else DocumentType.GITHUB_ISSUE
|
|
194
|
+
|
|
195
|
+
number = issue.get("number")
|
|
196
|
+
title = issue.get("title", "")
|
|
197
|
+
body = issue.get("body", "") or ""
|
|
198
|
+
url = issue.get("html_url", "")
|
|
199
|
+
# state = issue.get("state", "")
|
|
200
|
+
|
|
201
|
+
# Create a rich text representation for RAG
|
|
202
|
+
text = f"""GitHub {doc_type.value.replace("_", " ").title()} #{number}: {title}
|
|
203
|
+
Repo: {owner}/{repo}
|
|
204
|
+
URL: {url}
|
|
205
|
+
State: {issue.get("state")}
|
|
206
|
+
Author: {issue.get("user", {}).get("login")}
|
|
207
|
+
|
|
208
|
+
{body}
|
|
209
|
+
"""
|
|
210
|
+
texts_to_embed.append(text)
|
|
211
|
+
valid_issues.append((issue, doc_type, text, number, url, title))
|
|
212
|
+
|
|
213
|
+
if not texts_to_embed:
|
|
214
|
+
return 0
|
|
215
|
+
|
|
216
|
+
# Generate embeddings
|
|
217
|
+
# Assuming embedder has embed_batch
|
|
218
|
+
embeddings = await embedder.embed_batch(texts_to_embed)
|
|
219
|
+
|
|
220
|
+
for (_, doc_type, text, number, url, title), vector in zip(
|
|
221
|
+
valid_issues, embeddings, strict=True
|
|
222
|
+
):
|
|
223
|
+
doc_id = generate_document_id(
|
|
224
|
+
self.project_id, f"github://{owner}/{repo}/issues/{number}", str(number), 0
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Metadata to store
|
|
228
|
+
# We can use 'name' for Issue #123
|
|
229
|
+
name = f"Issue #{number}: {title}"
|
|
230
|
+
|
|
231
|
+
doc = Document(
|
|
232
|
+
id=doc_id,
|
|
233
|
+
text=text,
|
|
234
|
+
vector=vector,
|
|
235
|
+
project_id=self.project_id,
|
|
236
|
+
file_path=url, # Use URL as file_path for clickable links
|
|
237
|
+
doc_type=doc_type,
|
|
238
|
+
chunk_type="issue" if doc_type == DocumentType.GITHUB_ISSUE else "pr",
|
|
239
|
+
language="markdown",
|
|
240
|
+
name=name,
|
|
241
|
+
start_line=0,
|
|
242
|
+
end_line=0,
|
|
243
|
+
)
|
|
244
|
+
docs.append(doc)
|
|
245
|
+
|
|
246
|
+
await self.database.upsert_documents(docs)
|
|
247
|
+
return len(docs)
|