nexus-dev 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nexus_dev/__init__.py +4 -0
  2. nexus_dev/agent_templates/__init__.py +26 -0
  3. nexus_dev/agent_templates/api_designer.yaml +26 -0
  4. nexus_dev/agent_templates/code_reviewer.yaml +26 -0
  5. nexus_dev/agent_templates/debug_detective.yaml +26 -0
  6. nexus_dev/agent_templates/doc_writer.yaml +26 -0
  7. nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
  8. nexus_dev/agent_templates/refactor_architect.yaml +26 -0
  9. nexus_dev/agent_templates/security_auditor.yaml +26 -0
  10. nexus_dev/agent_templates/test_engineer.yaml +26 -0
  11. nexus_dev/agents/__init__.py +20 -0
  12. nexus_dev/agents/agent_config.py +97 -0
  13. nexus_dev/agents/agent_executor.py +197 -0
  14. nexus_dev/agents/agent_manager.py +104 -0
  15. nexus_dev/agents/prompt_factory.py +91 -0
  16. nexus_dev/chunkers/__init__.py +168 -0
  17. nexus_dev/chunkers/base.py +202 -0
  18. nexus_dev/chunkers/docs_chunker.py +291 -0
  19. nexus_dev/chunkers/java_chunker.py +343 -0
  20. nexus_dev/chunkers/javascript_chunker.py +312 -0
  21. nexus_dev/chunkers/python_chunker.py +308 -0
  22. nexus_dev/cli.py +2017 -0
  23. nexus_dev/config.py +261 -0
  24. nexus_dev/database.py +569 -0
  25. nexus_dev/embeddings.py +703 -0
  26. nexus_dev/gateway/__init__.py +10 -0
  27. nexus_dev/gateway/connection_manager.py +348 -0
  28. nexus_dev/github_importer.py +247 -0
  29. nexus_dev/mcp_client.py +281 -0
  30. nexus_dev/mcp_config.py +184 -0
  31. nexus_dev/schemas/mcp_config_schema.json +166 -0
  32. nexus_dev/server.py +1866 -0
  33. nexus_dev/templates/pre-commit-hook +56 -0
  34. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/__init__.py +26 -0
  35. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
  36. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
  37. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
  38. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
  39. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
  40. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
  41. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
  42. nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
  43. nexus_dev-3.3.1.data/data/nexus_dev/templates/pre-commit-hook +56 -0
  44. nexus_dev-3.3.1.dist-info/METADATA +668 -0
  45. nexus_dev-3.3.1.dist-info/RECORD +48 -0
  46. nexus_dev-3.3.1.dist-info/WHEEL +4 -0
  47. nexus_dev-3.3.1.dist-info/entry_points.txt +14 -0
  48. nexus_dev-3.3.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,348 @@
1
+ """MCP Connection Manager for Gateway Mode."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from dataclasses import dataclass, field
8
+ from typing import Any
9
+
10
+ import httpx
11
+ from mcp import ClientSession
12
+ from mcp.client.sse import sse_client
13
+ from mcp.client.stdio import StdioServerParameters, stdio_client
14
+ from mcp.client.streamable_http import streamable_http_client
15
+
16
+ from ..mcp_config import MCPServerConfig
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class MCPConnectionError(Exception):
22
+ """Failed to connect to MCP server."""
23
+
24
+ pass
25
+
26
+
27
+ class MCPTimeoutError(Exception):
28
+ """Tool invocation timed out."""
29
+
30
+ pass
31
+
32
+
33
+ @dataclass
34
+ class MCPConnection:
35
+ """Active connection to an MCP server."""
36
+
37
+ name: str
38
+ config: MCPServerConfig
39
+ session: ClientSession | None = None
40
+ _lock: asyncio.Lock = field(default_factory=asyncio.Lock)
41
+ _cleanup_stack: list[Any] = field(default_factory=list)
42
+
43
+ # Retry configuration
44
+ max_retries: int = 3
45
+ retry_delay: float = 1.0 # seconds (base delay for exponential backoff)
46
+
47
+ @property
48
+ def timeout(self) -> float:
49
+ """Get tool execution timeout from config."""
50
+ return self.config.timeout
51
+
52
+ @property
53
+ def connect_timeout(self) -> float:
54
+ """Get connection timeout from config."""
55
+ return self.config.connect_timeout
56
+
57
+ async def connect(self) -> ClientSession:
58
+ """Get or create connection with retry logic."""
59
+ async with self._lock:
60
+ # For HTTP transport, always create fresh connections to avoid
61
+ # anyio TaskGroup conflicts with streamable_http_client
62
+ if self.config.transport == "http":
63
+ if self.session is not None:
64
+ logger.debug("[%s] Cleaning up previous HTTP session", self.name)
65
+ await self._cleanup()
66
+ elif self.session is not None:
67
+ # Check if existing session is still alive (non-HTTP transports only)
68
+ try:
69
+ logger.debug("[%s] Pinging existing session", self.name)
70
+ await self.session.send_ping()
71
+ logger.debug("[%s] Existing session is alive", self.name)
72
+ return self.session
73
+ except Exception as e:
74
+ logger.warning(
75
+ "[%s] Connection lost or ping failed, reconnecting... Error: %s",
76
+ self.name,
77
+ e,
78
+ )
79
+ await self._cleanup()
80
+
81
+ # Try to connect with retries within total connect_timeout
82
+ last_error: Exception | None = None
83
+ try:
84
+ async with asyncio.timeout(self.connect_timeout):
85
+ for attempt in range(self.max_retries):
86
+ try:
87
+ logger.info(
88
+ "[%s] Connection attempt %d/%d",
89
+ self.name,
90
+ attempt + 1,
91
+ self.max_retries,
92
+ )
93
+ return await self._do_connect()
94
+ except Exception as e:
95
+ last_error = e
96
+ logger.warning(
97
+ "[%s] Connection attempt %d/%d failed: %s",
98
+ self.name,
99
+ attempt + 1,
100
+ self.max_retries,
101
+ e,
102
+ )
103
+ if attempt < self.max_retries - 1:
104
+ delay = self.retry_delay * (2**attempt)
105
+ logger.debug("[%s] Retrying in %.1fs...", self.name, delay)
106
+ await asyncio.sleep(delay)
107
+ except TimeoutError:
108
+ logger.error(
109
+ "[%s] Connection timed out after %.1fs", self.name, self.connect_timeout
110
+ )
111
+ raise MCPConnectionError(
112
+ f"Failed to connect to {self.name} due to timeout after {self.connect_timeout}s"
113
+ ) from last_error
114
+
115
+ logger.error("[%s] All connection attempts failed", self.name)
116
+ raise MCPConnectionError(
117
+ f"Failed to connect to {self.name} after {self.max_retries} attempts"
118
+ ) from last_error
119
+
120
+ async def _do_connect(self) -> ClientSession:
121
+ """Perform actual connection to MCP server.
122
+
123
+ Note: We don't use asyncio.wait_for() here because anyio-based transports
124
+ (like streamable_http_client) use their own cancel scopes which conflict
125
+ with asyncio's cancellation. The httpx client has its own timeout configured.
126
+ """
127
+ logger.debug("[%s] Connecting...", self.name)
128
+ try:
129
+ result = await self._do_connect_impl()
130
+ logger.info("[%s] Connection successful", self.name)
131
+ return result
132
+ except Exception as e:
133
+ logger.error("[%s] Connection failed: %s", self.name, e)
134
+ raise
135
+
136
+ async def _do_connect_impl(self) -> ClientSession:
137
+ """Internal connection implementation for SSE and stdio transports.
138
+
139
+ Note: HTTP transport does NOT use this method - it uses _invoke_http instead
140
+ to properly handle anyio's structured concurrency requirements.
141
+ """
142
+ if self.config.transport == "sse":
143
+ if not self.config.url:
144
+ raise ValueError(f"URL required for SSE transport: {self.name}")
145
+
146
+ logger.debug("[%s] Using SSE transport to %s", self.name, self.config.url)
147
+ transport_cm = sse_client(
148
+ url=self.config.url,
149
+ headers=self.config.headers,
150
+ )
151
+ elif self.config.transport == "stdio":
152
+ logger.debug(
153
+ "[%s] Using stdio transport with command: %s", self.name, self.config.command
154
+ )
155
+ server_params = StdioServerParameters(
156
+ command=self.config.command, # type: ignore
157
+ args=self.config.args,
158
+ env=self.config.env,
159
+ )
160
+ transport_cm = stdio_client(server_params)
161
+ else:
162
+ raise ValueError(f"Unsupported transport for pooling: {self.config.transport}")
163
+
164
+ logger.debug("[%s] Entering transport context manager", self.name)
165
+ read, write = await transport_cm.__aenter__()
166
+ self._cleanup_stack.append(transport_cm)
167
+
168
+ logger.debug("[%s] Creating client session", self.name)
169
+ session_cm = ClientSession(read, write)
170
+ self.session = await session_cm.__aenter__()
171
+ self._cleanup_stack.append(session_cm)
172
+
173
+ logger.debug("[%s] Initializing session", self.name)
174
+ await self.session.initialize()
175
+ logger.info("[%s] Connected to MCP server successfully", self.name)
176
+ return self.session
177
+
178
+ logger.debug("[%s] Creating client session", self.name)
179
+ session_cm = ClientSession(read, write)
180
+ self.session = await session_cm.__aenter__()
181
+ self._cleanup_stack.append(session_cm)
182
+
183
+ logger.debug("[%s] Initializing session", self.name)
184
+ await self.session.initialize()
185
+ logger.info("[%s] Connected to MCP server successfully", self.name)
186
+ return self.session
187
+
188
+ async def _cleanup(self) -> None:
189
+ """Clean up connection resources (called with lock held)."""
190
+ while self._cleanup_stack:
191
+ cm = self._cleanup_stack.pop()
192
+ try:
193
+ await cm.__aexit__(None, None, None)
194
+ except Exception as e:
195
+ logger.debug("Cleanup error for %s: %s", self.name, e)
196
+ self.session = None
197
+
198
+ async def disconnect(self) -> None:
199
+ """Close connection."""
200
+ async with self._lock:
201
+ await self._cleanup()
202
+ logger.info("Disconnected from MCP server: %s", self.name)
203
+
204
+ async def _invoke_http(self, tool: str, arguments: dict[str, Any]) -> Any:
205
+ """Invoke a tool using HTTP transport with proper context manager handling.
206
+
207
+ For HTTP transport, we must use proper async with blocks because
208
+ streamable_http_client uses anyio TaskGroups internally that conflict
209
+ with manual __aenter__/__aexit__ calls.
210
+ """
211
+ logger.debug("[%s] Using HTTP transport for tool: %s", self.name, tool)
212
+
213
+ if not self.config.url:
214
+ raise ValueError(f"URL required for HTTP transport: {self.name}")
215
+
216
+ async with (
217
+ httpx.AsyncClient(
218
+ headers=self.config.headers,
219
+ timeout=httpx.Timeout(self.config.timeout),
220
+ ) as http_client,
221
+ streamable_http_client(
222
+ url=self.config.url,
223
+ http_client=http_client,
224
+ terminate_on_close=True,
225
+ ) as (read, write, _),
226
+ ClientSession(read, write) as session,
227
+ ):
228
+ await session.initialize()
229
+ logger.debug("[%s] HTTP session initialized, calling tool: %s", self.name, tool)
230
+ result = await session.call_tool(tool, arguments)
231
+ logger.debug("[%s] Tool call completed: %s", self.name, tool)
232
+ return result
233
+
234
+ async def _invoke_impl(self, tool: str, arguments: dict[str, Any]) -> Any:
235
+ """Internal implementation of tool invocation.
236
+
237
+ Args:
238
+ tool: Tool name to invoke.
239
+ arguments: Tool arguments.
240
+
241
+ Returns:
242
+ Tool execution result.
243
+ """
244
+ # For HTTP transport, use dedicated method with proper async with handling
245
+ if self.config.transport == "http":
246
+ return await self._invoke_http(tool, arguments)
247
+
248
+ # For other transports (stdio, sse), use connection pooling
249
+ logger.debug("[%s] Getting connection for tool: %s", self.name, tool)
250
+ session = await self.connect()
251
+ logger.debug("[%s] Connection established, calling tool: %s", self.name, tool)
252
+ result = await session.call_tool(tool, arguments)
253
+ logger.debug("[%s] Tool call completed: %s", self.name, tool)
254
+ return result
255
+
256
+ async def invoke_with_timeout(self, tool: str, arguments: dict[str, Any]) -> Any:
257
+ """Invoke a tool with timeout protection.
258
+
259
+ Note: We don't use asyncio.wait_for() here because anyio-based transports
260
+ (like streamable_http_client) use their own cancel scopes which conflict
261
+ with asyncio's cancellation. The httpx client has its own timeout configured.
262
+
263
+ Args:
264
+ tool: Tool name to invoke.
265
+ arguments: Tool arguments.
266
+
267
+ Returns:
268
+ Tool execution result.
269
+
270
+ Raises:
271
+ MCPTimeoutError: If tool invocation times out.
272
+ MCPConnectionError: If connection fails.
273
+ """
274
+ logger.info("[%s] Starting invoke_tool: %s with args: %s", self.name, tool, arguments)
275
+
276
+ try:
277
+ if self.config.transport == "http":
278
+ # HTTP has its own timeout in httpx
279
+ result = await self._invoke_impl(tool, arguments)
280
+ else:
281
+ # Use asyncio.wait_for for stdio/sse as they don't have built-in timeout
282
+ result = await asyncio.wait_for(
283
+ self._invoke_impl(tool, arguments), timeout=self.timeout
284
+ )
285
+ logger.info("[%s] Tool invocation successful: %s", self.name, tool)
286
+ return result
287
+ except TimeoutError:
288
+ logger.error("[%s] Tool invocation timed out: %s", self.name, tool)
289
+ raise MCPTimeoutError(f"Tool '{tool}' timed out after {self.timeout}s") from None
290
+ except Exception as e:
291
+ logger.error("[%s] Tool invocation failed: %s - %s", self.name, tool, e)
292
+ # Cleanup on any error (only for non-HTTP transports)
293
+ if self.config.transport != "http":
294
+ async with self._lock:
295
+ await self._cleanup()
296
+ raise
297
+
298
+
299
+ class ConnectionManager:
300
+ """Manages pool of MCP connections."""
301
+
302
+ def __init__(self) -> None:
303
+ self._connections: dict[str, MCPConnection] = {}
304
+ self._lock = asyncio.Lock()
305
+
306
+ async def get_connection(self, name: str, config: MCPServerConfig) -> ClientSession:
307
+ """Get connection for a named server, creating if needed."""
308
+ async with self._lock:
309
+ if name not in self._connections:
310
+ self._connections[name] = MCPConnection(name=name, config=config)
311
+
312
+ connection = self._connections[name]
313
+
314
+ # Connect outside the manager lock to avoid blocking other requests
315
+ return await connection.connect()
316
+
317
+ async def disconnect_all(self) -> None:
318
+ """Close all active connections."""
319
+ async with self._lock:
320
+ coros = [conn.disconnect() for conn in self._connections.values()]
321
+ if coros:
322
+ await asyncio.gather(*coros, return_exceptions=True)
323
+ self._connections.clear()
324
+
325
+ async def invoke_tool(
326
+ self, name: str, config: MCPServerConfig, tool: str, arguments: dict[str, Any]
327
+ ) -> Any:
328
+ """Invoke a tool on a backend MCP server with timeout.
329
+
330
+ Args:
331
+ name: Server name for connection pooling.
332
+ config: Server configuration.
333
+ tool: Tool name to invoke.
334
+ arguments: Tool arguments.
335
+
336
+ Returns:
337
+ Tool execution result (MCP CallToolResult).
338
+
339
+ Raises:
340
+ MCPConnectionError: If connection fails after retries.
341
+ MCPTimeoutError: If tool invocation times out.
342
+ """
343
+ async with self._lock:
344
+ if name not in self._connections:
345
+ self._connections[name] = MCPConnection(name=name, config=config)
346
+ connection = self._connections[name]
347
+
348
+ return await connection.invoke_with_timeout(tool, arguments)
@@ -0,0 +1,247 @@
1
+ """GitHub Knowledge Importer.
2
+
3
+ Imports issues and pull requests from GitHub via the MCP server
4
+ and indexes them into the Nexus knowledge base.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import Any
11
+
12
+ from .database import Document, DocumentType, NexusDatabase, generate_document_id
13
+ from .mcp_client import MCPClientManager, MCPServerConnection
14
+ from .mcp_config import MCPConfig
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class GitHubImporter:
20
+ """Imports GitHub data into Nexus knowledge base."""
21
+
22
+ def __init__(
23
+ self,
24
+ database: NexusDatabase,
25
+ project_id: str,
26
+ client_manager: MCPClientManager | None = None,
27
+ mcp_config: MCPConfig | None = None,
28
+ ) -> None:
29
+ """Initialize importer.
30
+
31
+ Args:
32
+ database: Database instance to store documents.
33
+ project_id: Project ID to associate documents with.
34
+ client_manager: Optional MCP client manager (uses default if None).
35
+ mcp_config: MCP configuration (required to find 'github' server).
36
+ """
37
+ self.database = database
38
+ self.project_id = project_id
39
+ self.client_manager = client_manager or MCPClientManager()
40
+ self.mcp_config = mcp_config
41
+
42
+ async def import_issues(
43
+ self,
44
+ owner: str,
45
+ repo: str,
46
+ limit: int = 20,
47
+ state: str = "all",
48
+ ) -> int:
49
+ """Import GitHub issues.
50
+
51
+ Args:
52
+ owner: Repository owner.
53
+ repo: Repository name.
54
+ limit: Maximum number of issues to import.
55
+ state: Issue state ("open", "closed", "all").
56
+
57
+ Returns:
58
+ Number of issues imported.
59
+ """
60
+ # Connect to GitHub MCP server
61
+ try:
62
+ if not self.mcp_config:
63
+ raise ValueError("MCP Config is required to find 'github' server.")
64
+
65
+ server_name = "github"
66
+ server_config = self.mcp_config.servers.get(server_name)
67
+ if not server_config:
68
+ raise ValueError("Server 'github' not found in MCP config.")
69
+
70
+ # Create connection object
71
+ connection = MCPServerConnection(
72
+ name=server_name,
73
+ command=server_config.command or "",
74
+ args=server_config.args,
75
+ env=server_config.env,
76
+ transport=server_config.transport,
77
+ url=server_config.url,
78
+ headers=server_config.headers,
79
+ timeout=server_config.timeout,
80
+ )
81
+
82
+ all_items = []
83
+
84
+ # 1. Fetch Issues
85
+ issues = await self._fetch_tool_items(
86
+ connection, "list_issues", owner, repo, limit, state
87
+ )
88
+ logger.info(f"Fetched {len(issues)} issues")
89
+ all_items.extend(issues)
90
+
91
+ # 2. Fetch Pull Requests
92
+ prs = await self._fetch_tool_items(
93
+ connection, "list_pull_requests", owner, repo, limit, state
94
+ )
95
+ logger.info(f"Fetched {len(prs)} PRs")
96
+ all_items.extend(prs)
97
+
98
+ if not all_items:
99
+ return 0
100
+
101
+ return await self._index_issues(all_items, owner, repo)
102
+
103
+ except Exception as e:
104
+ logger.error("Failed to import GitHub data: %s", e)
105
+ raise
106
+
107
+ async def _fetch_tool_items(
108
+ self,
109
+ connection: MCPServerConnection,
110
+ tool_name: str,
111
+ owner: str,
112
+ repo: str,
113
+ limit: int,
114
+ state: str,
115
+ ) -> list[dict[str, Any]]:
116
+ """Fetch items using a specific GitHub MCP tool."""
117
+ args = {
118
+ "owner": owner,
119
+ "repo": repo,
120
+ "state": state,
121
+ "per_page": limit,
122
+ }
123
+
124
+ try:
125
+ result = await self.client_manager.call_tool(connection, tool_name, args)
126
+ except Exception as e:
127
+ logger.warning("Failed to call tool %s: %s", tool_name, e)
128
+ return []
129
+
130
+ if not result:
131
+ return []
132
+
133
+ # Extract content
134
+ content_list = []
135
+ if hasattr(result, "content"):
136
+ content_list = result.content
137
+ elif isinstance(result, dict) and "content" in result:
138
+ content_list = result["content"]
139
+
140
+ text_content = ""
141
+ for content in content_list:
142
+ if hasattr(content, "text"):
143
+ text_content += content.text
144
+ elif isinstance(content, dict) and "text" in content:
145
+ text_content += content["text"]
146
+
147
+ if not text_content:
148
+ return []
149
+
150
+ import json
151
+
152
+ try:
153
+ items = json.loads(text_content)
154
+ except json.JSONDecodeError:
155
+ logger.error("Failed to parse %s response as JSON", tool_name)
156
+ return []
157
+
158
+ if not isinstance(items, list):
159
+ # Handle dictionary response
160
+ if isinstance(items, dict):
161
+ # Check for common wrapper keys
162
+ for key in ["items", "issues", "pull_requests", "data"]:
163
+ if key in items:
164
+ items = items[key]
165
+ break
166
+
167
+ if items is None:
168
+ items = []
169
+ elif not isinstance(items, list):
170
+ # Fallback: if it's a dict but we couldn't find a list, maybe it's a single item?
171
+ # Or unexpected format.
172
+ items = [items] if isinstance(items, dict) and items else []
173
+
174
+ return items
175
+
176
+ async def _index_issues(self, issues: list[dict[str, Any]], owner: str, repo: str) -> int:
177
+ """Index a list of issue dictionaries."""
178
+ docs = []
179
+ embedder = self.database.embedder
180
+
181
+ # Batch embed would be efficient, but let's do one by one or prepare list first
182
+ # We need text for embedding
183
+ texts_to_embed = []
184
+ valid_issues = []
185
+
186
+ for issue in issues:
187
+ # Skip pull requests if they come through list_issues API (they often do)
188
+ # Unless we want to index them too (DocumentType.GITHUB_PR)
189
+
190
+ # GitHub API: PRs are issues. They have a 'pull_request' key.
191
+ # But from list_pull_requests endpoint, they have 'base' key.
192
+ is_pr = "pull_request" in issue or "base" in issue
193
+ doc_type = DocumentType.GITHUB_PR if is_pr else DocumentType.GITHUB_ISSUE
194
+
195
+ number = issue.get("number")
196
+ title = issue.get("title", "")
197
+ body = issue.get("body", "") or ""
198
+ url = issue.get("html_url", "")
199
+ # state = issue.get("state", "")
200
+
201
+ # Create a rich text representation for RAG
202
+ text = f"""GitHub {doc_type.value.replace("_", " ").title()} #{number}: {title}
203
+ Repo: {owner}/{repo}
204
+ URL: {url}
205
+ State: {issue.get("state")}
206
+ Author: {issue.get("user", {}).get("login")}
207
+
208
+ {body}
209
+ """
210
+ texts_to_embed.append(text)
211
+ valid_issues.append((issue, doc_type, text, number, url, title))
212
+
213
+ if not texts_to_embed:
214
+ return 0
215
+
216
+ # Generate embeddings
217
+ # Assuming embedder has embed_batch
218
+ embeddings = await embedder.embed_batch(texts_to_embed)
219
+
220
+ for (_, doc_type, text, number, url, title), vector in zip(
221
+ valid_issues, embeddings, strict=True
222
+ ):
223
+ doc_id = generate_document_id(
224
+ self.project_id, f"github://{owner}/{repo}/issues/{number}", str(number), 0
225
+ )
226
+
227
+ # Metadata to store
228
+ # We can use 'name' for Issue #123
229
+ name = f"Issue #{number}: {title}"
230
+
231
+ doc = Document(
232
+ id=doc_id,
233
+ text=text,
234
+ vector=vector,
235
+ project_id=self.project_id,
236
+ file_path=url, # Use URL as file_path for clickable links
237
+ doc_type=doc_type,
238
+ chunk_type="issue" if doc_type == DocumentType.GITHUB_ISSUE else "pr",
239
+ language="markdown",
240
+ name=name,
241
+ start_line=0,
242
+ end_line=0,
243
+ )
244
+ docs.append(doc)
245
+
246
+ await self.database.upsert_documents(docs)
247
+ return len(docs)