hanzo-mcp 0.7.7__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hanzo-mcp might be problematic. Click here for more details.

Files changed (178) hide show
  1. hanzo_mcp/__init__.py +6 -0
  2. hanzo_mcp/__main__.py +1 -1
  3. hanzo_mcp/analytics/__init__.py +2 -2
  4. hanzo_mcp/analytics/posthog_analytics.py +76 -82
  5. hanzo_mcp/cli.py +31 -36
  6. hanzo_mcp/cli_enhanced.py +94 -72
  7. hanzo_mcp/cli_plugin.py +27 -17
  8. hanzo_mcp/config/__init__.py +2 -2
  9. hanzo_mcp/config/settings.py +112 -88
  10. hanzo_mcp/config/tool_config.py +32 -34
  11. hanzo_mcp/dev_server.py +66 -67
  12. hanzo_mcp/prompts/__init__.py +94 -12
  13. hanzo_mcp/prompts/enhanced_prompts.py +809 -0
  14. hanzo_mcp/prompts/example_custom_prompt.py +6 -5
  15. hanzo_mcp/prompts/project_todo_reminder.py +0 -1
  16. hanzo_mcp/prompts/tool_explorer.py +10 -7
  17. hanzo_mcp/server.py +17 -21
  18. hanzo_mcp/server_enhanced.py +15 -22
  19. hanzo_mcp/tools/__init__.py +56 -28
  20. hanzo_mcp/tools/agent/__init__.py +16 -19
  21. hanzo_mcp/tools/agent/agent.py +82 -65
  22. hanzo_mcp/tools/agent/agent_tool.py +152 -122
  23. hanzo_mcp/tools/agent/agent_tool_v1_deprecated.py +66 -62
  24. hanzo_mcp/tools/agent/clarification_protocol.py +55 -50
  25. hanzo_mcp/tools/agent/clarification_tool.py +11 -10
  26. hanzo_mcp/tools/agent/claude_cli_tool.py +21 -20
  27. hanzo_mcp/tools/agent/claude_desktop_auth.py +130 -144
  28. hanzo_mcp/tools/agent/cli_agent_base.py +59 -53
  29. hanzo_mcp/tools/agent/code_auth.py +102 -107
  30. hanzo_mcp/tools/agent/code_auth_tool.py +28 -27
  31. hanzo_mcp/tools/agent/codex_cli_tool.py +20 -19
  32. hanzo_mcp/tools/agent/critic_tool.py +86 -73
  33. hanzo_mcp/tools/agent/gemini_cli_tool.py +21 -20
  34. hanzo_mcp/tools/agent/grok_cli_tool.py +21 -20
  35. hanzo_mcp/tools/agent/iching_tool.py +404 -139
  36. hanzo_mcp/tools/agent/network_tool.py +89 -73
  37. hanzo_mcp/tools/agent/prompt.py +2 -1
  38. hanzo_mcp/tools/agent/review_tool.py +101 -98
  39. hanzo_mcp/tools/agent/swarm_alias.py +87 -0
  40. hanzo_mcp/tools/agent/swarm_tool.py +246 -161
  41. hanzo_mcp/tools/agent/swarm_tool_v1_deprecated.py +134 -92
  42. hanzo_mcp/tools/agent/tool_adapter.py +21 -11
  43. hanzo_mcp/tools/common/__init__.py +1 -1
  44. hanzo_mcp/tools/common/base.py +3 -5
  45. hanzo_mcp/tools/common/batch_tool.py +46 -39
  46. hanzo_mcp/tools/common/config_tool.py +120 -84
  47. hanzo_mcp/tools/common/context.py +1 -5
  48. hanzo_mcp/tools/common/context_fix.py +5 -3
  49. hanzo_mcp/tools/common/critic_tool.py +4 -8
  50. hanzo_mcp/tools/common/decorators.py +58 -56
  51. hanzo_mcp/tools/common/enhanced_base.py +29 -32
  52. hanzo_mcp/tools/common/fastmcp_pagination.py +91 -94
  53. hanzo_mcp/tools/common/forgiving_edit.py +91 -87
  54. hanzo_mcp/tools/common/mode.py +15 -17
  55. hanzo_mcp/tools/common/mode_loader.py +27 -24
  56. hanzo_mcp/tools/common/paginated_base.py +61 -53
  57. hanzo_mcp/tools/common/paginated_response.py +72 -79
  58. hanzo_mcp/tools/common/pagination.py +50 -53
  59. hanzo_mcp/tools/common/permissions.py +4 -4
  60. hanzo_mcp/tools/common/personality.py +186 -138
  61. hanzo_mcp/tools/common/plugin_loader.py +54 -54
  62. hanzo_mcp/tools/common/stats.py +65 -47
  63. hanzo_mcp/tools/common/test_helpers.py +31 -0
  64. hanzo_mcp/tools/common/thinking_tool.py +4 -8
  65. hanzo_mcp/tools/common/tool_disable.py +17 -12
  66. hanzo_mcp/tools/common/tool_enable.py +13 -14
  67. hanzo_mcp/tools/common/tool_list.py +36 -28
  68. hanzo_mcp/tools/common/truncate.py +23 -23
  69. hanzo_mcp/tools/config/__init__.py +4 -4
  70. hanzo_mcp/tools/config/config_tool.py +42 -29
  71. hanzo_mcp/tools/config/index_config.py +37 -34
  72. hanzo_mcp/tools/config/mode_tool.py +175 -55
  73. hanzo_mcp/tools/database/__init__.py +15 -12
  74. hanzo_mcp/tools/database/database_manager.py +77 -75
  75. hanzo_mcp/tools/database/graph.py +137 -91
  76. hanzo_mcp/tools/database/graph_add.py +30 -18
  77. hanzo_mcp/tools/database/graph_query.py +178 -102
  78. hanzo_mcp/tools/database/graph_remove.py +33 -28
  79. hanzo_mcp/tools/database/graph_search.py +97 -75
  80. hanzo_mcp/tools/database/graph_stats.py +91 -59
  81. hanzo_mcp/tools/database/sql.py +107 -79
  82. hanzo_mcp/tools/database/sql_query.py +30 -24
  83. hanzo_mcp/tools/database/sql_search.py +29 -25
  84. hanzo_mcp/tools/database/sql_stats.py +47 -35
  85. hanzo_mcp/tools/editor/neovim_command.py +25 -28
  86. hanzo_mcp/tools/editor/neovim_edit.py +21 -23
  87. hanzo_mcp/tools/editor/neovim_session.py +60 -54
  88. hanzo_mcp/tools/filesystem/__init__.py +31 -30
  89. hanzo_mcp/tools/filesystem/ast_multi_edit.py +329 -249
  90. hanzo_mcp/tools/filesystem/ast_tool.py +4 -4
  91. hanzo_mcp/tools/filesystem/base.py +1 -1
  92. hanzo_mcp/tools/filesystem/batch_search.py +316 -224
  93. hanzo_mcp/tools/filesystem/content_replace.py +4 -4
  94. hanzo_mcp/tools/filesystem/diff.py +71 -59
  95. hanzo_mcp/tools/filesystem/directory_tree.py +7 -7
  96. hanzo_mcp/tools/filesystem/directory_tree_paginated.py +49 -37
  97. hanzo_mcp/tools/filesystem/edit.py +4 -4
  98. hanzo_mcp/tools/filesystem/find.py +173 -80
  99. hanzo_mcp/tools/filesystem/find_files.py +73 -52
  100. hanzo_mcp/tools/filesystem/git_search.py +157 -104
  101. hanzo_mcp/tools/filesystem/grep.py +8 -8
  102. hanzo_mcp/tools/filesystem/multi_edit.py +4 -8
  103. hanzo_mcp/tools/filesystem/read.py +12 -10
  104. hanzo_mcp/tools/filesystem/rules_tool.py +59 -43
  105. hanzo_mcp/tools/filesystem/search_tool.py +263 -207
  106. hanzo_mcp/tools/filesystem/symbols_tool.py +94 -54
  107. hanzo_mcp/tools/filesystem/tree.py +35 -33
  108. hanzo_mcp/tools/filesystem/unix_aliases.py +13 -18
  109. hanzo_mcp/tools/filesystem/watch.py +37 -36
  110. hanzo_mcp/tools/filesystem/write.py +4 -8
  111. hanzo_mcp/tools/jupyter/__init__.py +4 -4
  112. hanzo_mcp/tools/jupyter/base.py +4 -5
  113. hanzo_mcp/tools/jupyter/jupyter.py +67 -47
  114. hanzo_mcp/tools/jupyter/notebook_edit.py +4 -4
  115. hanzo_mcp/tools/jupyter/notebook_read.py +4 -7
  116. hanzo_mcp/tools/llm/__init__.py +5 -7
  117. hanzo_mcp/tools/llm/consensus_tool.py +72 -52
  118. hanzo_mcp/tools/llm/llm_manage.py +101 -60
  119. hanzo_mcp/tools/llm/llm_tool.py +226 -166
  120. hanzo_mcp/tools/llm/provider_tools.py +25 -26
  121. hanzo_mcp/tools/lsp/__init__.py +1 -1
  122. hanzo_mcp/tools/lsp/lsp_tool.py +228 -143
  123. hanzo_mcp/tools/mcp/__init__.py +2 -3
  124. hanzo_mcp/tools/mcp/mcp_add.py +27 -25
  125. hanzo_mcp/tools/mcp/mcp_remove.py +7 -8
  126. hanzo_mcp/tools/mcp/mcp_stats.py +23 -22
  127. hanzo_mcp/tools/mcp/mcp_tool.py +129 -98
  128. hanzo_mcp/tools/memory/__init__.py +39 -21
  129. hanzo_mcp/tools/memory/knowledge_tools.py +124 -99
  130. hanzo_mcp/tools/memory/memory_tools.py +90 -108
  131. hanzo_mcp/tools/search/__init__.py +7 -2
  132. hanzo_mcp/tools/search/find_tool.py +297 -212
  133. hanzo_mcp/tools/search/unified_search.py +366 -314
  134. hanzo_mcp/tools/shell/__init__.py +8 -7
  135. hanzo_mcp/tools/shell/auto_background.py +56 -49
  136. hanzo_mcp/tools/shell/base.py +1 -1
  137. hanzo_mcp/tools/shell/base_process.py +75 -75
  138. hanzo_mcp/tools/shell/bash_session.py +2 -2
  139. hanzo_mcp/tools/shell/bash_session_executor.py +4 -4
  140. hanzo_mcp/tools/shell/bash_tool.py +24 -31
  141. hanzo_mcp/tools/shell/command_executor.py +12 -12
  142. hanzo_mcp/tools/shell/logs.py +43 -33
  143. hanzo_mcp/tools/shell/npx.py +13 -13
  144. hanzo_mcp/tools/shell/npx_background.py +24 -21
  145. hanzo_mcp/tools/shell/npx_tool.py +18 -22
  146. hanzo_mcp/tools/shell/open.py +19 -21
  147. hanzo_mcp/tools/shell/pkill.py +31 -26
  148. hanzo_mcp/tools/shell/process_tool.py +32 -32
  149. hanzo_mcp/tools/shell/processes.py +57 -58
  150. hanzo_mcp/tools/shell/run_background.py +24 -25
  151. hanzo_mcp/tools/shell/run_command.py +5 -5
  152. hanzo_mcp/tools/shell/run_command_windows.py +5 -5
  153. hanzo_mcp/tools/shell/session_storage.py +3 -3
  154. hanzo_mcp/tools/shell/streaming_command.py +141 -126
  155. hanzo_mcp/tools/shell/uvx.py +24 -25
  156. hanzo_mcp/tools/shell/uvx_background.py +35 -33
  157. hanzo_mcp/tools/shell/uvx_tool.py +18 -22
  158. hanzo_mcp/tools/todo/__init__.py +6 -2
  159. hanzo_mcp/tools/todo/todo.py +50 -37
  160. hanzo_mcp/tools/todo/todo_read.py +5 -8
  161. hanzo_mcp/tools/todo/todo_write.py +5 -7
  162. hanzo_mcp/tools/vector/__init__.py +40 -28
  163. hanzo_mcp/tools/vector/ast_analyzer.py +176 -143
  164. hanzo_mcp/tools/vector/git_ingester.py +170 -179
  165. hanzo_mcp/tools/vector/index_tool.py +96 -44
  166. hanzo_mcp/tools/vector/infinity_store.py +283 -228
  167. hanzo_mcp/tools/vector/mock_infinity.py +39 -40
  168. hanzo_mcp/tools/vector/project_manager.py +88 -78
  169. hanzo_mcp/tools/vector/vector.py +59 -42
  170. hanzo_mcp/tools/vector/vector_index.py +30 -27
  171. hanzo_mcp/tools/vector/vector_search.py +64 -45
  172. hanzo_mcp/types.py +6 -4
  173. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/METADATA +1 -1
  174. hanzo_mcp-0.8.1.dist-info/RECORD +185 -0
  175. hanzo_mcp-0.7.7.dist-info/RECORD +0 -182
  176. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/WHEEL +0 -0
  177. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/entry_points.txt +0 -0
  178. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/top_level.txt +0 -0
@@ -2,24 +2,27 @@
2
2
 
3
3
  import json
4
4
  import hashlib
5
+ from typing import Any, Dict, List, Tuple, Optional
5
6
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Tuple
7
7
  from dataclasses import dataclass
8
8
 
9
9
  try:
10
10
  import infinity_embedded
11
+
11
12
  INFINITY_AVAILABLE = True
12
13
  except ImportError:
13
14
  # Use mock implementation when infinity_embedded is not available
14
15
  from . import mock_infinity as infinity_embedded
16
+
15
17
  INFINITY_AVAILABLE = True # Mock is always available
16
18
 
17
- from .ast_analyzer import ASTAnalyzer, FileAST, Symbol, create_symbol_embedding_text
19
+ from .ast_analyzer import Symbol, FileAST, ASTAnalyzer, create_symbol_embedding_text
18
20
 
19
21
 
20
22
  @dataclass
21
23
  class Document:
22
24
  """Document representation for vector storage."""
25
+
23
26
  id: str
24
27
  content: str
25
28
  metadata: Dict[str, Any]
@@ -30,6 +33,7 @@ class Document:
30
33
  @dataclass
31
34
  class SearchResult:
32
35
  """Search result from vector database."""
36
+
33
37
  document: Document
34
38
  score: float
35
39
  distance: float
@@ -38,6 +42,7 @@ class SearchResult:
38
42
  @dataclass
39
43
  class SymbolSearchResult:
40
44
  """Search result for symbols."""
45
+
41
46
  symbol: Symbol
42
47
  score: float
43
48
  context_document: Optional[Document] = None
@@ -46,6 +51,7 @@ class SymbolSearchResult:
46
51
  @dataclass
47
52
  class UnifiedSearchResult:
48
53
  """Search result combining text, vector, and symbol search."""
54
+
49
55
  type: str # 'document', 'symbol', 'reference'
50
56
  content: str
51
57
  file_path: str
@@ -58,7 +64,7 @@ class UnifiedSearchResult:
58
64
 
59
65
  class InfinityVectorStore:
60
66
  """Local vector database using Infinity."""
61
-
67
+
62
68
  def __init__(
63
69
  self,
64
70
  data_path: Optional[str] = None,
@@ -66,43 +72,46 @@ class InfinityVectorStore:
66
72
  dimension: int = 1536, # Default for OpenAI text-embedding-3-small
67
73
  ):
68
74
  """Initialize the Infinity vector store.
69
-
75
+
70
76
  Args:
71
77
  data_path: Path to store vector database (default: ~/.config/hanzo/vector-store)
72
78
  embedding_model: Embedding model to use
73
79
  dimension: Vector dimension (must match embedding model)
74
80
  """
75
81
  if not INFINITY_AVAILABLE:
76
- raise ImportError("infinity_embedded is required for vector store functionality")
77
-
82
+ raise ImportError(
83
+ "infinity_embedded is required for vector store functionality"
84
+ )
85
+
78
86
  # Set up data path
79
87
  if data_path:
80
88
  self.data_path = Path(data_path)
81
89
  else:
82
90
  from hanzo_mcp.config.settings import get_config_dir
91
+
83
92
  self.data_path = get_config_dir() / "vector-store"
84
-
93
+
85
94
  self.data_path.mkdir(parents=True, exist_ok=True)
86
-
95
+
87
96
  self.embedding_model = embedding_model
88
97
  self.dimension = dimension
89
-
98
+
90
99
  # Initialize AST analyzer
91
100
  self.ast_analyzer = ASTAnalyzer()
92
-
101
+
93
102
  # Connect to Infinity
94
103
  self.infinity = infinity_embedded.connect(str(self.data_path))
95
104
  self.db = self.infinity.get_database("hanzo_mcp")
96
-
105
+
97
106
  # Initialize tables
98
107
  self._initialize_tables()
99
-
108
+
100
109
  def _initialize_tables(self):
101
110
  """Initialize database tables if they don't exist."""
102
111
  # Documents table
103
112
  try:
104
113
  self.documents_table = self.db.get_table("documents")
105
- except:
114
+ except Exception:
106
115
  self.documents_table = self.db.create_table(
107
116
  "documents",
108
117
  {
@@ -112,13 +121,13 @@ class InfinityVectorStore:
112
121
  "chunk_index": {"type": "integer"},
113
122
  "metadata": {"type": "varchar"}, # JSON string
114
123
  "embedding": {"type": f"vector,{self.dimension},float"},
115
- }
124
+ },
116
125
  )
117
-
126
+
118
127
  # Symbols table for code symbols
119
128
  try:
120
129
  self.symbols_table = self.db.get_table("symbols")
121
- except:
130
+ except Exception:
122
131
  self.symbols_table = self.db.create_table(
123
132
  "symbols",
124
133
  {
@@ -134,13 +143,13 @@ class InfinityVectorStore:
134
143
  "docstring": {"type": "varchar"},
135
144
  "metadata": {"type": "varchar"}, # JSON string
136
145
  "embedding": {"type": f"vector,{self.dimension},float"},
137
- }
146
+ },
138
147
  )
139
-
148
+
140
149
  # AST table for storing complete file ASTs
141
150
  try:
142
151
  self.ast_table = self.db.get_table("ast_files")
143
- except:
152
+ except Exception:
144
153
  self.ast_table = self.db.create_table(
145
154
  "ast_files",
146
155
  {
@@ -149,13 +158,13 @@ class InfinityVectorStore:
149
158
  "language": {"type": "varchar"},
150
159
  "ast_data": {"type": "varchar"}, # JSON string of complete AST
151
160
  "last_updated": {"type": "varchar"}, # ISO timestamp
152
- }
161
+ },
153
162
  )
154
-
163
+
155
164
  # References table for cross-file references
156
165
  try:
157
166
  self.references_table = self.db.get_table("references")
158
- except:
167
+ except Exception:
159
168
  self.references_table = self.db.create_table(
160
169
  "references",
161
170
  {
@@ -163,18 +172,22 @@ class InfinityVectorStore:
163
172
  "source_file": {"type": "varchar"},
164
173
  "target_file": {"type": "varchar"},
165
174
  "symbol_name": {"type": "varchar"},
166
- "reference_type": {"type": "varchar"}, # import, call, inheritance, etc.
175
+ "reference_type": {
176
+ "type": "varchar"
177
+ }, # import, call, inheritance, etc.
167
178
  "line_number": {"type": "integer"},
168
179
  "metadata": {"type": "varchar"}, # JSON string
169
- }
180
+ },
170
181
  )
171
-
172
- def _generate_doc_id(self, content: str, file_path: str = "", chunk_index: int = 0) -> str:
182
+
183
+ def _generate_doc_id(
184
+ self, content: str, file_path: str = "", chunk_index: int = 0
185
+ ) -> str:
173
186
  """Generate a unique document ID."""
174
187
  content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
175
188
  path_hash = hashlib.sha256(file_path.encode()).hexdigest()[:8]
176
189
  return f"doc_{path_hash}_{chunk_index}_{content_hash}"
177
-
190
+
178
191
  def add_document(
179
192
  self,
180
193
  content: str,
@@ -184,39 +197,43 @@ class InfinityVectorStore:
184
197
  embedding: Optional[List[float]] = None,
185
198
  ) -> str:
186
199
  """Add a document to the vector store.
187
-
200
+
188
201
  Args:
189
202
  content: Document content
190
203
  metadata: Additional metadata
191
204
  file_path: Source file path
192
205
  chunk_index: Chunk index if document is part of larger file
193
206
  embedding: Pre-computed embedding (if None, will compute)
194
-
207
+
195
208
  Returns:
196
209
  Document ID
197
210
  """
198
211
  doc_id = self._generate_doc_id(content, file_path or "", chunk_index)
199
-
212
+
200
213
  # Generate embedding if not provided
201
214
  if embedding is None:
202
215
  embedding = self._generate_embedding(content)
203
-
216
+
204
217
  # Prepare metadata
205
218
  metadata = metadata or {}
206
219
  metadata_json = json.dumps(metadata)
207
-
220
+
208
221
  # Insert document
209
- self.documents_table.insert([{
210
- "id": doc_id,
211
- "content": content,
212
- "file_path": file_path or "",
213
- "chunk_index": chunk_index,
214
- "metadata": metadata_json,
215
- "embedding": embedding,
216
- }])
217
-
222
+ self.documents_table.insert(
223
+ [
224
+ {
225
+ "id": doc_id,
226
+ "content": content,
227
+ "file_path": file_path or "",
228
+ "chunk_index": chunk_index,
229
+ "metadata": metadata_json,
230
+ "embedding": embedding,
231
+ }
232
+ ]
233
+ )
234
+
218
235
  return doc_id
219
-
236
+
220
237
  def add_file(
221
238
  self,
222
239
  file_path: str,
@@ -225,45 +242,47 @@ class InfinityVectorStore:
225
242
  metadata: Dict[str, Any] = None,
226
243
  ) -> List[str]:
227
244
  """Add a file to the vector store by chunking it.
228
-
245
+
229
246
  Args:
230
247
  file_path: Path to the file to add
231
248
  chunk_size: Maximum characters per chunk
232
249
  chunk_overlap: Characters to overlap between chunks
233
250
  metadata: Additional metadata for all chunks
234
-
251
+
235
252
  Returns:
236
253
  List of document IDs for all chunks
237
254
  """
238
255
  path = Path(file_path)
239
256
  if not path.exists():
240
257
  raise FileNotFoundError(f"File not found: {file_path}")
241
-
258
+
242
259
  # Read file content
243
260
  try:
244
- content = path.read_text(encoding='utf-8')
261
+ content = path.read_text(encoding="utf-8")
245
262
  except UnicodeDecodeError:
246
263
  # Try with different encoding
247
- content = path.read_text(encoding='latin-1')
248
-
264
+ content = path.read_text(encoding="latin-1")
265
+
249
266
  # Chunk the content
250
267
  chunks = self._chunk_text(content, chunk_size, chunk_overlap)
251
-
268
+
252
269
  # Add metadata
253
270
  file_metadata = metadata or {}
254
- file_metadata.update({
255
- "file_name": path.name,
256
- "file_extension": path.suffix,
257
- "file_size": path.stat().st_size,
258
- })
259
-
271
+ file_metadata.update(
272
+ {
273
+ "file_name": path.name,
274
+ "file_extension": path.suffix,
275
+ "file_size": path.stat().st_size,
276
+ }
277
+ )
278
+
260
279
  # Add each chunk
261
280
  doc_ids = []
262
281
  for i, chunk in enumerate(chunks):
263
282
  chunk_metadata = file_metadata.copy()
264
283
  chunk_metadata["chunk_number"] = i
265
284
  chunk_metadata["total_chunks"] = len(chunks)
266
-
285
+
267
286
  doc_id = self.add_document(
268
287
  content=chunk,
269
288
  metadata=chunk_metadata,
@@ -271,9 +290,9 @@ class InfinityVectorStore:
271
290
  chunk_index=i,
272
291
  )
273
292
  doc_ids.append(doc_id)
274
-
293
+
275
294
  return doc_ids
276
-
295
+
277
296
  def add_file_with_ast(
278
297
  self,
279
298
  file_path: str,
@@ -282,138 +301,146 @@ class InfinityVectorStore:
282
301
  metadata: Dict[str, Any] = None,
283
302
  ) -> Tuple[List[str], Optional[FileAST]]:
284
303
  """Add a file with full AST analysis and symbol extraction.
285
-
304
+
286
305
  Args:
287
306
  file_path: Path to the file to add
288
307
  chunk_size: Maximum characters per chunk for content
289
308
  chunk_overlap: Characters to overlap between chunks
290
309
  metadata: Additional metadata for all chunks
291
-
310
+
292
311
  Returns:
293
312
  Tuple of (document IDs for content chunks, FileAST object)
294
313
  """
295
314
  path = Path(file_path)
296
315
  if not path.exists():
297
316
  raise FileNotFoundError(f"File not found: {file_path}")
298
-
317
+
299
318
  # First add file content using existing method
300
319
  doc_ids = self.add_file(file_path, chunk_size, chunk_overlap, metadata)
301
-
320
+
302
321
  # Analyze AST and symbols
303
322
  file_ast = self.ast_analyzer.analyze_file(file_path)
304
323
  if not file_ast:
305
324
  return doc_ids, None
306
-
325
+
307
326
  # Store complete AST
308
327
  self._store_file_ast(file_ast)
309
-
328
+
310
329
  # Store individual symbols with embeddings
311
330
  self._store_symbols(file_ast.symbols)
312
-
331
+
313
332
  # Store cross-references
314
333
  self._store_references(file_ast)
315
-
334
+
316
335
  return doc_ids, file_ast
317
-
336
+
318
337
  def _store_file_ast(self, file_ast: FileAST):
319
338
  """Store complete file AST information."""
320
339
  from datetime import datetime
321
-
340
+
322
341
  # Remove existing AST for this file
323
342
  try:
324
343
  self.ast_table.delete(f"file_path = '{file_ast.file_path}'")
325
- except:
344
+ except Exception:
326
345
  pass
327
-
346
+
328
347
  # Insert new AST
329
- self.ast_table.insert([{
330
- "file_path": file_ast.file_path,
331
- "file_hash": file_ast.file_hash,
332
- "language": file_ast.language,
333
- "ast_data": json.dumps(file_ast.to_dict()),
334
- "last_updated": datetime.now().isoformat(),
335
- }])
336
-
348
+ self.ast_table.insert(
349
+ [
350
+ {
351
+ "file_path": file_ast.file_path,
352
+ "file_hash": file_ast.file_hash,
353
+ "language": file_ast.language,
354
+ "ast_data": json.dumps(file_ast.to_dict()),
355
+ "last_updated": datetime.now().isoformat(),
356
+ }
357
+ ]
358
+ )
359
+
337
360
  def _store_symbols(self, symbols: List[Symbol]):
338
361
  """Store symbols with vector embeddings."""
339
362
  if not symbols:
340
363
  return
341
-
364
+
342
365
  # Remove existing symbols for these files
343
366
  file_paths = list(set(symbol.file_path for symbol in symbols))
344
367
  for file_path in file_paths:
345
368
  try:
346
369
  self.symbols_table.delete(f"file_path = '{file_path}'")
347
- except:
370
+ except Exception:
348
371
  pass
349
-
372
+
350
373
  # Insert new symbols
351
374
  symbol_records = []
352
375
  for symbol in symbols:
353
376
  # Create embedding text for symbol
354
377
  embedding_text = create_symbol_embedding_text(symbol)
355
378
  embedding = self._generate_embedding(embedding_text)
356
-
379
+
357
380
  # Generate symbol ID
358
381
  symbol_id = self._generate_symbol_id(symbol)
359
-
382
+
360
383
  # Prepare metadata
361
384
  symbol_metadata = {
362
385
  "references": symbol.references,
363
386
  "embedding_text": embedding_text,
364
387
  }
365
-
366
- symbol_records.append({
367
- "id": symbol_id,
368
- "name": symbol.name,
369
- "type": symbol.type,
370
- "file_path": symbol.file_path,
371
- "line_start": symbol.line_start,
372
- "line_end": symbol.line_end,
373
- "scope": symbol.scope or "",
374
- "parent": symbol.parent or "",
375
- "signature": symbol.signature or "",
376
- "docstring": symbol.docstring or "",
377
- "metadata": json.dumps(symbol_metadata),
378
- "embedding": embedding,
379
- })
380
-
388
+
389
+ symbol_records.append(
390
+ {
391
+ "id": symbol_id,
392
+ "name": symbol.name,
393
+ "type": symbol.type,
394
+ "file_path": symbol.file_path,
395
+ "line_start": symbol.line_start,
396
+ "line_end": symbol.line_end,
397
+ "scope": symbol.scope or "",
398
+ "parent": symbol.parent or "",
399
+ "signature": symbol.signature or "",
400
+ "docstring": symbol.docstring or "",
401
+ "metadata": json.dumps(symbol_metadata),
402
+ "embedding": embedding,
403
+ }
404
+ )
405
+
381
406
  if symbol_records:
382
407
  self.symbols_table.insert(symbol_records)
383
-
408
+
384
409
  def _store_references(self, file_ast: FileAST):
385
410
  """Store cross-file references."""
386
411
  if not file_ast.dependencies:
387
412
  return
388
-
413
+
389
414
  # Remove existing references for this file
390
415
  try:
391
416
  self.references_table.delete(f"source_file = '{file_ast.file_path}'")
392
- except:
417
+ except Exception:
393
418
  pass
394
-
419
+
395
420
  # Insert new references
396
421
  reference_records = []
397
422
  for i, dependency in enumerate(file_ast.dependencies):
398
423
  ref_id = f"{file_ast.file_path}_{dependency}_{i}"
399
- reference_records.append({
400
- "id": ref_id,
401
- "source_file": file_ast.file_path,
402
- "target_file": dependency,
403
- "symbol_name": dependency,
404
- "reference_type": "import",
405
- "line_number": 0, # Could be enhanced to track actual line numbers
406
- "metadata": json.dumps({}),
407
- })
408
-
424
+ reference_records.append(
425
+ {
426
+ "id": ref_id,
427
+ "source_file": file_ast.file_path,
428
+ "target_file": dependency,
429
+ "symbol_name": dependency,
430
+ "reference_type": "import",
431
+ "line_number": 0, # Could be enhanced to track actual line numbers
432
+ "metadata": json.dumps({}),
433
+ }
434
+ )
435
+
409
436
  if reference_records:
410
437
  self.references_table.insert(reference_records)
411
-
438
+
412
439
  def _generate_symbol_id(self, symbol: Symbol) -> str:
413
440
  """Generate unique symbol ID."""
414
441
  text = f"{symbol.file_path}_{symbol.type}_{symbol.name}_{symbol.line_start}"
415
442
  return hashlib.sha256(text.encode()).hexdigest()[:16]
416
-
443
+
417
444
  def search_symbols(
418
445
  self,
419
446
  query: str,
@@ -423,37 +450,37 @@ class InfinityVectorStore:
423
450
  score_threshold: float = 0.0,
424
451
  ) -> List[SymbolSearchResult]:
425
452
  """Search for symbols using vector similarity.
426
-
453
+
427
454
  Args:
428
455
  query: Search query
429
456
  symbol_type: Filter by symbol type (function, class, variable, etc.)
430
457
  file_path: Filter by file path
431
458
  limit: Maximum number of results
432
459
  score_threshold: Minimum similarity score
433
-
460
+
434
461
  Returns:
435
462
  List of symbol search results
436
463
  """
437
464
  # Generate query embedding
438
465
  query_embedding = self._generate_embedding(query)
439
-
466
+
440
467
  # Build search query
441
468
  search_query = self.symbols_table.output(["*"]).match_dense(
442
- "embedding",
443
- query_embedding,
444
- "float",
469
+ "embedding",
470
+ query_embedding,
471
+ "float",
445
472
  "ip", # Inner product
446
- limit * 2 # Get more results for filtering
473
+ limit * 2, # Get more results for filtering
447
474
  )
448
-
475
+
449
476
  # Apply filters
450
477
  if symbol_type:
451
478
  search_query = search_query.filter(f"type = '{symbol_type}'")
452
479
  if file_path:
453
480
  search_query = search_query.filter(f"file_path = '{file_path}'")
454
-
481
+
455
482
  search_results = search_query.to_pl()
456
-
483
+
457
484
  # Convert to SymbolSearchResult objects
458
485
  results = []
459
486
  for row in search_results.iter_rows(named=True):
@@ -462,9 +489,9 @@ class InfinityVectorStore:
462
489
  # Parse metadata
463
490
  try:
464
491
  metadata = json.loads(row["metadata"])
465
- except:
492
+ except Exception:
466
493
  metadata = {}
467
-
494
+
468
495
  # Create Symbol object
469
496
  symbol = Symbol(
470
497
  name=row["name"],
@@ -473,21 +500,23 @@ class InfinityVectorStore:
473
500
  line_start=row["line_start"],
474
501
  line_end=row["line_end"],
475
502
  column_start=0, # Not stored in table
476
- column_end=0, # Not stored in table
503
+ column_end=0, # Not stored in table
477
504
  scope=row["scope"],
478
505
  parent=row["parent"] if row["parent"] else None,
479
506
  docstring=row["docstring"] if row["docstring"] else None,
480
507
  signature=row["signature"] if row["signature"] else None,
481
508
  references=metadata.get("references", []),
482
509
  )
483
-
484
- results.append(SymbolSearchResult(
485
- symbol=symbol,
486
- score=score,
487
- ))
488
-
510
+
511
+ results.append(
512
+ SymbolSearchResult(
513
+ symbol=symbol,
514
+ score=score,
515
+ )
516
+ )
517
+
489
518
  return results[:limit]
490
-
519
+
491
520
  def search_ast_nodes(
492
521
  self,
493
522
  file_path: str,
@@ -495,24 +524,28 @@ class InfinityVectorStore:
495
524
  node_name: Optional[str] = None,
496
525
  ) -> Optional[FileAST]:
497
526
  """Search AST nodes within a specific file.
498
-
527
+
499
528
  Args:
500
529
  file_path: File to search in
501
530
  node_type: Filter by AST node type
502
531
  node_name: Filter by node name
503
-
532
+
504
533
  Returns:
505
534
  FileAST object if file found, None otherwise
506
535
  """
507
536
  try:
508
- results = self.ast_table.output(["*"]).filter(f"file_path = '{file_path}'").to_pl()
509
-
537
+ results = (
538
+ self.ast_table.output(["*"])
539
+ .filter(f"file_path = '{file_path}'")
540
+ .to_pl()
541
+ )
542
+
510
543
  if len(results) == 0:
511
544
  return None
512
-
545
+
513
546
  row = next(results.iter_rows(named=True))
514
547
  ast_data = json.loads(row["ast_data"])
515
-
548
+
516
549
  # Reconstruct FileAST object
517
550
  file_ast = FileAST(
518
551
  file_path=ast_data["file_path"],
@@ -524,44 +557,52 @@ class InfinityVectorStore:
524
557
  exports=ast_data["exports"],
525
558
  dependencies=ast_data["dependencies"],
526
559
  )
527
-
560
+
528
561
  return file_ast
529
-
562
+
530
563
  except Exception as e:
531
564
  import logging
565
+
532
566
  logger = logging.getLogger(__name__)
533
567
  logger.error(f"Error searching AST nodes: {e}")
534
568
  return None
535
-
569
+
536
570
  def get_file_references(self, file_path: str) -> List[Dict[str, Any]]:
537
571
  """Get all files that reference the given file.
538
-
572
+
539
573
  Args:
540
574
  file_path: File to find references for
541
-
575
+
542
576
  Returns:
543
577
  List of reference information
544
578
  """
545
579
  try:
546
- results = self.references_table.output(["*"]).filter(f"target_file = '{file_path}'").to_pl()
547
-
580
+ results = (
581
+ self.references_table.output(["*"])
582
+ .filter(f"target_file = '{file_path}'")
583
+ .to_pl()
584
+ )
585
+
548
586
  references = []
549
587
  for row in results.iter_rows(named=True):
550
- references.append({
551
- "source_file": row["source_file"],
552
- "symbol_name": row["symbol_name"],
553
- "reference_type": row["reference_type"],
554
- "line_number": row["line_number"],
555
- })
556
-
588
+ references.append(
589
+ {
590
+ "source_file": row["source_file"],
591
+ "symbol_name": row["symbol_name"],
592
+ "reference_type": row["reference_type"],
593
+ "line_number": row["line_number"],
594
+ }
595
+ )
596
+
557
597
  return references
558
-
598
+
559
599
  except Exception as e:
560
600
  import logging
601
+
561
602
  logger = logging.getLogger(__name__)
562
603
  logger.error(f"Error getting file references: {e}")
563
604
  return []
564
-
605
+
565
606
  def search(
566
607
  self,
567
608
  query: str,
@@ -570,37 +611,41 @@ class InfinityVectorStore:
570
611
  filters: Dict[str, Any] = None,
571
612
  ) -> List[SearchResult]:
572
613
  """Search for similar documents.
573
-
614
+
574
615
  Args:
575
616
  query: Search query
576
617
  limit: Maximum number of results
577
618
  score_threshold: Minimum similarity score
578
619
  filters: Metadata filters (not yet implemented)
579
-
620
+
580
621
  Returns:
581
622
  List of search results
582
623
  """
583
624
  # Generate query embedding
584
625
  query_embedding = self._generate_embedding(query)
585
-
626
+
586
627
  # Perform vector search
587
- search_results = self.documents_table.output(["*"]).match_dense(
588
- "embedding",
589
- query_embedding,
590
- "float",
591
- "ip", # Inner product (cosine similarity)
592
- limit
593
- ).to_pl()
594
-
628
+ search_results = (
629
+ self.documents_table.output(["*"])
630
+ .match_dense(
631
+ "embedding",
632
+ query_embedding,
633
+ "float",
634
+ "ip", # Inner product (cosine similarity)
635
+ limit,
636
+ )
637
+ .to_pl()
638
+ )
639
+
595
640
  # Convert to SearchResult objects
596
641
  results = []
597
642
  for row in search_results.iter_rows(named=True):
598
643
  # Parse metadata
599
644
  try:
600
645
  metadata = json.loads(row["metadata"])
601
- except:
646
+ except Exception:
602
647
  metadata = {}
603
-
648
+
604
649
  # Create document
605
650
  document = Document(
606
651
  id=row["id"],
@@ -609,64 +654,70 @@ class InfinityVectorStore:
609
654
  file_path=row["file_path"] if row["file_path"] else None,
610
655
  chunk_index=row["chunk_index"],
611
656
  )
612
-
657
+
613
658
  # Score is the similarity (higher is better)
614
659
  score = row.get("score", 0.0)
615
660
  distance = 1.0 - score # Convert similarity to distance
616
-
661
+
617
662
  if score >= score_threshold:
618
- results.append(SearchResult(
619
- document=document,
620
- score=score,
621
- distance=distance,
622
- ))
623
-
663
+ results.append(
664
+ SearchResult(
665
+ document=document,
666
+ score=score,
667
+ distance=distance,
668
+ )
669
+ )
670
+
624
671
  return results
625
-
672
+
626
673
  def delete_document(self, doc_id: str) -> bool:
627
674
  """Delete a document by ID.
628
-
675
+
629
676
  Args:
630
677
  doc_id: Document ID to delete
631
-
678
+
632
679
  Returns:
633
680
  True if document was deleted
634
681
  """
635
682
  try:
636
683
  self.documents_table.delete(f"id = '{doc_id}'")
637
684
  return True
638
- except:
685
+ except Exception:
639
686
  return False
640
-
687
+
641
688
  def delete_file(self, file_path: str) -> int:
642
689
  """Delete all documents from a specific file.
643
-
690
+
644
691
  Args:
645
692
  file_path: File path to delete documents for
646
-
693
+
647
694
  Returns:
648
695
  Number of documents deleted
649
696
  """
650
697
  try:
651
698
  # Get count first
652
- results = self.documents_table.output(["id"]).filter(f"file_path = '{file_path}'").to_pl()
699
+ results = (
700
+ self.documents_table.output(["id"])
701
+ .filter(f"file_path = '{file_path}'")
702
+ .to_pl()
703
+ )
653
704
  count = len(results)
654
-
705
+
655
706
  # Delete all documents for this file
656
707
  self.documents_table.delete(f"file_path = '{file_path}'")
657
708
  return count
658
- except:
709
+ except Exception:
659
710
  return 0
660
-
711
+
661
712
  def list_files(self) -> List[Dict[str, Any]]:
662
713
  """List all indexed files.
663
-
714
+
664
715
  Returns:
665
716
  List of file information
666
717
  """
667
718
  try:
668
719
  results = self.documents_table.output(["file_path", "metadata"]).to_pl()
669
-
720
+
670
721
  files = {}
671
722
  for row in results.iter_rows(named=True):
672
723
  file_path = row["file_path"]
@@ -675,63 +726,66 @@ class InfinityVectorStore:
675
726
  metadata = json.loads(row["metadata"])
676
727
  files[file_path] = {
677
728
  "file_path": file_path,
678
- "file_name": metadata.get("file_name", Path(file_path).name),
729
+ "file_name": metadata.get(
730
+ "file_name", Path(file_path).name
731
+ ),
679
732
  "file_size": metadata.get("file_size", 0),
680
733
  "total_chunks": metadata.get("total_chunks", 1),
681
734
  }
682
- except:
735
+ except Exception:
683
736
  files[file_path] = {
684
737
  "file_path": file_path,
685
738
  "file_name": Path(file_path).name,
686
739
  }
687
-
740
+
688
741
  return list(files.values())
689
- except:
742
+ except Exception:
690
743
  return []
691
-
744
+
692
745
  def _chunk_text(self, text: str, chunk_size: int, overlap: int) -> List[str]:
693
746
  """Split text into overlapping chunks."""
694
747
  if len(text) <= chunk_size:
695
748
  return [text]
696
-
749
+
697
750
  chunks = []
698
751
  start = 0
699
-
752
+
700
753
  while start < len(text):
701
754
  end = start + chunk_size
702
-
755
+
703
756
  # Try to break at word boundary
704
757
  if end < len(text):
705
758
  # Look back for a good break point
706
759
  break_point = end
707
760
  for i in range(end - 100, start + 100, -1):
708
- if i > 0 and text[i] in '\n\r.!?':
761
+ if i > 0 and text[i] in "\n\r.!?":
709
762
  break_point = i + 1
710
763
  break
711
764
  end = break_point
712
-
765
+
713
766
  chunk = text[start:end].strip()
714
767
  if chunk:
715
768
  chunks.append(chunk)
716
-
769
+
717
770
  start = max(start + chunk_size - overlap, end)
718
-
771
+
719
772
  return chunks
720
-
773
+
721
774
  def _generate_embedding(self, text: str) -> List[float]:
722
775
  """Generate embedding for text.
723
-
776
+
724
777
  For now, this returns a dummy embedding. In a real implementation,
725
778
  you would call an embedding API (OpenAI, Cohere, etc.) or use a local model.
726
779
  """
727
780
  # This is a placeholder - you would implement actual embedding generation here
728
781
  # For now, return a random embedding of the correct dimension
729
782
  import random
783
+
730
784
  return [random.random() for _ in range(self.dimension)]
731
-
785
+
732
786
  async def get_stats(self) -> Dict[str, Any]:
733
787
  """Get statistics about the vector store.
734
-
788
+
735
789
  Returns:
736
790
  Dictionary with statistics
737
791
  """
@@ -739,30 +793,30 @@ class InfinityVectorStore:
739
793
  # Get document count
740
794
  doc_count_result = self.documents_table.output(["count(*)"]).to_pl()
741
795
  doc_count = doc_count_result.item(0, 0) if len(doc_count_result) > 0 else 0
742
-
796
+
743
797
  # Get unique file count
744
798
  file_result = self.documents_table.output(["file_path"]).to_pl()
745
799
  unique_files = set()
746
800
  for row in file_result.iter_rows():
747
801
  if row[0]:
748
802
  unique_files.add(row[0])
749
-
803
+
750
804
  # Get symbol count
751
805
  symbol_count = 0
752
806
  try:
753
807
  symbol_result = self.symbols_table.output(["count(*)"]).to_pl()
754
808
  symbol_count = symbol_result.item(0, 0) if len(symbol_result) > 0 else 0
755
- except:
809
+ except Exception:
756
810
  pass
757
-
811
+
758
812
  # Get AST count
759
813
  ast_count = 0
760
814
  try:
761
815
  ast_result = self.ast_table.output(["count(*)"]).to_pl()
762
816
  ast_count = ast_result.item(0, 0) if len(ast_result) > 0 else 0
763
- except:
817
+ except Exception:
764
818
  pass
765
-
819
+
766
820
  return {
767
821
  "document_count": doc_count,
768
822
  "vector_count": doc_count, # Each document has a vector
@@ -779,57 +833,58 @@ class InfinityVectorStore:
779
833
  "document_count": 0,
780
834
  "vector_count": 0,
781
835
  }
782
-
836
+
783
837
  async def clear(self) -> bool:
784
838
  """Clear all data from the vector store.
785
-
839
+
786
840
  Returns:
787
841
  True if successful
788
842
  """
789
843
  try:
790
844
  # Delete all records from all tables
791
845
  self.documents_table.delete()
792
-
846
+
793
847
  try:
794
848
  self.symbols_table.delete()
795
- except:
849
+ except Exception:
796
850
  pass
797
-
851
+
798
852
  try:
799
853
  self.ast_table.delete()
800
- except:
854
+ except Exception:
801
855
  pass
802
-
856
+
803
857
  try:
804
858
  self.references_table.delete()
805
- except:
859
+ except Exception:
806
860
  pass
807
-
861
+
808
862
  return True
809
863
  except Exception as e:
810
864
  import logging
865
+
811
866
  logger = logging.getLogger(__name__)
812
867
  logger.error(f"Error clearing vector store: {e}")
813
868
  return False
814
-
869
+
815
870
  async def index_document(
816
871
  self,
817
872
  content: str,
818
873
  metadata: Dict[str, Any] = None,
819
874
  ) -> str:
820
875
  """Async version of add_document for consistency.
821
-
876
+
822
877
  Args:
823
878
  content: Document content
824
879
  metadata: Additional metadata
825
-
880
+
826
881
  Returns:
827
882
  Document ID
828
883
  """
829
884
  file_path = metadata.get("path") if metadata else None
830
885
  return self.add_document(content, metadata, file_path)
831
-
886
+
832
887
  def close(self):
833
888
  """Close the database connection."""
834
- if hasattr(self, 'infinity'):
835
- self.infinity.disconnect()
889
+ if hasattr(self, "infinity"):
890
+ self.infinity.disconnect()