emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
"""Kuzu database connection management."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional, Generator, Any
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
|
|
10
|
+
import kuzu
|
|
11
|
+
|
|
12
|
+
from ..core.config import KuzuConfig, get_config
|
|
13
|
+
from ..core.exceptions import DatabaseConnectionError
|
|
14
|
+
from ..utils.logger import log
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Lock file constants
|
|
18
|
+
LOCK_FILE_NAME = "kuzu.lock"
|
|
19
|
+
LOCK_STALE_SECONDS = 1800 # 30 minutes for long operations like indexing
|
|
20
|
+
LOCK_WRITE_TIMEOUT = 60 # Wait up to 60 seconds to acquire write lock
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class KuzuQueryResult:
|
|
24
|
+
"""Wrapper for Kuzu query results providing Neo4j-compatible API."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, result: kuzu.QueryResult):
|
|
27
|
+
self._result = result
|
|
28
|
+
self._columns = result.get_column_names()
|
|
29
|
+
self._rows: list[dict] = []
|
|
30
|
+
self._consumed = False
|
|
31
|
+
|
|
32
|
+
def _consume(self):
|
|
33
|
+
"""Consume all results into memory."""
|
|
34
|
+
if not self._consumed:
|
|
35
|
+
while self._result.has_next():
|
|
36
|
+
values = self._result.get_next()
|
|
37
|
+
self._rows.append(dict(zip(self._columns, values)))
|
|
38
|
+
self._consumed = True
|
|
39
|
+
|
|
40
|
+
def single(self) -> Optional[dict]:
|
|
41
|
+
"""Return single result (Neo4j-compatible API)."""
|
|
42
|
+
self._consume()
|
|
43
|
+
return self._rows[0] if self._rows else None
|
|
44
|
+
|
|
45
|
+
def __iter__(self):
|
|
46
|
+
"""Iterate over results."""
|
|
47
|
+
self._consume()
|
|
48
|
+
return iter(self._rows)
|
|
49
|
+
|
|
50
|
+
def __len__(self):
|
|
51
|
+
"""Return number of results."""
|
|
52
|
+
self._consume()
|
|
53
|
+
return len(self._rows)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class KuzuSessionWrapper:
|
|
57
|
+
"""Wrapper providing Neo4j-compatible session API for Kuzu."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, conn: kuzu.Connection):
|
|
60
|
+
self._conn = conn
|
|
61
|
+
|
|
62
|
+
def run(self, query: str, **parameters) -> KuzuQueryResult:
|
|
63
|
+
"""Execute query with Neo4j-compatible API.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
query: Cypher query
|
|
67
|
+
**parameters: Query parameters as keyword arguments
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
KuzuQueryResult with Neo4j-compatible methods
|
|
71
|
+
"""
|
|
72
|
+
try:
|
|
73
|
+
result = self._conn.execute(query, parameters or {})
|
|
74
|
+
return KuzuQueryResult(result)
|
|
75
|
+
except Exception as e:
|
|
76
|
+
raise DatabaseConnectionError(f"Query execution failed: {e}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class KuzuConnection:
|
|
80
|
+
"""Manages Kuzu embedded database connections."""
|
|
81
|
+
|
|
82
|
+
def __init__(self, config: Optional[KuzuConfig] = None):
|
|
83
|
+
"""Initialize Kuzu connection.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
config: Kuzu configuration. If None, loads from environment.
|
|
87
|
+
"""
|
|
88
|
+
if config is None:
|
|
89
|
+
config = get_config().kuzu
|
|
90
|
+
|
|
91
|
+
self.config = config
|
|
92
|
+
self._db: Optional[kuzu.Database] = None
|
|
93
|
+
self._conn: Optional[kuzu.Connection] = None
|
|
94
|
+
|
|
95
|
+
def connect(self, max_retries: int = 3, retry_delay: float = 0.5) -> kuzu.Connection:
|
|
96
|
+
"""Establish connection to Kuzu database.
|
|
97
|
+
|
|
98
|
+
Uses retry logic with exponential backoff to handle transient lock issues.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
max_retries: Maximum number of connection attempts
|
|
102
|
+
retry_delay: Initial delay between retries (doubles each attempt)
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Kuzu connection instance
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
DatabaseConnectionError: If connection fails after all retries
|
|
109
|
+
"""
|
|
110
|
+
if self._conn is not None:
|
|
111
|
+
return self._conn
|
|
112
|
+
|
|
113
|
+
import time
|
|
114
|
+
|
|
115
|
+
last_error = None
|
|
116
|
+
for attempt in range(max_retries):
|
|
117
|
+
try:
|
|
118
|
+
# Ensure database directory exists
|
|
119
|
+
db_path = Path(self.config.database_path)
|
|
120
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
|
|
122
|
+
if attempt == 0:
|
|
123
|
+
log.info(f"Connecting to Kuzu database at {self.config.database_path}")
|
|
124
|
+
else:
|
|
125
|
+
log.info(f"Retrying Kuzu connection (attempt {attempt + 1}/{max_retries})")
|
|
126
|
+
|
|
127
|
+
self._db = kuzu.Database(str(db_path), read_only=self.config.read_only)
|
|
128
|
+
self._conn = kuzu.Connection(self._db)
|
|
129
|
+
|
|
130
|
+
# Test connection
|
|
131
|
+
result = self._conn.execute("RETURN 1 AS num")
|
|
132
|
+
result.get_next()
|
|
133
|
+
|
|
134
|
+
log.info("Successfully connected to Kuzu database")
|
|
135
|
+
return self._conn
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
last_error = e
|
|
139
|
+
error_str = str(e).lower()
|
|
140
|
+
|
|
141
|
+
# Check if it's a lock error that might be transient
|
|
142
|
+
if "lock" in error_str or "could not set lock" in error_str:
|
|
143
|
+
if attempt < max_retries - 1:
|
|
144
|
+
delay = retry_delay * (2 ** attempt)
|
|
145
|
+
log.warning(f"Database lock conflict, retrying in {delay:.1f}s...")
|
|
146
|
+
time.sleep(delay)
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Non-retryable error or max retries reached
|
|
150
|
+
break
|
|
151
|
+
|
|
152
|
+
raise DatabaseConnectionError(f"Failed to connect to Kuzu: {last_error}")
|
|
153
|
+
|
|
154
|
+
def close(self):
|
|
155
|
+
"""Close the Kuzu connection."""
|
|
156
|
+
if self._conn is not None:
|
|
157
|
+
log.info("Closing Kuzu connection")
|
|
158
|
+
self._conn = None
|
|
159
|
+
if self._db is not None:
|
|
160
|
+
self._db = None
|
|
161
|
+
|
|
162
|
+
def execute(self, query: str, parameters: Optional[dict] = None) -> list[dict]:
|
|
163
|
+
"""Execute a read query and return results as list of dicts.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
query: Cypher query to execute
|
|
167
|
+
parameters: Query parameters
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
List of result dictionaries
|
|
171
|
+
"""
|
|
172
|
+
conn = self.connect()
|
|
173
|
+
params = parameters or {}
|
|
174
|
+
|
|
175
|
+
# Kuzu requires exact parameter match - filter to only params used in query
|
|
176
|
+
import re
|
|
177
|
+
used_params = set(re.findall(r'\$(\w+)', query))
|
|
178
|
+
filtered_params = {k: v for k, v in params.items() if k in used_params}
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
result = conn.execute(query, filtered_params)
|
|
182
|
+
columns = result.get_column_names()
|
|
183
|
+
rows = []
|
|
184
|
+
while result.has_next():
|
|
185
|
+
values = result.get_next()
|
|
186
|
+
rows.append(dict(zip(columns, values)))
|
|
187
|
+
return rows
|
|
188
|
+
except Exception as e:
|
|
189
|
+
raise DatabaseConnectionError(f"Query execution failed: {e}")
|
|
190
|
+
|
|
191
|
+
def execute_write(self, query: str, parameters: Optional[dict] = None) -> None:
|
|
192
|
+
"""Execute a write query.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
query: Cypher query to execute
|
|
196
|
+
parameters: Query parameters
|
|
197
|
+
"""
|
|
198
|
+
conn = self.connect()
|
|
199
|
+
params = parameters or {}
|
|
200
|
+
|
|
201
|
+
# Kuzu requires exact parameter match - filter to only params used in query
|
|
202
|
+
# Look for $param_name patterns in the query
|
|
203
|
+
import re
|
|
204
|
+
used_params = set(re.findall(r'\$(\w+)', query))
|
|
205
|
+
filtered_params = {k: v for k, v in params.items() if k in used_params}
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
conn.execute(query, filtered_params)
|
|
209
|
+
except Exception as e:
|
|
210
|
+
# Debug: log detailed info when there's an error
|
|
211
|
+
error_str = str(e)
|
|
212
|
+
if "not found" in error_str.lower():
|
|
213
|
+
log.warning(f"KUZU DEBUG - Error: {e}")
|
|
214
|
+
log.warning(f"KUZU DEBUG - Query: {repr(query)}")
|
|
215
|
+
log.warning(f"KUZU DEBUG - Used params: {used_params}")
|
|
216
|
+
log.warning(f"KUZU DEBUG - Filtered params: {list(filtered_params.keys())}")
|
|
217
|
+
raise DatabaseConnectionError(f"Write query failed: {e}")
|
|
218
|
+
|
|
219
|
+
@contextmanager
|
|
220
|
+
def session(self) -> Generator[KuzuSessionWrapper, None, None]:
|
|
221
|
+
"""Create a context-managed Kuzu session.
|
|
222
|
+
|
|
223
|
+
Note: Kuzu doesn't have separate sessions like Neo4j.
|
|
224
|
+
This returns a wrapper for API compatibility with Neo4j-style code.
|
|
225
|
+
|
|
226
|
+
Yields:
|
|
227
|
+
KuzuSessionWrapper with Neo4j-compatible run() method
|
|
228
|
+
|
|
229
|
+
Example:
|
|
230
|
+
with connection.session() as session:
|
|
231
|
+
result = session.run("MATCH (n) RETURN count(n)")
|
|
232
|
+
record = result.single()
|
|
233
|
+
"""
|
|
234
|
+
conn = self.connect()
|
|
235
|
+
try:
|
|
236
|
+
yield KuzuSessionWrapper(conn)
|
|
237
|
+
finally:
|
|
238
|
+
pass # Kuzu connection persists
|
|
239
|
+
|
|
240
|
+
@contextmanager
|
|
241
|
+
def transaction(self) -> Generator[kuzu.Connection, None, None]:
|
|
242
|
+
"""Create a context-managed transaction.
|
|
243
|
+
|
|
244
|
+
Note: Kuzu has automatic transaction management.
|
|
245
|
+
This is provided for API compatibility.
|
|
246
|
+
|
|
247
|
+
Yields:
|
|
248
|
+
Kuzu connection
|
|
249
|
+
"""
|
|
250
|
+
conn = self.connect()
|
|
251
|
+
try:
|
|
252
|
+
yield conn
|
|
253
|
+
except Exception as e:
|
|
254
|
+
raise
|
|
255
|
+
|
|
256
|
+
def verify_connection(self) -> bool:
|
|
257
|
+
"""Verify that the connection works.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
True if connection is successful, False otherwise
|
|
261
|
+
"""
|
|
262
|
+
try:
|
|
263
|
+
conn = self.connect()
|
|
264
|
+
result = conn.execute("RETURN 1 AS num")
|
|
265
|
+
result.get_next()
|
|
266
|
+
return True
|
|
267
|
+
except Exception as e:
|
|
268
|
+
log.error(f"Connection verification failed: {e}")
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
def get_database_info(self) -> dict:
|
|
272
|
+
"""Get information about the Kuzu database.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Dictionary with database information including specific node counts
|
|
276
|
+
"""
|
|
277
|
+
conn = self.connect()
|
|
278
|
+
|
|
279
|
+
# Get total node count
|
|
280
|
+
result = conn.execute("MATCH (n) RETURN count(n) AS node_count")
|
|
281
|
+
node_count = result.get_next()[0] if result.has_next() else 0
|
|
282
|
+
|
|
283
|
+
# Get relationship count
|
|
284
|
+
result = conn.execute("MATCH ()-[r]->() RETURN count(r) AS rel_count")
|
|
285
|
+
rel_count = result.get_next()[0] if result.has_next() else 0
|
|
286
|
+
|
|
287
|
+
# Get node table names (labels)
|
|
288
|
+
# show_tables() returns: [id, name, type, database_name, comment]
|
|
289
|
+
result = conn.execute("CALL show_tables() RETURN *")
|
|
290
|
+
tables = []
|
|
291
|
+
labels = []
|
|
292
|
+
rel_types = []
|
|
293
|
+
while result.has_next():
|
|
294
|
+
row = result.get_next()
|
|
295
|
+
# row format: [id, name, type, database_name, comment]
|
|
296
|
+
table_name = row[1] if len(row) > 1 else str(row[0])
|
|
297
|
+
table_type = row[2] if len(row) > 2 else "NODE"
|
|
298
|
+
tables.append(table_name)
|
|
299
|
+
if table_type == "NODE":
|
|
300
|
+
labels.append(table_name)
|
|
301
|
+
elif table_type == "REL":
|
|
302
|
+
rel_types.append(table_name)
|
|
303
|
+
|
|
304
|
+
# Get specific node counts for index status
|
|
305
|
+
file_count = 0
|
|
306
|
+
function_count = 0
|
|
307
|
+
class_count = 0
|
|
308
|
+
community_count = 0
|
|
309
|
+
|
|
310
|
+
try:
|
|
311
|
+
if "File" in labels:
|
|
312
|
+
result = conn.execute("MATCH (n:File) RETURN count(n)")
|
|
313
|
+
file_count = result.get_next()[0] if result.has_next() else 0
|
|
314
|
+
except Exception:
|
|
315
|
+
pass
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
if "Function" in labels:
|
|
319
|
+
result = conn.execute("MATCH (n:Function) RETURN count(n)")
|
|
320
|
+
function_count = result.get_next()[0] if result.has_next() else 0
|
|
321
|
+
except Exception:
|
|
322
|
+
pass
|
|
323
|
+
|
|
324
|
+
try:
|
|
325
|
+
if "Class" in labels:
|
|
326
|
+
result = conn.execute("MATCH (n:Class) RETURN count(n)")
|
|
327
|
+
class_count = result.get_next()[0] if result.has_next() else 0
|
|
328
|
+
except Exception:
|
|
329
|
+
pass
|
|
330
|
+
|
|
331
|
+
try:
|
|
332
|
+
if "Community" in labels:
|
|
333
|
+
result = conn.execute("MATCH (n:Community) RETURN count(n)")
|
|
334
|
+
community_count = result.get_next()[0] if result.has_next() else 0
|
|
335
|
+
except Exception:
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
return {
|
|
339
|
+
"node_count": node_count,
|
|
340
|
+
"relationship_count": rel_count,
|
|
341
|
+
"labels": labels,
|
|
342
|
+
"relationship_types": rel_types,
|
|
343
|
+
"file_count": file_count,
|
|
344
|
+
"function_count": function_count,
|
|
345
|
+
"class_count": class_count,
|
|
346
|
+
"community_count": community_count,
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
def clear_database(self):
|
|
350
|
+
"""Clear all nodes and relationships from the database.
|
|
351
|
+
|
|
352
|
+
Warning: This will delete all data!
|
|
353
|
+
"""
|
|
354
|
+
log.warning("Clearing database - all data will be deleted!")
|
|
355
|
+
conn = self.connect()
|
|
356
|
+
|
|
357
|
+
# Get all tables and drop them
|
|
358
|
+
result = conn.execute("CALL show_tables() RETURN *")
|
|
359
|
+
tables = []
|
|
360
|
+
while result.has_next():
|
|
361
|
+
row = result.get_next()
|
|
362
|
+
tables.append((row[0], row[1] if len(row) > 1 else "NODE"))
|
|
363
|
+
|
|
364
|
+
# Drop relationship tables first, then node tables
|
|
365
|
+
for table_name, table_type in tables:
|
|
366
|
+
if table_type == "REL":
|
|
367
|
+
try:
|
|
368
|
+
conn.execute(f"DROP TABLE {table_name}")
|
|
369
|
+
except Exception:
|
|
370
|
+
pass
|
|
371
|
+
|
|
372
|
+
for table_name, table_type in tables:
|
|
373
|
+
if table_type == "NODE":
|
|
374
|
+
try:
|
|
375
|
+
conn.execute(f"DROP TABLE {table_name}")
|
|
376
|
+
except Exception:
|
|
377
|
+
pass
|
|
378
|
+
|
|
379
|
+
log.info("Database cleared successfully")
|
|
380
|
+
|
|
381
|
+
def __enter__(self):
|
|
382
|
+
"""Context manager entry."""
|
|
383
|
+
self.connect()
|
|
384
|
+
return self
|
|
385
|
+
|
|
386
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
387
|
+
"""Context manager exit."""
|
|
388
|
+
self.close()
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
# Global connection instance
|
|
392
|
+
_connection: Optional[KuzuConnection] = None
|
|
393
|
+
_connection_lock = None # Will be initialized on first use
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _get_lock():
|
|
397
|
+
"""Get or create the connection lock."""
|
|
398
|
+
global _connection_lock
|
|
399
|
+
if _connection_lock is None:
|
|
400
|
+
import threading
|
|
401
|
+
_connection_lock = threading.Lock()
|
|
402
|
+
return _connection_lock
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def get_connection() -> KuzuConnection:
|
|
406
|
+
"""Get the global Kuzu connection instance.
|
|
407
|
+
|
|
408
|
+
Thread-safe singleton pattern ensures only one connection exists per process.
|
|
409
|
+
The connection is lazily initialized on first use.
|
|
410
|
+
"""
|
|
411
|
+
global _connection
|
|
412
|
+
|
|
413
|
+
# Fast path: connection already exists
|
|
414
|
+
if _connection is not None:
|
|
415
|
+
return _connection
|
|
416
|
+
|
|
417
|
+
# Slow path: need to create connection (thread-safe)
|
|
418
|
+
with _get_lock():
|
|
419
|
+
# Double-check after acquiring lock
|
|
420
|
+
if _connection is None:
|
|
421
|
+
_connection = KuzuConnection()
|
|
422
|
+
# Eagerly connect to catch issues early
|
|
423
|
+
try:
|
|
424
|
+
_connection.connect()
|
|
425
|
+
except DatabaseConnectionError as e:
|
|
426
|
+
log.warning(f"Failed to connect on initialization: {e}")
|
|
427
|
+
# Still return the connection - it will retry on next execute()
|
|
428
|
+
return _connection
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def set_connection(connection: KuzuConnection):
|
|
432
|
+
"""Set the global Kuzu connection instance."""
|
|
433
|
+
global _connection
|
|
434
|
+
with _get_lock():
|
|
435
|
+
if _connection is not None:
|
|
436
|
+
_connection.close()
|
|
437
|
+
_connection = connection
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def configure_for_repo(repo_root: Path) -> KuzuConnection:
|
|
441
|
+
"""Configure and return a connection for a specific repository.
|
|
442
|
+
|
|
443
|
+
Sets the database path to {repo_root}/.emdash/index/kuzu_db.
|
|
444
|
+
This ensures each repository has its own isolated database.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
repo_root: Path to the repository root
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
KuzuConnection configured for this repo
|
|
451
|
+
"""
|
|
452
|
+
global _connection
|
|
453
|
+
|
|
454
|
+
repo_root = Path(repo_root).resolve()
|
|
455
|
+
db_path = repo_root / ".emdash" / "kuzu_db"
|
|
456
|
+
|
|
457
|
+
with _get_lock():
|
|
458
|
+
# Close existing connection if any
|
|
459
|
+
if _connection is not None:
|
|
460
|
+
_connection.close()
|
|
461
|
+
|
|
462
|
+
# Create new connection with repo-specific path
|
|
463
|
+
config = KuzuConfig(database_path=str(db_path))
|
|
464
|
+
_connection = KuzuConnection(config)
|
|
465
|
+
|
|
466
|
+
log.info(f"Configured database for repo: {repo_root}")
|
|
467
|
+
log.info(f"Database path: {db_path}")
|
|
468
|
+
|
|
469
|
+
return _connection
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def close_connection():
|
|
473
|
+
"""Close and clear the global connection.
|
|
474
|
+
|
|
475
|
+
Call this when shutting down or before running tests that need fresh state.
|
|
476
|
+
"""
|
|
477
|
+
global _connection
|
|
478
|
+
with _get_lock():
|
|
479
|
+
if _connection is not None:
|
|
480
|
+
_connection.close()
|
|
481
|
+
_connection = None
|
|
482
|
+
log.debug("Global connection closed")
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
# Read-only connection for concurrent access
|
|
486
|
+
_read_connection: Optional[KuzuConnection] = None
|
|
487
|
+
_read_connection_lock = None
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _get_read_lock():
|
|
491
|
+
"""Get or create the read connection lock."""
|
|
492
|
+
global _read_connection_lock
|
|
493
|
+
if _read_connection_lock is None:
|
|
494
|
+
import threading
|
|
495
|
+
_read_connection_lock = threading.Lock()
|
|
496
|
+
return _read_connection_lock
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def get_read_connection() -> KuzuConnection:
|
|
500
|
+
"""Get a read-only connection for queries.
|
|
501
|
+
|
|
502
|
+
Read-only connections can coexist with other read-only connections
|
|
503
|
+
and with a single write connection. Use this for all query operations
|
|
504
|
+
to avoid lock conflicts with write operations.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
KuzuConnection configured for read-only access
|
|
508
|
+
"""
|
|
509
|
+
global _read_connection
|
|
510
|
+
|
|
511
|
+
# Fast path
|
|
512
|
+
if _read_connection is not None:
|
|
513
|
+
return _read_connection
|
|
514
|
+
|
|
515
|
+
# Slow path: create read-only connection
|
|
516
|
+
with _get_read_lock():
|
|
517
|
+
if _read_connection is None:
|
|
518
|
+
# Get base config and override to read-only
|
|
519
|
+
base_config = get_config().kuzu
|
|
520
|
+
config = KuzuConfig(
|
|
521
|
+
database_path=base_config.database_path,
|
|
522
|
+
read_only=True
|
|
523
|
+
)
|
|
524
|
+
_read_connection = KuzuConnection(config)
|
|
525
|
+
try:
|
|
526
|
+
_read_connection.connect()
|
|
527
|
+
log.debug("Created read-only connection")
|
|
528
|
+
except DatabaseConnectionError as e:
|
|
529
|
+
log.warning(f"Failed to create read-only connection: {e}")
|
|
530
|
+
return _read_connection
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def close_read_connection():
|
|
534
|
+
"""Close the global read-only connection."""
|
|
535
|
+
global _read_connection
|
|
536
|
+
with _get_read_lock():
|
|
537
|
+
if _read_connection is not None:
|
|
538
|
+
_read_connection.close()
|
|
539
|
+
_read_connection = None
|
|
540
|
+
log.debug("Read-only connection closed")
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
def get_write_connection() -> KuzuConnection:
|
|
544
|
+
"""Get a write connection with lock acquisition.
|
|
545
|
+
|
|
546
|
+
This is an alias for get_connection() but makes the intent clear
|
|
547
|
+
that write access is needed.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
KuzuConnection configured for read-write access
|
|
551
|
+
"""
|
|
552
|
+
return get_connection()
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
# Lock file management for write coordination
|
|
556
|
+
def _get_lock_file_path() -> Path:
|
|
557
|
+
"""Get the path to the lock file."""
|
|
558
|
+
config = get_config().kuzu
|
|
559
|
+
db_path = Path(config.database_path)
|
|
560
|
+
return db_path.parent / LOCK_FILE_NAME
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def _is_process_alive(pid: int) -> bool:
|
|
564
|
+
"""Check if a process with the given PID is still running."""
|
|
565
|
+
try:
|
|
566
|
+
os.kill(pid, 0)
|
|
567
|
+
return True
|
|
568
|
+
except OSError:
|
|
569
|
+
return False
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _is_lock_stale(lock_info: dict) -> bool:
|
|
573
|
+
"""Check if a lock file is stale (old or dead process)."""
|
|
574
|
+
pid = lock_info.get("pid")
|
|
575
|
+
timestamp = lock_info.get("timestamp", 0)
|
|
576
|
+
|
|
577
|
+
# Check if process is dead
|
|
578
|
+
if pid and not _is_process_alive(pid):
|
|
579
|
+
log.debug(f"Lock held by dead process {pid}")
|
|
580
|
+
return True
|
|
581
|
+
|
|
582
|
+
# Check if lock is too old
|
|
583
|
+
age = time.time() - timestamp
|
|
584
|
+
if age > LOCK_STALE_SECONDS:
|
|
585
|
+
log.debug(f"Lock is stale ({age:.0f}s old)")
|
|
586
|
+
return True
|
|
587
|
+
|
|
588
|
+
return False
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def acquire_write_lock(operation: str = "write") -> bool:
|
|
592
|
+
"""Attempt to acquire the write lock.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
operation: Name of the operation (for logging)
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
True if lock was acquired, False otherwise
|
|
599
|
+
"""
|
|
600
|
+
lock_path = _get_lock_file_path()
|
|
601
|
+
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
602
|
+
|
|
603
|
+
# Check for existing lock
|
|
604
|
+
if lock_path.exists():
|
|
605
|
+
try:
|
|
606
|
+
lock_info = json.loads(lock_path.read_text())
|
|
607
|
+
if not _is_lock_stale(lock_info):
|
|
608
|
+
holder_op = lock_info.get("operation", "unknown")
|
|
609
|
+
holder_pid = lock_info.get("pid", "?")
|
|
610
|
+
log.warning(f"Database locked by {holder_op} (PID {holder_pid})")
|
|
611
|
+
return False
|
|
612
|
+
# Stale lock - remove it
|
|
613
|
+
log.info("Removing stale lock file")
|
|
614
|
+
lock_path.unlink(missing_ok=True)
|
|
615
|
+
except (json.JSONDecodeError, IOError):
|
|
616
|
+
# Corrupted lock file - remove it
|
|
617
|
+
lock_path.unlink(missing_ok=True)
|
|
618
|
+
|
|
619
|
+
# Create lock file
|
|
620
|
+
lock_info = {
|
|
621
|
+
"pid": os.getpid(),
|
|
622
|
+
"timestamp": time.time(),
|
|
623
|
+
"operation": operation,
|
|
624
|
+
}
|
|
625
|
+
lock_path.write_text(json.dumps(lock_info))
|
|
626
|
+
log.debug(f"Acquired write lock for {operation}")
|
|
627
|
+
return True
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def release_write_lock():
|
|
631
|
+
"""Release the write lock if held by this process."""
|
|
632
|
+
lock_path = _get_lock_file_path()
|
|
633
|
+
if lock_path.exists():
|
|
634
|
+
try:
|
|
635
|
+
lock_info = json.loads(lock_path.read_text())
|
|
636
|
+
if lock_info.get("pid") == os.getpid():
|
|
637
|
+
lock_path.unlink(missing_ok=True)
|
|
638
|
+
log.debug("Released write lock")
|
|
639
|
+
except (json.JSONDecodeError, IOError):
|
|
640
|
+
pass
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def wait_for_write_lock(operation: str = "write", timeout: float = LOCK_WRITE_TIMEOUT) -> bool:
|
|
644
|
+
"""Wait to acquire the write lock with timeout.
|
|
645
|
+
|
|
646
|
+
Args:
|
|
647
|
+
operation: Name of the operation (for logging)
|
|
648
|
+
timeout: Maximum seconds to wait
|
|
649
|
+
|
|
650
|
+
Returns:
|
|
651
|
+
True if lock was acquired, False if timeout
|
|
652
|
+
"""
|
|
653
|
+
start = time.time()
|
|
654
|
+
attempt = 0
|
|
655
|
+
|
|
656
|
+
while time.time() - start < timeout:
|
|
657
|
+
if acquire_write_lock(operation):
|
|
658
|
+
return True
|
|
659
|
+
|
|
660
|
+
attempt += 1
|
|
661
|
+
delay = min(1.0 * (1.5 ** attempt), 5.0) # Exponential backoff, max 5s
|
|
662
|
+
remaining = timeout - (time.time() - start)
|
|
663
|
+
|
|
664
|
+
if remaining > delay:
|
|
665
|
+
log.info(f"Waiting for database lock ({remaining:.0f}s remaining)...")
|
|
666
|
+
time.sleep(delay)
|
|
667
|
+
else:
|
|
668
|
+
break
|
|
669
|
+
|
|
670
|
+
return False
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
@contextmanager
|
|
674
|
+
def write_lock_context(operation: str = "write", timeout: float = LOCK_WRITE_TIMEOUT):
|
|
675
|
+
"""Context manager for write operations with lock management.
|
|
676
|
+
|
|
677
|
+
Usage:
|
|
678
|
+
with write_lock_context("indexing"):
|
|
679
|
+
# perform write operations
|
|
680
|
+
|
|
681
|
+
Raises:
|
|
682
|
+
DatabaseConnectionError if lock cannot be acquired
|
|
683
|
+
"""
|
|
684
|
+
if not wait_for_write_lock(operation, timeout):
|
|
685
|
+
raise DatabaseConnectionError(
|
|
686
|
+
f"Could not acquire database lock for {operation}. "
|
|
687
|
+
"Another process may be writing to the database."
|
|
688
|
+
)
|
|
689
|
+
try:
|
|
690
|
+
yield
|
|
691
|
+
finally:
|
|
692
|
+
release_write_lock()
|