emdash-core 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. emdash_core/__init__.py +3 -0
  2. emdash_core/agent/__init__.py +37 -0
  3. emdash_core/agent/agents.py +225 -0
  4. emdash_core/agent/code_reviewer.py +476 -0
  5. emdash_core/agent/compaction.py +143 -0
  6. emdash_core/agent/context_manager.py +140 -0
  7. emdash_core/agent/events.py +338 -0
  8. emdash_core/agent/handlers.py +224 -0
  9. emdash_core/agent/inprocess_subagent.py +377 -0
  10. emdash_core/agent/mcp/__init__.py +50 -0
  11. emdash_core/agent/mcp/client.py +346 -0
  12. emdash_core/agent/mcp/config.py +302 -0
  13. emdash_core/agent/mcp/manager.py +496 -0
  14. emdash_core/agent/mcp/tool_factory.py +213 -0
  15. emdash_core/agent/prompts/__init__.py +38 -0
  16. emdash_core/agent/prompts/main_agent.py +104 -0
  17. emdash_core/agent/prompts/subagents.py +131 -0
  18. emdash_core/agent/prompts/workflow.py +136 -0
  19. emdash_core/agent/providers/__init__.py +34 -0
  20. emdash_core/agent/providers/base.py +143 -0
  21. emdash_core/agent/providers/factory.py +80 -0
  22. emdash_core/agent/providers/models.py +220 -0
  23. emdash_core/agent/providers/openai_provider.py +463 -0
  24. emdash_core/agent/providers/transformers_provider.py +217 -0
  25. emdash_core/agent/research/__init__.py +81 -0
  26. emdash_core/agent/research/agent.py +143 -0
  27. emdash_core/agent/research/controller.py +254 -0
  28. emdash_core/agent/research/critic.py +428 -0
  29. emdash_core/agent/research/macros.py +469 -0
  30. emdash_core/agent/research/planner.py +449 -0
  31. emdash_core/agent/research/researcher.py +436 -0
  32. emdash_core/agent/research/state.py +523 -0
  33. emdash_core/agent/research/synthesizer.py +594 -0
  34. emdash_core/agent/reviewer_profile.py +475 -0
  35. emdash_core/agent/rules.py +123 -0
  36. emdash_core/agent/runner.py +601 -0
  37. emdash_core/agent/session.py +262 -0
  38. emdash_core/agent/spec_schema.py +66 -0
  39. emdash_core/agent/specification.py +479 -0
  40. emdash_core/agent/subagent.py +397 -0
  41. emdash_core/agent/subagent_prompts.py +13 -0
  42. emdash_core/agent/toolkit.py +482 -0
  43. emdash_core/agent/toolkits/__init__.py +64 -0
  44. emdash_core/agent/toolkits/base.py +96 -0
  45. emdash_core/agent/toolkits/explore.py +47 -0
  46. emdash_core/agent/toolkits/plan.py +55 -0
  47. emdash_core/agent/tools/__init__.py +141 -0
  48. emdash_core/agent/tools/analytics.py +436 -0
  49. emdash_core/agent/tools/base.py +131 -0
  50. emdash_core/agent/tools/coding.py +484 -0
  51. emdash_core/agent/tools/github_mcp.py +592 -0
  52. emdash_core/agent/tools/history.py +13 -0
  53. emdash_core/agent/tools/modes.py +153 -0
  54. emdash_core/agent/tools/plan.py +206 -0
  55. emdash_core/agent/tools/plan_write.py +135 -0
  56. emdash_core/agent/tools/search.py +412 -0
  57. emdash_core/agent/tools/spec.py +341 -0
  58. emdash_core/agent/tools/task.py +262 -0
  59. emdash_core/agent/tools/task_output.py +204 -0
  60. emdash_core/agent/tools/tasks.py +454 -0
  61. emdash_core/agent/tools/traversal.py +588 -0
  62. emdash_core/agent/tools/web.py +179 -0
  63. emdash_core/analytics/__init__.py +5 -0
  64. emdash_core/analytics/engine.py +1286 -0
  65. emdash_core/api/__init__.py +5 -0
  66. emdash_core/api/agent.py +308 -0
  67. emdash_core/api/agents.py +154 -0
  68. emdash_core/api/analyze.py +264 -0
  69. emdash_core/api/auth.py +173 -0
  70. emdash_core/api/context.py +77 -0
  71. emdash_core/api/db.py +121 -0
  72. emdash_core/api/embed.py +131 -0
  73. emdash_core/api/feature.py +143 -0
  74. emdash_core/api/health.py +93 -0
  75. emdash_core/api/index.py +162 -0
  76. emdash_core/api/plan.py +110 -0
  77. emdash_core/api/projectmd.py +210 -0
  78. emdash_core/api/query.py +320 -0
  79. emdash_core/api/research.py +122 -0
  80. emdash_core/api/review.py +161 -0
  81. emdash_core/api/router.py +76 -0
  82. emdash_core/api/rules.py +116 -0
  83. emdash_core/api/search.py +119 -0
  84. emdash_core/api/spec.py +99 -0
  85. emdash_core/api/swarm.py +223 -0
  86. emdash_core/api/tasks.py +109 -0
  87. emdash_core/api/team.py +120 -0
  88. emdash_core/auth/__init__.py +17 -0
  89. emdash_core/auth/github.py +389 -0
  90. emdash_core/config.py +74 -0
  91. emdash_core/context/__init__.py +52 -0
  92. emdash_core/context/models.py +50 -0
  93. emdash_core/context/providers/__init__.py +11 -0
  94. emdash_core/context/providers/base.py +74 -0
  95. emdash_core/context/providers/explored_areas.py +183 -0
  96. emdash_core/context/providers/touched_areas.py +360 -0
  97. emdash_core/context/registry.py +73 -0
  98. emdash_core/context/reranker.py +199 -0
  99. emdash_core/context/service.py +260 -0
  100. emdash_core/context/session.py +352 -0
  101. emdash_core/core/__init__.py +104 -0
  102. emdash_core/core/config.py +454 -0
  103. emdash_core/core/exceptions.py +55 -0
  104. emdash_core/core/models.py +265 -0
  105. emdash_core/core/review_config.py +57 -0
  106. emdash_core/db/__init__.py +67 -0
  107. emdash_core/db/auth.py +134 -0
  108. emdash_core/db/models.py +91 -0
  109. emdash_core/db/provider.py +222 -0
  110. emdash_core/db/providers/__init__.py +5 -0
  111. emdash_core/db/providers/supabase.py +452 -0
  112. emdash_core/embeddings/__init__.py +24 -0
  113. emdash_core/embeddings/indexer.py +534 -0
  114. emdash_core/embeddings/models.py +192 -0
  115. emdash_core/embeddings/providers/__init__.py +7 -0
  116. emdash_core/embeddings/providers/base.py +112 -0
  117. emdash_core/embeddings/providers/fireworks.py +141 -0
  118. emdash_core/embeddings/providers/openai.py +104 -0
  119. emdash_core/embeddings/registry.py +146 -0
  120. emdash_core/embeddings/service.py +215 -0
  121. emdash_core/graph/__init__.py +26 -0
  122. emdash_core/graph/builder.py +134 -0
  123. emdash_core/graph/connection.py +692 -0
  124. emdash_core/graph/schema.py +416 -0
  125. emdash_core/graph/writer.py +667 -0
  126. emdash_core/ingestion/__init__.py +7 -0
  127. emdash_core/ingestion/change_detector.py +150 -0
  128. emdash_core/ingestion/git/__init__.py +5 -0
  129. emdash_core/ingestion/git/commit_analyzer.py +196 -0
  130. emdash_core/ingestion/github/__init__.py +6 -0
  131. emdash_core/ingestion/github/pr_fetcher.py +296 -0
  132. emdash_core/ingestion/github/task_extractor.py +100 -0
  133. emdash_core/ingestion/orchestrator.py +540 -0
  134. emdash_core/ingestion/parsers/__init__.py +10 -0
  135. emdash_core/ingestion/parsers/base_parser.py +66 -0
  136. emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
  137. emdash_core/ingestion/parsers/class_extractor.py +154 -0
  138. emdash_core/ingestion/parsers/function_extractor.py +202 -0
  139. emdash_core/ingestion/parsers/import_analyzer.py +119 -0
  140. emdash_core/ingestion/parsers/python_parser.py +123 -0
  141. emdash_core/ingestion/parsers/registry.py +72 -0
  142. emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
  143. emdash_core/ingestion/parsers/typescript_parser.py +278 -0
  144. emdash_core/ingestion/repository.py +346 -0
  145. emdash_core/models/__init__.py +38 -0
  146. emdash_core/models/agent.py +68 -0
  147. emdash_core/models/index.py +77 -0
  148. emdash_core/models/query.py +113 -0
  149. emdash_core/planning/__init__.py +7 -0
  150. emdash_core/planning/agent_api.py +413 -0
  151. emdash_core/planning/context_builder.py +265 -0
  152. emdash_core/planning/feature_context.py +232 -0
  153. emdash_core/planning/feature_expander.py +646 -0
  154. emdash_core/planning/llm_explainer.py +198 -0
  155. emdash_core/planning/similarity.py +509 -0
  156. emdash_core/planning/team_focus.py +821 -0
  157. emdash_core/server.py +153 -0
  158. emdash_core/sse/__init__.py +5 -0
  159. emdash_core/sse/stream.py +196 -0
  160. emdash_core/swarm/__init__.py +17 -0
  161. emdash_core/swarm/merge_agent.py +383 -0
  162. emdash_core/swarm/session_manager.py +274 -0
  163. emdash_core/swarm/swarm_runner.py +226 -0
  164. emdash_core/swarm/task_definition.py +137 -0
  165. emdash_core/swarm/worker_spawner.py +319 -0
  166. emdash_core/swarm/worktree_manager.py +278 -0
  167. emdash_core/templates/__init__.py +10 -0
  168. emdash_core/templates/defaults/agent-builder.md.template +82 -0
  169. emdash_core/templates/defaults/focus.md.template +115 -0
  170. emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
  171. emdash_core/templates/defaults/pr-review.md.template +80 -0
  172. emdash_core/templates/defaults/project.md.template +85 -0
  173. emdash_core/templates/defaults/research_critic.md.template +112 -0
  174. emdash_core/templates/defaults/research_planner.md.template +85 -0
  175. emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
  176. emdash_core/templates/defaults/reviewer.md.template +81 -0
  177. emdash_core/templates/defaults/spec.md.template +41 -0
  178. emdash_core/templates/defaults/tasks.md.template +78 -0
  179. emdash_core/templates/loader.py +296 -0
  180. emdash_core/utils/__init__.py +45 -0
  181. emdash_core/utils/git.py +84 -0
  182. emdash_core/utils/image.py +502 -0
  183. emdash_core/utils/logger.py +51 -0
  184. emdash_core-0.1.7.dist-info/METADATA +35 -0
  185. emdash_core-0.1.7.dist-info/RECORD +187 -0
  186. emdash_core-0.1.7.dist-info/WHEEL +4 -0
  187. emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,692 @@
1
+ """Kuzu database connection management."""
2
+
3
+ import os
4
+ import json
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Optional, Generator, Any
8
+ from contextlib import contextmanager
9
+
10
+ import kuzu
11
+
12
+ from ..core.config import KuzuConfig, get_config
13
+ from ..core.exceptions import DatabaseConnectionError
14
+ from ..utils.logger import log
15
+
16
+
17
+ # Lock file constants
18
+ LOCK_FILE_NAME = "kuzu.lock"
19
+ LOCK_STALE_SECONDS = 1800 # 30 minutes for long operations like indexing
20
+ LOCK_WRITE_TIMEOUT = 60 # Wait up to 60 seconds to acquire write lock
21
+
22
+
23
+ class KuzuQueryResult:
24
+ """Wrapper for Kuzu query results providing Neo4j-compatible API."""
25
+
26
+ def __init__(self, result: kuzu.QueryResult):
27
+ self._result = result
28
+ self._columns = result.get_column_names()
29
+ self._rows: list[dict] = []
30
+ self._consumed = False
31
+
32
+ def _consume(self):
33
+ """Consume all results into memory."""
34
+ if not self._consumed:
35
+ while self._result.has_next():
36
+ values = self._result.get_next()
37
+ self._rows.append(dict(zip(self._columns, values)))
38
+ self._consumed = True
39
+
40
+ def single(self) -> Optional[dict]:
41
+ """Return single result (Neo4j-compatible API)."""
42
+ self._consume()
43
+ return self._rows[0] if self._rows else None
44
+
45
+ def __iter__(self):
46
+ """Iterate over results."""
47
+ self._consume()
48
+ return iter(self._rows)
49
+
50
+ def __len__(self):
51
+ """Return number of results."""
52
+ self._consume()
53
+ return len(self._rows)
54
+
55
+
56
+ class KuzuSessionWrapper:
57
+ """Wrapper providing Neo4j-compatible session API for Kuzu."""
58
+
59
+ def __init__(self, conn: kuzu.Connection):
60
+ self._conn = conn
61
+
62
+ def run(self, query: str, **parameters) -> KuzuQueryResult:
63
+ """Execute query with Neo4j-compatible API.
64
+
65
+ Args:
66
+ query: Cypher query
67
+ **parameters: Query parameters as keyword arguments
68
+
69
+ Returns:
70
+ KuzuQueryResult with Neo4j-compatible methods
71
+ """
72
+ try:
73
+ result = self._conn.execute(query, parameters or {})
74
+ return KuzuQueryResult(result)
75
+ except Exception as e:
76
+ raise DatabaseConnectionError(f"Query execution failed: {e}")
77
+
78
+
79
+ class KuzuConnection:
80
+ """Manages Kuzu embedded database connections."""
81
+
82
+ def __init__(self, config: Optional[KuzuConfig] = None):
83
+ """Initialize Kuzu connection.
84
+
85
+ Args:
86
+ config: Kuzu configuration. If None, loads from environment.
87
+ """
88
+ if config is None:
89
+ config = get_config().kuzu
90
+
91
+ self.config = config
92
+ self._db: Optional[kuzu.Database] = None
93
+ self._conn: Optional[kuzu.Connection] = None
94
+
95
+ def connect(self, max_retries: int = 3, retry_delay: float = 0.5) -> kuzu.Connection:
96
+ """Establish connection to Kuzu database.
97
+
98
+ Uses retry logic with exponential backoff to handle transient lock issues.
99
+
100
+ Args:
101
+ max_retries: Maximum number of connection attempts
102
+ retry_delay: Initial delay between retries (doubles each attempt)
103
+
104
+ Returns:
105
+ Kuzu connection instance
106
+
107
+ Raises:
108
+ DatabaseConnectionError: If connection fails after all retries
109
+ """
110
+ if self._conn is not None:
111
+ return self._conn
112
+
113
+ import time
114
+
115
+ last_error = None
116
+ for attempt in range(max_retries):
117
+ try:
118
+ # Ensure database directory exists
119
+ db_path = Path(self.config.database_path)
120
+ db_path.parent.mkdir(parents=True, exist_ok=True)
121
+
122
+ if attempt == 0:
123
+ log.info(f"Connecting to Kuzu database at {self.config.database_path}")
124
+ else:
125
+ log.info(f"Retrying Kuzu connection (attempt {attempt + 1}/{max_retries})")
126
+
127
+ self._db = kuzu.Database(str(db_path), read_only=self.config.read_only)
128
+ self._conn = kuzu.Connection(self._db)
129
+
130
+ # Test connection
131
+ result = self._conn.execute("RETURN 1 AS num")
132
+ result.get_next()
133
+
134
+ log.info("Successfully connected to Kuzu database")
135
+ return self._conn
136
+
137
+ except Exception as e:
138
+ last_error = e
139
+ error_str = str(e).lower()
140
+
141
+ # Check if it's a lock error that might be transient
142
+ if "lock" in error_str or "could not set lock" in error_str:
143
+ if attempt < max_retries - 1:
144
+ delay = retry_delay * (2 ** attempt)
145
+ log.warning(f"Database lock conflict, retrying in {delay:.1f}s...")
146
+ time.sleep(delay)
147
+ continue
148
+
149
+ # Non-retryable error or max retries reached
150
+ break
151
+
152
+ raise DatabaseConnectionError(f"Failed to connect to Kuzu: {last_error}")
153
+
154
+ def close(self):
155
+ """Close the Kuzu connection."""
156
+ if self._conn is not None:
157
+ log.info("Closing Kuzu connection")
158
+ self._conn = None
159
+ if self._db is not None:
160
+ self._db = None
161
+
162
+ def execute(self, query: str, parameters: Optional[dict] = None) -> list[dict]:
163
+ """Execute a read query and return results as list of dicts.
164
+
165
+ Args:
166
+ query: Cypher query to execute
167
+ parameters: Query parameters
168
+
169
+ Returns:
170
+ List of result dictionaries
171
+ """
172
+ conn = self.connect()
173
+ params = parameters or {}
174
+
175
+ # Kuzu requires exact parameter match - filter to only params used in query
176
+ import re
177
+ used_params = set(re.findall(r'\$(\w+)', query))
178
+ filtered_params = {k: v for k, v in params.items() if k in used_params}
179
+
180
+ try:
181
+ result = conn.execute(query, filtered_params)
182
+ columns = result.get_column_names()
183
+ rows = []
184
+ while result.has_next():
185
+ values = result.get_next()
186
+ rows.append(dict(zip(columns, values)))
187
+ return rows
188
+ except Exception as e:
189
+ raise DatabaseConnectionError(f"Query execution failed: {e}")
190
+
191
+ def execute_write(self, query: str, parameters: Optional[dict] = None) -> None:
192
+ """Execute a write query.
193
+
194
+ Args:
195
+ query: Cypher query to execute
196
+ parameters: Query parameters
197
+ """
198
+ conn = self.connect()
199
+ params = parameters or {}
200
+
201
+ # Kuzu requires exact parameter match - filter to only params used in query
202
+ # Look for $param_name patterns in the query
203
+ import re
204
+ used_params = set(re.findall(r'\$(\w+)', query))
205
+ filtered_params = {k: v for k, v in params.items() if k in used_params}
206
+
207
+ try:
208
+ conn.execute(query, filtered_params)
209
+ except Exception as e:
210
+ # Debug: log detailed info when there's an error
211
+ error_str = str(e)
212
+ if "not found" in error_str.lower():
213
+ log.warning(f"KUZU DEBUG - Error: {e}")
214
+ log.warning(f"KUZU DEBUG - Query: {repr(query)}")
215
+ log.warning(f"KUZU DEBUG - Used params: {used_params}")
216
+ log.warning(f"KUZU DEBUG - Filtered params: {list(filtered_params.keys())}")
217
+ raise DatabaseConnectionError(f"Write query failed: {e}")
218
+
219
+ @contextmanager
220
+ def session(self) -> Generator[KuzuSessionWrapper, None, None]:
221
+ """Create a context-managed Kuzu session.
222
+
223
+ Note: Kuzu doesn't have separate sessions like Neo4j.
224
+ This returns a wrapper for API compatibility with Neo4j-style code.
225
+
226
+ Yields:
227
+ KuzuSessionWrapper with Neo4j-compatible run() method
228
+
229
+ Example:
230
+ with connection.session() as session:
231
+ result = session.run("MATCH (n) RETURN count(n)")
232
+ record = result.single()
233
+ """
234
+ conn = self.connect()
235
+ try:
236
+ yield KuzuSessionWrapper(conn)
237
+ finally:
238
+ pass # Kuzu connection persists
239
+
240
+ @contextmanager
241
+ def transaction(self) -> Generator[kuzu.Connection, None, None]:
242
+ """Create a context-managed transaction.
243
+
244
+ Note: Kuzu has automatic transaction management.
245
+ This is provided for API compatibility.
246
+
247
+ Yields:
248
+ Kuzu connection
249
+ """
250
+ conn = self.connect()
251
+ try:
252
+ yield conn
253
+ except Exception as e:
254
+ raise
255
+
256
+ def verify_connection(self) -> bool:
257
+ """Verify that the connection works.
258
+
259
+ Returns:
260
+ True if connection is successful, False otherwise
261
+ """
262
+ try:
263
+ conn = self.connect()
264
+ result = conn.execute("RETURN 1 AS num")
265
+ result.get_next()
266
+ return True
267
+ except Exception as e:
268
+ log.error(f"Connection verification failed: {e}")
269
+ return False
270
+
271
+ def get_database_info(self) -> dict:
272
+ """Get information about the Kuzu database.
273
+
274
+ Returns:
275
+ Dictionary with database information including specific node counts
276
+ """
277
+ conn = self.connect()
278
+
279
+ # Get total node count
280
+ result = conn.execute("MATCH (n) RETURN count(n) AS node_count")
281
+ node_count = result.get_next()[0] if result.has_next() else 0
282
+
283
+ # Get relationship count
284
+ result = conn.execute("MATCH ()-[r]->() RETURN count(r) AS rel_count")
285
+ rel_count = result.get_next()[0] if result.has_next() else 0
286
+
287
+ # Get node table names (labels)
288
+ # show_tables() returns: [id, name, type, database_name, comment]
289
+ result = conn.execute("CALL show_tables() RETURN *")
290
+ tables = []
291
+ labels = []
292
+ rel_types = []
293
+ while result.has_next():
294
+ row = result.get_next()
295
+ # row format: [id, name, type, database_name, comment]
296
+ table_name = row[1] if len(row) > 1 else str(row[0])
297
+ table_type = row[2] if len(row) > 2 else "NODE"
298
+ tables.append(table_name)
299
+ if table_type == "NODE":
300
+ labels.append(table_name)
301
+ elif table_type == "REL":
302
+ rel_types.append(table_name)
303
+
304
+ # Get specific node counts for index status
305
+ file_count = 0
306
+ function_count = 0
307
+ class_count = 0
308
+ community_count = 0
309
+
310
+ try:
311
+ if "File" in labels:
312
+ result = conn.execute("MATCH (n:File) RETURN count(n)")
313
+ file_count = result.get_next()[0] if result.has_next() else 0
314
+ except Exception:
315
+ pass
316
+
317
+ try:
318
+ if "Function" in labels:
319
+ result = conn.execute("MATCH (n:Function) RETURN count(n)")
320
+ function_count = result.get_next()[0] if result.has_next() else 0
321
+ except Exception:
322
+ pass
323
+
324
+ try:
325
+ if "Class" in labels:
326
+ result = conn.execute("MATCH (n:Class) RETURN count(n)")
327
+ class_count = result.get_next()[0] if result.has_next() else 0
328
+ except Exception:
329
+ pass
330
+
331
+ try:
332
+ if "Community" in labels:
333
+ result = conn.execute("MATCH (n:Community) RETURN count(n)")
334
+ community_count = result.get_next()[0] if result.has_next() else 0
335
+ except Exception:
336
+ pass
337
+
338
+ return {
339
+ "node_count": node_count,
340
+ "relationship_count": rel_count,
341
+ "labels": labels,
342
+ "relationship_types": rel_types,
343
+ "file_count": file_count,
344
+ "function_count": function_count,
345
+ "class_count": class_count,
346
+ "community_count": community_count,
347
+ }
348
+
349
+ def clear_database(self):
350
+ """Clear all nodes and relationships from the database.
351
+
352
+ Warning: This will delete all data!
353
+ """
354
+ log.warning("Clearing database - all data will be deleted!")
355
+ conn = self.connect()
356
+
357
+ # Get all tables and drop them
358
+ result = conn.execute("CALL show_tables() RETURN *")
359
+ tables = []
360
+ while result.has_next():
361
+ row = result.get_next()
362
+ tables.append((row[0], row[1] if len(row) > 1 else "NODE"))
363
+
364
+ # Drop relationship tables first, then node tables
365
+ for table_name, table_type in tables:
366
+ if table_type == "REL":
367
+ try:
368
+ conn.execute(f"DROP TABLE {table_name}")
369
+ except Exception:
370
+ pass
371
+
372
+ for table_name, table_type in tables:
373
+ if table_type == "NODE":
374
+ try:
375
+ conn.execute(f"DROP TABLE {table_name}")
376
+ except Exception:
377
+ pass
378
+
379
+ log.info("Database cleared successfully")
380
+
381
+ def __enter__(self):
382
+ """Context manager entry."""
383
+ self.connect()
384
+ return self
385
+
386
+ def __exit__(self, exc_type, exc_val, exc_tb):
387
+ """Context manager exit."""
388
+ self.close()
389
+
390
+
391
+ # Global connection instance
392
+ _connection: Optional[KuzuConnection] = None
393
+ _connection_lock = None # Will be initialized on first use
394
+
395
+
396
+ def _get_lock():
397
+ """Get or create the connection lock."""
398
+ global _connection_lock
399
+ if _connection_lock is None:
400
+ import threading
401
+ _connection_lock = threading.Lock()
402
+ return _connection_lock
403
+
404
+
405
+ def get_connection() -> KuzuConnection:
406
+ """Get the global Kuzu connection instance.
407
+
408
+ Thread-safe singleton pattern ensures only one connection exists per process.
409
+ The connection is lazily initialized on first use.
410
+ """
411
+ global _connection
412
+
413
+ # Fast path: connection already exists
414
+ if _connection is not None:
415
+ return _connection
416
+
417
+ # Slow path: need to create connection (thread-safe)
418
+ with _get_lock():
419
+ # Double-check after acquiring lock
420
+ if _connection is None:
421
+ _connection = KuzuConnection()
422
+ # Eagerly connect to catch issues early
423
+ try:
424
+ _connection.connect()
425
+ except DatabaseConnectionError as e:
426
+ log.warning(f"Failed to connect on initialization: {e}")
427
+ # Still return the connection - it will retry on next execute()
428
+ return _connection
429
+
430
+
431
+ def set_connection(connection: KuzuConnection):
432
+ """Set the global Kuzu connection instance."""
433
+ global _connection
434
+ with _get_lock():
435
+ if _connection is not None:
436
+ _connection.close()
437
+ _connection = connection
438
+
439
+
440
+ def configure_for_repo(repo_root: Path) -> KuzuConnection:
441
+ """Configure and return a connection for a specific repository.
442
+
443
+ Sets the database path to {repo_root}/.emdash/index/kuzu_db.
444
+ This ensures each repository has its own isolated database.
445
+
446
+ Args:
447
+ repo_root: Path to the repository root
448
+
449
+ Returns:
450
+ KuzuConnection configured for this repo
451
+ """
452
+ global _connection
453
+
454
+ repo_root = Path(repo_root).resolve()
455
+ db_path = repo_root / ".emdash" / "kuzu_db"
456
+
457
+ with _get_lock():
458
+ # Close existing connection if any
459
+ if _connection is not None:
460
+ _connection.close()
461
+
462
+ # Create new connection with repo-specific path
463
+ config = KuzuConfig(database_path=str(db_path))
464
+ _connection = KuzuConnection(config)
465
+
466
+ log.info(f"Configured database for repo: {repo_root}")
467
+ log.info(f"Database path: {db_path}")
468
+
469
+ return _connection
470
+
471
+
472
+ def close_connection():
473
+ """Close and clear the global connection.
474
+
475
+ Call this when shutting down or before running tests that need fresh state.
476
+ """
477
+ global _connection
478
+ with _get_lock():
479
+ if _connection is not None:
480
+ _connection.close()
481
+ _connection = None
482
+ log.debug("Global connection closed")
483
+
484
+
485
+ # Read-only connection for concurrent access
486
+ _read_connection: Optional[KuzuConnection] = None
487
+ _read_connection_lock = None
488
+
489
+
490
+ def _get_read_lock():
491
+ """Get or create the read connection lock."""
492
+ global _read_connection_lock
493
+ if _read_connection_lock is None:
494
+ import threading
495
+ _read_connection_lock = threading.Lock()
496
+ return _read_connection_lock
497
+
498
+
499
+ def get_read_connection() -> KuzuConnection:
500
+ """Get a read-only connection for queries.
501
+
502
+ Read-only connections can coexist with other read-only connections
503
+ and with a single write connection. Use this for all query operations
504
+ to avoid lock conflicts with write operations.
505
+
506
+ Returns:
507
+ KuzuConnection configured for read-only access
508
+ """
509
+ global _read_connection
510
+
511
+ # Fast path
512
+ if _read_connection is not None:
513
+ return _read_connection
514
+
515
+ # Slow path: create read-only connection
516
+ with _get_read_lock():
517
+ if _read_connection is None:
518
+ # Get base config and override to read-only
519
+ base_config = get_config().kuzu
520
+ config = KuzuConfig(
521
+ database_path=base_config.database_path,
522
+ read_only=True
523
+ )
524
+ _read_connection = KuzuConnection(config)
525
+ try:
526
+ _read_connection.connect()
527
+ log.debug("Created read-only connection")
528
+ except DatabaseConnectionError as e:
529
+ log.warning(f"Failed to create read-only connection: {e}")
530
+ return _read_connection
531
+
532
+
533
+ def close_read_connection():
534
+ """Close the global read-only connection."""
535
+ global _read_connection
536
+ with _get_read_lock():
537
+ if _read_connection is not None:
538
+ _read_connection.close()
539
+ _read_connection = None
540
+ log.debug("Read-only connection closed")
541
+
542
+
543
+ def get_write_connection() -> KuzuConnection:
544
+ """Get a write connection with lock acquisition.
545
+
546
+ This is an alias for get_connection() but makes the intent clear
547
+ that write access is needed.
548
+
549
+ Returns:
550
+ KuzuConnection configured for read-write access
551
+ """
552
+ return get_connection()
553
+
554
+
555
+ # Lock file management for write coordination
556
+ def _get_lock_file_path() -> Path:
557
+ """Get the path to the lock file."""
558
+ config = get_config().kuzu
559
+ db_path = Path(config.database_path)
560
+ return db_path.parent / LOCK_FILE_NAME
561
+
562
+
563
+ def _is_process_alive(pid: int) -> bool:
564
+ """Check if a process with the given PID is still running."""
565
+ try:
566
+ os.kill(pid, 0)
567
+ return True
568
+ except OSError:
569
+ return False
570
+
571
+
572
+ def _is_lock_stale(lock_info: dict) -> bool:
573
+ """Check if a lock file is stale (old or dead process)."""
574
+ pid = lock_info.get("pid")
575
+ timestamp = lock_info.get("timestamp", 0)
576
+
577
+ # Check if process is dead
578
+ if pid and not _is_process_alive(pid):
579
+ log.debug(f"Lock held by dead process {pid}")
580
+ return True
581
+
582
+ # Check if lock is too old
583
+ age = time.time() - timestamp
584
+ if age > LOCK_STALE_SECONDS:
585
+ log.debug(f"Lock is stale ({age:.0f}s old)")
586
+ return True
587
+
588
+ return False
589
+
590
+
591
+ def acquire_write_lock(operation: str = "write") -> bool:
592
+ """Attempt to acquire the write lock.
593
+
594
+ Args:
595
+ operation: Name of the operation (for logging)
596
+
597
+ Returns:
598
+ True if lock was acquired, False otherwise
599
+ """
600
+ lock_path = _get_lock_file_path()
601
+ lock_path.parent.mkdir(parents=True, exist_ok=True)
602
+
603
+ # Check for existing lock
604
+ if lock_path.exists():
605
+ try:
606
+ lock_info = json.loads(lock_path.read_text())
607
+ if not _is_lock_stale(lock_info):
608
+ holder_op = lock_info.get("operation", "unknown")
609
+ holder_pid = lock_info.get("pid", "?")
610
+ log.warning(f"Database locked by {holder_op} (PID {holder_pid})")
611
+ return False
612
+ # Stale lock - remove it
613
+ log.info("Removing stale lock file")
614
+ lock_path.unlink(missing_ok=True)
615
+ except (json.JSONDecodeError, IOError):
616
+ # Corrupted lock file - remove it
617
+ lock_path.unlink(missing_ok=True)
618
+
619
+ # Create lock file
620
+ lock_info = {
621
+ "pid": os.getpid(),
622
+ "timestamp": time.time(),
623
+ "operation": operation,
624
+ }
625
+ lock_path.write_text(json.dumps(lock_info))
626
+ log.debug(f"Acquired write lock for {operation}")
627
+ return True
628
+
629
+
630
+ def release_write_lock():
631
+ """Release the write lock if held by this process."""
632
+ lock_path = _get_lock_file_path()
633
+ if lock_path.exists():
634
+ try:
635
+ lock_info = json.loads(lock_path.read_text())
636
+ if lock_info.get("pid") == os.getpid():
637
+ lock_path.unlink(missing_ok=True)
638
+ log.debug("Released write lock")
639
+ except (json.JSONDecodeError, IOError):
640
+ pass
641
+
642
+
643
+ def wait_for_write_lock(operation: str = "write", timeout: float = LOCK_WRITE_TIMEOUT) -> bool:
644
+ """Wait to acquire the write lock with timeout.
645
+
646
+ Args:
647
+ operation: Name of the operation (for logging)
648
+ timeout: Maximum seconds to wait
649
+
650
+ Returns:
651
+ True if lock was acquired, False if timeout
652
+ """
653
+ start = time.time()
654
+ attempt = 0
655
+
656
+ while time.time() - start < timeout:
657
+ if acquire_write_lock(operation):
658
+ return True
659
+
660
+ attempt += 1
661
+ delay = min(1.0 * (1.5 ** attempt), 5.0) # Exponential backoff, max 5s
662
+ remaining = timeout - (time.time() - start)
663
+
664
+ if remaining > delay:
665
+ log.info(f"Waiting for database lock ({remaining:.0f}s remaining)...")
666
+ time.sleep(delay)
667
+ else:
668
+ break
669
+
670
+ return False
671
+
672
+
673
+ @contextmanager
674
+ def write_lock_context(operation: str = "write", timeout: float = LOCK_WRITE_TIMEOUT):
675
+ """Context manager for write operations with lock management.
676
+
677
+ Usage:
678
+ with write_lock_context("indexing"):
679
+ # perform write operations
680
+
681
+ Raises:
682
+ DatabaseConnectionError if lock cannot be acquired
683
+ """
684
+ if not wait_for_write_lock(operation, timeout):
685
+ raise DatabaseConnectionError(
686
+ f"Could not acquire database lock for {operation}. "
687
+ "Another process may be writing to the database."
688
+ )
689
+ try:
690
+ yield
691
+ finally:
692
+ release_write_lock()