emdash-core 0.1.37__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. emdash_core/agent/agents.py +9 -0
  2. emdash_core/agent/background.py +481 -0
  3. emdash_core/agent/inprocess_subagent.py +70 -1
  4. emdash_core/agent/mcp/config.py +78 -2
  5. emdash_core/agent/prompts/main_agent.py +53 -1
  6. emdash_core/agent/prompts/plan_mode.py +65 -44
  7. emdash_core/agent/prompts/subagents.py +73 -1
  8. emdash_core/agent/prompts/workflow.py +179 -28
  9. emdash_core/agent/providers/models.py +1 -1
  10. emdash_core/agent/providers/openai_provider.py +10 -0
  11. emdash_core/agent/research/researcher.py +154 -45
  12. emdash_core/agent/runner/agent_runner.py +145 -19
  13. emdash_core/agent/runner/sdk_runner.py +29 -2
  14. emdash_core/agent/skills.py +81 -1
  15. emdash_core/agent/toolkit.py +87 -11
  16. emdash_core/agent/tools/__init__.py +2 -0
  17. emdash_core/agent/tools/coding.py +344 -52
  18. emdash_core/agent/tools/lsp.py +361 -0
  19. emdash_core/agent/tools/skill.py +21 -1
  20. emdash_core/agent/tools/task.py +16 -19
  21. emdash_core/agent/tools/task_output.py +262 -32
  22. emdash_core/agent/verifier/__init__.py +11 -0
  23. emdash_core/agent/verifier/manager.py +295 -0
  24. emdash_core/agent/verifier/models.py +97 -0
  25. emdash_core/{swarm/worktree_manager.py → agent/worktree.py} +19 -1
  26. emdash_core/api/agent.py +297 -2
  27. emdash_core/api/research.py +3 -3
  28. emdash_core/api/router.py +0 -4
  29. emdash_core/context/longevity.py +197 -0
  30. emdash_core/context/providers/explored_areas.py +83 -39
  31. emdash_core/context/reranker.py +35 -144
  32. emdash_core/context/simple_reranker.py +500 -0
  33. emdash_core/context/tool_relevance.py +84 -0
  34. emdash_core/core/config.py +8 -0
  35. emdash_core/graph/__init__.py +8 -1
  36. emdash_core/graph/connection.py +24 -3
  37. emdash_core/graph/writer.py +7 -1
  38. emdash_core/models/agent.py +10 -0
  39. emdash_core/server.py +1 -6
  40. emdash_core/sse/stream.py +16 -1
  41. emdash_core/utils/__init__.py +0 -2
  42. emdash_core/utils/git.py +103 -0
  43. emdash_core/utils/image.py +147 -160
  44. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/METADATA +6 -6
  45. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/RECORD +47 -52
  46. emdash_core/api/swarm.py +0 -223
  47. emdash_core/db/__init__.py +0 -67
  48. emdash_core/db/auth.py +0 -134
  49. emdash_core/db/models.py +0 -91
  50. emdash_core/db/provider.py +0 -222
  51. emdash_core/db/providers/__init__.py +0 -5
  52. emdash_core/db/providers/supabase.py +0 -452
  53. emdash_core/swarm/__init__.py +0 -17
  54. emdash_core/swarm/merge_agent.py +0 -383
  55. emdash_core/swarm/session_manager.py +0 -274
  56. emdash_core/swarm/swarm_runner.py +0 -226
  57. emdash_core/swarm/task_definition.py +0 -137
  58. emdash_core/swarm/worker_spawner.py +0 -319
  59. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/WHEEL +0 -0
  60. {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,500 @@
1
+ """Simple re-ranker for context items - no ML dependencies.
2
+
3
+ A lightweight alternative to the cross-encoder reranker that uses:
4
+ 1. Text matching (query terms vs entity names/paths/descriptions)
5
+ 2. Graph signals (pagerank, betweenness centrality)
6
+ 3. Session signals (recency, touch frequency)
7
+ 4. Longevity signals (items that keep appearing are important)
8
+
9
+ This reranker requires zero external dependencies and runs in <10ms.
10
+ """
11
+
12
+ import math
13
+ import os
14
+ import re
15
+ from datetime import datetime
16
+ from typing import Optional
17
+
18
+ from .models import ContextItem
19
+ from .longevity import get_longevity_score, record_reranked_items
20
+ from ..utils.logger import log
21
+
22
+
23
+ # ============================================================================
24
+ # Tokenization
25
+ # ============================================================================
26
+
27
+ # Common code stopwords to filter out
28
+ CODE_STOPWORDS = frozenset({
29
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
30
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
31
+ "should", "may", "might", "must", "shall", "can", "need", "dare",
32
+ "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
33
+ "from", "as", "into", "through", "during", "before", "after", "above",
34
+ "below", "between", "under", "again", "further", "then", "once",
35
+ "and", "but", "or", "nor", "so", "yet", "both", "either", "neither",
36
+ "not", "only", "own", "same", "than", "too", "very", "just",
37
+ "get", "set", "init", "self", "cls", "this", "that", "these", "those",
38
+ "def", "class", "function", "method", "return", "import", "from",
39
+ "if", "else", "elif", "try", "except", "finally", "raise", "assert",
40
+ "for", "while", "break", "continue", "pass", "lambda", "yield",
41
+ "true", "false", "none", "null", "undefined",
42
+ })
43
+
44
+ # Regex for splitting code identifiers
45
+ SPLIT_PATTERN = re.compile(r'[._/\\:\-\s]+|(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])')
46
+
47
+
48
+ def tokenize(text: str) -> set[str]:
49
+ """Tokenize text into normalized terms for matching.
50
+
51
+ Handles:
52
+ - camelCase: "getUserName" -> {"get", "user", "name"}
53
+ - snake_case: "get_user_name" -> {"get", "user", "name"}
54
+ - paths: "src/auth/service.py" -> {"src", "auth", "service", "py"}
55
+
56
+ Args:
57
+ text: Text to tokenize
58
+
59
+ Returns:
60
+ Set of lowercase tokens (stopwords removed)
61
+ """
62
+ if not text:
63
+ return set()
64
+
65
+ # Split by common delimiters and camelCase boundaries
66
+ # Important: split BEFORE lowercasing to preserve camelCase boundaries
67
+ tokens = SPLIT_PATTERN.split(text)
68
+
69
+ # Lowercase and filter: non-empty, not stopwords, length > 1
70
+ return {
71
+ token.lower() for token in tokens
72
+ if token and len(token) > 1 and token.lower() not in CODE_STOPWORDS
73
+ }
74
+
75
+
76
+ def tokenize_query(query: str) -> set[str]:
77
+ """Tokenize a user query, preserving important terms.
78
+
79
+ Less aggressive filtering than code tokenization.
80
+ """
81
+ if not query:
82
+ return set()
83
+
84
+ # Simple word split for queries
85
+ words = re.split(r'\s+', query.lower())
86
+
87
+ # Also split camelCase/snake_case if present
88
+ tokens = set()
89
+ for word in words:
90
+ tokens.update(SPLIT_PATTERN.split(word))
91
+
92
+ # Filter but keep more terms (queries are short)
93
+ return {token for token in tokens if token and len(token) > 1}
94
+
95
+
96
+ # ============================================================================
97
+ # Scoring Functions
98
+ # ============================================================================
99
+
100
+ def text_match_score(query_tokens: set[str], item: ContextItem) -> float:
101
+ """Score based on query term overlap with entity text.
102
+
103
+ Args:
104
+ query_tokens: Pre-tokenized query terms
105
+ item: Context item to score
106
+
107
+ Returns:
108
+ Score between 0.0 and 1.0
109
+ """
110
+ if not query_tokens:
111
+ return 0.0
112
+
113
+ # Tokenize item fields
114
+ name_tokens = tokenize(item.qualified_name)
115
+ path_tokens = tokenize(item.file_path) if item.file_path else set()
116
+ desc_tokens = tokenize(item.description) if item.description else set()
117
+
118
+ # Calculate overlap ratios
119
+ query_size = len(query_tokens)
120
+
121
+ # Name matches are most important
122
+ name_overlap = len(query_tokens & name_tokens)
123
+ name_score = name_overlap / query_size if query_size else 0
124
+
125
+ # Boost for exact substring match in name
126
+ name_lower = item.qualified_name.lower()
127
+ exact_boost = 0.0
128
+ for token in query_tokens:
129
+ if token in name_lower:
130
+ exact_boost += 0.1
131
+ exact_boost = min(0.3, exact_boost) # Cap at 0.3
132
+
133
+ # Path matches
134
+ path_overlap = len(query_tokens & path_tokens)
135
+ path_score = path_overlap / query_size if query_size else 0
136
+
137
+ # Description matches
138
+ desc_overlap = len(query_tokens & desc_tokens)
139
+ desc_score = desc_overlap / query_size if query_size else 0
140
+
141
+ # Weighted combination
142
+ score = (
143
+ name_score * 0.50 +
144
+ exact_boost +
145
+ path_score * 0.15 +
146
+ desc_score * 0.05
147
+ )
148
+
149
+ return min(1.0, score)
150
+
151
+
152
+ def graph_boost_score(
153
+ item: ContextItem,
154
+ connection=None,
155
+ _cache: dict = {},
156
+ ) -> float:
157
+ """Score based on graph centrality metrics.
158
+
159
+ Uses pagerank and betweenness centrality from the graph database.
160
+ Results are cached per qualified_name to avoid repeated queries.
161
+
162
+ Args:
163
+ item: Context item to score
164
+ connection: Kuzu database connection (optional)
165
+
166
+ Returns:
167
+ Score between 0.0 and 1.0
168
+ """
169
+ if connection is None:
170
+ return 0.0
171
+
172
+ qname = item.qualified_name
173
+
174
+ # Check cache
175
+ if qname in _cache:
176
+ return _cache[qname]
177
+
178
+ try:
179
+ # Determine node table from entity type
180
+ entity_type = item.entity_type
181
+ if entity_type not in ("Function", "Class", "File"):
182
+ _cache[qname] = 0.0
183
+ return 0.0
184
+
185
+ # Query for centrality metrics
186
+ conn = connection.connect()
187
+
188
+ if entity_type == "File":
189
+ # Files use path as key
190
+ query = f"""
191
+ MATCH (n:File {{path: $path}})
192
+ RETURN n.commit_importance, n.commit_count
193
+ """
194
+ result = conn.execute(query, {"path": item.file_path or qname})
195
+ else:
196
+ # Functions/Classes use qualified_name
197
+ query = f"""
198
+ MATCH (n:{entity_type} {{qualified_name: $qname}})
199
+ RETURN n.pagerank, n.betweenness
200
+ """
201
+ result = conn.execute(query, {"qname": qname})
202
+
203
+ if result.has_next():
204
+ row = result.get_next()
205
+ val1 = row[0] or 0.0
206
+ val2 = row[1] or 0.0
207
+
208
+ if entity_type == "File":
209
+ # For files: use commit importance and count
210
+ score = min(1.0, val1 * 0.5 + (val2 / 100) * 0.5)
211
+ else:
212
+ # For functions/classes: pagerank + betweenness
213
+ # Normalize assuming pagerank max ~0.1, betweenness max ~0.5
214
+ score = min(1.0, val1 * 5 + val2 * 1)
215
+
216
+ _cache[qname] = score
217
+ return score
218
+
219
+ _cache[qname] = 0.0
220
+ return 0.0
221
+
222
+ except Exception as e:
223
+ log.debug(f"Graph boost query failed for {qname}: {e}")
224
+ _cache[qname] = 0.0
225
+ return 0.0
226
+
227
+
228
+ def session_boost_score(item: ContextItem, now: Optional[datetime] = None) -> float:
229
+ """Score based on session activity signals.
230
+
231
+ Args:
232
+ item: Context item to score
233
+ now: Current time (for testing)
234
+
235
+ Returns:
236
+ Score between 0.0 and 1.0
237
+ """
238
+ if now is None:
239
+ now = datetime.now()
240
+
241
+ # Recency score: very slow decay (half-life = 48 hours = 2880 minutes)
242
+ recency = 0.0
243
+ if item.last_touched:
244
+ age_seconds = (now - item.last_touched).total_seconds()
245
+ age_minutes = max(0, age_seconds / 60)
246
+ recency = math.exp(-0.693 * age_minutes / 2880)
247
+
248
+ # Frequency score: logarithmic scaling (diminishing returns)
249
+ frequency = 0.0
250
+ if item.touch_count > 0:
251
+ # log(1) = 0, log(e) = 1, log(e^2) = 2, etc.
252
+ # Normalize so touch_count=10 gives ~0.7
253
+ frequency = min(1.0, math.log(item.touch_count + 1) / 3)
254
+
255
+ # Combine: frequency is more important now (recency barely decays)
256
+ return recency * 0.3 + frequency * 0.7
257
+
258
+
259
+ def neighbor_boost_score(query_tokens: set[str], item: ContextItem) -> float:
260
+ """Boost score if neighbors match query terms.
261
+
262
+ Args:
263
+ query_tokens: Pre-tokenized query terms
264
+ item: Context item with neighbors
265
+
266
+ Returns:
267
+ Boost score between 0.0 and 0.3
268
+ """
269
+ if not query_tokens or not item.neighbors:
270
+ return 0.0
271
+
272
+ # Check if any neighbor names match query
273
+ matches = 0
274
+ for neighbor in item.neighbors[:10]: # Limit to first 10
275
+ neighbor_tokens = tokenize(neighbor)
276
+ if query_tokens & neighbor_tokens:
277
+ matches += 1
278
+
279
+ # Cap at 3 matches = 0.3 boost
280
+ return min(0.3, matches * 0.1)
281
+
282
+
283
+ def file_cooccurrence_boost(
284
+ item: ContextItem, file_entity_counts: dict[str, int]
285
+ ) -> float:
286
+ """Boost score for files with multiple entities in context.
287
+
288
+ If multiple entities from the same file appear in the context frame,
289
+ it suggests the file is a focal point of the current work.
290
+
291
+ Args:
292
+ item: Context item to score
293
+ file_entity_counts: Dict mapping file_path to entity count in context
294
+
295
+ Returns:
296
+ Score between 0.0 and 1.0
297
+ """
298
+ if not item.file_path:
299
+ return 0.0
300
+
301
+ count = file_entity_counts.get(item.file_path, 0)
302
+
303
+ # Score progression:
304
+ # 1 entity = 0.0 (no co-occurrence)
305
+ # 2 entities = 0.3
306
+ # 3 entities = 0.5
307
+ # 4 entities = 0.6
308
+ # 5+ entities = 0.7 (capped)
309
+ if count <= 1:
310
+ return 0.0
311
+
312
+ # Log scale for diminishing returns
313
+ return min(0.7, math.log(count) / 2.5)
314
+
315
+
316
+ # ============================================================================
317
+ # Main Reranking Function
318
+ # ============================================================================
319
+
320
+ def simple_rerank_items(
321
+ items: list[ContextItem],
322
+ query: str,
323
+ connection=None,
324
+ top_k: Optional[int] = None,
325
+ weights: Optional[dict] = None,
326
+ ) -> list[ContextItem]:
327
+ """Re-rank context items using simple heuristics.
328
+
329
+ Scoring formula:
330
+ final_score = (
331
+ base_score * W_BASE +
332
+ text_match * W_TEXT +
333
+ graph_boost * W_GRAPH +
334
+ session_boost * W_SESSION +
335
+ neighbor_boost * W_NEIGHBOR +
336
+ longevity * W_LONGEVITY +
337
+ file_cooccur * W_FILE_COOCCUR
338
+ )
339
+
340
+ Args:
341
+ items: List of context items to re-rank
342
+ query: User query for relevance matching
343
+ connection: Optional Kuzu connection for graph signals
344
+ top_k: Number of top items to return (default: 20)
345
+ weights: Optional weight overrides {base, text, graph, session, neighbor, longevity, file_cooccur}
346
+
347
+ Returns:
348
+ Sorted list of top-k items (most relevant first)
349
+ """
350
+ import time
351
+ start_time = time.time()
352
+
353
+ if not items:
354
+ return items
355
+
356
+ if not query or not query.strip():
357
+ log.debug("No query provided, returning items by base score")
358
+ return sorted(items, key=lambda x: x.score, reverse=True)[:top_k or 20]
359
+
360
+ # Default weights
361
+ w = {
362
+ "base": 0.15, # Tool-based relevance (already computed)
363
+ "text": 0.35, # Query-entity text matching
364
+ "graph": 0.10, # PageRank/betweenness
365
+ "session": 0.10, # Recency and frequency
366
+ "neighbor": 0.05, # Neighbor matching
367
+ "longevity": 0.15, # Items that keep appearing
368
+ "file_cooccur": 0.10, # Files with multiple entities
369
+ }
370
+ if weights:
371
+ w.update(weights)
372
+
373
+ # Pre-tokenize query once
374
+ query_tokens = tokenize_query(query)
375
+ now = datetime.now()
376
+
377
+ # Build file entity counts for co-occurrence boost
378
+ file_entity_counts: dict[str, int] = {}
379
+ for item in items:
380
+ if item.file_path:
381
+ file_entity_counts[item.file_path] = file_entity_counts.get(item.file_path, 0) + 1
382
+
383
+ # Score all items
384
+ scored_items = []
385
+ for item in items:
386
+ # Base score from tool-based relevance
387
+ base = item.score
388
+
389
+ # Text matching score
390
+ text = text_match_score(query_tokens, item)
391
+
392
+ # Graph centrality boost (if connection available)
393
+ graph = graph_boost_score(item, connection) if connection else 0.0
394
+
395
+ # Session activity boost
396
+ session = session_boost_score(item, now)
397
+
398
+ # Neighbor matching boost
399
+ neighbor = neighbor_boost_score(query_tokens, item)
400
+
401
+ # Longevity boost (items that keep appearing)
402
+ longevity = get_longevity_score(item.qualified_name)
403
+
404
+ # File co-occurrence boost (multiple entities from same file)
405
+ file_cooccur = file_cooccurrence_boost(item, file_entity_counts)
406
+
407
+ # Compute final score
408
+ final_score = (
409
+ base * w["base"] +
410
+ text * w["text"] +
411
+ graph * w["graph"] +
412
+ session * w["session"] +
413
+ neighbor * w["neighbor"] +
414
+ longevity * w["longevity"] +
415
+ file_cooccur * w["file_cooccur"]
416
+ )
417
+
418
+ scored_items.append((item, final_score))
419
+
420
+ # Sort by score descending
421
+ scored_items.sort(key=lambda x: x[1], reverse=True)
422
+
423
+ # Determine how many to keep
424
+ if top_k is None:
425
+ top_k = int(os.getenv("CONTEXT_RERANK_TOP_K", "20"))
426
+ keep_count = min(top_k, len(scored_items))
427
+
428
+ duration_ms = (time.time() - start_time) * 1000
429
+
430
+ # Log statistics
431
+ if scored_items:
432
+ max_score = scored_items[0][1]
433
+ min_score = scored_items[-1][1] if scored_items else 0
434
+ log.info(
435
+ f"Simple re-rank: {len(items)} -> {keep_count} items "
436
+ f"in {duration_ms:.1f}ms | "
437
+ f"scores [{min_score:.3f}-{max_score:.3f}] | "
438
+ f"query: '{query[:40]}...'"
439
+ )
440
+
441
+ # Record appearances for longevity tracking
442
+ result_items = [item for item, score in scored_items[:keep_count]]
443
+ record_reranked_items([item.qualified_name for item in result_items])
444
+
445
+ return result_items
446
+
447
+
448
+ def get_simple_rerank_scores(
449
+ items: list[ContextItem],
450
+ query: str,
451
+ connection=None,
452
+ ) -> list[tuple[ContextItem, float, dict]]:
453
+ """Get detailed scoring breakdown for debugging.
454
+
455
+ Args:
456
+ items: List of context items
457
+ query: Query to score against
458
+ connection: Optional Kuzu connection
459
+
460
+ Returns:
461
+ List of (item, total_score, component_scores) sorted by score
462
+ """
463
+ if not items:
464
+ return []
465
+
466
+ query_tokens = tokenize_query(query) if query else set()
467
+ now = datetime.now()
468
+
469
+ # Build file entity counts for co-occurrence boost
470
+ file_entity_counts: dict[str, int] = {}
471
+ for item in items:
472
+ if item.file_path:
473
+ file_entity_counts[item.file_path] = file_entity_counts.get(item.file_path, 0) + 1
474
+
475
+ results = []
476
+ for item in items:
477
+ components = {
478
+ "base": item.score,
479
+ "text": text_match_score(query_tokens, item),
480
+ "graph": graph_boost_score(item, connection) if connection else 0.0,
481
+ "session": session_boost_score(item, now),
482
+ "neighbor": neighbor_boost_score(query_tokens, item),
483
+ "longevity": get_longevity_score(item.qualified_name),
484
+ "file_cooccur": file_cooccurrence_boost(item, file_entity_counts),
485
+ }
486
+
487
+ total = (
488
+ components["base"] * 0.15 +
489
+ components["text"] * 0.35 +
490
+ components["graph"] * 0.10 +
491
+ components["session"] * 0.10 +
492
+ components["neighbor"] * 0.05 +
493
+ components["longevity"] * 0.15 +
494
+ components["file_cooccur"] * 0.10
495
+ )
496
+
497
+ results.append((item, total, components))
498
+
499
+ results.sort(key=lambda x: x[1], reverse=True)
500
+ return results
@@ -0,0 +1,84 @@
1
+ """Tool relevance scores for context ranking.
2
+
3
+ This module defines how much relevance weight each tool type contributes
4
+ when an entity is discovered through that tool during exploration.
5
+
6
+ Scoring Philosophy:
7
+ - Highest: Actions that modify code (write_file, apply_diff) - these are
8
+ what the agent is actively working on
9
+ - High: Deliberate investigation (expand_node, get_callers, read_file)
10
+ - Medium: Targeted search (semantic_search, text_search, grep)
11
+ - Low: Broad discovery (list_files, graph algorithms)
12
+ """
13
+
14
+ # Tool-based relevance scores
15
+ # These scores become the base_score for ContextItems
16
+ TOOL_RELEVANCE = {
17
+ # Highest relevance - active modifications (what we're working on NOW)
18
+ "write_to_file": 1.0,
19
+ "apply_diff": 1.0,
20
+ "execute_command": 0.9, # Often running tests/builds on specific files
21
+
22
+ # High relevance - deliberate investigation
23
+ "get_callers": 0.9,
24
+ "get_callees": 0.9,
25
+ "get_class_hierarchy": 0.85,
26
+ "get_impact_analysis": 0.85,
27
+ "read_file": 0.8,
28
+ "get_neighbors": 0.8,
29
+
30
+ # Medium-high relevance
31
+ "expand_node": 0.6,
32
+
33
+ # Medium relevance - targeted search
34
+ "semantic_search": 0.7,
35
+ "text_search": 0.65,
36
+ "grep": 0.6,
37
+ "get_file_dependencies": 0.6,
38
+ "find_entity": 0.55,
39
+
40
+ # Lower relevance - broad discovery
41
+ "list_files": 0.3,
42
+ "glob": 0.3,
43
+
44
+ # Lowest relevance - graph algorithms (bulk results, less targeted)
45
+ "get_top_pagerank": 0.2,
46
+ "get_communities": 0.2,
47
+ "get_central_nodes": 0.2,
48
+ }
49
+
50
+ # Default score for unknown tools
51
+ DEFAULT_TOOL_RELEVANCE = 0.3
52
+
53
+ # Tools where only top N results are considered highly relevant
54
+ SEARCH_TOOLS = {"semantic_search", "text_search", "grep", "find_entity"}
55
+
56
+ # How many top results from search tools get full relevance score
57
+ TOP_RESULTS_LIMIT = 3
58
+
59
+ # Score multiplier for non-top search results
60
+ NON_TOP_RESULT_MULTIPLIER = 0.5
61
+
62
+
63
+ def get_tool_relevance(tool_name: str) -> float:
64
+ """Get the relevance score for a tool.
65
+
66
+ Args:
67
+ tool_name: Name of the tool
68
+
69
+ Returns:
70
+ Relevance score between 0.0 and 1.0
71
+ """
72
+ return TOOL_RELEVANCE.get(tool_name, DEFAULT_TOOL_RELEVANCE)
73
+
74
+
75
+ def is_search_tool(tool_name: str) -> bool:
76
+ """Check if a tool is a search tool (where only top results are relevant).
77
+
78
+ Args:
79
+ tool_name: Name of the tool
80
+
81
+ Returns:
82
+ True if it's a search tool
83
+ """
84
+ return tool_name in SEARCH_TOOLS
@@ -281,6 +281,13 @@ class AgentConfig(BaseModel):
281
281
  description="Maximum tokens for tool output (estimated at ~4 chars/token)",
282
282
  )
283
283
 
284
+ tool_parallel_workers: int = Field(
285
+ default=6,
286
+ ge=1,
287
+ le=16,
288
+ description="Maximum parallel workers for concurrent tool execution",
289
+ )
290
+
284
291
  context_compact_threshold: float = Field(
285
292
  default=0.8,
286
293
  ge=0.5,
@@ -302,6 +309,7 @@ class AgentConfig(BaseModel):
302
309
  max_context_messages=int(os.getenv("EMDASH_MAX_CONTEXT_MESSAGES", "25")),
303
310
  max_iterations=int(os.getenv("EMDASH_MAX_ITERATIONS", "100")),
304
311
  tool_max_output_tokens=int(os.getenv("EMDASH_TOOL_MAX_OUTPUT", "25000")),
312
+ tool_parallel_workers=int(os.getenv("EMDASH_TOOL_PARALLEL_WORKERS", "6")),
305
313
  context_compact_threshold=float(os.getenv("EMDASH_CONTEXT_COMPACT_THRESHOLD", "0.8")),
306
314
  context_compact_target=float(os.getenv("EMDASH_CONTEXT_COMPACT_TARGET", "0.5")),
307
315
  )
@@ -1,16 +1,23 @@
1
- """Graph module for Kuzu database operations."""
1
+ """Graph module for Kuzu database operations.
2
+
3
+ Note: Kuzu is an optional dependency. Check KUZU_AVAILABLE before using
4
+ graph features, or install with: pip install 'emdash-ai[graph]'
5
+ """
2
6
 
3
7
  from .connection import (
4
8
  KuzuConnection,
5
9
  get_connection,
6
10
  set_connection,
7
11
  close_connection,
12
+ KUZU_AVAILABLE,
8
13
  )
9
14
  from .schema import SchemaManager, initialize_database
10
15
  from .builder import GraphBuilder
11
16
  from .writer import GraphWriter
12
17
 
13
18
  __all__ = [
19
+ # Availability check
20
+ "KUZU_AVAILABLE",
14
21
  # Connection
15
22
  "KuzuConnection",
16
23
  "get_connection",
@@ -1,13 +1,21 @@
1
1
  """Kuzu database connection management."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import os
4
6
  import json
5
7
  import time
6
8
  from pathlib import Path
7
- from typing import Optional, Generator, Any
9
+ from typing import Optional, Generator, Any, TYPE_CHECKING
8
10
  from contextlib import contextmanager
9
11
 
10
- import kuzu
12
+ # Lazy import for kuzu - it's an optional dependency
13
+ try:
14
+ import kuzu
15
+ KUZU_AVAILABLE = True
16
+ except ImportError:
17
+ kuzu = None # type: ignore
18
+ KUZU_AVAILABLE = False
11
19
 
12
20
  from ..core.config import KuzuConfig, get_config
13
21
  from ..core.exceptions import DatabaseConnectionError
@@ -20,6 +28,16 @@ LOCK_STALE_SECONDS = 1800 # 30 minutes for long operations like indexing
20
28
  LOCK_WRITE_TIMEOUT = 60 # Wait up to 60 seconds to acquire write lock
21
29
 
22
30
 
31
+ def _require_kuzu():
32
+ """Check that kuzu is available, raise helpful error if not."""
33
+ if not KUZU_AVAILABLE:
34
+ raise ImportError(
35
+ "Kuzu graph database is not installed. "
36
+ "Install with: pip install 'emdash-ai[graph]'\n"
37
+ "Or: pip install kuzu"
38
+ )
39
+
40
+
23
41
  class KuzuQueryResult:
24
42
  """Wrapper for Kuzu query results providing Neo4j-compatible API."""
25
43
 
@@ -92,7 +110,7 @@ class KuzuConnection:
92
110
  self._db: Optional[kuzu.Database] = None
93
111
  self._conn: Optional[kuzu.Connection] = None
94
112
 
95
- def connect(self, max_retries: int = 3, retry_delay: float = 0.5) -> kuzu.Connection:
113
+ def connect(self, max_retries: int = 3, retry_delay: float = 0.5):
96
114
  """Establish connection to Kuzu database.
97
115
 
98
116
  Uses retry logic with exponential backoff to handle transient lock issues.
@@ -106,7 +124,10 @@ class KuzuConnection:
106
124
 
107
125
  Raises:
108
126
  DatabaseConnectionError: If connection fails after all retries
127
+ ImportError: If kuzu is not installed
109
128
  """
129
+ _require_kuzu()
130
+
110
131
  if self._conn is not None:
111
132
  return self._conn
112
133
 
@@ -3,7 +3,13 @@
3
3
  from typing import List
4
4
  from datetime import datetime
5
5
 
6
- import kuzu
6
+ # Lazy import for kuzu - it's an optional dependency
7
+ try:
8
+ import kuzu
9
+ KUZU_AVAILABLE = True
10
+ except ImportError:
11
+ kuzu = None # type: ignore
12
+ KUZU_AVAILABLE = False
7
13
 
8
14
  from ..core.models import (
9
15
  FileEntity,