emdash-core 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. emdash_core/__init__.py +3 -0
  2. emdash_core/agent/__init__.py +37 -0
  3. emdash_core/agent/agents.py +225 -0
  4. emdash_core/agent/code_reviewer.py +476 -0
  5. emdash_core/agent/compaction.py +143 -0
  6. emdash_core/agent/context_manager.py +140 -0
  7. emdash_core/agent/events.py +338 -0
  8. emdash_core/agent/handlers.py +224 -0
  9. emdash_core/agent/inprocess_subagent.py +377 -0
  10. emdash_core/agent/mcp/__init__.py +50 -0
  11. emdash_core/agent/mcp/client.py +346 -0
  12. emdash_core/agent/mcp/config.py +302 -0
  13. emdash_core/agent/mcp/manager.py +496 -0
  14. emdash_core/agent/mcp/tool_factory.py +213 -0
  15. emdash_core/agent/prompts/__init__.py +38 -0
  16. emdash_core/agent/prompts/main_agent.py +104 -0
  17. emdash_core/agent/prompts/subagents.py +131 -0
  18. emdash_core/agent/prompts/workflow.py +136 -0
  19. emdash_core/agent/providers/__init__.py +34 -0
  20. emdash_core/agent/providers/base.py +143 -0
  21. emdash_core/agent/providers/factory.py +80 -0
  22. emdash_core/agent/providers/models.py +220 -0
  23. emdash_core/agent/providers/openai_provider.py +463 -0
  24. emdash_core/agent/providers/transformers_provider.py +217 -0
  25. emdash_core/agent/research/__init__.py +81 -0
  26. emdash_core/agent/research/agent.py +143 -0
  27. emdash_core/agent/research/controller.py +254 -0
  28. emdash_core/agent/research/critic.py +428 -0
  29. emdash_core/agent/research/macros.py +469 -0
  30. emdash_core/agent/research/planner.py +449 -0
  31. emdash_core/agent/research/researcher.py +436 -0
  32. emdash_core/agent/research/state.py +523 -0
  33. emdash_core/agent/research/synthesizer.py +594 -0
  34. emdash_core/agent/reviewer_profile.py +475 -0
  35. emdash_core/agent/rules.py +123 -0
  36. emdash_core/agent/runner.py +601 -0
  37. emdash_core/agent/session.py +262 -0
  38. emdash_core/agent/spec_schema.py +66 -0
  39. emdash_core/agent/specification.py +479 -0
  40. emdash_core/agent/subagent.py +397 -0
  41. emdash_core/agent/subagent_prompts.py +13 -0
  42. emdash_core/agent/toolkit.py +482 -0
  43. emdash_core/agent/toolkits/__init__.py +64 -0
  44. emdash_core/agent/toolkits/base.py +96 -0
  45. emdash_core/agent/toolkits/explore.py +47 -0
  46. emdash_core/agent/toolkits/plan.py +55 -0
  47. emdash_core/agent/tools/__init__.py +141 -0
  48. emdash_core/agent/tools/analytics.py +436 -0
  49. emdash_core/agent/tools/base.py +131 -0
  50. emdash_core/agent/tools/coding.py +484 -0
  51. emdash_core/agent/tools/github_mcp.py +592 -0
  52. emdash_core/agent/tools/history.py +13 -0
  53. emdash_core/agent/tools/modes.py +153 -0
  54. emdash_core/agent/tools/plan.py +206 -0
  55. emdash_core/agent/tools/plan_write.py +135 -0
  56. emdash_core/agent/tools/search.py +412 -0
  57. emdash_core/agent/tools/spec.py +341 -0
  58. emdash_core/agent/tools/task.py +262 -0
  59. emdash_core/agent/tools/task_output.py +204 -0
  60. emdash_core/agent/tools/tasks.py +454 -0
  61. emdash_core/agent/tools/traversal.py +588 -0
  62. emdash_core/agent/tools/web.py +179 -0
  63. emdash_core/analytics/__init__.py +5 -0
  64. emdash_core/analytics/engine.py +1286 -0
  65. emdash_core/api/__init__.py +5 -0
  66. emdash_core/api/agent.py +308 -0
  67. emdash_core/api/agents.py +154 -0
  68. emdash_core/api/analyze.py +264 -0
  69. emdash_core/api/auth.py +173 -0
  70. emdash_core/api/context.py +77 -0
  71. emdash_core/api/db.py +121 -0
  72. emdash_core/api/embed.py +131 -0
  73. emdash_core/api/feature.py +143 -0
  74. emdash_core/api/health.py +93 -0
  75. emdash_core/api/index.py +162 -0
  76. emdash_core/api/plan.py +110 -0
  77. emdash_core/api/projectmd.py +210 -0
  78. emdash_core/api/query.py +320 -0
  79. emdash_core/api/research.py +122 -0
  80. emdash_core/api/review.py +161 -0
  81. emdash_core/api/router.py +76 -0
  82. emdash_core/api/rules.py +116 -0
  83. emdash_core/api/search.py +119 -0
  84. emdash_core/api/spec.py +99 -0
  85. emdash_core/api/swarm.py +223 -0
  86. emdash_core/api/tasks.py +109 -0
  87. emdash_core/api/team.py +120 -0
  88. emdash_core/auth/__init__.py +17 -0
  89. emdash_core/auth/github.py +389 -0
  90. emdash_core/config.py +74 -0
  91. emdash_core/context/__init__.py +52 -0
  92. emdash_core/context/models.py +50 -0
  93. emdash_core/context/providers/__init__.py +11 -0
  94. emdash_core/context/providers/base.py +74 -0
  95. emdash_core/context/providers/explored_areas.py +183 -0
  96. emdash_core/context/providers/touched_areas.py +360 -0
  97. emdash_core/context/registry.py +73 -0
  98. emdash_core/context/reranker.py +199 -0
  99. emdash_core/context/service.py +260 -0
  100. emdash_core/context/session.py +352 -0
  101. emdash_core/core/__init__.py +104 -0
  102. emdash_core/core/config.py +454 -0
  103. emdash_core/core/exceptions.py +55 -0
  104. emdash_core/core/models.py +265 -0
  105. emdash_core/core/review_config.py +57 -0
  106. emdash_core/db/__init__.py +67 -0
  107. emdash_core/db/auth.py +134 -0
  108. emdash_core/db/models.py +91 -0
  109. emdash_core/db/provider.py +222 -0
  110. emdash_core/db/providers/__init__.py +5 -0
  111. emdash_core/db/providers/supabase.py +452 -0
  112. emdash_core/embeddings/__init__.py +24 -0
  113. emdash_core/embeddings/indexer.py +534 -0
  114. emdash_core/embeddings/models.py +192 -0
  115. emdash_core/embeddings/providers/__init__.py +7 -0
  116. emdash_core/embeddings/providers/base.py +112 -0
  117. emdash_core/embeddings/providers/fireworks.py +141 -0
  118. emdash_core/embeddings/providers/openai.py +104 -0
  119. emdash_core/embeddings/registry.py +146 -0
  120. emdash_core/embeddings/service.py +215 -0
  121. emdash_core/graph/__init__.py +26 -0
  122. emdash_core/graph/builder.py +134 -0
  123. emdash_core/graph/connection.py +692 -0
  124. emdash_core/graph/schema.py +416 -0
  125. emdash_core/graph/writer.py +667 -0
  126. emdash_core/ingestion/__init__.py +7 -0
  127. emdash_core/ingestion/change_detector.py +150 -0
  128. emdash_core/ingestion/git/__init__.py +5 -0
  129. emdash_core/ingestion/git/commit_analyzer.py +196 -0
  130. emdash_core/ingestion/github/__init__.py +6 -0
  131. emdash_core/ingestion/github/pr_fetcher.py +296 -0
  132. emdash_core/ingestion/github/task_extractor.py +100 -0
  133. emdash_core/ingestion/orchestrator.py +540 -0
  134. emdash_core/ingestion/parsers/__init__.py +10 -0
  135. emdash_core/ingestion/parsers/base_parser.py +66 -0
  136. emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
  137. emdash_core/ingestion/parsers/class_extractor.py +154 -0
  138. emdash_core/ingestion/parsers/function_extractor.py +202 -0
  139. emdash_core/ingestion/parsers/import_analyzer.py +119 -0
  140. emdash_core/ingestion/parsers/python_parser.py +123 -0
  141. emdash_core/ingestion/parsers/registry.py +72 -0
  142. emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
  143. emdash_core/ingestion/parsers/typescript_parser.py +278 -0
  144. emdash_core/ingestion/repository.py +346 -0
  145. emdash_core/models/__init__.py +38 -0
  146. emdash_core/models/agent.py +68 -0
  147. emdash_core/models/index.py +77 -0
  148. emdash_core/models/query.py +113 -0
  149. emdash_core/planning/__init__.py +7 -0
  150. emdash_core/planning/agent_api.py +413 -0
  151. emdash_core/planning/context_builder.py +265 -0
  152. emdash_core/planning/feature_context.py +232 -0
  153. emdash_core/planning/feature_expander.py +646 -0
  154. emdash_core/planning/llm_explainer.py +198 -0
  155. emdash_core/planning/similarity.py +509 -0
  156. emdash_core/planning/team_focus.py +821 -0
  157. emdash_core/server.py +153 -0
  158. emdash_core/sse/__init__.py +5 -0
  159. emdash_core/sse/stream.py +196 -0
  160. emdash_core/swarm/__init__.py +17 -0
  161. emdash_core/swarm/merge_agent.py +383 -0
  162. emdash_core/swarm/session_manager.py +274 -0
  163. emdash_core/swarm/swarm_runner.py +226 -0
  164. emdash_core/swarm/task_definition.py +137 -0
  165. emdash_core/swarm/worker_spawner.py +319 -0
  166. emdash_core/swarm/worktree_manager.py +278 -0
  167. emdash_core/templates/__init__.py +10 -0
  168. emdash_core/templates/defaults/agent-builder.md.template +82 -0
  169. emdash_core/templates/defaults/focus.md.template +115 -0
  170. emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
  171. emdash_core/templates/defaults/pr-review.md.template +80 -0
  172. emdash_core/templates/defaults/project.md.template +85 -0
  173. emdash_core/templates/defaults/research_critic.md.template +112 -0
  174. emdash_core/templates/defaults/research_planner.md.template +85 -0
  175. emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
  176. emdash_core/templates/defaults/reviewer.md.template +81 -0
  177. emdash_core/templates/defaults/spec.md.template +41 -0
  178. emdash_core/templates/defaults/tasks.md.template +78 -0
  179. emdash_core/templates/loader.py +296 -0
  180. emdash_core/utils/__init__.py +45 -0
  181. emdash_core/utils/git.py +84 -0
  182. emdash_core/utils/image.py +502 -0
  183. emdash_core/utils/logger.py +51 -0
  184. emdash_core-0.1.7.dist-info/METADATA +35 -0
  185. emdash_core-0.1.7.dist-info/RECORD +187 -0
  186. emdash_core-0.1.7.dist-info/WHEEL +4 -0
  187. emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,667 @@
1
+ """Batch write operations for Kuzu graph construction."""
2
+
3
+ from typing import List
4
+ from datetime import datetime
5
+
6
+ import kuzu
7
+
8
+ from ..core.models import (
9
+ FileEntity,
10
+ ClassEntity,
11
+ FunctionEntity,
12
+ ModuleEntity,
13
+ ImportStatement,
14
+ CommitEntity,
15
+ AuthorEntity,
16
+ FileModification,
17
+ PullRequestEntity,
18
+ TaskEntity,
19
+ )
20
+ from .connection import KuzuConnection
21
+ from ..utils.logger import log
22
+
23
+
24
+ class GraphWriter:
25
+ """Handles batch writes to Kuzu database."""
26
+
27
+ def __init__(self, connection: KuzuConnection, batch_size: int = 1000):
28
+ """Initialize graph writer.
29
+
30
+ Args:
31
+ connection: Kuzu connection
32
+ batch_size: Number of entities to write per batch
33
+ """
34
+ self.connection = connection
35
+ self.batch_size = batch_size
36
+
37
+ def _batch_iter(self, items: list):
38
+ """Yield batches of items.
39
+
40
+ Args:
41
+ items: List of items to batch
42
+
43
+ Yields:
44
+ Batches of items
45
+ """
46
+ for i in range(0, len(items), self.batch_size):
47
+ yield items[i:i + self.batch_size]
48
+
49
+ def write_files(self, files: List[FileEntity]):
50
+ """Write file nodes to the graph (batched).
51
+
52
+ Args:
53
+ files: List of FileEntity objects
54
+ """
55
+ if not files:
56
+ return
57
+ log.info(f"Writing {len(files)} file nodes...")
58
+
59
+ for batch in self._batch_iter(files):
60
+ rows = []
61
+ for file in batch:
62
+ file_dict = self._entity_to_dict(file)
63
+ rows.append({
64
+ 'path': str(file_dict['path']),
65
+ 'name': str(file_dict['name']),
66
+ 'extension': file_dict.get('extension'),
67
+ 'size_bytes': int(file_dict.get('size_bytes') or 0),
68
+ 'lines_of_code': int(file_dict.get('lines_of_code') or 0),
69
+ 'hash': file_dict.get('hash'),
70
+ 'last_modified': file_dict.get('last_modified'),
71
+ })
72
+ try:
73
+ self.connection.execute_write("""
74
+ UNWIND $rows AS row
75
+ MERGE (f:File {path: row.path})
76
+ SET f.name = row.name,
77
+ f.extension = row.extension,
78
+ f.size_bytes = row.size_bytes,
79
+ f.lines_of_code = row.lines_of_code,
80
+ f.hash = row.hash
81
+ """, {"rows": rows})
82
+ except Exception as e:
83
+ log.warning(f"Failed to write file batch: {e}")
84
+
85
+ log.info(f"Wrote {len(files)} file nodes")
86
+
87
+ def write_classes(self, classes: List[ClassEntity]):
88
+ """Write class nodes and CONTAINS relationships (batched).
89
+
90
+ Args:
91
+ classes: List of ClassEntity objects
92
+ """
93
+ if not classes:
94
+ return
95
+ log.info(f"Writing {len(classes)} class nodes...")
96
+
97
+ for batch in self._batch_iter(classes):
98
+ rows = []
99
+ for cls in batch:
100
+ cls_dict = self._entity_to_dict(cls)
101
+ rows.append({
102
+ 'qualified_name': str(cls_dict['qualified_name']),
103
+ 'name': str(cls_dict['name']),
104
+ 'file_path': str(cls_dict['file_path']),
105
+ 'line_start': int(cls_dict['line_start']),
106
+ 'line_end': int(cls_dict['line_end']),
107
+ 'docstring': cls_dict.get('docstring'),
108
+ 'is_abstract': bool(cls_dict.get('is_abstract', False)),
109
+ 'decorators': list(cls_dict.get('decorators') or []),
110
+ 'base_classes': list(cls_dict.get('base_classes') or []),
111
+ 'attributes': list(cls_dict.get('attributes') or []),
112
+ 'methods': list(cls_dict.get('methods') or []),
113
+ })
114
+ try:
115
+ # Batch create class nodes
116
+ self.connection.execute_write("""
117
+ UNWIND $rows AS row
118
+ MERGE (c:Class {qualified_name: row.qualified_name})
119
+ SET c.name = row.name,
120
+ c.file_path = row.file_path,
121
+ c.line_start = row.line_start,
122
+ c.line_end = row.line_end,
123
+ c.docstring = row.docstring,
124
+ c.is_abstract = row.is_abstract,
125
+ c.decorators = row.decorators,
126
+ c.base_classes = row.base_classes,
127
+ c.attributes = row.attributes,
128
+ c.methods = row.methods
129
+ """, {"rows": rows})
130
+
131
+ # Batch create CONTAINS_CLASS relationships
132
+ self.connection.execute_write("""
133
+ UNWIND $rows AS row
134
+ MATCH (f:File {path: row.file_path})
135
+ MATCH (c:Class {qualified_name: row.qualified_name})
136
+ MERGE (f)-[:CONTAINS_CLASS {line_start: row.line_start}]->(c)
137
+ """, {"rows": rows})
138
+ except Exception as e:
139
+ log.warning(f"Failed to write class batch: {e}")
140
+
141
+ log.info(f"Wrote {len(classes)} class nodes")
142
+
143
+ def write_functions(self, functions: List[FunctionEntity]):
144
+ """Write function nodes and relationships (batched).
145
+
146
+ Args:
147
+ functions: List of FunctionEntity objects
148
+ """
149
+ if not functions:
150
+ return
151
+ log.info(f"Writing {len(functions)} function nodes...")
152
+
153
+ for batch in self._batch_iter(functions):
154
+ rows = []
155
+ method_rows = []
156
+ for func in batch:
157
+ func_dict = self._entity_to_dict(func)
158
+ row = {
159
+ 'qualified_name': str(func_dict['qualified_name']),
160
+ 'name': str(func_dict['name']),
161
+ 'file_path': str(func_dict['file_path']),
162
+ 'line_start': int(func_dict['line_start']),
163
+ 'line_end': int(func_dict['line_end']),
164
+ 'docstring': func_dict.get('docstring'),
165
+ 'parameters': list(func_dict.get('parameters') or []),
166
+ 'return_annotation': func_dict.get('return_annotation'),
167
+ 'is_async': bool(func_dict.get('is_async', False)),
168
+ 'is_method': bool(func_dict.get('is_method', False)),
169
+ 'is_static': bool(func_dict.get('is_static', False)),
170
+ 'is_classmethod': bool(func_dict.get('is_classmethod', False)),
171
+ 'decorators': list(func_dict.get('decorators') or []),
172
+ 'cyclomatic_complexity': int(func_dict.get('cyclomatic_complexity') or 1),
173
+ 'calls': list(func_dict.get('calls') or []),
174
+ }
175
+ rows.append(row)
176
+
177
+ # Collect method relationships
178
+ if func.is_method:
179
+ parts = func.qualified_name.rsplit('.', 1)
180
+ if len(parts) > 1:
181
+ method_rows.append({
182
+ 'class_name': parts[0],
183
+ 'func_name': func.qualified_name,
184
+ })
185
+
186
+ try:
187
+ # Batch create function nodes
188
+ self.connection.execute_write("""
189
+ UNWIND $rows AS row
190
+ MERGE (f:Function {qualified_name: row.qualified_name})
191
+ SET f.name = row.name,
192
+ f.file_path = row.file_path,
193
+ f.line_start = row.line_start,
194
+ f.line_end = row.line_end,
195
+ f.docstring = row.docstring,
196
+ f.parameters = row.parameters,
197
+ f.return_annotation = row.return_annotation,
198
+ f.is_async = row.is_async,
199
+ f.is_method = row.is_method,
200
+ f.is_static = row.is_static,
201
+ f.is_classmethod = row.is_classmethod,
202
+ f.decorators = row.decorators,
203
+ f.cyclomatic_complexity = row.cyclomatic_complexity,
204
+ f.calls = row.calls
205
+ """, {"rows": rows})
206
+
207
+ # Batch create CONTAINS_FUNCTION relationships
208
+ self.connection.execute_write("""
209
+ UNWIND $rows AS row
210
+ MATCH (file:File {path: row.file_path})
211
+ MATCH (f:Function {qualified_name: row.qualified_name})
212
+ MERGE (file)-[:CONTAINS_FUNCTION {line_start: row.line_start}]->(f)
213
+ """, {"rows": rows})
214
+
215
+ # Batch create HAS_METHOD relationships
216
+ if method_rows:
217
+ self.connection.execute_write("""
218
+ UNWIND $rows AS row
219
+ MATCH (c:Class {qualified_name: row.class_name})
220
+ MATCH (f:Function {qualified_name: row.func_name})
221
+ MERGE (c)-[:HAS_METHOD]->(f)
222
+ """, {"rows": method_rows})
223
+ except Exception as e:
224
+ log.warning(f"Failed to write function batch: {e}")
225
+
226
+ log.info(f"Wrote {len(functions)} function nodes")
227
+
228
+ def write_inheritance(self, classes: List[ClassEntity]):
229
+ """Write inheritance relationships between classes (batched).
230
+
231
+ Args:
232
+ classes: List of ClassEntity objects
233
+ """
234
+ # Collect all inheritance pairs
235
+ rows = []
236
+ for cls in classes:
237
+ if not cls.base_classes:
238
+ continue
239
+ for base_name in cls.base_classes:
240
+ rows.append({
241
+ 'child_name': cls.qualified_name,
242
+ 'base_name': base_name,
243
+ })
244
+
245
+ if not rows:
246
+ log.info("No inheritance relationships to write")
247
+ return
248
+
249
+ log.info(f"Writing {len(rows)} inheritance relationships...")
250
+
251
+ for batch in self._batch_iter(rows):
252
+ try:
253
+ self.connection.execute_write("""
254
+ UNWIND $rows AS row
255
+ MATCH (child:Class {qualified_name: row.child_name})
256
+ MATCH (parent:Class)
257
+ WHERE parent.qualified_name = row.base_name OR parent.name = row.base_name
258
+ MERGE (child)-[:INHERITS_FROM]->(parent)
259
+ """, {"rows": batch})
260
+ except Exception as e:
261
+ log.warning(f"Failed to write inheritance batch: {e}")
262
+
263
+ log.info(f"Wrote {len(rows)} inheritance relationships")
264
+
265
+ def write_calls(self, functions: List[FunctionEntity]):
266
+ """Write CALLS relationships between functions (batched).
267
+
268
+ Args:
269
+ functions: List of FunctionEntity objects
270
+ """
271
+ # Collect all call pairs
272
+ rows = []
273
+ for func in functions:
274
+ if not func.calls:
275
+ continue
276
+ for called_name in func.calls:
277
+ rows.append({
278
+ 'caller_name': func.qualified_name,
279
+ 'called_name': called_name,
280
+ })
281
+
282
+ if not rows:
283
+ log.info("No call relationships to write")
284
+ return
285
+
286
+ log.info(f"Writing {len(rows)} call relationships...")
287
+
288
+ for batch in self._batch_iter(rows):
289
+ try:
290
+ self.connection.execute_write("""
291
+ UNWIND $rows AS row
292
+ MATCH (caller:Function {qualified_name: row.caller_name})
293
+ MATCH (callee:Function)
294
+ WHERE callee.qualified_name = row.called_name OR callee.name = row.called_name
295
+ MERGE (caller)-[:CALLS]->(callee)
296
+ """, {"rows": batch})
297
+ except Exception as e:
298
+ log.warning(f"Failed to write calls batch: {e}")
299
+
300
+ log.info(f"Wrote {len(rows)} call relationships")
301
+
302
+ def write_modules(self, modules: List[ModuleEntity]):
303
+ """Write module nodes (batched).
304
+
305
+ Args:
306
+ modules: List of ModuleEntity objects
307
+ """
308
+ if not modules:
309
+ return
310
+ log.info(f"Writing {len(modules)} module nodes...")
311
+
312
+ for batch in self._batch_iter(modules):
313
+ rows = []
314
+ for mod in batch:
315
+ mod_dict = self._entity_to_dict(mod)
316
+ rows.append({
317
+ 'name': str(mod_dict['name']),
318
+ 'import_path': mod_dict.get('import_path'),
319
+ 'is_external': bool(mod_dict.get('is_external', False)),
320
+ 'package': mod_dict.get('package'),
321
+ })
322
+ try:
323
+ self.connection.execute_write("""
324
+ UNWIND $rows AS row
325
+ MERGE (m:Module {name: row.name})
326
+ SET m.import_path = row.import_path,
327
+ m.is_external = row.is_external,
328
+ m.package = row.package
329
+ """, {"rows": rows})
330
+ except Exception as e:
331
+ log.warning(f"Failed to write module batch: {e}")
332
+
333
+ log.info(f"Wrote {len(modules)} module nodes")
334
+
335
+ def write_imports(self, imports: List[ImportStatement]):
336
+ """Write IMPORTS relationships from files to modules (batched).
337
+
338
+ Args:
339
+ imports: List of ImportStatement objects
340
+ """
341
+ if not imports:
342
+ return
343
+ log.info(f"Writing {len(imports)} import relationships...")
344
+
345
+ for batch in self._batch_iter(imports):
346
+ rows = []
347
+ for imp in batch:
348
+ imp_dict = self._entity_to_dict(imp)
349
+ rows.append({
350
+ 'file_path': str(imp_dict['file_path']),
351
+ 'module': str(imp_dict['module']),
352
+ 'import_type': imp_dict.get('import_type', 'import'),
353
+ 'line_number': int(imp_dict.get('line_number') or 0),
354
+ 'alias': imp_dict.get('alias'),
355
+ })
356
+ try:
357
+ self.connection.execute_write("""
358
+ UNWIND $rows AS row
359
+ MATCH (f:File {path: row.file_path})
360
+ MATCH (m:Module {name: row.module})
361
+ MERGE (f)-[:IMPORTS {
362
+ import_type: row.import_type,
363
+ line_number: row.line_number,
364
+ alias: row.alias
365
+ }]->(m)
366
+ """, {"rows": rows})
367
+ except Exception as e:
368
+ log.warning(f"Failed to write imports batch: {e}")
369
+
370
+ log.info(f"Wrote {len(imports)} import relationships")
371
+
372
+ def write_commits(self, commits: List[CommitEntity]):
373
+ """Write commit nodes.
374
+
375
+ Args:
376
+ commits: List of CommitEntity objects
377
+ """
378
+ log.info(f"Writing {len(commits)} commit nodes...")
379
+
380
+ for commit in commits:
381
+ commit_dict = self._entity_to_dict(commit)
382
+ try:
383
+ self.connection.execute_write("""
384
+ MERGE (c:Commit {sha: $sha})
385
+ SET c.message = $message,
386
+ c.timestamp = timestamp($timestamp),
387
+ c.author_name = $author_name,
388
+ c.author_email = $author_email,
389
+ c.committer_name = $committer_name,
390
+ c.committer_email = $committer_email,
391
+ c.insertions = $insertions,
392
+ c.deletions = $deletions,
393
+ c.files_changed = $files_changed,
394
+ c.is_merge = $is_merge,
395
+ c.parent_shas = $parent_shas
396
+ """, commit_dict)
397
+ except Exception as e:
398
+ log.warning(f"Failed to write commit {commit.sha}: {e}")
399
+
400
+ log.info(f"Wrote {len(commits)} commit nodes")
401
+
402
+ def write_authors(self, authors: List[AuthorEntity]):
403
+ """Write author nodes.
404
+
405
+ Args:
406
+ authors: List of AuthorEntity objects
407
+ """
408
+ log.info(f"Writing {len(authors)} author nodes...")
409
+
410
+ for author in authors:
411
+ author_dict = self._entity_to_dict(author)
412
+ try:
413
+ self.connection.execute_write("""
414
+ MERGE (a:Author {email: $email})
415
+ SET a.name = $name,
416
+ a.first_commit = timestamp($first_commit),
417
+ a.last_commit = timestamp($last_commit),
418
+ a.total_commits = $total_commits,
419
+ a.total_lines_added = $total_lines_added,
420
+ a.total_lines_deleted = $total_lines_deleted
421
+ """, author_dict)
422
+ except Exception as e:
423
+ log.warning(f"Failed to write author {author.email}: {e}")
424
+
425
+ log.info(f"Wrote {len(authors)} author nodes")
426
+
427
+ def write_file_modifications(self, modifications: List[FileModification]):
428
+ """Write file modification relationships.
429
+
430
+ Args:
431
+ modifications: List of FileModification objects
432
+ """
433
+ log.info(f"Writing {len(modifications)} file modifications...")
434
+
435
+ count = 0
436
+
437
+ for mod in modifications:
438
+ mod_dict = self._entity_to_dict(mod)
439
+ try:
440
+ self.connection.execute_write("""
441
+ MATCH (c:Commit {sha: $commit_sha})
442
+ MATCH (f:File {path: $file_path})
443
+ MERGE (c)-[:COMMIT_MODIFIES {
444
+ change_type: $change_type,
445
+ insertions: $insertions,
446
+ deletions: $deletions,
447
+ old_path: $old_path
448
+ }]->(f)
449
+ """, mod_dict)
450
+ count += 1
451
+ except Exception as e:
452
+ log.debug(f"Could not create modification: {mod.commit_sha} -> {mod.file_path}: {e}")
453
+
454
+ log.info(f"Wrote {count} file modification relationships")
455
+
456
+ def write_commit_authorship(self, commits: List[CommitEntity]):
457
+ """Write AUTHORED_BY relationships from commits to authors.
458
+
459
+ Args:
460
+ commits: List of CommitEntity objects
461
+ """
462
+ log.info("Writing commit authorship relationships...")
463
+
464
+ count = 0
465
+
466
+ for commit in commits:
467
+ try:
468
+ self.connection.execute_write("""
469
+ MATCH (c:Commit {sha: $sha})
470
+ MATCH (a:Author {email: $author_email})
471
+ MERGE (c)-[:AUTHORED_BY]->(a)
472
+ """, {"sha": commit.sha, "author_email": commit.author_email})
473
+ count += 1
474
+ except Exception as e:
475
+ log.debug(f"Could not create authorship: {commit.sha} -> {commit.author_email}: {e}")
476
+
477
+ log.info(f"Wrote {count} authorship relationships")
478
+
479
+ def write_pull_requests(self, prs: List[PullRequestEntity]):
480
+ """Write pull request nodes.
481
+
482
+ Args:
483
+ prs: List of PullRequestEntity objects
484
+ """
485
+ log.info(f"Writing {len(prs)} pull request nodes...")
486
+
487
+ for pr in prs:
488
+ pr_dict = self._entity_to_dict(pr)
489
+ try:
490
+ # Handle nullable timestamps
491
+ created_at = pr_dict.get('created_at')
492
+ merged_at = pr_dict.get('merged_at')
493
+
494
+ self.connection.execute_write("""
495
+ MERGE (p:PullRequest {number: $number})
496
+ SET p.title = $title,
497
+ p.description = $description,
498
+ p.state = $state,
499
+ p.author = $author,
500
+ p.reviewers = $reviewers,
501
+ p.labels = $labels,
502
+ p.additions = $additions,
503
+ p.deletions = $deletions,
504
+ p.files_changed = $files_changed,
505
+ p.base_branch = $base_branch,
506
+ p.head_branch = $head_branch,
507
+ p.embedding = $embedding
508
+ """, pr_dict)
509
+
510
+ # Set timestamps separately if not null
511
+ if created_at:
512
+ self.connection.execute_write("""
513
+ MATCH (p:PullRequest {number: $number})
514
+ SET p.created_at = timestamp($created_at)
515
+ """, {"number": pr.number, "created_at": created_at})
516
+
517
+ if merged_at:
518
+ self.connection.execute_write("""
519
+ MATCH (p:PullRequest {number: $number})
520
+ SET p.merged_at = timestamp($merged_at)
521
+ """, {"number": pr.number, "merged_at": merged_at})
522
+
523
+ except Exception as e:
524
+ log.warning(f"Failed to write PR {pr.number}: {e}")
525
+
526
+ log.info(f"Wrote {len(prs)} pull request nodes")
527
+
528
+ def write_pr_commit_links(self, prs: List[PullRequestEntity]):
529
+ """Write relationships from PRs to their commits.
530
+
531
+ Args:
532
+ prs: List of PullRequestEntity objects with commit_shas
533
+ """
534
+ log.info("Writing PR-Commit relationships...")
535
+
536
+ count = 0
537
+
538
+ for pr in prs:
539
+ if not pr.commit_shas:
540
+ continue
541
+
542
+ for sha in pr.commit_shas:
543
+ try:
544
+ self.connection.execute_write("""
545
+ MATCH (p:PullRequest {number: $number})
546
+ MATCH (c:Commit {sha: $sha})
547
+ MERGE (p)-[:PR_CONTAINS]->(c)
548
+ """, {"number": pr.number, "sha": sha})
549
+ count += 1
550
+ except Exception as e:
551
+ log.debug(f"Could not link PR {pr.number} to commit {sha}: {e}")
552
+
553
+ log.info(f"Wrote {count} PR-Commit relationships")
554
+
555
+ def write_pr_file_links(self, prs: List[PullRequestEntity]):
556
+ """Write relationships from PRs to modified files.
557
+
558
+ Args:
559
+ prs: List of PullRequestEntity objects with files_changed
560
+ """
561
+ log.info("Writing PR-File relationships...")
562
+
563
+ count = 0
564
+
565
+ for pr in prs:
566
+ if not pr.files_changed:
567
+ continue
568
+
569
+ # files_changed could be a count or a list of paths
570
+ if isinstance(pr.files_changed, (int, float)):
571
+ continue
572
+
573
+ for file_path in pr.files_changed:
574
+ try:
575
+ self.connection.execute_write("""
576
+ MATCH (p:PullRequest {number: $number})
577
+ MATCH (f:File)
578
+ WHERE f.path ENDS WITH $file_path
579
+ MERGE (p)-[:PR_MODIFIES]->(f)
580
+ """, {"number": pr.number, "file_path": file_path})
581
+ count += 1
582
+ except Exception as e:
583
+ log.debug(f"Could not link PR {pr.number} to file {file_path}: {e}")
584
+
585
+ log.info(f"Wrote {count} PR-File relationships")
586
+
587
+ def write_tasks(self, tasks: List[TaskEntity]):
588
+ """Write task nodes and link to PRs.
589
+
590
+ Args:
591
+ tasks: List of TaskEntity objects
592
+ """
593
+ log.info(f"Writing {len(tasks)} task nodes...")
594
+
595
+ for task in tasks:
596
+ task_dict = self._entity_to_dict(task)
597
+ # Rename 'order' to 'task_order' to match schema
598
+ if 'order' in task_dict:
599
+ task_dict['task_order'] = task_dict.pop('order')
600
+
601
+ try:
602
+ self.connection.execute_write("""
603
+ MERGE (t:Task {id: $id})
604
+ SET t.pr_number = $pr_number,
605
+ t.description = $description,
606
+ t.is_completed = $is_completed,
607
+ t.task_order = $task_order
608
+ """, task_dict)
609
+
610
+ # Link to PR
611
+ self.connection.execute_write("""
612
+ MATCH (pr:PullRequest {number: $pr_number})
613
+ MATCH (t:Task {id: $id})
614
+ MERGE (pr)-[:HAS_TASK]->(t)
615
+ """, task_dict)
616
+ except Exception as e:
617
+ log.warning(f"Failed to write task {task.id}: {e}")
618
+
619
+ log.info(f"Wrote {len(tasks)} task nodes")
620
+
621
+ def _entity_to_dict(self, entity) -> dict:
622
+ """Convert an entity to a dictionary for Kuzu.
623
+
624
+ Args:
625
+ entity: Entity object (dataclass)
626
+
627
+ Returns:
628
+ Dictionary representation
629
+ """
630
+ if hasattr(entity, '__dataclass_fields__'):
631
+ # It's a dataclass
632
+ result = {}
633
+ for field_name, field in entity.__dataclass_fields__.items():
634
+ try:
635
+ value = getattr(entity, field_name)
636
+
637
+ # Convert datetime to ISO format string for Kuzu timestamp()
638
+ if isinstance(value, datetime):
639
+ value = value.isoformat()
640
+ elif hasattr(value, 'isoformat'):
641
+ value = value.isoformat()
642
+
643
+ # Convert None lists to empty lists for Kuzu arrays
644
+ if value is None and 'list' in str(field.type).lower():
645
+ value = []
646
+
647
+ result[field_name] = value
648
+ except AttributeError as e:
649
+ log.warning(f"DEBUG: Missing attribute {field_name} on entity {type(entity)}: {e}")
650
+ # Set default based on type hint
651
+ if 'list' in str(field.type).lower():
652
+ result[field_name] = []
653
+ elif 'bool' in str(field.type).lower():
654
+ result[field_name] = False
655
+ elif 'int' in str(field.type).lower():
656
+ result[field_name] = 0
657
+ else:
658
+ result[field_name] = None
659
+
660
+ return result
661
+ elif isinstance(entity, dict):
662
+ # Already a dict - return it directly
663
+ return entity
664
+ else:
665
+ # Fallback to __dict__
666
+ log.warning(f"DEBUG: Entity is not a dataclass: type={type(entity)}, hasattr __dict__={hasattr(entity, '__dict__')}")
667
+ return entity.__dict__
@@ -0,0 +1,7 @@
1
+ """Ingestion module for EmDash."""
2
+
3
+ from .orchestrator import IngestionOrchestrator
4
+ from .repository import RepositoryManager
5
+ from .change_detector import ChangeDetector, ChangedFiles
6
+
7
+ __all__ = ["IngestionOrchestrator", "RepositoryManager", "ChangeDetector", "ChangedFiles"]