emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,667 @@
|
|
|
1
|
+
"""Batch write operations for Kuzu graph construction."""
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
import kuzu
|
|
7
|
+
|
|
8
|
+
from ..core.models import (
|
|
9
|
+
FileEntity,
|
|
10
|
+
ClassEntity,
|
|
11
|
+
FunctionEntity,
|
|
12
|
+
ModuleEntity,
|
|
13
|
+
ImportStatement,
|
|
14
|
+
CommitEntity,
|
|
15
|
+
AuthorEntity,
|
|
16
|
+
FileModification,
|
|
17
|
+
PullRequestEntity,
|
|
18
|
+
TaskEntity,
|
|
19
|
+
)
|
|
20
|
+
from .connection import KuzuConnection
|
|
21
|
+
from ..utils.logger import log
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class GraphWriter:
|
|
25
|
+
"""Handles batch writes to Kuzu database."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, connection: KuzuConnection, batch_size: int = 1000):
|
|
28
|
+
"""Initialize graph writer.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
connection: Kuzu connection
|
|
32
|
+
batch_size: Number of entities to write per batch
|
|
33
|
+
"""
|
|
34
|
+
self.connection = connection
|
|
35
|
+
self.batch_size = batch_size
|
|
36
|
+
|
|
37
|
+
def _batch_iter(self, items: list):
|
|
38
|
+
"""Yield batches of items.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
items: List of items to batch
|
|
42
|
+
|
|
43
|
+
Yields:
|
|
44
|
+
Batches of items
|
|
45
|
+
"""
|
|
46
|
+
for i in range(0, len(items), self.batch_size):
|
|
47
|
+
yield items[i:i + self.batch_size]
|
|
48
|
+
|
|
49
|
+
def write_files(self, files: List[FileEntity]):
|
|
50
|
+
"""Write file nodes to the graph (batched).
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
files: List of FileEntity objects
|
|
54
|
+
"""
|
|
55
|
+
if not files:
|
|
56
|
+
return
|
|
57
|
+
log.info(f"Writing {len(files)} file nodes...")
|
|
58
|
+
|
|
59
|
+
for batch in self._batch_iter(files):
|
|
60
|
+
rows = []
|
|
61
|
+
for file in batch:
|
|
62
|
+
file_dict = self._entity_to_dict(file)
|
|
63
|
+
rows.append({
|
|
64
|
+
'path': str(file_dict['path']),
|
|
65
|
+
'name': str(file_dict['name']),
|
|
66
|
+
'extension': file_dict.get('extension'),
|
|
67
|
+
'size_bytes': int(file_dict.get('size_bytes') or 0),
|
|
68
|
+
'lines_of_code': int(file_dict.get('lines_of_code') or 0),
|
|
69
|
+
'hash': file_dict.get('hash'),
|
|
70
|
+
'last_modified': file_dict.get('last_modified'),
|
|
71
|
+
})
|
|
72
|
+
try:
|
|
73
|
+
self.connection.execute_write("""
|
|
74
|
+
UNWIND $rows AS row
|
|
75
|
+
MERGE (f:File {path: row.path})
|
|
76
|
+
SET f.name = row.name,
|
|
77
|
+
f.extension = row.extension,
|
|
78
|
+
f.size_bytes = row.size_bytes,
|
|
79
|
+
f.lines_of_code = row.lines_of_code,
|
|
80
|
+
f.hash = row.hash
|
|
81
|
+
""", {"rows": rows})
|
|
82
|
+
except Exception as e:
|
|
83
|
+
log.warning(f"Failed to write file batch: {e}")
|
|
84
|
+
|
|
85
|
+
log.info(f"Wrote {len(files)} file nodes")
|
|
86
|
+
|
|
87
|
+
def write_classes(self, classes: List[ClassEntity]):
|
|
88
|
+
"""Write class nodes and CONTAINS relationships (batched).
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
classes: List of ClassEntity objects
|
|
92
|
+
"""
|
|
93
|
+
if not classes:
|
|
94
|
+
return
|
|
95
|
+
log.info(f"Writing {len(classes)} class nodes...")
|
|
96
|
+
|
|
97
|
+
for batch in self._batch_iter(classes):
|
|
98
|
+
rows = []
|
|
99
|
+
for cls in batch:
|
|
100
|
+
cls_dict = self._entity_to_dict(cls)
|
|
101
|
+
rows.append({
|
|
102
|
+
'qualified_name': str(cls_dict['qualified_name']),
|
|
103
|
+
'name': str(cls_dict['name']),
|
|
104
|
+
'file_path': str(cls_dict['file_path']),
|
|
105
|
+
'line_start': int(cls_dict['line_start']),
|
|
106
|
+
'line_end': int(cls_dict['line_end']),
|
|
107
|
+
'docstring': cls_dict.get('docstring'),
|
|
108
|
+
'is_abstract': bool(cls_dict.get('is_abstract', False)),
|
|
109
|
+
'decorators': list(cls_dict.get('decorators') or []),
|
|
110
|
+
'base_classes': list(cls_dict.get('base_classes') or []),
|
|
111
|
+
'attributes': list(cls_dict.get('attributes') or []),
|
|
112
|
+
'methods': list(cls_dict.get('methods') or []),
|
|
113
|
+
})
|
|
114
|
+
try:
|
|
115
|
+
# Batch create class nodes
|
|
116
|
+
self.connection.execute_write("""
|
|
117
|
+
UNWIND $rows AS row
|
|
118
|
+
MERGE (c:Class {qualified_name: row.qualified_name})
|
|
119
|
+
SET c.name = row.name,
|
|
120
|
+
c.file_path = row.file_path,
|
|
121
|
+
c.line_start = row.line_start,
|
|
122
|
+
c.line_end = row.line_end,
|
|
123
|
+
c.docstring = row.docstring,
|
|
124
|
+
c.is_abstract = row.is_abstract,
|
|
125
|
+
c.decorators = row.decorators,
|
|
126
|
+
c.base_classes = row.base_classes,
|
|
127
|
+
c.attributes = row.attributes,
|
|
128
|
+
c.methods = row.methods
|
|
129
|
+
""", {"rows": rows})
|
|
130
|
+
|
|
131
|
+
# Batch create CONTAINS_CLASS relationships
|
|
132
|
+
self.connection.execute_write("""
|
|
133
|
+
UNWIND $rows AS row
|
|
134
|
+
MATCH (f:File {path: row.file_path})
|
|
135
|
+
MATCH (c:Class {qualified_name: row.qualified_name})
|
|
136
|
+
MERGE (f)-[:CONTAINS_CLASS {line_start: row.line_start}]->(c)
|
|
137
|
+
""", {"rows": rows})
|
|
138
|
+
except Exception as e:
|
|
139
|
+
log.warning(f"Failed to write class batch: {e}")
|
|
140
|
+
|
|
141
|
+
log.info(f"Wrote {len(classes)} class nodes")
|
|
142
|
+
|
|
143
|
+
def write_functions(self, functions: List[FunctionEntity]):
|
|
144
|
+
"""Write function nodes and relationships (batched).
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
functions: List of FunctionEntity objects
|
|
148
|
+
"""
|
|
149
|
+
if not functions:
|
|
150
|
+
return
|
|
151
|
+
log.info(f"Writing {len(functions)} function nodes...")
|
|
152
|
+
|
|
153
|
+
for batch in self._batch_iter(functions):
|
|
154
|
+
rows = []
|
|
155
|
+
method_rows = []
|
|
156
|
+
for func in batch:
|
|
157
|
+
func_dict = self._entity_to_dict(func)
|
|
158
|
+
row = {
|
|
159
|
+
'qualified_name': str(func_dict['qualified_name']),
|
|
160
|
+
'name': str(func_dict['name']),
|
|
161
|
+
'file_path': str(func_dict['file_path']),
|
|
162
|
+
'line_start': int(func_dict['line_start']),
|
|
163
|
+
'line_end': int(func_dict['line_end']),
|
|
164
|
+
'docstring': func_dict.get('docstring'),
|
|
165
|
+
'parameters': list(func_dict.get('parameters') or []),
|
|
166
|
+
'return_annotation': func_dict.get('return_annotation'),
|
|
167
|
+
'is_async': bool(func_dict.get('is_async', False)),
|
|
168
|
+
'is_method': bool(func_dict.get('is_method', False)),
|
|
169
|
+
'is_static': bool(func_dict.get('is_static', False)),
|
|
170
|
+
'is_classmethod': bool(func_dict.get('is_classmethod', False)),
|
|
171
|
+
'decorators': list(func_dict.get('decorators') or []),
|
|
172
|
+
'cyclomatic_complexity': int(func_dict.get('cyclomatic_complexity') or 1),
|
|
173
|
+
'calls': list(func_dict.get('calls') or []),
|
|
174
|
+
}
|
|
175
|
+
rows.append(row)
|
|
176
|
+
|
|
177
|
+
# Collect method relationships
|
|
178
|
+
if func.is_method:
|
|
179
|
+
parts = func.qualified_name.rsplit('.', 1)
|
|
180
|
+
if len(parts) > 1:
|
|
181
|
+
method_rows.append({
|
|
182
|
+
'class_name': parts[0],
|
|
183
|
+
'func_name': func.qualified_name,
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
# Batch create function nodes
|
|
188
|
+
self.connection.execute_write("""
|
|
189
|
+
UNWIND $rows AS row
|
|
190
|
+
MERGE (f:Function {qualified_name: row.qualified_name})
|
|
191
|
+
SET f.name = row.name,
|
|
192
|
+
f.file_path = row.file_path,
|
|
193
|
+
f.line_start = row.line_start,
|
|
194
|
+
f.line_end = row.line_end,
|
|
195
|
+
f.docstring = row.docstring,
|
|
196
|
+
f.parameters = row.parameters,
|
|
197
|
+
f.return_annotation = row.return_annotation,
|
|
198
|
+
f.is_async = row.is_async,
|
|
199
|
+
f.is_method = row.is_method,
|
|
200
|
+
f.is_static = row.is_static,
|
|
201
|
+
f.is_classmethod = row.is_classmethod,
|
|
202
|
+
f.decorators = row.decorators,
|
|
203
|
+
f.cyclomatic_complexity = row.cyclomatic_complexity,
|
|
204
|
+
f.calls = row.calls
|
|
205
|
+
""", {"rows": rows})
|
|
206
|
+
|
|
207
|
+
# Batch create CONTAINS_FUNCTION relationships
|
|
208
|
+
self.connection.execute_write("""
|
|
209
|
+
UNWIND $rows AS row
|
|
210
|
+
MATCH (file:File {path: row.file_path})
|
|
211
|
+
MATCH (f:Function {qualified_name: row.qualified_name})
|
|
212
|
+
MERGE (file)-[:CONTAINS_FUNCTION {line_start: row.line_start}]->(f)
|
|
213
|
+
""", {"rows": rows})
|
|
214
|
+
|
|
215
|
+
# Batch create HAS_METHOD relationships
|
|
216
|
+
if method_rows:
|
|
217
|
+
self.connection.execute_write("""
|
|
218
|
+
UNWIND $rows AS row
|
|
219
|
+
MATCH (c:Class {qualified_name: row.class_name})
|
|
220
|
+
MATCH (f:Function {qualified_name: row.func_name})
|
|
221
|
+
MERGE (c)-[:HAS_METHOD]->(f)
|
|
222
|
+
""", {"rows": method_rows})
|
|
223
|
+
except Exception as e:
|
|
224
|
+
log.warning(f"Failed to write function batch: {e}")
|
|
225
|
+
|
|
226
|
+
log.info(f"Wrote {len(functions)} function nodes")
|
|
227
|
+
|
|
228
|
+
def write_inheritance(self, classes: List[ClassEntity]):
|
|
229
|
+
"""Write inheritance relationships between classes (batched).
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
classes: List of ClassEntity objects
|
|
233
|
+
"""
|
|
234
|
+
# Collect all inheritance pairs
|
|
235
|
+
rows = []
|
|
236
|
+
for cls in classes:
|
|
237
|
+
if not cls.base_classes:
|
|
238
|
+
continue
|
|
239
|
+
for base_name in cls.base_classes:
|
|
240
|
+
rows.append({
|
|
241
|
+
'child_name': cls.qualified_name,
|
|
242
|
+
'base_name': base_name,
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
if not rows:
|
|
246
|
+
log.info("No inheritance relationships to write")
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
log.info(f"Writing {len(rows)} inheritance relationships...")
|
|
250
|
+
|
|
251
|
+
for batch in self._batch_iter(rows):
|
|
252
|
+
try:
|
|
253
|
+
self.connection.execute_write("""
|
|
254
|
+
UNWIND $rows AS row
|
|
255
|
+
MATCH (child:Class {qualified_name: row.child_name})
|
|
256
|
+
MATCH (parent:Class)
|
|
257
|
+
WHERE parent.qualified_name = row.base_name OR parent.name = row.base_name
|
|
258
|
+
MERGE (child)-[:INHERITS_FROM]->(parent)
|
|
259
|
+
""", {"rows": batch})
|
|
260
|
+
except Exception as e:
|
|
261
|
+
log.warning(f"Failed to write inheritance batch: {e}")
|
|
262
|
+
|
|
263
|
+
log.info(f"Wrote {len(rows)} inheritance relationships")
|
|
264
|
+
|
|
265
|
+
def write_calls(self, functions: List[FunctionEntity]):
|
|
266
|
+
"""Write CALLS relationships between functions (batched).
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
functions: List of FunctionEntity objects
|
|
270
|
+
"""
|
|
271
|
+
# Collect all call pairs
|
|
272
|
+
rows = []
|
|
273
|
+
for func in functions:
|
|
274
|
+
if not func.calls:
|
|
275
|
+
continue
|
|
276
|
+
for called_name in func.calls:
|
|
277
|
+
rows.append({
|
|
278
|
+
'caller_name': func.qualified_name,
|
|
279
|
+
'called_name': called_name,
|
|
280
|
+
})
|
|
281
|
+
|
|
282
|
+
if not rows:
|
|
283
|
+
log.info("No call relationships to write")
|
|
284
|
+
return
|
|
285
|
+
|
|
286
|
+
log.info(f"Writing {len(rows)} call relationships...")
|
|
287
|
+
|
|
288
|
+
for batch in self._batch_iter(rows):
|
|
289
|
+
try:
|
|
290
|
+
self.connection.execute_write("""
|
|
291
|
+
UNWIND $rows AS row
|
|
292
|
+
MATCH (caller:Function {qualified_name: row.caller_name})
|
|
293
|
+
MATCH (callee:Function)
|
|
294
|
+
WHERE callee.qualified_name = row.called_name OR callee.name = row.called_name
|
|
295
|
+
MERGE (caller)-[:CALLS]->(callee)
|
|
296
|
+
""", {"rows": batch})
|
|
297
|
+
except Exception as e:
|
|
298
|
+
log.warning(f"Failed to write calls batch: {e}")
|
|
299
|
+
|
|
300
|
+
log.info(f"Wrote {len(rows)} call relationships")
|
|
301
|
+
|
|
302
|
+
def write_modules(self, modules: List[ModuleEntity]):
|
|
303
|
+
"""Write module nodes (batched).
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
modules: List of ModuleEntity objects
|
|
307
|
+
"""
|
|
308
|
+
if not modules:
|
|
309
|
+
return
|
|
310
|
+
log.info(f"Writing {len(modules)} module nodes...")
|
|
311
|
+
|
|
312
|
+
for batch in self._batch_iter(modules):
|
|
313
|
+
rows = []
|
|
314
|
+
for mod in batch:
|
|
315
|
+
mod_dict = self._entity_to_dict(mod)
|
|
316
|
+
rows.append({
|
|
317
|
+
'name': str(mod_dict['name']),
|
|
318
|
+
'import_path': mod_dict.get('import_path'),
|
|
319
|
+
'is_external': bool(mod_dict.get('is_external', False)),
|
|
320
|
+
'package': mod_dict.get('package'),
|
|
321
|
+
})
|
|
322
|
+
try:
|
|
323
|
+
self.connection.execute_write("""
|
|
324
|
+
UNWIND $rows AS row
|
|
325
|
+
MERGE (m:Module {name: row.name})
|
|
326
|
+
SET m.import_path = row.import_path,
|
|
327
|
+
m.is_external = row.is_external,
|
|
328
|
+
m.package = row.package
|
|
329
|
+
""", {"rows": rows})
|
|
330
|
+
except Exception as e:
|
|
331
|
+
log.warning(f"Failed to write module batch: {e}")
|
|
332
|
+
|
|
333
|
+
log.info(f"Wrote {len(modules)} module nodes")
|
|
334
|
+
|
|
335
|
+
def write_imports(self, imports: List[ImportStatement]):
|
|
336
|
+
"""Write IMPORTS relationships from files to modules (batched).
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
imports: List of ImportStatement objects
|
|
340
|
+
"""
|
|
341
|
+
if not imports:
|
|
342
|
+
return
|
|
343
|
+
log.info(f"Writing {len(imports)} import relationships...")
|
|
344
|
+
|
|
345
|
+
for batch in self._batch_iter(imports):
|
|
346
|
+
rows = []
|
|
347
|
+
for imp in batch:
|
|
348
|
+
imp_dict = self._entity_to_dict(imp)
|
|
349
|
+
rows.append({
|
|
350
|
+
'file_path': str(imp_dict['file_path']),
|
|
351
|
+
'module': str(imp_dict['module']),
|
|
352
|
+
'import_type': imp_dict.get('import_type', 'import'),
|
|
353
|
+
'line_number': int(imp_dict.get('line_number') or 0),
|
|
354
|
+
'alias': imp_dict.get('alias'),
|
|
355
|
+
})
|
|
356
|
+
try:
|
|
357
|
+
self.connection.execute_write("""
|
|
358
|
+
UNWIND $rows AS row
|
|
359
|
+
MATCH (f:File {path: row.file_path})
|
|
360
|
+
MATCH (m:Module {name: row.module})
|
|
361
|
+
MERGE (f)-[:IMPORTS {
|
|
362
|
+
import_type: row.import_type,
|
|
363
|
+
line_number: row.line_number,
|
|
364
|
+
alias: row.alias
|
|
365
|
+
}]->(m)
|
|
366
|
+
""", {"rows": rows})
|
|
367
|
+
except Exception as e:
|
|
368
|
+
log.warning(f"Failed to write imports batch: {e}")
|
|
369
|
+
|
|
370
|
+
log.info(f"Wrote {len(imports)} import relationships")
|
|
371
|
+
|
|
372
|
+
def write_commits(self, commits: List[CommitEntity]):
|
|
373
|
+
"""Write commit nodes.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
commits: List of CommitEntity objects
|
|
377
|
+
"""
|
|
378
|
+
log.info(f"Writing {len(commits)} commit nodes...")
|
|
379
|
+
|
|
380
|
+
for commit in commits:
|
|
381
|
+
commit_dict = self._entity_to_dict(commit)
|
|
382
|
+
try:
|
|
383
|
+
self.connection.execute_write("""
|
|
384
|
+
MERGE (c:Commit {sha: $sha})
|
|
385
|
+
SET c.message = $message,
|
|
386
|
+
c.timestamp = timestamp($timestamp),
|
|
387
|
+
c.author_name = $author_name,
|
|
388
|
+
c.author_email = $author_email,
|
|
389
|
+
c.committer_name = $committer_name,
|
|
390
|
+
c.committer_email = $committer_email,
|
|
391
|
+
c.insertions = $insertions,
|
|
392
|
+
c.deletions = $deletions,
|
|
393
|
+
c.files_changed = $files_changed,
|
|
394
|
+
c.is_merge = $is_merge,
|
|
395
|
+
c.parent_shas = $parent_shas
|
|
396
|
+
""", commit_dict)
|
|
397
|
+
except Exception as e:
|
|
398
|
+
log.warning(f"Failed to write commit {commit.sha}: {e}")
|
|
399
|
+
|
|
400
|
+
log.info(f"Wrote {len(commits)} commit nodes")
|
|
401
|
+
|
|
402
|
+
def write_authors(self, authors: List[AuthorEntity]):
|
|
403
|
+
"""Write author nodes.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
authors: List of AuthorEntity objects
|
|
407
|
+
"""
|
|
408
|
+
log.info(f"Writing {len(authors)} author nodes...")
|
|
409
|
+
|
|
410
|
+
for author in authors:
|
|
411
|
+
author_dict = self._entity_to_dict(author)
|
|
412
|
+
try:
|
|
413
|
+
self.connection.execute_write("""
|
|
414
|
+
MERGE (a:Author {email: $email})
|
|
415
|
+
SET a.name = $name,
|
|
416
|
+
a.first_commit = timestamp($first_commit),
|
|
417
|
+
a.last_commit = timestamp($last_commit),
|
|
418
|
+
a.total_commits = $total_commits,
|
|
419
|
+
a.total_lines_added = $total_lines_added,
|
|
420
|
+
a.total_lines_deleted = $total_lines_deleted
|
|
421
|
+
""", author_dict)
|
|
422
|
+
except Exception as e:
|
|
423
|
+
log.warning(f"Failed to write author {author.email}: {e}")
|
|
424
|
+
|
|
425
|
+
log.info(f"Wrote {len(authors)} author nodes")
|
|
426
|
+
|
|
427
|
+
def write_file_modifications(self, modifications: List[FileModification]):
|
|
428
|
+
"""Write file modification relationships.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
modifications: List of FileModification objects
|
|
432
|
+
"""
|
|
433
|
+
log.info(f"Writing {len(modifications)} file modifications...")
|
|
434
|
+
|
|
435
|
+
count = 0
|
|
436
|
+
|
|
437
|
+
for mod in modifications:
|
|
438
|
+
mod_dict = self._entity_to_dict(mod)
|
|
439
|
+
try:
|
|
440
|
+
self.connection.execute_write("""
|
|
441
|
+
MATCH (c:Commit {sha: $commit_sha})
|
|
442
|
+
MATCH (f:File {path: $file_path})
|
|
443
|
+
MERGE (c)-[:COMMIT_MODIFIES {
|
|
444
|
+
change_type: $change_type,
|
|
445
|
+
insertions: $insertions,
|
|
446
|
+
deletions: $deletions,
|
|
447
|
+
old_path: $old_path
|
|
448
|
+
}]->(f)
|
|
449
|
+
""", mod_dict)
|
|
450
|
+
count += 1
|
|
451
|
+
except Exception as e:
|
|
452
|
+
log.debug(f"Could not create modification: {mod.commit_sha} -> {mod.file_path}: {e}")
|
|
453
|
+
|
|
454
|
+
log.info(f"Wrote {count} file modification relationships")
|
|
455
|
+
|
|
456
|
+
def write_commit_authorship(self, commits: List[CommitEntity]):
|
|
457
|
+
"""Write AUTHORED_BY relationships from commits to authors.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
commits: List of CommitEntity objects
|
|
461
|
+
"""
|
|
462
|
+
log.info("Writing commit authorship relationships...")
|
|
463
|
+
|
|
464
|
+
count = 0
|
|
465
|
+
|
|
466
|
+
for commit in commits:
|
|
467
|
+
try:
|
|
468
|
+
self.connection.execute_write("""
|
|
469
|
+
MATCH (c:Commit {sha: $sha})
|
|
470
|
+
MATCH (a:Author {email: $author_email})
|
|
471
|
+
MERGE (c)-[:AUTHORED_BY]->(a)
|
|
472
|
+
""", {"sha": commit.sha, "author_email": commit.author_email})
|
|
473
|
+
count += 1
|
|
474
|
+
except Exception as e:
|
|
475
|
+
log.debug(f"Could not create authorship: {commit.sha} -> {commit.author_email}: {e}")
|
|
476
|
+
|
|
477
|
+
log.info(f"Wrote {count} authorship relationships")
|
|
478
|
+
|
|
479
|
+
def write_pull_requests(self, prs: List[PullRequestEntity]):
|
|
480
|
+
"""Write pull request nodes.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
prs: List of PullRequestEntity objects
|
|
484
|
+
"""
|
|
485
|
+
log.info(f"Writing {len(prs)} pull request nodes...")
|
|
486
|
+
|
|
487
|
+
for pr in prs:
|
|
488
|
+
pr_dict = self._entity_to_dict(pr)
|
|
489
|
+
try:
|
|
490
|
+
# Handle nullable timestamps
|
|
491
|
+
created_at = pr_dict.get('created_at')
|
|
492
|
+
merged_at = pr_dict.get('merged_at')
|
|
493
|
+
|
|
494
|
+
self.connection.execute_write("""
|
|
495
|
+
MERGE (p:PullRequest {number: $number})
|
|
496
|
+
SET p.title = $title,
|
|
497
|
+
p.description = $description,
|
|
498
|
+
p.state = $state,
|
|
499
|
+
p.author = $author,
|
|
500
|
+
p.reviewers = $reviewers,
|
|
501
|
+
p.labels = $labels,
|
|
502
|
+
p.additions = $additions,
|
|
503
|
+
p.deletions = $deletions,
|
|
504
|
+
p.files_changed = $files_changed,
|
|
505
|
+
p.base_branch = $base_branch,
|
|
506
|
+
p.head_branch = $head_branch,
|
|
507
|
+
p.embedding = $embedding
|
|
508
|
+
""", pr_dict)
|
|
509
|
+
|
|
510
|
+
# Set timestamps separately if not null
|
|
511
|
+
if created_at:
|
|
512
|
+
self.connection.execute_write("""
|
|
513
|
+
MATCH (p:PullRequest {number: $number})
|
|
514
|
+
SET p.created_at = timestamp($created_at)
|
|
515
|
+
""", {"number": pr.number, "created_at": created_at})
|
|
516
|
+
|
|
517
|
+
if merged_at:
|
|
518
|
+
self.connection.execute_write("""
|
|
519
|
+
MATCH (p:PullRequest {number: $number})
|
|
520
|
+
SET p.merged_at = timestamp($merged_at)
|
|
521
|
+
""", {"number": pr.number, "merged_at": merged_at})
|
|
522
|
+
|
|
523
|
+
except Exception as e:
|
|
524
|
+
log.warning(f"Failed to write PR {pr.number}: {e}")
|
|
525
|
+
|
|
526
|
+
log.info(f"Wrote {len(prs)} pull request nodes")
|
|
527
|
+
|
|
528
|
+
def write_pr_commit_links(self, prs: List[PullRequestEntity]):
|
|
529
|
+
"""Write relationships from PRs to their commits.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
prs: List of PullRequestEntity objects with commit_shas
|
|
533
|
+
"""
|
|
534
|
+
log.info("Writing PR-Commit relationships...")
|
|
535
|
+
|
|
536
|
+
count = 0
|
|
537
|
+
|
|
538
|
+
for pr in prs:
|
|
539
|
+
if not pr.commit_shas:
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
for sha in pr.commit_shas:
|
|
543
|
+
try:
|
|
544
|
+
self.connection.execute_write("""
|
|
545
|
+
MATCH (p:PullRequest {number: $number})
|
|
546
|
+
MATCH (c:Commit {sha: $sha})
|
|
547
|
+
MERGE (p)-[:PR_CONTAINS]->(c)
|
|
548
|
+
""", {"number": pr.number, "sha": sha})
|
|
549
|
+
count += 1
|
|
550
|
+
except Exception as e:
|
|
551
|
+
log.debug(f"Could not link PR {pr.number} to commit {sha}: {e}")
|
|
552
|
+
|
|
553
|
+
log.info(f"Wrote {count} PR-Commit relationships")
|
|
554
|
+
|
|
555
|
+
def write_pr_file_links(self, prs: List[PullRequestEntity]):
|
|
556
|
+
"""Write relationships from PRs to modified files.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
prs: List of PullRequestEntity objects with files_changed
|
|
560
|
+
"""
|
|
561
|
+
log.info("Writing PR-File relationships...")
|
|
562
|
+
|
|
563
|
+
count = 0
|
|
564
|
+
|
|
565
|
+
for pr in prs:
|
|
566
|
+
if not pr.files_changed:
|
|
567
|
+
continue
|
|
568
|
+
|
|
569
|
+
# files_changed could be a count or a list of paths
|
|
570
|
+
if isinstance(pr.files_changed, (int, float)):
|
|
571
|
+
continue
|
|
572
|
+
|
|
573
|
+
for file_path in pr.files_changed:
|
|
574
|
+
try:
|
|
575
|
+
self.connection.execute_write("""
|
|
576
|
+
MATCH (p:PullRequest {number: $number})
|
|
577
|
+
MATCH (f:File)
|
|
578
|
+
WHERE f.path ENDS WITH $file_path
|
|
579
|
+
MERGE (p)-[:PR_MODIFIES]->(f)
|
|
580
|
+
""", {"number": pr.number, "file_path": file_path})
|
|
581
|
+
count += 1
|
|
582
|
+
except Exception as e:
|
|
583
|
+
log.debug(f"Could not link PR {pr.number} to file {file_path}: {e}")
|
|
584
|
+
|
|
585
|
+
log.info(f"Wrote {count} PR-File relationships")
|
|
586
|
+
|
|
587
|
+
def write_tasks(self, tasks: List[TaskEntity]):
|
|
588
|
+
"""Write task nodes and link to PRs.
|
|
589
|
+
|
|
590
|
+
Args:
|
|
591
|
+
tasks: List of TaskEntity objects
|
|
592
|
+
"""
|
|
593
|
+
log.info(f"Writing {len(tasks)} task nodes...")
|
|
594
|
+
|
|
595
|
+
for task in tasks:
|
|
596
|
+
task_dict = self._entity_to_dict(task)
|
|
597
|
+
# Rename 'order' to 'task_order' to match schema
|
|
598
|
+
if 'order' in task_dict:
|
|
599
|
+
task_dict['task_order'] = task_dict.pop('order')
|
|
600
|
+
|
|
601
|
+
try:
|
|
602
|
+
self.connection.execute_write("""
|
|
603
|
+
MERGE (t:Task {id: $id})
|
|
604
|
+
SET t.pr_number = $pr_number,
|
|
605
|
+
t.description = $description,
|
|
606
|
+
t.is_completed = $is_completed,
|
|
607
|
+
t.task_order = $task_order
|
|
608
|
+
""", task_dict)
|
|
609
|
+
|
|
610
|
+
# Link to PR
|
|
611
|
+
self.connection.execute_write("""
|
|
612
|
+
MATCH (pr:PullRequest {number: $pr_number})
|
|
613
|
+
MATCH (t:Task {id: $id})
|
|
614
|
+
MERGE (pr)-[:HAS_TASK]->(t)
|
|
615
|
+
""", task_dict)
|
|
616
|
+
except Exception as e:
|
|
617
|
+
log.warning(f"Failed to write task {task.id}: {e}")
|
|
618
|
+
|
|
619
|
+
log.info(f"Wrote {len(tasks)} task nodes")
|
|
620
|
+
|
|
621
|
+
def _entity_to_dict(self, entity) -> dict:
|
|
622
|
+
"""Convert an entity to a dictionary for Kuzu.
|
|
623
|
+
|
|
624
|
+
Args:
|
|
625
|
+
entity: Entity object (dataclass)
|
|
626
|
+
|
|
627
|
+
Returns:
|
|
628
|
+
Dictionary representation
|
|
629
|
+
"""
|
|
630
|
+
if hasattr(entity, '__dataclass_fields__'):
|
|
631
|
+
# It's a dataclass
|
|
632
|
+
result = {}
|
|
633
|
+
for field_name, field in entity.__dataclass_fields__.items():
|
|
634
|
+
try:
|
|
635
|
+
value = getattr(entity, field_name)
|
|
636
|
+
|
|
637
|
+
# Convert datetime to ISO format string for Kuzu timestamp()
|
|
638
|
+
if isinstance(value, datetime):
|
|
639
|
+
value = value.isoformat()
|
|
640
|
+
elif hasattr(value, 'isoformat'):
|
|
641
|
+
value = value.isoformat()
|
|
642
|
+
|
|
643
|
+
# Convert None lists to empty lists for Kuzu arrays
|
|
644
|
+
if value is None and 'list' in str(field.type).lower():
|
|
645
|
+
value = []
|
|
646
|
+
|
|
647
|
+
result[field_name] = value
|
|
648
|
+
except AttributeError as e:
|
|
649
|
+
log.warning(f"DEBUG: Missing attribute {field_name} on entity {type(entity)}: {e}")
|
|
650
|
+
# Set default based on type hint
|
|
651
|
+
if 'list' in str(field.type).lower():
|
|
652
|
+
result[field_name] = []
|
|
653
|
+
elif 'bool' in str(field.type).lower():
|
|
654
|
+
result[field_name] = False
|
|
655
|
+
elif 'int' in str(field.type).lower():
|
|
656
|
+
result[field_name] = 0
|
|
657
|
+
else:
|
|
658
|
+
result[field_name] = None
|
|
659
|
+
|
|
660
|
+
return result
|
|
661
|
+
elif isinstance(entity, dict):
|
|
662
|
+
# Already a dict - return it directly
|
|
663
|
+
return entity
|
|
664
|
+
else:
|
|
665
|
+
# Fallback to __dict__
|
|
666
|
+
log.warning(f"DEBUG: Entity is not a dataclass: type={type(entity)}, hasattr __dict__={hasattr(entity, '__dict__')}")
|
|
667
|
+
return entity.__dict__
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Ingestion module for EmDash."""
|
|
2
|
+
|
|
3
|
+
from .orchestrator import IngestionOrchestrator
|
|
4
|
+
from .repository import RepositoryManager
|
|
5
|
+
from .change_detector import ChangeDetector, ChangedFiles
|
|
6
|
+
|
|
7
|
+
__all__ = ["IngestionOrchestrator", "RepositoryManager", "ChangeDetector", "ChangedFiles"]
|