emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Build comprehensive planning context for feature implementation."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from ..graph.connection import KuzuConnection, get_connection
|
|
7
|
+
from .similarity import SimilaritySearch
|
|
8
|
+
from ..analytics.engine import AnalyticsEngine
|
|
9
|
+
from ..utils.logger import log
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class PlanningContext:
|
|
14
|
+
"""Complete context for feature planning."""
|
|
15
|
+
|
|
16
|
+
query: str
|
|
17
|
+
similar_prs: list[dict] = field(default_factory=list)
|
|
18
|
+
affected_communities: list[dict] = field(default_factory=list)
|
|
19
|
+
key_entry_points: list[dict] = field(default_factory=list)
|
|
20
|
+
domain_experts: list[dict] = field(default_factory=list)
|
|
21
|
+
related_files: list[str] = field(default_factory=list)
|
|
22
|
+
similar_code: list[dict] = field(default_factory=list)
|
|
23
|
+
suggested_tasks: list[dict] = field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
def to_dict(self) -> dict:
|
|
26
|
+
"""Convert to dictionary for serialization."""
|
|
27
|
+
return {
|
|
28
|
+
"query": self.query,
|
|
29
|
+
"similar_prs": self.similar_prs,
|
|
30
|
+
"affected_communities": self.affected_communities,
|
|
31
|
+
"key_entry_points": self.key_entry_points,
|
|
32
|
+
"domain_experts": self.domain_experts,
|
|
33
|
+
"related_files": self.related_files,
|
|
34
|
+
"similar_code": self.similar_code,
|
|
35
|
+
"suggested_tasks": self.suggested_tasks,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ContextBuilder:
|
|
40
|
+
"""Builds comprehensive planning context for features."""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
connection: Optional[KuzuConnection] = None,
|
|
45
|
+
similarity_search: Optional[SimilaritySearch] = None,
|
|
46
|
+
):
|
|
47
|
+
"""Initialize context builder.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
connection: Neo4j connection. If None, uses global connection.
|
|
51
|
+
similarity_search: Similarity search service.
|
|
52
|
+
"""
|
|
53
|
+
self.connection = connection or get_connection()
|
|
54
|
+
self.similarity = similarity_search or SimilaritySearch(self.connection)
|
|
55
|
+
|
|
56
|
+
def build_context(
|
|
57
|
+
self,
|
|
58
|
+
feature_description: str,
|
|
59
|
+
similar_pr_limit: int = 5,
|
|
60
|
+
code_limit: int = 10,
|
|
61
|
+
expert_limit: int = 5,
|
|
62
|
+
) -> PlanningContext:
|
|
63
|
+
"""Build complete planning context for a feature.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
feature_description: Description of the feature to implement
|
|
67
|
+
similar_pr_limit: Max number of similar PRs to include
|
|
68
|
+
code_limit: Max number of similar code entities to include
|
|
69
|
+
expert_limit: Max number of domain experts to include
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
PlanningContext with all relevant information
|
|
73
|
+
"""
|
|
74
|
+
log.info(f"Building planning context for: {feature_description}")
|
|
75
|
+
|
|
76
|
+
context = PlanningContext(query=feature_description)
|
|
77
|
+
|
|
78
|
+
# 1. Find similar PRs
|
|
79
|
+
context.similar_prs = self.similarity.find_similar_prs(
|
|
80
|
+
feature_description,
|
|
81
|
+
limit=similar_pr_limit,
|
|
82
|
+
)
|
|
83
|
+
log.info(f"Found {len(context.similar_prs)} similar PRs")
|
|
84
|
+
|
|
85
|
+
# 2. Find similar code
|
|
86
|
+
context.similar_code = self.similarity.find_similar_code(
|
|
87
|
+
feature_description,
|
|
88
|
+
limit=code_limit,
|
|
89
|
+
)
|
|
90
|
+
log.info(f"Found {len(context.similar_code)} similar code entities")
|
|
91
|
+
|
|
92
|
+
# 3. Collect related files from similar PRs and code
|
|
93
|
+
related_files = set()
|
|
94
|
+
for pr in context.similar_prs:
|
|
95
|
+
if pr.get("files_changed"):
|
|
96
|
+
related_files.update(pr["files_changed"])
|
|
97
|
+
for code in context.similar_code:
|
|
98
|
+
if code.get("file_path"):
|
|
99
|
+
related_files.add(code["file_path"])
|
|
100
|
+
context.related_files = list(related_files)
|
|
101
|
+
|
|
102
|
+
# 4. Find affected communities
|
|
103
|
+
if context.related_files:
|
|
104
|
+
context.affected_communities = self._find_affected_communities(
|
|
105
|
+
context.related_files
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# 5. Find key entry points
|
|
109
|
+
context.key_entry_points = self._find_entry_points(context.similar_prs)
|
|
110
|
+
|
|
111
|
+
# 6. Find domain experts
|
|
112
|
+
if context.related_files:
|
|
113
|
+
context.domain_experts = self._find_domain_experts(
|
|
114
|
+
context.related_files,
|
|
115
|
+
limit=expert_limit,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# 7. Extract suggested tasks from similar PRs
|
|
119
|
+
context.suggested_tasks = self._extract_tasks_from_prs(context.similar_prs)
|
|
120
|
+
|
|
121
|
+
return context
|
|
122
|
+
|
|
123
|
+
def _find_affected_communities(self, file_paths: list[str]) -> list[dict]:
|
|
124
|
+
"""Find communities that contain the given files.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
file_paths: List of file paths
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
List of community summaries
|
|
131
|
+
"""
|
|
132
|
+
if not file_paths:
|
|
133
|
+
return []
|
|
134
|
+
|
|
135
|
+
with self.connection.session() as session:
|
|
136
|
+
result = session.run("""
|
|
137
|
+
UNWIND $file_paths as fp
|
|
138
|
+
MATCH (f:File)-[:CONTAINS_CLASS|CONTAINS_FUNCTION]->(entity)
|
|
139
|
+
WHERE f.path ENDS WITH fp
|
|
140
|
+
AND entity.community IS NOT NULL
|
|
141
|
+
WITH entity.community as community_id, collect(DISTINCT entity.name) as members
|
|
142
|
+
RETURN community_id,
|
|
143
|
+
size(members) as member_count,
|
|
144
|
+
members[0:5] as sample_members
|
|
145
|
+
ORDER BY member_count DESC
|
|
146
|
+
LIMIT 10
|
|
147
|
+
""", file_paths=file_paths)
|
|
148
|
+
|
|
149
|
+
return [dict(record) for record in result]
|
|
150
|
+
|
|
151
|
+
def _find_entry_points(self, similar_prs: list[dict]) -> list[dict]:
|
|
152
|
+
"""Find key functions that were modified in similar PRs.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
similar_prs: List of similar PRs
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
List of key entry points (functions/classes)
|
|
159
|
+
"""
|
|
160
|
+
# Collect all files from similar PRs
|
|
161
|
+
all_files = set()
|
|
162
|
+
for pr in similar_prs:
|
|
163
|
+
if pr.get("files_changed"):
|
|
164
|
+
all_files.update(pr["files_changed"])
|
|
165
|
+
|
|
166
|
+
if not all_files:
|
|
167
|
+
return []
|
|
168
|
+
|
|
169
|
+
with self.connection.session() as session:
|
|
170
|
+
# Query functions and classes separately (Kuzu doesn't support | in rel types)
|
|
171
|
+
functions_result = session.run("""
|
|
172
|
+
UNWIND $file_paths as fp
|
|
173
|
+
MATCH (f:File)-[:CONTAINS_FUNCTION]->(entity:Function)
|
|
174
|
+
WHERE f.path ENDS WITH fp
|
|
175
|
+
AND entity.pagerank IS NOT NULL
|
|
176
|
+
RETURN 'Function' as type,
|
|
177
|
+
entity.name as name,
|
|
178
|
+
entity.qualified_name as qualified_name,
|
|
179
|
+
entity.pagerank as pagerank,
|
|
180
|
+
f.path as file_path
|
|
181
|
+
""", file_paths=list(all_files))
|
|
182
|
+
|
|
183
|
+
classes_result = session.run("""
|
|
184
|
+
UNWIND $file_paths as fp
|
|
185
|
+
MATCH (f:File)-[:CONTAINS_CLASS]->(entity:Class)
|
|
186
|
+
WHERE f.path ENDS WITH fp
|
|
187
|
+
AND entity.pagerank IS NOT NULL
|
|
188
|
+
RETURN 'Class' as type,
|
|
189
|
+
entity.name as name,
|
|
190
|
+
entity.qualified_name as qualified_name,
|
|
191
|
+
entity.pagerank as pagerank,
|
|
192
|
+
f.path as file_path
|
|
193
|
+
""", file_paths=list(all_files))
|
|
194
|
+
|
|
195
|
+
# Combine and sort by pagerank
|
|
196
|
+
all_results = [dict(r) for r in functions_result] + [dict(r) for r in classes_result]
|
|
197
|
+
all_results.sort(key=lambda x: x.get("pagerank", 0) or 0, reverse=True)
|
|
198
|
+
return all_results[:10]
|
|
199
|
+
|
|
200
|
+
def _find_domain_experts(
|
|
201
|
+
self,
|
|
202
|
+
file_paths: list[str],
|
|
203
|
+
limit: int = 5,
|
|
204
|
+
) -> list[dict]:
|
|
205
|
+
"""Find authors with most commits to the given files.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
file_paths: List of file paths
|
|
209
|
+
limit: Maximum number of experts to return
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
List of domain experts with commit counts
|
|
213
|
+
"""
|
|
214
|
+
if not file_paths:
|
|
215
|
+
return []
|
|
216
|
+
|
|
217
|
+
with self.connection.session() as session:
|
|
218
|
+
result = session.run("""
|
|
219
|
+
UNWIND $file_paths as fp
|
|
220
|
+
MATCH (f:File)<-[:COMMIT_MODIFIES]-(c:GitCommit)-[:AUTHORED_BY]->(a:Author)
|
|
221
|
+
WHERE f.path ENDS WITH fp
|
|
222
|
+
WITH a.name as author_name,
|
|
223
|
+
a.email as author_email,
|
|
224
|
+
count(DISTINCT c) as commit_count,
|
|
225
|
+
collect(DISTINCT f.path) as files_touched
|
|
226
|
+
RETURN author_name,
|
|
227
|
+
author_email,
|
|
228
|
+
commit_count,
|
|
229
|
+
size(files_touched) as files_count,
|
|
230
|
+
files_touched[0:5] as sample_files
|
|
231
|
+
ORDER BY commit_count DESC
|
|
232
|
+
LIMIT $limit
|
|
233
|
+
""", file_paths=file_paths, limit=limit)
|
|
234
|
+
|
|
235
|
+
return [dict(record) for record in result]
|
|
236
|
+
|
|
237
|
+
def _extract_tasks_from_prs(self, similar_prs: list[dict]) -> list[dict]:
|
|
238
|
+
"""Extract tasks from similar PRs.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
similar_prs: List of similar PRs
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
List of tasks from the PRs
|
|
245
|
+
"""
|
|
246
|
+
if not similar_prs:
|
|
247
|
+
return []
|
|
248
|
+
|
|
249
|
+
pr_numbers = [pr["number"] for pr in similar_prs if pr.get("number")]
|
|
250
|
+
|
|
251
|
+
if not pr_numbers:
|
|
252
|
+
return []
|
|
253
|
+
|
|
254
|
+
with self.connection.session() as session:
|
|
255
|
+
result = session.run("""
|
|
256
|
+
UNWIND $pr_numbers as pr_num
|
|
257
|
+
MATCH (pr:PullRequest {number: pr_num})-[:HAS_TASK]->(t:Task)
|
|
258
|
+
RETURN pr.number as pr_number,
|
|
259
|
+
pr.title as pr_title,
|
|
260
|
+
t.description as task_description,
|
|
261
|
+
t.is_completed as is_completed
|
|
262
|
+
ORDER BY pr.number, t.order
|
|
263
|
+
""", pr_numbers=pr_numbers)
|
|
264
|
+
|
|
265
|
+
return [dict(record) for record in result]
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Feature context builder using semantic search and AST expansion."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from ..graph.connection import KuzuConnection, get_connection
|
|
7
|
+
from .similarity import SimilaritySearch
|
|
8
|
+
from .feature_expander import FeatureExpander, FeatureGraph
|
|
9
|
+
from ..utils.logger import log
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class FeatureContext:
|
|
14
|
+
"""Complete context for a feature query."""
|
|
15
|
+
|
|
16
|
+
query: str
|
|
17
|
+
root_node: dict = field(default_factory=dict)
|
|
18
|
+
feature_graph: FeatureGraph = field(default_factory=FeatureGraph)
|
|
19
|
+
similar_nodes: list[dict] = field(default_factory=list)
|
|
20
|
+
related_prs: list[dict] = field(default_factory=list)
|
|
21
|
+
authors: list[dict] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> dict:
|
|
24
|
+
"""Convert to dictionary."""
|
|
25
|
+
return {
|
|
26
|
+
"query": self.query,
|
|
27
|
+
"root_node": self.root_node,
|
|
28
|
+
"feature_graph": self.feature_graph.to_dict(),
|
|
29
|
+
"similar_nodes": self.similar_nodes,
|
|
30
|
+
"related_prs": self.related_prs,
|
|
31
|
+
"authors": self.authors,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class FeatureContextBuilder:
|
|
36
|
+
"""Builds complete feature context from a query."""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
connection: Optional[KuzuConnection] = None,
|
|
41
|
+
similarity_search: Optional[SimilaritySearch] = None,
|
|
42
|
+
feature_expander: Optional[FeatureExpander] = None,
|
|
43
|
+
):
|
|
44
|
+
"""Initialize feature context builder.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
connection: Neo4j connection. If None, uses global connection.
|
|
48
|
+
similarity_search: Similarity search instance. If None, creates new one.
|
|
49
|
+
feature_expander: Feature expander instance. If None, creates new one.
|
|
50
|
+
"""
|
|
51
|
+
self.connection = connection or get_connection()
|
|
52
|
+
self.similarity_search = similarity_search or SimilaritySearch(self.connection)
|
|
53
|
+
self.expander = feature_expander or FeatureExpander(self.connection)
|
|
54
|
+
|
|
55
|
+
def build_context(
|
|
56
|
+
self,
|
|
57
|
+
query: str,
|
|
58
|
+
max_hops: int = 2,
|
|
59
|
+
include_prs: bool = True,
|
|
60
|
+
include_authors: bool = True,
|
|
61
|
+
use_importance_ranking: bool = True,
|
|
62
|
+
) -> FeatureContext:
|
|
63
|
+
"""Build full feature context.
|
|
64
|
+
|
|
65
|
+
1. Semantic search for most relevant node (optionally importance-weighted)
|
|
66
|
+
2. Expand AST graph from that node
|
|
67
|
+
3. Find related PRs and authors
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
query: Feature description to search for
|
|
71
|
+
max_hops: Maximum relationship hops to traverse
|
|
72
|
+
include_prs: Whether to include related PRs
|
|
73
|
+
include_authors: Whether to include authors
|
|
74
|
+
use_importance_ranking: If True, use importance-weighted search
|
|
75
|
+
(combines semantic + activity + PageRank)
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
FeatureContext with expanded AST graph
|
|
79
|
+
"""
|
|
80
|
+
log.info(f"Building feature context for: {query}")
|
|
81
|
+
|
|
82
|
+
# Step 1: Find most relevant node via semantic search
|
|
83
|
+
if use_importance_ranking:
|
|
84
|
+
log.info("Using importance-weighted search (semantic + activity + PageRank)")
|
|
85
|
+
similar_code = self.similarity_search.importance_weighted_search(
|
|
86
|
+
query, limit=5, min_score=0.3
|
|
87
|
+
)
|
|
88
|
+
else:
|
|
89
|
+
similar_code = self.similarity_search.find_similar_code(
|
|
90
|
+
query, limit=5, min_score=0.3
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if not similar_code:
|
|
94
|
+
log.warning("No relevant code found via semantic search, trying text search")
|
|
95
|
+
# Fall back to text search
|
|
96
|
+
similar_code = self._fallback_text_search(query)
|
|
97
|
+
|
|
98
|
+
if not similar_code:
|
|
99
|
+
log.warning("No relevant code found")
|
|
100
|
+
return FeatureContext(query=query)
|
|
101
|
+
|
|
102
|
+
root = similar_code[0]
|
|
103
|
+
if use_importance_ranking and "combined_score" in root:
|
|
104
|
+
log.info(
|
|
105
|
+
f"Found root node: {root.get('type')} - {root.get('name')} "
|
|
106
|
+
f"(combined: {root.get('combined_score', 0):.2f}, "
|
|
107
|
+
f"semantic: {root.get('norm_semantic', 0):.2f}, "
|
|
108
|
+
f"importance: {root.get('norm_importance', 0):.2f}, "
|
|
109
|
+
f"pagerank: {root.get('norm_pagerank', 0):.2f})"
|
|
110
|
+
)
|
|
111
|
+
else:
|
|
112
|
+
log.info(f"Found root node: {root.get('type')} - {root.get('name')} (score: {root.get('score', 'N/A')})")
|
|
113
|
+
|
|
114
|
+
# Step 2: Expand AST graph based on node type
|
|
115
|
+
node_type = root.get("type", "").lower()
|
|
116
|
+
qualified_name = root.get("qualified_name", "")
|
|
117
|
+
file_path = root.get("file_path", "")
|
|
118
|
+
|
|
119
|
+
if node_type == "function":
|
|
120
|
+
graph = self.expander.expand_from_function(qualified_name, max_hops)
|
|
121
|
+
elif node_type == "class":
|
|
122
|
+
graph = self.expander.expand_from_class(qualified_name, max_hops)
|
|
123
|
+
else:
|
|
124
|
+
graph = self.expander.expand_from_file(file_path, max_hops)
|
|
125
|
+
|
|
126
|
+
# Update root node in graph
|
|
127
|
+
graph.root_node = root
|
|
128
|
+
|
|
129
|
+
log.info(f"Expanded graph: {len(graph.functions)} functions, {len(graph.classes)} classes")
|
|
130
|
+
|
|
131
|
+
# Step 3: Enrich with PR/author data
|
|
132
|
+
related_prs = []
|
|
133
|
+
if include_prs:
|
|
134
|
+
related_prs = self._find_related_prs(graph)
|
|
135
|
+
|
|
136
|
+
authors = []
|
|
137
|
+
if include_authors:
|
|
138
|
+
authors = self._find_authors(graph)
|
|
139
|
+
|
|
140
|
+
return FeatureContext(
|
|
141
|
+
query=query,
|
|
142
|
+
root_node=root,
|
|
143
|
+
feature_graph=graph,
|
|
144
|
+
similar_nodes=similar_code[1:],
|
|
145
|
+
related_prs=related_prs,
|
|
146
|
+
authors=authors,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def _fallback_text_search(self, query: str) -> list[dict]:
|
|
150
|
+
"""Fall back to text search when vector search unavailable."""
|
|
151
|
+
with self.connection.session() as session:
|
|
152
|
+
# Search functions by name or docstring
|
|
153
|
+
result = session.run("""
|
|
154
|
+
MATCH (f:Function)
|
|
155
|
+
WHERE toLower(f.name) CONTAINS toLower($query)
|
|
156
|
+
OR toLower(f.docstring) CONTAINS toLower($query)
|
|
157
|
+
RETURN 'Function' as type,
|
|
158
|
+
f.name as name,
|
|
159
|
+
f.qualified_name as qualified_name,
|
|
160
|
+
f.file_path as file_path,
|
|
161
|
+
f.docstring as docstring,
|
|
162
|
+
1.0 as score
|
|
163
|
+
LIMIT 5
|
|
164
|
+
""", query=query)
|
|
165
|
+
|
|
166
|
+
results = [dict(r) for r in result]
|
|
167
|
+
|
|
168
|
+
if not results:
|
|
169
|
+
# Try class search
|
|
170
|
+
result = session.run("""
|
|
171
|
+
MATCH (c:Class)
|
|
172
|
+
WHERE toLower(c.name) CONTAINS toLower($query)
|
|
173
|
+
OR toLower(c.docstring) CONTAINS toLower($query)
|
|
174
|
+
RETURN 'Class' as type,
|
|
175
|
+
c.name as name,
|
|
176
|
+
c.qualified_name as qualified_name,
|
|
177
|
+
c.file_path as file_path,
|
|
178
|
+
c.docstring as docstring,
|
|
179
|
+
1.0 as score
|
|
180
|
+
LIMIT 5
|
|
181
|
+
""", query=query)
|
|
182
|
+
results = [dict(r) for r in result]
|
|
183
|
+
|
|
184
|
+
return results
|
|
185
|
+
|
|
186
|
+
def _find_related_prs(self, graph: FeatureGraph) -> list[dict]:
|
|
187
|
+
"""Find PRs that modified files in the feature graph."""
|
|
188
|
+
if not graph.files:
|
|
189
|
+
return []
|
|
190
|
+
|
|
191
|
+
file_paths = [f.get("path") for f in graph.files if f.get("path")]
|
|
192
|
+
if not file_paths:
|
|
193
|
+
return []
|
|
194
|
+
|
|
195
|
+
with self.connection.session() as session:
|
|
196
|
+
result = session.run("""
|
|
197
|
+
UNWIND $file_paths as fp
|
|
198
|
+
MATCH (pr:PullRequest)-[:PR_MODIFIES]->(f:File)
|
|
199
|
+
WHERE f.path ENDS WITH fp OR f.path = fp
|
|
200
|
+
RETURN DISTINCT pr.number as number,
|
|
201
|
+
pr.title as title,
|
|
202
|
+
pr.author as author,
|
|
203
|
+
pr.state as state,
|
|
204
|
+
count(DISTINCT f) as files_touched
|
|
205
|
+
ORDER BY files_touched DESC
|
|
206
|
+
LIMIT 5
|
|
207
|
+
""", file_paths=file_paths)
|
|
208
|
+
|
|
209
|
+
return [dict(r) for r in result]
|
|
210
|
+
|
|
211
|
+
def _find_authors(self, graph: FeatureGraph) -> list[dict]:
|
|
212
|
+
"""Find authors who worked on files in the feature graph."""
|
|
213
|
+
if not graph.files:
|
|
214
|
+
return []
|
|
215
|
+
|
|
216
|
+
file_paths = [f.get("path") for f in graph.files if f.get("path")]
|
|
217
|
+
if not file_paths:
|
|
218
|
+
return []
|
|
219
|
+
|
|
220
|
+
with self.connection.session() as session:
|
|
221
|
+
result = session.run("""
|
|
222
|
+
UNWIND $file_paths as fp
|
|
223
|
+
MATCH (a:Author)<-[:AUTHORED_BY]-(c:GitCommit)-[:COMMIT_MODIFIES]->(f:File)
|
|
224
|
+
WHERE f.path ENDS WITH fp OR f.path = fp
|
|
225
|
+
RETURN DISTINCT a.name as name,
|
|
226
|
+
a.email as email,
|
|
227
|
+
count(DISTINCT c) as commit_count
|
|
228
|
+
ORDER BY commit_count DESC
|
|
229
|
+
LIMIT 5
|
|
230
|
+
""", file_paths=file_paths)
|
|
231
|
+
|
|
232
|
+
return [dict(r) for r in result]
|