emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""ExploredAreasProvider - Context from agent exploration with tool-based relevance."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict
|
|
4
|
+
from typing import Optional, Union
|
|
5
|
+
|
|
6
|
+
from ..models import ContextItem, ContextProviderSpec
|
|
7
|
+
from .base import ContextProvider
|
|
8
|
+
from ..registry import ContextProviderRegistry
|
|
9
|
+
from ...graph.connection import KuzuConnection
|
|
10
|
+
from ...utils.logger import log
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ExploredAreasProvider(ContextProvider):
|
|
14
|
+
"""Context provider that extracts entities from agent exploration.
|
|
15
|
+
|
|
16
|
+
Analyzes the steps recorded during an agent session and assigns
|
|
17
|
+
relevance scores based on the tool type used to discover each entity.
|
|
18
|
+
|
|
19
|
+
High relevance: deliberate investigation (expand_node, get_callers, etc.)
|
|
20
|
+
Medium relevance: targeted search (semantic_search, text_search)
|
|
21
|
+
Low relevance: broad search (grep, get_top_pagerank)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# Tool-based relevance scores
|
|
25
|
+
TOOL_RELEVANCE = {
|
|
26
|
+
# High relevance - deliberate investigation
|
|
27
|
+
"expand_node": 1.0,
|
|
28
|
+
"get_callers": 0.9,
|
|
29
|
+
"get_callees": 0.9,
|
|
30
|
+
"get_class_hierarchy": 0.9,
|
|
31
|
+
"get_neighbors": 0.85,
|
|
32
|
+
"get_impact_analysis": 0.85,
|
|
33
|
+
"read_file": 0.8, # Reading a file is deliberate investigation
|
|
34
|
+
# Medium relevance - targeted search
|
|
35
|
+
"semantic_search": 0.7,
|
|
36
|
+
"text_search": 0.6,
|
|
37
|
+
"get_file_dependencies": 0.6,
|
|
38
|
+
"find_entity": 0.6,
|
|
39
|
+
# Lower relevance - broad search/modification
|
|
40
|
+
"grep": 0.4,
|
|
41
|
+
"write_to_file": 0.4,
|
|
42
|
+
"apply_diff": 0.4,
|
|
43
|
+
"get_top_pagerank": 0.3,
|
|
44
|
+
"get_communities": 0.3,
|
|
45
|
+
"list_files": 0.2,
|
|
46
|
+
"execute_command": 0.1,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Only top N results from search tools are considered highly relevant
|
|
50
|
+
TOP_RESULTS_LIMIT = 3
|
|
51
|
+
|
|
52
|
+
# Tools where we limit to top results
|
|
53
|
+
SEARCH_TOOLS = {"semantic_search", "text_search", "grep", "find_entity"}
|
|
54
|
+
|
|
55
|
+
def __init__(self, connection: KuzuConnection, config: Optional[dict] = None):
|
|
56
|
+
super().__init__(connection, config)
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def spec(self) -> ContextProviderSpec:
|
|
60
|
+
return ContextProviderSpec(
|
|
61
|
+
name="explored_areas",
|
|
62
|
+
description="Context from agent exploration with tool-based relevance",
|
|
63
|
+
requires_graph=False, # Uses session data, not graph queries
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def extract_context(self, exploration_steps: list) -> list[ContextItem]:
|
|
67
|
+
"""Extract context items from exploration steps.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
exploration_steps: List of ExplorationStep objects or dicts from AgentSession
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Context items with relevance-based scores
|
|
74
|
+
"""
|
|
75
|
+
if not exploration_steps:
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
# Track best score for each entity
|
|
79
|
+
entity_scores: dict[str, tuple[float, Optional[str], Optional[str]]] = {}
|
|
80
|
+
|
|
81
|
+
for step in exploration_steps:
|
|
82
|
+
# Handle both ExplorationStep objects and dicts
|
|
83
|
+
if hasattr(step, "tool_name"):
|
|
84
|
+
tool_name = step.tool_name
|
|
85
|
+
entities = step.entities_discovered
|
|
86
|
+
else:
|
|
87
|
+
tool_name = step.get("tool_name", "")
|
|
88
|
+
entities = step.get("entities_discovered", [])
|
|
89
|
+
|
|
90
|
+
# Get base relevance score for this tool
|
|
91
|
+
base_score = self.TOOL_RELEVANCE.get(tool_name, 0.2)
|
|
92
|
+
|
|
93
|
+
# For search tools, only top results are highly relevant
|
|
94
|
+
if tool_name in self.SEARCH_TOOLS:
|
|
95
|
+
# Process top results with full score, others with reduced score
|
|
96
|
+
for i, entity in enumerate(entities):
|
|
97
|
+
qname = self._extract_qualified_name(entity)
|
|
98
|
+
if not qname:
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
# Top results get full score, others get reduced
|
|
102
|
+
if i < self.TOP_RESULTS_LIMIT:
|
|
103
|
+
score = base_score
|
|
104
|
+
else:
|
|
105
|
+
score = base_score * 0.5 # Reduced score for non-top results
|
|
106
|
+
|
|
107
|
+
self._update_entity_score(entity_scores, qname, score, entity)
|
|
108
|
+
else:
|
|
109
|
+
# Non-search tools: all entities get the same score
|
|
110
|
+
for entity in entities:
|
|
111
|
+
qname = self._extract_qualified_name(entity)
|
|
112
|
+
if not qname:
|
|
113
|
+
continue
|
|
114
|
+
self._update_entity_score(entity_scores, qname, base_score, entity)
|
|
115
|
+
|
|
116
|
+
# Convert to ContextItems
|
|
117
|
+
items = []
|
|
118
|
+
for qname, (score, entity_type, file_path) in entity_scores.items():
|
|
119
|
+
# Skip file: prefix for display if it's a File type
|
|
120
|
+
display_name = qname
|
|
121
|
+
if qname.startswith("file:"):
|
|
122
|
+
display_name = qname[5:] # Remove "file:" prefix
|
|
123
|
+
items.append(
|
|
124
|
+
ContextItem(
|
|
125
|
+
qualified_name=display_name,
|
|
126
|
+
entity_type=entity_type or "Unknown",
|
|
127
|
+
file_path=file_path,
|
|
128
|
+
score=score,
|
|
129
|
+
neighbors=[], # Could fetch from graph if needed
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
log.info(
|
|
134
|
+
f"ExploredAreasProvider: extracted {len(items)} context items "
|
|
135
|
+
f"from {len(exploration_steps)} exploration steps"
|
|
136
|
+
)
|
|
137
|
+
return items
|
|
138
|
+
|
|
139
|
+
def _extract_qualified_name(self, entity: Union[str, dict]) -> Optional[str]:
|
|
140
|
+
"""Extract qualified name from entity (string or dict)."""
|
|
141
|
+
if isinstance(entity, str):
|
|
142
|
+
return entity
|
|
143
|
+
if isinstance(entity, dict):
|
|
144
|
+
return entity.get("qualified_name")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
def _update_entity_score(
|
|
148
|
+
self,
|
|
149
|
+
entity_scores: dict,
|
|
150
|
+
qname: str,
|
|
151
|
+
score: float,
|
|
152
|
+
entity: Union[str, dict],
|
|
153
|
+
) -> None:
|
|
154
|
+
"""Update entity score, keeping the highest score."""
|
|
155
|
+
current = entity_scores.get(qname)
|
|
156
|
+
if current is None or score > current[0]:
|
|
157
|
+
entity_type = self._infer_type(entity)
|
|
158
|
+
file_path = self._infer_file(entity)
|
|
159
|
+
entity_scores[qname] = (score, entity_type, file_path)
|
|
160
|
+
|
|
161
|
+
def _infer_type(self, entity: Union[str, dict]) -> Optional[str]:
|
|
162
|
+
"""Infer entity type from entity data."""
|
|
163
|
+
if isinstance(entity, dict):
|
|
164
|
+
return entity.get("type") or entity.get("entity_type")
|
|
165
|
+
# Try to infer from qualified name pattern
|
|
166
|
+
if isinstance(entity, str):
|
|
167
|
+
if "." in entity:
|
|
168
|
+
parts = entity.split(".")
|
|
169
|
+
# If last part starts with uppercase, likely a class
|
|
170
|
+
if parts[-1] and parts[-1][0].isupper():
|
|
171
|
+
return "Class"
|
|
172
|
+
return "Function" # Default assumption
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
def _infer_file(self, entity: Union[str, dict]) -> Optional[str]:
|
|
176
|
+
"""Infer file path from entity data."""
|
|
177
|
+
if isinstance(entity, dict):
|
|
178
|
+
return entity.get("file_path") or entity.get("path")
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# Auto-register provider
|
|
183
|
+
ContextProviderRegistry.register("explored_areas", ExploredAreasProvider)
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""TouchedAreasProvider - Context from AST neighbors of modified code."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from ..models import ContextItem, ContextProviderSpec
|
|
7
|
+
from .base import ContextProvider
|
|
8
|
+
from ..registry import ContextProviderRegistry
|
|
9
|
+
from ...graph.connection import KuzuConnection
|
|
10
|
+
from ...utils.logger import log
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TouchedAreasProvider(ContextProvider):
|
|
14
|
+
"""Context provider that extracts AST neighbors of touched code.
|
|
15
|
+
|
|
16
|
+
When files are modified, this provider:
|
|
17
|
+
1. Finds functions/classes in those files from the Kuzu AST
|
|
18
|
+
2. Gets N-hop neighbors (callers, callees, parent classes, etc.)
|
|
19
|
+
3. Returns context items with descriptions and relationships
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, connection: KuzuConnection, config: Optional[dict] = None):
|
|
23
|
+
super().__init__(connection, config)
|
|
24
|
+
self._neighbor_depth = int(
|
|
25
|
+
config.get("neighbor_depth") if config else os.getenv("CONTEXT_NEIGHBOR_DEPTH", "2")
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def spec(self) -> ContextProviderSpec:
|
|
30
|
+
return ContextProviderSpec(
|
|
31
|
+
name="touched_areas",
|
|
32
|
+
description="AST-based context from modified code and neighbors",
|
|
33
|
+
requires_graph=True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def extract_context(self, modified_files: list[str]) -> list[ContextItem]:
|
|
37
|
+
"""Extract context items from modified files.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
modified_files: List of file paths that were modified
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of context items with AST neighbors
|
|
44
|
+
"""
|
|
45
|
+
if not modified_files:
|
|
46
|
+
return []
|
|
47
|
+
|
|
48
|
+
items = []
|
|
49
|
+
seen_qualified_names = set()
|
|
50
|
+
|
|
51
|
+
for file_path in modified_files:
|
|
52
|
+
# Skip non-code files
|
|
53
|
+
if not self._is_code_file(file_path):
|
|
54
|
+
continue
|
|
55
|
+
|
|
56
|
+
# Normalize path to match what's in the database
|
|
57
|
+
normalized_path = self._normalize_path(file_path)
|
|
58
|
+
|
|
59
|
+
# Get entities in this file
|
|
60
|
+
entities = self._get_file_entities(normalized_path)
|
|
61
|
+
|
|
62
|
+
for entity in entities:
|
|
63
|
+
qname = entity.get("qualified_name")
|
|
64
|
+
if not qname or qname in seen_qualified_names:
|
|
65
|
+
continue
|
|
66
|
+
seen_qualified_names.add(qname)
|
|
67
|
+
|
|
68
|
+
# Get neighbors up to configured depth
|
|
69
|
+
neighbors = self._get_neighbors(entity, depth=self._neighbor_depth)
|
|
70
|
+
neighbor_names = [n.get("qualified_name") for n in neighbors if n.get("qualified_name")]
|
|
71
|
+
|
|
72
|
+
items.append(
|
|
73
|
+
ContextItem(
|
|
74
|
+
qualified_name=qname,
|
|
75
|
+
entity_type=entity.get("type", "Unknown"),
|
|
76
|
+
description=entity.get("docstring"),
|
|
77
|
+
file_path=file_path,
|
|
78
|
+
neighbors=neighbor_names,
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
log.info(f"TouchedAreasProvider: extracted {len(items)} context items from {len(modified_files)} files")
|
|
83
|
+
return items
|
|
84
|
+
|
|
85
|
+
def _normalize_path(self, file_path: str) -> str:
|
|
86
|
+
"""Normalize file path to match database format."""
|
|
87
|
+
# Convert to absolute path if relative
|
|
88
|
+
if not os.path.isabs(file_path):
|
|
89
|
+
file_path = os.path.abspath(file_path)
|
|
90
|
+
return file_path
|
|
91
|
+
|
|
92
|
+
def _is_code_file(self, file_path: str) -> bool:
|
|
93
|
+
"""Check if file is a code file worth tracking.
|
|
94
|
+
|
|
95
|
+
Filters out logs, build artifacts, and other non-code files.
|
|
96
|
+
"""
|
|
97
|
+
if not file_path:
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
lower_path = file_path.lower()
|
|
101
|
+
|
|
102
|
+
# Skip patterns
|
|
103
|
+
skip_patterns = (
|
|
104
|
+
".log", "/logs/", "/log/",
|
|
105
|
+
"/node_modules/", "/__pycache__/",
|
|
106
|
+
"/dist/", "/build/", "/.git/",
|
|
107
|
+
".pyc", ".pyo", ".so", ".dll",
|
|
108
|
+
"/coverage/", "/.nyc_output/",
|
|
109
|
+
".env", ".lock",
|
|
110
|
+
)
|
|
111
|
+
if any(pattern in lower_path for pattern in skip_patterns):
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
# Code file extensions
|
|
115
|
+
code_extensions = (
|
|
116
|
+
".py", ".ts", ".tsx", ".js", ".jsx",
|
|
117
|
+
".java", ".go", ".rs", ".rb", ".php",
|
|
118
|
+
".c", ".cpp", ".h", ".hpp", ".cs",
|
|
119
|
+
".swift", ".kt", ".scala", ".vue",
|
|
120
|
+
".json", ".yaml", ".yml", ".toml",
|
|
121
|
+
".md", ".sql", ".graphql",
|
|
122
|
+
)
|
|
123
|
+
return any(lower_path.endswith(ext) for ext in code_extensions)
|
|
124
|
+
|
|
125
|
+
def _get_file_entities(self, file_path: str) -> list[dict]:
|
|
126
|
+
"""Query Kuzu for functions and classes in a file.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
file_path: Absolute path to the file
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
List of entity dictionaries
|
|
133
|
+
"""
|
|
134
|
+
try:
|
|
135
|
+
# Try exact match first
|
|
136
|
+
results = self.connection.execute(
|
|
137
|
+
"""
|
|
138
|
+
MATCH (f:File)-[:CONTAINS_FUNCTION]->(fn:Function)
|
|
139
|
+
WHERE f.path = $path
|
|
140
|
+
RETURN fn.qualified_name as qualified_name,
|
|
141
|
+
'Function' as type,
|
|
142
|
+
fn.docstring as docstring,
|
|
143
|
+
fn.name as name
|
|
144
|
+
""",
|
|
145
|
+
{"path": file_path},
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Also get classes
|
|
149
|
+
class_results = self.connection.execute(
|
|
150
|
+
"""
|
|
151
|
+
MATCH (f:File)-[:CONTAINS_CLASS]->(c:Class)
|
|
152
|
+
WHERE f.path = $path
|
|
153
|
+
RETURN c.qualified_name as qualified_name,
|
|
154
|
+
'Class' as type,
|
|
155
|
+
c.docstring as docstring,
|
|
156
|
+
c.name as name
|
|
157
|
+
""",
|
|
158
|
+
{"path": file_path},
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
results.extend(class_results)
|
|
162
|
+
|
|
163
|
+
# If no results, try ENDS WITH for partial path match
|
|
164
|
+
if not results:
|
|
165
|
+
filename = os.path.basename(file_path)
|
|
166
|
+
results = self.connection.execute(
|
|
167
|
+
"""
|
|
168
|
+
MATCH (f:File)-[:CONTAINS_FUNCTION]->(fn:Function)
|
|
169
|
+
WHERE f.path ENDS WITH $filename
|
|
170
|
+
RETURN fn.qualified_name as qualified_name,
|
|
171
|
+
'Function' as type,
|
|
172
|
+
fn.docstring as docstring,
|
|
173
|
+
fn.name as name
|
|
174
|
+
""",
|
|
175
|
+
{"filename": filename},
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
class_results = self.connection.execute(
|
|
179
|
+
"""
|
|
180
|
+
MATCH (f:File)-[:CONTAINS_CLASS]->(c:Class)
|
|
181
|
+
WHERE f.path ENDS WITH $filename
|
|
182
|
+
RETURN c.qualified_name as qualified_name,
|
|
183
|
+
'Class' as type,
|
|
184
|
+
c.docstring as docstring,
|
|
185
|
+
c.name as name
|
|
186
|
+
""",
|
|
187
|
+
{"filename": filename},
|
|
188
|
+
)
|
|
189
|
+
results.extend(class_results)
|
|
190
|
+
|
|
191
|
+
return results
|
|
192
|
+
|
|
193
|
+
except Exception as e:
|
|
194
|
+
log.debug(f"Failed to get file entities for {file_path}: {e}")
|
|
195
|
+
return []
|
|
196
|
+
|
|
197
|
+
def _get_neighbors(self, entity: dict, depth: int = 2) -> list[dict]:
|
|
198
|
+
"""Get N-hop neighbors of an entity.
|
|
199
|
+
|
|
200
|
+
Traverses CALLS, INHERITS_FROM, and HAS_METHOD relationships
|
|
201
|
+
to find related code entities.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
entity: Entity dictionary with qualified_name and type
|
|
205
|
+
depth: Number of hops to traverse
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
List of neighbor entity dictionaries
|
|
209
|
+
"""
|
|
210
|
+
qname = entity.get("qualified_name")
|
|
211
|
+
entity_type = entity.get("type")
|
|
212
|
+
|
|
213
|
+
if not qname:
|
|
214
|
+
return []
|
|
215
|
+
|
|
216
|
+
neighbors = []
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
if entity_type == "Function":
|
|
220
|
+
neighbors.extend(self._get_function_neighbors(qname, depth))
|
|
221
|
+
elif entity_type == "Class":
|
|
222
|
+
neighbors.extend(self._get_class_neighbors(qname, depth))
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
log.warning(f"Failed to get neighbors for {qname}: {e}")
|
|
226
|
+
|
|
227
|
+
return neighbors
|
|
228
|
+
|
|
229
|
+
def _get_function_neighbors(self, qualified_name: str, depth: int) -> list[dict]:
|
|
230
|
+
"""Get neighbors of a function (callers, callees)."""
|
|
231
|
+
neighbors = []
|
|
232
|
+
|
|
233
|
+
# Get direct callers (functions that call this one)
|
|
234
|
+
callers = self.connection.execute(
|
|
235
|
+
"""
|
|
236
|
+
MATCH (caller:Function)-[:CALLS]->(f:Function {qualified_name: $qname})
|
|
237
|
+
RETURN caller.qualified_name as qualified_name,
|
|
238
|
+
'Function' as type,
|
|
239
|
+
caller.docstring as docstring,
|
|
240
|
+
caller.name as name
|
|
241
|
+
LIMIT 20
|
|
242
|
+
""",
|
|
243
|
+
{"qname": qualified_name},
|
|
244
|
+
)
|
|
245
|
+
neighbors.extend(callers)
|
|
246
|
+
|
|
247
|
+
# Get direct callees (functions this one calls)
|
|
248
|
+
callees = self.connection.execute(
|
|
249
|
+
"""
|
|
250
|
+
MATCH (f:Function {qualified_name: $qname})-[:CALLS]->(callee:Function)
|
|
251
|
+
RETURN callee.qualified_name as qualified_name,
|
|
252
|
+
'Function' as type,
|
|
253
|
+
callee.docstring as docstring,
|
|
254
|
+
callee.name as name
|
|
255
|
+
LIMIT 20
|
|
256
|
+
""",
|
|
257
|
+
{"qname": qualified_name},
|
|
258
|
+
)
|
|
259
|
+
neighbors.extend(callees)
|
|
260
|
+
|
|
261
|
+
# If depth > 1, get 2nd hop neighbors
|
|
262
|
+
if depth > 1:
|
|
263
|
+
# 2-hop callers (who calls my callers)
|
|
264
|
+
hop2_callers = self.connection.execute(
|
|
265
|
+
"""
|
|
266
|
+
MATCH (caller2:Function)-[:CALLS]->(caller:Function)-[:CALLS]->(f:Function {qualified_name: $qname})
|
|
267
|
+
WHERE caller2.qualified_name <> $qname
|
|
268
|
+
RETURN DISTINCT caller2.qualified_name as qualified_name,
|
|
269
|
+
'Function' as type,
|
|
270
|
+
caller2.docstring as docstring,
|
|
271
|
+
caller2.name as name
|
|
272
|
+
LIMIT 10
|
|
273
|
+
""",
|
|
274
|
+
{"qname": qualified_name},
|
|
275
|
+
)
|
|
276
|
+
neighbors.extend(hop2_callers)
|
|
277
|
+
|
|
278
|
+
# 2-hop callees (who my callees call)
|
|
279
|
+
hop2_callees = self.connection.execute(
|
|
280
|
+
"""
|
|
281
|
+
MATCH (f:Function {qualified_name: $qname})-[:CALLS]->(callee:Function)-[:CALLS]->(callee2:Function)
|
|
282
|
+
WHERE callee2.qualified_name <> $qname
|
|
283
|
+
RETURN DISTINCT callee2.qualified_name as qualified_name,
|
|
284
|
+
'Function' as type,
|
|
285
|
+
callee2.docstring as docstring,
|
|
286
|
+
callee2.name as name
|
|
287
|
+
LIMIT 10
|
|
288
|
+
""",
|
|
289
|
+
{"qname": qualified_name},
|
|
290
|
+
)
|
|
291
|
+
neighbors.extend(hop2_callees)
|
|
292
|
+
|
|
293
|
+
return neighbors
|
|
294
|
+
|
|
295
|
+
def _get_class_neighbors(self, qualified_name: str, depth: int) -> list[dict]:
|
|
296
|
+
"""Get neighbors of a class (parents, children, methods)."""
|
|
297
|
+
neighbors = []
|
|
298
|
+
|
|
299
|
+
# Get parent classes
|
|
300
|
+
parents = self.connection.execute(
|
|
301
|
+
"""
|
|
302
|
+
MATCH (c:Class {qualified_name: $qname})-[:INHERITS_FROM]->(parent:Class)
|
|
303
|
+
RETURN parent.qualified_name as qualified_name,
|
|
304
|
+
'Class' as type,
|
|
305
|
+
parent.docstring as docstring,
|
|
306
|
+
parent.name as name
|
|
307
|
+
LIMIT 10
|
|
308
|
+
""",
|
|
309
|
+
{"qname": qualified_name},
|
|
310
|
+
)
|
|
311
|
+
neighbors.extend(parents)
|
|
312
|
+
|
|
313
|
+
# Get child classes
|
|
314
|
+
children = self.connection.execute(
|
|
315
|
+
"""
|
|
316
|
+
MATCH (child:Class)-[:INHERITS_FROM]->(c:Class {qualified_name: $qname})
|
|
317
|
+
RETURN child.qualified_name as qualified_name,
|
|
318
|
+
'Class' as type,
|
|
319
|
+
child.docstring as docstring,
|
|
320
|
+
child.name as name
|
|
321
|
+
LIMIT 10
|
|
322
|
+
""",
|
|
323
|
+
{"qname": qualified_name},
|
|
324
|
+
)
|
|
325
|
+
neighbors.extend(children)
|
|
326
|
+
|
|
327
|
+
# Get methods of this class
|
|
328
|
+
methods = self.connection.execute(
|
|
329
|
+
"""
|
|
330
|
+
MATCH (c:Class {qualified_name: $qname})-[:HAS_METHOD]->(m:Function)
|
|
331
|
+
RETURN m.qualified_name as qualified_name,
|
|
332
|
+
'Function' as type,
|
|
333
|
+
m.docstring as docstring,
|
|
334
|
+
m.name as name
|
|
335
|
+
LIMIT 20
|
|
336
|
+
""",
|
|
337
|
+
{"qname": qualified_name},
|
|
338
|
+
)
|
|
339
|
+
neighbors.extend(methods)
|
|
340
|
+
|
|
341
|
+
# If depth > 1, get grandparent/grandchild classes
|
|
342
|
+
if depth > 1:
|
|
343
|
+
grandparents = self.connection.execute(
|
|
344
|
+
"""
|
|
345
|
+
MATCH (c:Class {qualified_name: $qname})-[:INHERITS_FROM]->(:Class)-[:INHERITS_FROM]->(gp:Class)
|
|
346
|
+
RETURN DISTINCT gp.qualified_name as qualified_name,
|
|
347
|
+
'Class' as type,
|
|
348
|
+
gp.docstring as docstring,
|
|
349
|
+
gp.name as name
|
|
350
|
+
LIMIT 5
|
|
351
|
+
""",
|
|
352
|
+
{"qname": qualified_name},
|
|
353
|
+
)
|
|
354
|
+
neighbors.extend(grandparents)
|
|
355
|
+
|
|
356
|
+
return neighbors
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
# Auto-register provider
|
|
360
|
+
ContextProviderRegistry.register("touched_areas", TouchedAreasProvider)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Registry for context providers."""
|
|
2
|
+
|
|
3
|
+
from typing import Type, Optional
|
|
4
|
+
|
|
5
|
+
from ..graph.connection import KuzuConnection
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ContextProviderRegistry:
|
|
9
|
+
"""Registry for context providers using class-method pattern.
|
|
10
|
+
|
|
11
|
+
Providers register themselves at import time, allowing for
|
|
12
|
+
easy extensibility without modifying existing code.
|
|
13
|
+
|
|
14
|
+
Example:
|
|
15
|
+
ContextProviderRegistry.register("my_provider", MyProvider)
|
|
16
|
+
provider = ContextProviderRegistry.get_provider("my_provider", connection)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
_providers: dict[str, Type] = {}
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def register(cls, name: str, provider_class: Type):
|
|
23
|
+
"""Register a context provider.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
name: Unique name for the provider
|
|
27
|
+
provider_class: Provider class (must subclass ContextProvider)
|
|
28
|
+
"""
|
|
29
|
+
cls._providers[name] = provider_class
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def get_provider(cls, name: str, connection: KuzuConnection, config: Optional[dict] = None):
|
|
33
|
+
"""Get a provider instance by name.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Provider name
|
|
37
|
+
connection: Kuzu database connection
|
|
38
|
+
config: Optional provider-specific configuration
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Instantiated provider
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
ValueError: If provider name not found
|
|
45
|
+
"""
|
|
46
|
+
if name not in cls._providers:
|
|
47
|
+
available = ", ".join(cls._providers.keys()) or "none"
|
|
48
|
+
raise ValueError(f"Unknown context provider: '{name}'. Available: {available}")
|
|
49
|
+
return cls._providers[name](connection, config)
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def list_providers(cls) -> list[str]:
|
|
53
|
+
"""Get list of registered provider names."""
|
|
54
|
+
return list(cls._providers.keys())
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def is_registered(cls, name: str) -> bool:
|
|
58
|
+
"""Check if a provider is registered."""
|
|
59
|
+
return name in cls._providers
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_provider(name: str, connection: KuzuConnection, config: Optional[dict] = None):
|
|
63
|
+
"""Convenience function to get a provider.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
name: Provider name
|
|
67
|
+
connection: Kuzu database connection
|
|
68
|
+
config: Optional provider-specific configuration
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Instantiated provider
|
|
72
|
+
"""
|
|
73
|
+
return ContextProviderRegistry.get_provider(name, connection, config)
|