codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,260 @@
1
+ """Configuration settings for Semantic Code Intelligence."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+
12
+ # Default directories to ignore during scanning
13
+ DEFAULT_IGNORE_DIRS: set[str] = {
14
+ ".git",
15
+ "node_modules",
16
+ "build",
17
+ "dist",
18
+ "venv",
19
+ ".venv",
20
+ "__pycache__",
21
+ ".tox",
22
+ ".mypy_cache",
23
+ ".pytest_cache",
24
+ "egg-info",
25
+ ".eggs",
26
+ ".idea",
27
+ ".vscode",
28
+ "target",
29
+ "bin",
30
+ "obj",
31
+ }
32
+
33
+ # Default file extensions to index
34
+ DEFAULT_EXTENSIONS: set[str] = {
35
+ ".py",
36
+ ".js",
37
+ ".ts",
38
+ ".jsx",
39
+ ".tsx",
40
+ ".java",
41
+ ".go",
42
+ ".rs",
43
+ ".c",
44
+ ".cpp",
45
+ ".h",
46
+ ".hpp",
47
+ ".rb",
48
+ ".php",
49
+ ".cs",
50
+ ".swift",
51
+ ".kt",
52
+ ".scala",
53
+ ".sh",
54
+ ".bash",
55
+ ".sql",
56
+ ".r",
57
+ ".lua",
58
+ ".dart",
59
+ ".ex",
60
+ ".exs",
61
+ }
62
+
63
+ CONFIG_DIR_NAME = ".codexa"
64
+ CONFIG_FILE_NAME = "config.json"
65
+ INDEX_DIR_NAME = "index"
66
+
67
+
68
+ class EmbeddingConfig(BaseModel):
69
+ """Configuration for the embedding engine."""
70
+
71
+ model_name: str = Field(
72
+ default="all-MiniLM-L6-v2",
73
+ description="Sentence-transformers model name for embedding generation.",
74
+ )
75
+ chunk_size: int = Field(
76
+ default=512,
77
+ description="Maximum number of characters per code chunk.",
78
+ )
79
+ chunk_overlap: int = Field(
80
+ default=64,
81
+ description="Number of overlapping characters between consecutive chunks.",
82
+ )
83
+
84
+
85
+ class SearchConfig(BaseModel):
86
+ """Configuration for the search engine."""
87
+
88
+ top_k: int = Field(
89
+ default=10,
90
+ description="Number of top results to return from similarity search.",
91
+ )
92
+ similarity_threshold: float = Field(
93
+ default=0.3,
94
+ description="Minimum similarity score threshold for results.",
95
+ )
96
+
97
+
98
+ class IndexConfig(BaseModel):
99
+ """Configuration for the indexing system."""
100
+
101
+ ignore_dirs: set[str] = Field(default_factory=lambda: DEFAULT_IGNORE_DIRS.copy())
102
+ extensions: set[str] = Field(default_factory=lambda: DEFAULT_EXTENSIONS.copy())
103
+ use_incremental: bool = Field(
104
+ default=True,
105
+ description="Enable incremental indexing using file hashes.",
106
+ )
107
+
108
+
109
+ class LLMConfig(BaseModel):
110
+ """Configuration for LLM provider integration."""
111
+
112
+ provider: str = Field(
113
+ default="mock",
114
+ description="LLM provider name: 'openai', 'ollama', or 'mock'.",
115
+ )
116
+ model: str = Field(
117
+ default="gpt-3.5-turbo",
118
+ description="Model name to use with the provider.",
119
+ )
120
+ api_key: str = Field(
121
+ default="",
122
+ description="API key for remote providers (e.g. OpenAI).",
123
+ )
124
+ base_url: str = Field(
125
+ default="",
126
+ description="Custom base URL for the LLM API endpoint.",
127
+ )
128
+ temperature: float = Field(
129
+ default=0.2,
130
+ description="Sampling temperature for LLM responses.",
131
+ )
132
+ max_tokens: int = Field(
133
+ default=2048,
134
+ description="Maximum tokens for LLM response generation.",
135
+ )
136
+ cache_enabled: bool = Field(
137
+ default=True,
138
+ description="Enable LLM response caching.",
139
+ )
140
+ cache_ttl_hours: int = Field(
141
+ default=24,
142
+ description="Time-to-live for cached LLM responses in hours.",
143
+ )
144
+ cache_max_entries: int = Field(
145
+ default=1000,
146
+ description="Maximum number of cached LLM responses.",
147
+ )
148
+ rate_limit_rpm: int = Field(
149
+ default=0,
150
+ description="Max requests per minute (0 = unlimited).",
151
+ )
152
+ rate_limit_tpm: int = Field(
153
+ default=0,
154
+ description="Max tokens per minute (0 = unlimited).",
155
+ )
156
+
157
+
158
+ class QualityConfig(BaseModel):
159
+ """Configuration for code quality metrics and gate enforcement."""
160
+
161
+ complexity_threshold: int = Field(
162
+ default=10,
163
+ description="Minimum cyclomatic complexity to flag.",
164
+ )
165
+ min_maintainability: float = Field(
166
+ default=40.0,
167
+ description="Minimum maintainability index for quality gates.",
168
+ )
169
+ max_issues: int = Field(
170
+ default=20,
171
+ description="Maximum allowed quality issues for gates.",
172
+ )
173
+ snapshot_on_index: bool = Field(
174
+ default=False,
175
+ description="Automatically save a quality snapshot on indexing.",
176
+ )
177
+ history_limit: int = Field(
178
+ default=50,
179
+ description="Maximum number of snapshots to retain.",
180
+ )
181
+
182
+
183
+ class AppConfig(BaseModel):
184
+ """Top-level application configuration."""
185
+
186
+ project_root: str = Field(
187
+ default=".",
188
+ description="Root path of the project being indexed.",
189
+ )
190
+ embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig)
191
+ search: SearchConfig = Field(default_factory=SearchConfig)
192
+ index: IndexConfig = Field(default_factory=IndexConfig)
193
+ llm: LLMConfig = Field(default_factory=LLMConfig)
194
+ quality: QualityConfig = Field(default_factory=QualityConfig)
195
+ verbose: bool = Field(default=False, description="Enable verbose output.")
196
+
197
+ @classmethod
198
+ def config_dir(cls, project_root: str | Path) -> Path:
199
+ """Return the .codexa config directory for a given project root."""
200
+ return Path(project_root).resolve() / CONFIG_DIR_NAME
201
+
202
+ @classmethod
203
+ def config_path(cls, project_root: str | Path) -> Path:
204
+ """Return the path to the config.json file."""
205
+ return cls.config_dir(project_root) / CONFIG_FILE_NAME
206
+
207
+ @classmethod
208
+ def index_dir(cls, project_root: str | Path) -> Path:
209
+ """Return the path to the index storage directory."""
210
+ return cls.config_dir(project_root) / INDEX_DIR_NAME
211
+
212
+
213
+ def load_config(project_root: str | Path = ".") -> AppConfig:
214
+ """Load configuration from the project's .codexa/config.json.
215
+
216
+ Falls back to default configuration if the file doesn't exist.
217
+ """
218
+ config_path = AppConfig.config_path(project_root)
219
+ if config_path.exists():
220
+ data = json.loads(config_path.read_text(encoding="utf-8"))
221
+ return AppConfig.model_validate(data)
222
+ return AppConfig(project_root=str(Path(project_root).resolve()))
223
+
224
+
225
+ def save_config(config: AppConfig, project_root: Optional[str | Path] = None) -> Path:
226
+ """Save configuration to the project's .codexa/config.json.
227
+
228
+ Creates the config directory if it doesn't exist.
229
+ Returns the path to the saved config file.
230
+ """
231
+ root = project_root or config.project_root
232
+ config_dir = AppConfig.config_dir(root)
233
+ config_dir.mkdir(parents=True, exist_ok=True)
234
+
235
+ config_path = AppConfig.config_path(root)
236
+ config_path.write_text(
237
+ config.model_dump_json(indent=2),
238
+ encoding="utf-8",
239
+ )
240
+ return config_path
241
+
242
+
243
+ def init_project(project_root: str | Path = ".") -> tuple[AppConfig, Path]:
244
+ """Initialize a new project: create config dir, index dir, and default config.
245
+
246
+ Returns the config object and the path to the config file.
247
+ """
248
+ root = Path(project_root).resolve()
249
+ config = AppConfig(project_root=str(root))
250
+
251
+ # Create directories
252
+ config_dir = AppConfig.config_dir(root)
253
+ config_dir.mkdir(parents=True, exist_ok=True)
254
+ index_dir = AppConfig.index_dir(root)
255
+ index_dir.mkdir(parents=True, exist_ok=True)
256
+
257
+ # Save default config
258
+ config_path = save_config(config, root)
259
+
260
+ return config, config_path
@@ -0,0 +1,19 @@
1
+ """Context engine package — code context building, call graphs, and dependency tracking."""
2
+
3
+ from semantic_code_intelligence.context.engine import (
4
+ CallEdge,
5
+ CallGraph,
6
+ ContextBuilder,
7
+ ContextWindow,
8
+ DependencyMap,
9
+ FileDependency,
10
+ )
11
+
12
+ __all__ = [
13
+ "CallEdge",
14
+ "CallGraph",
15
+ "ContextBuilder",
16
+ "ContextWindow",
17
+ "DependencyMap",
18
+ "FileDependency",
19
+ ]
@@ -0,0 +1,429 @@
1
+ """Context engine — builds rich code context from parsed symbols.
2
+
3
+ Provides:
4
+ - ContextBuilder: assembles context windows around symbols
5
+ - CallGraph: AST-based call/reference graph (tree-sitter powered)
6
+ - DependencyMap: file-level dependency tracking from imports
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import tree_sitter
17
+
18
+ from semantic_code_intelligence.parsing.parser import (
19
+ Symbol,
20
+ detect_language,
21
+ extract_imports,
22
+ get_language,
23
+ parse_file,
24
+ )
25
+ from semantic_code_intelligence.utils.logging import get_logger
26
+
27
+ logger = get_logger("context")
28
+
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Context Builder
32
+ # ---------------------------------------------------------------------------
33
+
34
+ @dataclass
35
+ class ContextWindow:
36
+ """A context window consisting of a focal symbol and surrounding context."""
37
+
38
+ focal_symbol: Symbol
39
+ related_symbols: list[Symbol] = field(default_factory=list)
40
+ imports: list[Symbol] = field(default_factory=list)
41
+ file_content: str = ""
42
+
43
+ def to_dict(self) -> dict[str, Any]:
44
+ """Serialize the context window to a plain dictionary."""
45
+ return {
46
+ "focal_symbol": self.focal_symbol.to_dict(),
47
+ "related_symbols": [s.to_dict() for s in self.related_symbols],
48
+ "imports": [s.to_dict() for s in self.imports],
49
+ }
50
+
51
+ def render(self, max_lines: int = 50) -> str:
52
+ """Render a human-readable context summary."""
53
+ lines: list[str] = []
54
+ lines.append(f"=== {self.focal_symbol.kind}: {self.focal_symbol.name} ===")
55
+ lines.append(f"File: {self.focal_symbol.file_path}")
56
+ lines.append(f"Lines: {self.focal_symbol.start_line}-{self.focal_symbol.end_line}")
57
+ lines.append("")
58
+
59
+ if self.imports:
60
+ lines.append("-- Imports --")
61
+ for imp in self.imports[:5]:
62
+ lines.append(f" {imp.body.strip()}")
63
+ lines.append("")
64
+
65
+ lines.append("-- Source --")
66
+ body_lines = self.focal_symbol.body.split("\n")
67
+ for line in body_lines[:max_lines]:
68
+ lines.append(f" {line}")
69
+ if len(body_lines) > max_lines:
70
+ lines.append(f" ... ({len(body_lines) - max_lines} more lines)")
71
+
72
+ if self.related_symbols:
73
+ lines.append("")
74
+ lines.append("-- Related symbols --")
75
+ for sym in self.related_symbols[:10]:
76
+ lines.append(f" {sym.kind} {sym.name} (L{sym.start_line})")
77
+
78
+ return "\n".join(lines)
79
+
80
+
81
+ class ContextBuilder:
82
+ """Builds context windows for symbols within a repository."""
83
+
84
+ def __init__(self) -> None:
85
+ self._file_symbols: dict[str, list[Symbol]] = {}
86
+ self._file_contents: dict[str, str] = {}
87
+
88
+ def index_file(self, file_path: str, content: str | None = None) -> list[Symbol]:
89
+ """Parse and index a file, returning its symbols."""
90
+ if content is None:
91
+ try:
92
+ content = Path(file_path).read_text(encoding="utf-8", errors="replace")
93
+ except (OSError, PermissionError):
94
+ return []
95
+
96
+ symbols = parse_file(file_path, content)
97
+ self._file_symbols[file_path] = symbols
98
+ self._file_contents[file_path] = content
99
+ return symbols
100
+
101
+ def get_symbols(self, file_path: str) -> list[Symbol]:
102
+ """Get cached symbols for a file."""
103
+ return self._file_symbols.get(file_path, [])
104
+
105
+ def get_all_symbols(self) -> list[Symbol]:
106
+ """Get all indexed symbols across all files."""
107
+ result: list[Symbol] = []
108
+ for symbols in self._file_symbols.values():
109
+ result.extend(symbols)
110
+ return result
111
+
112
+ def find_symbol(self, name: str, kind: str | None = None) -> list[Symbol]:
113
+ """Find symbols by name, optionally filtered by kind."""
114
+ results: list[Symbol] = []
115
+ for symbols in self._file_symbols.values():
116
+ for s in symbols:
117
+ if s.name == name:
118
+ if kind is None or s.kind == kind:
119
+ results.append(s)
120
+ return results
121
+
122
+ def build_context(self, symbol: Symbol) -> ContextWindow:
123
+ """Build a context window around a specific symbol."""
124
+ file_path = symbol.file_path
125
+ symbols = self._file_symbols.get(file_path, [])
126
+ content = self._file_contents.get(file_path, "")
127
+
128
+ # Gather imports from the same file
129
+ imports = [s for s in symbols if s.kind == "import"]
130
+
131
+ # Gather related symbols (same file, excluding the focal one)
132
+ related = [
133
+ s for s in symbols
134
+ if s is not symbol and s.kind != "import"
135
+ ]
136
+
137
+ return ContextWindow(
138
+ focal_symbol=symbol,
139
+ related_symbols=related,
140
+ imports=imports,
141
+ file_content=content,
142
+ )
143
+
144
+ def build_context_for_name(self, name: str) -> list[ContextWindow]:
145
+ """Build context windows for all symbols matching a name."""
146
+ symbols = self.find_symbol(name)
147
+ return [self.build_context(s) for s in symbols]
148
+
149
+
150
+ # ---------------------------------------------------------------------------
151
+ # Call Graph (lightweight reference-based)
152
+ # ---------------------------------------------------------------------------
153
+
154
+ @dataclass
155
+ class CallEdge:
156
+ """An edge in the call graph."""
157
+
158
+ caller: str # "file:name" or just "name"
159
+ callee: str
160
+ file_path: str
161
+ line: int
162
+
163
+ def to_dict(self) -> dict[str, Any]:
164
+ """Serialize the call edge to a plain dictionary."""
165
+ return {
166
+ "caller": self.caller,
167
+ "callee": self.callee,
168
+ "file_path": self.file_path,
169
+ "line": self.line,
170
+ }
171
+
172
+
173
+ class CallGraph:
174
+ """AST-based call graph built from tree-sitter function-call nodes.
175
+
176
+ Walks the AST of each callable symbol's body to find ``call`` nodes
177
+ (function/method invocations). The callee name is resolved from the
178
+ AST node (``identifier`` / ``attribute`` / ``field_expression``) and
179
+ matched against indexed symbol names to produce precise edges.
180
+
181
+ Falls back to the regex heuristic only when tree-sitter cannot parse
182
+ the file (e.g. unsupported language).
183
+ """
184
+
185
+ # Node types that represent a function/method call across languages
186
+ _CALL_NODE_TYPES: set[str] = {
187
+ "call", # Python, Ruby, PHP
188
+ "call_expression", # JS, TS, Go, Rust, C#, C++, Java
189
+ "method_invocation", # Java
190
+ "invocation_expression", # C#
191
+ }
192
+
193
+ def __init__(self) -> None:
194
+ self._edges: list[CallEdge] = []
195
+ self._callers: dict[str, list[CallEdge]] = {} # callee -> list of callers
196
+ self._callees: dict[str, list[CallEdge]] = {} # caller -> list of callees
197
+
198
+ # ----- public API -----
199
+
200
+ def build(self, symbols: list[Symbol]) -> None:
201
+ """Build the call graph from a list of symbols using AST analysis."""
202
+ self._edges.clear()
203
+ self._callers.clear()
204
+ self._callees.clear()
205
+
206
+ callable_symbols = [
207
+ s for s in symbols if s.kind in ("function", "method", "class")
208
+ ]
209
+ callee_names: set[str] = {s.name for s in callable_symbols}
210
+
211
+ # Group symbols by file so we parse each file once
212
+ file_symbols: dict[str, list[Symbol]] = {}
213
+ for sym in callable_symbols:
214
+ file_symbols.setdefault(sym.file_path, []).append(sym)
215
+
216
+ for file_path, syms in file_symbols.items():
217
+ lang_name = detect_language(file_path)
218
+ language_obj = get_language(lang_name) if lang_name else None
219
+
220
+ for sym in syms:
221
+ caller_key = f"{sym.file_path}:{sym.name}"
222
+ if language_obj is not None:
223
+ call_names = self._extract_calls_ast(
224
+ sym.body, language_obj, callee_names, sym.name,
225
+ )
226
+ else:
227
+ # Fallback: regex heuristic for unsupported languages
228
+ call_names = self._extract_calls_regex(
229
+ sym.body, callee_names, sym.name,
230
+ )
231
+
232
+ for callee_name in call_names:
233
+ edge = CallEdge(
234
+ caller=caller_key,
235
+ callee=callee_name,
236
+ file_path=sym.file_path,
237
+ line=sym.start_line,
238
+ )
239
+ self._edges.append(edge)
240
+ self._callers.setdefault(callee_name, []).append(edge)
241
+ self._callees.setdefault(caller_key, []).append(edge)
242
+
243
+ # ----- AST-based extraction -----
244
+
245
+ def _extract_calls_ast(
246
+ self,
247
+ body: str,
248
+ language: tree_sitter.Language,
249
+ known_names: set[str],
250
+ self_name: str,
251
+ ) -> set[str]:
252
+ """Extract function/method call names from *body* via tree-sitter AST.
253
+
254
+ Returns the set of *known* callee names that appear as call
255
+ targets in the AST (excluding self-references).
256
+ """
257
+ source = body.encode("utf-8")
258
+ parser = tree_sitter.Parser(language)
259
+ tree = parser.parse(source)
260
+
261
+ found: set[str] = set()
262
+ self._walk_calls(tree.root_node, source, known_names, self_name, found)
263
+ return found
264
+
265
+ def _walk_calls(
266
+ self,
267
+ node: tree_sitter.Node,
268
+ source: bytes,
269
+ known_names: set[str],
270
+ self_name: str,
271
+ found: set[str],
272
+ ) -> None:
273
+ """Recursively walk the AST collecting call-target names."""
274
+ if node.type in self._CALL_NODE_TYPES:
275
+ name = self._resolve_call_name(node, source)
276
+ if name and name != self_name and name in known_names:
277
+ found.add(name)
278
+
279
+ for child in node.children:
280
+ self._walk_calls(child, source, known_names, self_name, found)
281
+
282
+ @staticmethod
283
+ def _resolve_call_name(call_node: tree_sitter.Node, source: bytes) -> str | None:
284
+ """Resolve the callee name from a call/call_expression node.
285
+
286
+ Handles:
287
+ - ``foo()``: direct identifier call
288
+ - ``obj.method()``: attribute/member access — returns ``method``
289
+ - ``pkg::func()``: scoped identifier (Rust/C++) — returns ``func``
290
+ """
291
+ # The function/target is typically the first named child
292
+ func = call_node.child_by_field_name("function")
293
+ if func is None:
294
+ # Java method_invocation uses "name" field
295
+ func = call_node.child_by_field_name("name")
296
+ if func is None and call_node.children:
297
+ func = call_node.children[0]
298
+ if func is None:
299
+ return None
300
+
301
+ # Drill through attribute access to get the final name
302
+ if func.type in ("attribute", "member_expression", "field_expression",
303
+ "scoped_identifier", "member_access_expression"):
304
+ # The method name is the last named child / field "attribute"/"field"
305
+ attr = func.child_by_field_name("attribute") or func.child_by_field_name("field")
306
+ if attr is not None:
307
+ return source[attr.start_byte:attr.end_byte].decode("utf-8", errors="replace")
308
+ # Fallback: last named child
309
+ for ch in reversed(func.children):
310
+ if ch.is_named:
311
+ return source[ch.start_byte:ch.end_byte].decode("utf-8", errors="replace")
312
+ return None
313
+
314
+ if func.type == "identifier":
315
+ return source[func.start_byte:func.end_byte].decode("utf-8", errors="replace")
316
+
317
+ return None
318
+
319
+ # ----- regex fallback -----
320
+
321
+ @staticmethod
322
+ def _extract_calls_regex(
323
+ body: str,
324
+ known_names: set[str],
325
+ self_name: str,
326
+ ) -> set[str]:
327
+ """Fallback regex heuristic for unsupported languages."""
328
+ found: set[str] = set()
329
+ for name in known_names:
330
+ if name == self_name:
331
+ continue
332
+ if re.search(r"\b" + re.escape(name) + r"\s*[\(\.]", body):
333
+ found.add(name)
334
+ return found
335
+
336
+ @property
337
+ def edges(self) -> list[CallEdge]:
338
+ """Return a shallow copy of all call-graph edges."""
339
+ return list(self._edges)
340
+
341
+ def callers_of(self, name: str) -> list[CallEdge]:
342
+ """Get all edges where `name` is the callee."""
343
+ return self._callers.get(name, [])
344
+
345
+ def callees_of(self, caller_key: str) -> list[CallEdge]:
346
+ """Get all edges where `caller_key` is the caller."""
347
+ return self._callees.get(caller_key, [])
348
+
349
+ def to_dict(self) -> dict[str, Any]:
350
+ """Serialize the call graph to a summary dictionary."""
351
+ return {
352
+ "edges": [e.to_dict() for e in self._edges],
353
+ "node_count": len(
354
+ {e.caller for e in self._edges} | {e.callee for e in self._edges}
355
+ ),
356
+ "edge_count": len(self._edges),
357
+ }
358
+
359
+ def __repr__(self) -> str:
360
+ return f"CallGraph(edges={len(self._edges)})"
361
+
362
+
363
+ # ---------------------------------------------------------------------------
364
+ # Dependency Map (file-level imports)
365
+ # ---------------------------------------------------------------------------
366
+
367
+ @dataclass
368
+ class FileDependency:
369
+ """A file-level dependency."""
370
+
371
+ source_file: str
372
+ import_text: str
373
+ line: int
374
+
375
+ def to_dict(self) -> dict[str, Any]:
376
+ """Serialize the file dependency to a plain dictionary."""
377
+ return {
378
+ "source_file": self.source_file,
379
+ "import_text": self.import_text,
380
+ "line": self.line,
381
+ }
382
+
383
+
384
+ class DependencyMap:
385
+ """Tracks file-level dependencies based on import statements."""
386
+
387
+ def __init__(self) -> None:
388
+ self._dependencies: dict[str, list[FileDependency]] = {}
389
+
390
+ def add_file(self, file_path: str, content: str | None = None) -> list[FileDependency]:
391
+ """Parse imports from a file and record as dependencies."""
392
+ imports = extract_imports(file_path, content)
393
+ deps: list[FileDependency] = []
394
+ for imp in imports:
395
+ dep = FileDependency(
396
+ source_file=file_path,
397
+ import_text=imp.body.strip(),
398
+ line=imp.start_line,
399
+ )
400
+ deps.append(dep)
401
+ self._dependencies[file_path] = deps
402
+ return deps
403
+
404
+ def get_dependencies(self, file_path: str) -> list[FileDependency]:
405
+ """Get dependencies for a specific file."""
406
+ return self._dependencies.get(file_path, [])
407
+
408
+ def get_all_files(self) -> list[str]:
409
+ """Get all tracked files."""
410
+ return list(self._dependencies.keys())
411
+
412
+ def get_dependents(self, module_name: str) -> list[FileDependency]:
413
+ """Find all files that import a given module name."""
414
+ results: list[FileDependency] = []
415
+ for deps in self._dependencies.values():
416
+ for dep in deps:
417
+ if module_name in dep.import_text:
418
+ results.append(dep)
419
+ return results
420
+
421
+ def to_dict(self) -> dict[str, Any]:
422
+ """Serialize all tracked file dependencies to a dictionary."""
423
+ return {
424
+ file: [d.to_dict() for d in deps]
425
+ for file, deps in self._dependencies.items()
426
+ }
427
+
428
+ def __repr__(self) -> str:
429
+ return f"DependencyMap(files={len(self._dependencies)})"