codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""Configuration settings for Semantic Code Intelligence."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Default directories to ignore during scanning
|
|
13
|
+
DEFAULT_IGNORE_DIRS: set[str] = {
|
|
14
|
+
".git",
|
|
15
|
+
"node_modules",
|
|
16
|
+
"build",
|
|
17
|
+
"dist",
|
|
18
|
+
"venv",
|
|
19
|
+
".venv",
|
|
20
|
+
"__pycache__",
|
|
21
|
+
".tox",
|
|
22
|
+
".mypy_cache",
|
|
23
|
+
".pytest_cache",
|
|
24
|
+
"egg-info",
|
|
25
|
+
".eggs",
|
|
26
|
+
".idea",
|
|
27
|
+
".vscode",
|
|
28
|
+
"target",
|
|
29
|
+
"bin",
|
|
30
|
+
"obj",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Default file extensions to index
|
|
34
|
+
DEFAULT_EXTENSIONS: set[str] = {
|
|
35
|
+
".py",
|
|
36
|
+
".js",
|
|
37
|
+
".ts",
|
|
38
|
+
".jsx",
|
|
39
|
+
".tsx",
|
|
40
|
+
".java",
|
|
41
|
+
".go",
|
|
42
|
+
".rs",
|
|
43
|
+
".c",
|
|
44
|
+
".cpp",
|
|
45
|
+
".h",
|
|
46
|
+
".hpp",
|
|
47
|
+
".rb",
|
|
48
|
+
".php",
|
|
49
|
+
".cs",
|
|
50
|
+
".swift",
|
|
51
|
+
".kt",
|
|
52
|
+
".scala",
|
|
53
|
+
".sh",
|
|
54
|
+
".bash",
|
|
55
|
+
".sql",
|
|
56
|
+
".r",
|
|
57
|
+
".lua",
|
|
58
|
+
".dart",
|
|
59
|
+
".ex",
|
|
60
|
+
".exs",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
CONFIG_DIR_NAME = ".codexa"
|
|
64
|
+
CONFIG_FILE_NAME = "config.json"
|
|
65
|
+
INDEX_DIR_NAME = "index"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class EmbeddingConfig(BaseModel):
|
|
69
|
+
"""Configuration for the embedding engine."""
|
|
70
|
+
|
|
71
|
+
model_name: str = Field(
|
|
72
|
+
default="all-MiniLM-L6-v2",
|
|
73
|
+
description="Sentence-transformers model name for embedding generation.",
|
|
74
|
+
)
|
|
75
|
+
chunk_size: int = Field(
|
|
76
|
+
default=512,
|
|
77
|
+
description="Maximum number of characters per code chunk.",
|
|
78
|
+
)
|
|
79
|
+
chunk_overlap: int = Field(
|
|
80
|
+
default=64,
|
|
81
|
+
description="Number of overlapping characters between consecutive chunks.",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SearchConfig(BaseModel):
|
|
86
|
+
"""Configuration for the search engine."""
|
|
87
|
+
|
|
88
|
+
top_k: int = Field(
|
|
89
|
+
default=10,
|
|
90
|
+
description="Number of top results to return from similarity search.",
|
|
91
|
+
)
|
|
92
|
+
similarity_threshold: float = Field(
|
|
93
|
+
default=0.3,
|
|
94
|
+
description="Minimum similarity score threshold for results.",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class IndexConfig(BaseModel):
|
|
99
|
+
"""Configuration for the indexing system."""
|
|
100
|
+
|
|
101
|
+
ignore_dirs: set[str] = Field(default_factory=lambda: DEFAULT_IGNORE_DIRS.copy())
|
|
102
|
+
extensions: set[str] = Field(default_factory=lambda: DEFAULT_EXTENSIONS.copy())
|
|
103
|
+
use_incremental: bool = Field(
|
|
104
|
+
default=True,
|
|
105
|
+
description="Enable incremental indexing using file hashes.",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class LLMConfig(BaseModel):
|
|
110
|
+
"""Configuration for LLM provider integration."""
|
|
111
|
+
|
|
112
|
+
provider: str = Field(
|
|
113
|
+
default="mock",
|
|
114
|
+
description="LLM provider name: 'openai', 'ollama', or 'mock'.",
|
|
115
|
+
)
|
|
116
|
+
model: str = Field(
|
|
117
|
+
default="gpt-3.5-turbo",
|
|
118
|
+
description="Model name to use with the provider.",
|
|
119
|
+
)
|
|
120
|
+
api_key: str = Field(
|
|
121
|
+
default="",
|
|
122
|
+
description="API key for remote providers (e.g. OpenAI).",
|
|
123
|
+
)
|
|
124
|
+
base_url: str = Field(
|
|
125
|
+
default="",
|
|
126
|
+
description="Custom base URL for the LLM API endpoint.",
|
|
127
|
+
)
|
|
128
|
+
temperature: float = Field(
|
|
129
|
+
default=0.2,
|
|
130
|
+
description="Sampling temperature for LLM responses.",
|
|
131
|
+
)
|
|
132
|
+
max_tokens: int = Field(
|
|
133
|
+
default=2048,
|
|
134
|
+
description="Maximum tokens for LLM response generation.",
|
|
135
|
+
)
|
|
136
|
+
cache_enabled: bool = Field(
|
|
137
|
+
default=True,
|
|
138
|
+
description="Enable LLM response caching.",
|
|
139
|
+
)
|
|
140
|
+
cache_ttl_hours: int = Field(
|
|
141
|
+
default=24,
|
|
142
|
+
description="Time-to-live for cached LLM responses in hours.",
|
|
143
|
+
)
|
|
144
|
+
cache_max_entries: int = Field(
|
|
145
|
+
default=1000,
|
|
146
|
+
description="Maximum number of cached LLM responses.",
|
|
147
|
+
)
|
|
148
|
+
rate_limit_rpm: int = Field(
|
|
149
|
+
default=0,
|
|
150
|
+
description="Max requests per minute (0 = unlimited).",
|
|
151
|
+
)
|
|
152
|
+
rate_limit_tpm: int = Field(
|
|
153
|
+
default=0,
|
|
154
|
+
description="Max tokens per minute (0 = unlimited).",
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class QualityConfig(BaseModel):
|
|
159
|
+
"""Configuration for code quality metrics and gate enforcement."""
|
|
160
|
+
|
|
161
|
+
complexity_threshold: int = Field(
|
|
162
|
+
default=10,
|
|
163
|
+
description="Minimum cyclomatic complexity to flag.",
|
|
164
|
+
)
|
|
165
|
+
min_maintainability: float = Field(
|
|
166
|
+
default=40.0,
|
|
167
|
+
description="Minimum maintainability index for quality gates.",
|
|
168
|
+
)
|
|
169
|
+
max_issues: int = Field(
|
|
170
|
+
default=20,
|
|
171
|
+
description="Maximum allowed quality issues for gates.",
|
|
172
|
+
)
|
|
173
|
+
snapshot_on_index: bool = Field(
|
|
174
|
+
default=False,
|
|
175
|
+
description="Automatically save a quality snapshot on indexing.",
|
|
176
|
+
)
|
|
177
|
+
history_limit: int = Field(
|
|
178
|
+
default=50,
|
|
179
|
+
description="Maximum number of snapshots to retain.",
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class AppConfig(BaseModel):
|
|
184
|
+
"""Top-level application configuration."""
|
|
185
|
+
|
|
186
|
+
project_root: str = Field(
|
|
187
|
+
default=".",
|
|
188
|
+
description="Root path of the project being indexed.",
|
|
189
|
+
)
|
|
190
|
+
embedding: EmbeddingConfig = Field(default_factory=EmbeddingConfig)
|
|
191
|
+
search: SearchConfig = Field(default_factory=SearchConfig)
|
|
192
|
+
index: IndexConfig = Field(default_factory=IndexConfig)
|
|
193
|
+
llm: LLMConfig = Field(default_factory=LLMConfig)
|
|
194
|
+
quality: QualityConfig = Field(default_factory=QualityConfig)
|
|
195
|
+
verbose: bool = Field(default=False, description="Enable verbose output.")
|
|
196
|
+
|
|
197
|
+
@classmethod
|
|
198
|
+
def config_dir(cls, project_root: str | Path) -> Path:
|
|
199
|
+
"""Return the .codexa config directory for a given project root."""
|
|
200
|
+
return Path(project_root).resolve() / CONFIG_DIR_NAME
|
|
201
|
+
|
|
202
|
+
@classmethod
|
|
203
|
+
def config_path(cls, project_root: str | Path) -> Path:
|
|
204
|
+
"""Return the path to the config.json file."""
|
|
205
|
+
return cls.config_dir(project_root) / CONFIG_FILE_NAME
|
|
206
|
+
|
|
207
|
+
@classmethod
|
|
208
|
+
def index_dir(cls, project_root: str | Path) -> Path:
|
|
209
|
+
"""Return the path to the index storage directory."""
|
|
210
|
+
return cls.config_dir(project_root) / INDEX_DIR_NAME
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def load_config(project_root: str | Path = ".") -> AppConfig:
|
|
214
|
+
"""Load configuration from the project's .codexa/config.json.
|
|
215
|
+
|
|
216
|
+
Falls back to default configuration if the file doesn't exist.
|
|
217
|
+
"""
|
|
218
|
+
config_path = AppConfig.config_path(project_root)
|
|
219
|
+
if config_path.exists():
|
|
220
|
+
data = json.loads(config_path.read_text(encoding="utf-8"))
|
|
221
|
+
return AppConfig.model_validate(data)
|
|
222
|
+
return AppConfig(project_root=str(Path(project_root).resolve()))
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def save_config(config: AppConfig, project_root: Optional[str | Path] = None) -> Path:
|
|
226
|
+
"""Save configuration to the project's .codexa/config.json.
|
|
227
|
+
|
|
228
|
+
Creates the config directory if it doesn't exist.
|
|
229
|
+
Returns the path to the saved config file.
|
|
230
|
+
"""
|
|
231
|
+
root = project_root or config.project_root
|
|
232
|
+
config_dir = AppConfig.config_dir(root)
|
|
233
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
234
|
+
|
|
235
|
+
config_path = AppConfig.config_path(root)
|
|
236
|
+
config_path.write_text(
|
|
237
|
+
config.model_dump_json(indent=2),
|
|
238
|
+
encoding="utf-8",
|
|
239
|
+
)
|
|
240
|
+
return config_path
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def init_project(project_root: str | Path = ".") -> tuple[AppConfig, Path]:
|
|
244
|
+
"""Initialize a new project: create config dir, index dir, and default config.
|
|
245
|
+
|
|
246
|
+
Returns the config object and the path to the config file.
|
|
247
|
+
"""
|
|
248
|
+
root = Path(project_root).resolve()
|
|
249
|
+
config = AppConfig(project_root=str(root))
|
|
250
|
+
|
|
251
|
+
# Create directories
|
|
252
|
+
config_dir = AppConfig.config_dir(root)
|
|
253
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
254
|
+
index_dir = AppConfig.index_dir(root)
|
|
255
|
+
index_dir.mkdir(parents=True, exist_ok=True)
|
|
256
|
+
|
|
257
|
+
# Save default config
|
|
258
|
+
config_path = save_config(config, root)
|
|
259
|
+
|
|
260
|
+
return config, config_path
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Context engine package — code context building, call graphs, and dependency tracking."""
|
|
2
|
+
|
|
3
|
+
from semantic_code_intelligence.context.engine import (
|
|
4
|
+
CallEdge,
|
|
5
|
+
CallGraph,
|
|
6
|
+
ContextBuilder,
|
|
7
|
+
ContextWindow,
|
|
8
|
+
DependencyMap,
|
|
9
|
+
FileDependency,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"CallEdge",
|
|
14
|
+
"CallGraph",
|
|
15
|
+
"ContextBuilder",
|
|
16
|
+
"ContextWindow",
|
|
17
|
+
"DependencyMap",
|
|
18
|
+
"FileDependency",
|
|
19
|
+
]
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
"""Context engine — builds rich code context from parsed symbols.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- ContextBuilder: assembles context windows around symbols
|
|
5
|
+
- CallGraph: AST-based call/reference graph (tree-sitter powered)
|
|
6
|
+
- DependencyMap: file-level dependency tracking from imports
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import tree_sitter
|
|
17
|
+
|
|
18
|
+
from semantic_code_intelligence.parsing.parser import (
|
|
19
|
+
Symbol,
|
|
20
|
+
detect_language,
|
|
21
|
+
extract_imports,
|
|
22
|
+
get_language,
|
|
23
|
+
parse_file,
|
|
24
|
+
)
|
|
25
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
26
|
+
|
|
27
|
+
logger = get_logger("context")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
# Context Builder
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ContextWindow:
|
|
36
|
+
"""A context window consisting of a focal symbol and surrounding context."""
|
|
37
|
+
|
|
38
|
+
focal_symbol: Symbol
|
|
39
|
+
related_symbols: list[Symbol] = field(default_factory=list)
|
|
40
|
+
imports: list[Symbol] = field(default_factory=list)
|
|
41
|
+
file_content: str = ""
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict[str, Any]:
|
|
44
|
+
"""Serialize the context window to a plain dictionary."""
|
|
45
|
+
return {
|
|
46
|
+
"focal_symbol": self.focal_symbol.to_dict(),
|
|
47
|
+
"related_symbols": [s.to_dict() for s in self.related_symbols],
|
|
48
|
+
"imports": [s.to_dict() for s in self.imports],
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def render(self, max_lines: int = 50) -> str:
|
|
52
|
+
"""Render a human-readable context summary."""
|
|
53
|
+
lines: list[str] = []
|
|
54
|
+
lines.append(f"=== {self.focal_symbol.kind}: {self.focal_symbol.name} ===")
|
|
55
|
+
lines.append(f"File: {self.focal_symbol.file_path}")
|
|
56
|
+
lines.append(f"Lines: {self.focal_symbol.start_line}-{self.focal_symbol.end_line}")
|
|
57
|
+
lines.append("")
|
|
58
|
+
|
|
59
|
+
if self.imports:
|
|
60
|
+
lines.append("-- Imports --")
|
|
61
|
+
for imp in self.imports[:5]:
|
|
62
|
+
lines.append(f" {imp.body.strip()}")
|
|
63
|
+
lines.append("")
|
|
64
|
+
|
|
65
|
+
lines.append("-- Source --")
|
|
66
|
+
body_lines = self.focal_symbol.body.split("\n")
|
|
67
|
+
for line in body_lines[:max_lines]:
|
|
68
|
+
lines.append(f" {line}")
|
|
69
|
+
if len(body_lines) > max_lines:
|
|
70
|
+
lines.append(f" ... ({len(body_lines) - max_lines} more lines)")
|
|
71
|
+
|
|
72
|
+
if self.related_symbols:
|
|
73
|
+
lines.append("")
|
|
74
|
+
lines.append("-- Related symbols --")
|
|
75
|
+
for sym in self.related_symbols[:10]:
|
|
76
|
+
lines.append(f" {sym.kind} {sym.name} (L{sym.start_line})")
|
|
77
|
+
|
|
78
|
+
return "\n".join(lines)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ContextBuilder:
|
|
82
|
+
"""Builds context windows for symbols within a repository."""
|
|
83
|
+
|
|
84
|
+
def __init__(self) -> None:
|
|
85
|
+
self._file_symbols: dict[str, list[Symbol]] = {}
|
|
86
|
+
self._file_contents: dict[str, str] = {}
|
|
87
|
+
|
|
88
|
+
def index_file(self, file_path: str, content: str | None = None) -> list[Symbol]:
|
|
89
|
+
"""Parse and index a file, returning its symbols."""
|
|
90
|
+
if content is None:
|
|
91
|
+
try:
|
|
92
|
+
content = Path(file_path).read_text(encoding="utf-8", errors="replace")
|
|
93
|
+
except (OSError, PermissionError):
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
symbols = parse_file(file_path, content)
|
|
97
|
+
self._file_symbols[file_path] = symbols
|
|
98
|
+
self._file_contents[file_path] = content
|
|
99
|
+
return symbols
|
|
100
|
+
|
|
101
|
+
def get_symbols(self, file_path: str) -> list[Symbol]:
|
|
102
|
+
"""Get cached symbols for a file."""
|
|
103
|
+
return self._file_symbols.get(file_path, [])
|
|
104
|
+
|
|
105
|
+
def get_all_symbols(self) -> list[Symbol]:
|
|
106
|
+
"""Get all indexed symbols across all files."""
|
|
107
|
+
result: list[Symbol] = []
|
|
108
|
+
for symbols in self._file_symbols.values():
|
|
109
|
+
result.extend(symbols)
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
def find_symbol(self, name: str, kind: str | None = None) -> list[Symbol]:
|
|
113
|
+
"""Find symbols by name, optionally filtered by kind."""
|
|
114
|
+
results: list[Symbol] = []
|
|
115
|
+
for symbols in self._file_symbols.values():
|
|
116
|
+
for s in symbols:
|
|
117
|
+
if s.name == name:
|
|
118
|
+
if kind is None or s.kind == kind:
|
|
119
|
+
results.append(s)
|
|
120
|
+
return results
|
|
121
|
+
|
|
122
|
+
def build_context(self, symbol: Symbol) -> ContextWindow:
|
|
123
|
+
"""Build a context window around a specific symbol."""
|
|
124
|
+
file_path = symbol.file_path
|
|
125
|
+
symbols = self._file_symbols.get(file_path, [])
|
|
126
|
+
content = self._file_contents.get(file_path, "")
|
|
127
|
+
|
|
128
|
+
# Gather imports from the same file
|
|
129
|
+
imports = [s for s in symbols if s.kind == "import"]
|
|
130
|
+
|
|
131
|
+
# Gather related symbols (same file, excluding the focal one)
|
|
132
|
+
related = [
|
|
133
|
+
s for s in symbols
|
|
134
|
+
if s is not symbol and s.kind != "import"
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
return ContextWindow(
|
|
138
|
+
focal_symbol=symbol,
|
|
139
|
+
related_symbols=related,
|
|
140
|
+
imports=imports,
|
|
141
|
+
file_content=content,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def build_context_for_name(self, name: str) -> list[ContextWindow]:
|
|
145
|
+
"""Build context windows for all symbols matching a name."""
|
|
146
|
+
symbols = self.find_symbol(name)
|
|
147
|
+
return [self.build_context(s) for s in symbols]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
# Call Graph (lightweight reference-based)
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class CallEdge:
|
|
156
|
+
"""An edge in the call graph."""
|
|
157
|
+
|
|
158
|
+
caller: str # "file:name" or just "name"
|
|
159
|
+
callee: str
|
|
160
|
+
file_path: str
|
|
161
|
+
line: int
|
|
162
|
+
|
|
163
|
+
def to_dict(self) -> dict[str, Any]:
|
|
164
|
+
"""Serialize the call edge to a plain dictionary."""
|
|
165
|
+
return {
|
|
166
|
+
"caller": self.caller,
|
|
167
|
+
"callee": self.callee,
|
|
168
|
+
"file_path": self.file_path,
|
|
169
|
+
"line": self.line,
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class CallGraph:
|
|
174
|
+
"""AST-based call graph built from tree-sitter function-call nodes.
|
|
175
|
+
|
|
176
|
+
Walks the AST of each callable symbol's body to find ``call`` nodes
|
|
177
|
+
(function/method invocations). The callee name is resolved from the
|
|
178
|
+
AST node (``identifier`` / ``attribute`` / ``field_expression``) and
|
|
179
|
+
matched against indexed symbol names to produce precise edges.
|
|
180
|
+
|
|
181
|
+
Falls back to the regex heuristic only when tree-sitter cannot parse
|
|
182
|
+
the file (e.g. unsupported language).
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
# Node types that represent a function/method call across languages
|
|
186
|
+
_CALL_NODE_TYPES: set[str] = {
|
|
187
|
+
"call", # Python, Ruby, PHP
|
|
188
|
+
"call_expression", # JS, TS, Go, Rust, C#, C++, Java
|
|
189
|
+
"method_invocation", # Java
|
|
190
|
+
"invocation_expression", # C#
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
def __init__(self) -> None:
|
|
194
|
+
self._edges: list[CallEdge] = []
|
|
195
|
+
self._callers: dict[str, list[CallEdge]] = {} # callee -> list of callers
|
|
196
|
+
self._callees: dict[str, list[CallEdge]] = {} # caller -> list of callees
|
|
197
|
+
|
|
198
|
+
# ----- public API -----
|
|
199
|
+
|
|
200
|
+
def build(self, symbols: list[Symbol]) -> None:
|
|
201
|
+
"""Build the call graph from a list of symbols using AST analysis."""
|
|
202
|
+
self._edges.clear()
|
|
203
|
+
self._callers.clear()
|
|
204
|
+
self._callees.clear()
|
|
205
|
+
|
|
206
|
+
callable_symbols = [
|
|
207
|
+
s for s in symbols if s.kind in ("function", "method", "class")
|
|
208
|
+
]
|
|
209
|
+
callee_names: set[str] = {s.name for s in callable_symbols}
|
|
210
|
+
|
|
211
|
+
# Group symbols by file so we parse each file once
|
|
212
|
+
file_symbols: dict[str, list[Symbol]] = {}
|
|
213
|
+
for sym in callable_symbols:
|
|
214
|
+
file_symbols.setdefault(sym.file_path, []).append(sym)
|
|
215
|
+
|
|
216
|
+
for file_path, syms in file_symbols.items():
|
|
217
|
+
lang_name = detect_language(file_path)
|
|
218
|
+
language_obj = get_language(lang_name) if lang_name else None
|
|
219
|
+
|
|
220
|
+
for sym in syms:
|
|
221
|
+
caller_key = f"{sym.file_path}:{sym.name}"
|
|
222
|
+
if language_obj is not None:
|
|
223
|
+
call_names = self._extract_calls_ast(
|
|
224
|
+
sym.body, language_obj, callee_names, sym.name,
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
# Fallback: regex heuristic for unsupported languages
|
|
228
|
+
call_names = self._extract_calls_regex(
|
|
229
|
+
sym.body, callee_names, sym.name,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
for callee_name in call_names:
|
|
233
|
+
edge = CallEdge(
|
|
234
|
+
caller=caller_key,
|
|
235
|
+
callee=callee_name,
|
|
236
|
+
file_path=sym.file_path,
|
|
237
|
+
line=sym.start_line,
|
|
238
|
+
)
|
|
239
|
+
self._edges.append(edge)
|
|
240
|
+
self._callers.setdefault(callee_name, []).append(edge)
|
|
241
|
+
self._callees.setdefault(caller_key, []).append(edge)
|
|
242
|
+
|
|
243
|
+
# ----- AST-based extraction -----
|
|
244
|
+
|
|
245
|
+
def _extract_calls_ast(
|
|
246
|
+
self,
|
|
247
|
+
body: str,
|
|
248
|
+
language: tree_sitter.Language,
|
|
249
|
+
known_names: set[str],
|
|
250
|
+
self_name: str,
|
|
251
|
+
) -> set[str]:
|
|
252
|
+
"""Extract function/method call names from *body* via tree-sitter AST.
|
|
253
|
+
|
|
254
|
+
Returns the set of *known* callee names that appear as call
|
|
255
|
+
targets in the AST (excluding self-references).
|
|
256
|
+
"""
|
|
257
|
+
source = body.encode("utf-8")
|
|
258
|
+
parser = tree_sitter.Parser(language)
|
|
259
|
+
tree = parser.parse(source)
|
|
260
|
+
|
|
261
|
+
found: set[str] = set()
|
|
262
|
+
self._walk_calls(tree.root_node, source, known_names, self_name, found)
|
|
263
|
+
return found
|
|
264
|
+
|
|
265
|
+
def _walk_calls(
|
|
266
|
+
self,
|
|
267
|
+
node: tree_sitter.Node,
|
|
268
|
+
source: bytes,
|
|
269
|
+
known_names: set[str],
|
|
270
|
+
self_name: str,
|
|
271
|
+
found: set[str],
|
|
272
|
+
) -> None:
|
|
273
|
+
"""Recursively walk the AST collecting call-target names."""
|
|
274
|
+
if node.type in self._CALL_NODE_TYPES:
|
|
275
|
+
name = self._resolve_call_name(node, source)
|
|
276
|
+
if name and name != self_name and name in known_names:
|
|
277
|
+
found.add(name)
|
|
278
|
+
|
|
279
|
+
for child in node.children:
|
|
280
|
+
self._walk_calls(child, source, known_names, self_name, found)
|
|
281
|
+
|
|
282
|
+
@staticmethod
|
|
283
|
+
def _resolve_call_name(call_node: tree_sitter.Node, source: bytes) -> str | None:
|
|
284
|
+
"""Resolve the callee name from a call/call_expression node.
|
|
285
|
+
|
|
286
|
+
Handles:
|
|
287
|
+
- ``foo()``: direct identifier call
|
|
288
|
+
- ``obj.method()``: attribute/member access — returns ``method``
|
|
289
|
+
- ``pkg::func()``: scoped identifier (Rust/C++) — returns ``func``
|
|
290
|
+
"""
|
|
291
|
+
# The function/target is typically the first named child
|
|
292
|
+
func = call_node.child_by_field_name("function")
|
|
293
|
+
if func is None:
|
|
294
|
+
# Java method_invocation uses "name" field
|
|
295
|
+
func = call_node.child_by_field_name("name")
|
|
296
|
+
if func is None and call_node.children:
|
|
297
|
+
func = call_node.children[0]
|
|
298
|
+
if func is None:
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
# Drill through attribute access to get the final name
|
|
302
|
+
if func.type in ("attribute", "member_expression", "field_expression",
|
|
303
|
+
"scoped_identifier", "member_access_expression"):
|
|
304
|
+
# The method name is the last named child / field "attribute"/"field"
|
|
305
|
+
attr = func.child_by_field_name("attribute") or func.child_by_field_name("field")
|
|
306
|
+
if attr is not None:
|
|
307
|
+
return source[attr.start_byte:attr.end_byte].decode("utf-8", errors="replace")
|
|
308
|
+
# Fallback: last named child
|
|
309
|
+
for ch in reversed(func.children):
|
|
310
|
+
if ch.is_named:
|
|
311
|
+
return source[ch.start_byte:ch.end_byte].decode("utf-8", errors="replace")
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
if func.type == "identifier":
|
|
315
|
+
return source[func.start_byte:func.end_byte].decode("utf-8", errors="replace")
|
|
316
|
+
|
|
317
|
+
return None
|
|
318
|
+
|
|
319
|
+
# ----- regex fallback -----
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def _extract_calls_regex(
|
|
323
|
+
body: str,
|
|
324
|
+
known_names: set[str],
|
|
325
|
+
self_name: str,
|
|
326
|
+
) -> set[str]:
|
|
327
|
+
"""Fallback regex heuristic for unsupported languages."""
|
|
328
|
+
found: set[str] = set()
|
|
329
|
+
for name in known_names:
|
|
330
|
+
if name == self_name:
|
|
331
|
+
continue
|
|
332
|
+
if re.search(r"\b" + re.escape(name) + r"\s*[\(\.]", body):
|
|
333
|
+
found.add(name)
|
|
334
|
+
return found
|
|
335
|
+
|
|
336
|
+
@property
|
|
337
|
+
def edges(self) -> list[CallEdge]:
|
|
338
|
+
"""Return a shallow copy of all call-graph edges."""
|
|
339
|
+
return list(self._edges)
|
|
340
|
+
|
|
341
|
+
def callers_of(self, name: str) -> list[CallEdge]:
|
|
342
|
+
"""Get all edges where `name` is the callee."""
|
|
343
|
+
return self._callers.get(name, [])
|
|
344
|
+
|
|
345
|
+
def callees_of(self, caller_key: str) -> list[CallEdge]:
|
|
346
|
+
"""Get all edges where `caller_key` is the caller."""
|
|
347
|
+
return self._callees.get(caller_key, [])
|
|
348
|
+
|
|
349
|
+
def to_dict(self) -> dict[str, Any]:
|
|
350
|
+
"""Serialize the call graph to a summary dictionary."""
|
|
351
|
+
return {
|
|
352
|
+
"edges": [e.to_dict() for e in self._edges],
|
|
353
|
+
"node_count": len(
|
|
354
|
+
{e.caller for e in self._edges} | {e.callee for e in self._edges}
|
|
355
|
+
),
|
|
356
|
+
"edge_count": len(self._edges),
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
def __repr__(self) -> str:
|
|
360
|
+
return f"CallGraph(edges={len(self._edges)})"
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# ---------------------------------------------------------------------------
|
|
364
|
+
# Dependency Map (file-level imports)
|
|
365
|
+
# ---------------------------------------------------------------------------
|
|
366
|
+
|
|
367
|
+
@dataclass
|
|
368
|
+
class FileDependency:
|
|
369
|
+
"""A file-level dependency."""
|
|
370
|
+
|
|
371
|
+
source_file: str
|
|
372
|
+
import_text: str
|
|
373
|
+
line: int
|
|
374
|
+
|
|
375
|
+
def to_dict(self) -> dict[str, Any]:
|
|
376
|
+
"""Serialize the file dependency to a plain dictionary."""
|
|
377
|
+
return {
|
|
378
|
+
"source_file": self.source_file,
|
|
379
|
+
"import_text": self.import_text,
|
|
380
|
+
"line": self.line,
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
class DependencyMap:
|
|
385
|
+
"""Tracks file-level dependencies based on import statements."""
|
|
386
|
+
|
|
387
|
+
def __init__(self) -> None:
|
|
388
|
+
self._dependencies: dict[str, list[FileDependency]] = {}
|
|
389
|
+
|
|
390
|
+
def add_file(self, file_path: str, content: str | None = None) -> list[FileDependency]:
|
|
391
|
+
"""Parse imports from a file and record as dependencies."""
|
|
392
|
+
imports = extract_imports(file_path, content)
|
|
393
|
+
deps: list[FileDependency] = []
|
|
394
|
+
for imp in imports:
|
|
395
|
+
dep = FileDependency(
|
|
396
|
+
source_file=file_path,
|
|
397
|
+
import_text=imp.body.strip(),
|
|
398
|
+
line=imp.start_line,
|
|
399
|
+
)
|
|
400
|
+
deps.append(dep)
|
|
401
|
+
self._dependencies[file_path] = deps
|
|
402
|
+
return deps
|
|
403
|
+
|
|
404
|
+
def get_dependencies(self, file_path: str) -> list[FileDependency]:
|
|
405
|
+
"""Get dependencies for a specific file."""
|
|
406
|
+
return self._dependencies.get(file_path, [])
|
|
407
|
+
|
|
408
|
+
def get_all_files(self) -> list[str]:
|
|
409
|
+
"""Get all tracked files."""
|
|
410
|
+
return list(self._dependencies.keys())
|
|
411
|
+
|
|
412
|
+
def get_dependents(self, module_name: str) -> list[FileDependency]:
|
|
413
|
+
"""Find all files that import a given module name."""
|
|
414
|
+
results: list[FileDependency] = []
|
|
415
|
+
for deps in self._dependencies.values():
|
|
416
|
+
for dep in deps:
|
|
417
|
+
if module_name in dep.import_text:
|
|
418
|
+
results.append(dep)
|
|
419
|
+
return results
|
|
420
|
+
|
|
421
|
+
def to_dict(self) -> dict[str, Any]:
|
|
422
|
+
"""Serialize all tracked file dependencies to a dictionary."""
|
|
423
|
+
return {
|
|
424
|
+
file: [d.to_dict() for d in deps]
|
|
425
|
+
for file, deps in self._dependencies.items()
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
def __repr__(self) -> str:
|
|
429
|
+
return f"DependencyMap(files={len(self._dependencies)})"
|