cortex-loop 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +7 -0
- cortex/adapters.py +339 -0
- cortex/blocklist.py +51 -0
- cortex/challenges.py +210 -0
- cortex/cli.py +7 -0
- cortex/core.py +601 -0
- cortex/core_helpers.py +190 -0
- cortex/data/identity_preamble.md +5 -0
- cortex/data/layer1_part_a.md +65 -0
- cortex/data/layer1_part_b.md +17 -0
- cortex/executive.py +295 -0
- cortex/foundation.py +185 -0
- cortex/genome.py +348 -0
- cortex/graveyard.py +226 -0
- cortex/hooks/__init__.py +27 -0
- cortex/hooks/_shared.py +167 -0
- cortex/hooks/post_tool_use.py +13 -0
- cortex/hooks/pre_tool_use.py +13 -0
- cortex/hooks/session_start.py +13 -0
- cortex/hooks/stop.py +13 -0
- cortex/invariants.py +258 -0
- cortex/packs.py +118 -0
- cortex/repomap.py +6 -0
- cortex/requirements.py +497 -0
- cortex/retry.py +312 -0
- cortex/stop_contract.py +217 -0
- cortex/stop_payload.py +122 -0
- cortex/stop_policy.py +100 -0
- cortex/stop_runtime.py +400 -0
- cortex/stop_signals.py +75 -0
- cortex/store.py +793 -0
- cortex/templates/__init__.py +10 -0
- cortex/utils.py +58 -0
- cortex_loop-0.1.0a1.dist-info/METADATA +121 -0
- cortex_loop-0.1.0a1.dist-info/RECORD +52 -0
- cortex_loop-0.1.0a1.dist-info/WHEEL +5 -0
- cortex_loop-0.1.0a1.dist-info/entry_points.txt +3 -0
- cortex_loop-0.1.0a1.dist-info/licenses/LICENSE +21 -0
- cortex_loop-0.1.0a1.dist-info/top_level.txt +3 -0
- cortex_ops_cli/__init__.py +3 -0
- cortex_ops_cli/_adapter_validation.py +119 -0
- cortex_ops_cli/_check_report.py +454 -0
- cortex_ops_cli/_check_report_output.py +270 -0
- cortex_ops_cli/_openai_bridge_probe.py +241 -0
- cortex_ops_cli/_openai_bridge_protocol.py +469 -0
- cortex_ops_cli/_runtime_profile_templates.py +341 -0
- cortex_ops_cli/_runtime_profiles.py +445 -0
- cortex_ops_cli/gemini_hooks.py +301 -0
- cortex_ops_cli/main.py +911 -0
- cortex_ops_cli/openai_app_server_bridge.py +375 -0
- cortex_repomap/__init__.py +1 -0
- cortex_repomap/engine.py +1201 -0
cortex_repomap/engine.py
ADDED
|
@@ -0,0 +1,1201 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import importlib.util
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import UTC, datetime
|
|
11
|
+
from pathlib import Path, PurePosixPath
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
SCHEMA_VERSION = "repomap_artifact_v1"
|
|
15
|
+
MAX_DISCOVER_FILE_BYTES = 512_000
|
|
16
|
+
READ_SAMPLE_BYTES = 8192
|
|
17
|
+
DEFAULT_IGNORED_DIRS = set(
|
|
18
|
+
".git .hg .svn .venv venv .tox .mypy_cache .pytest_cache .ruff_cache .cortex "
|
|
19
|
+
"node_modules dist build __pycache__ .next coverage".split()
|
|
20
|
+
)
|
|
21
|
+
_BINARY_SUFFIXES = set(
|
|
22
|
+
".png .jpg .jpeg .gif .webp .bmp .ico .pdf .zip .gz .tgz .bz2 .xz .7z .tar .jar .war "
|
|
23
|
+
".so .dll .dylib .exe .bin .woff .woff2 .ttf .otf .mp3 .mp4 .mov .avi .wav .sqlite .db "
|
|
24
|
+
".pyc .pyo".split()
|
|
25
|
+
)
|
|
26
|
+
_CORE_CODE_LIKE_SUFFIXES = set(
|
|
27
|
+
".astro .py .js .jsx .ts .tsx .mjs .cjs .toml .yaml .yml .json .md .html .css .scss".split()
|
|
28
|
+
)
|
|
29
|
+
_EXTENDED_CODE_LIKE_SUFFIXES = set(
|
|
30
|
+
".svelte .vue .pyi .java .kt .go .rs .rb .php .c .cc .cpp .h .hpp .cs .swift .scala .lua .sh .bash .zsh .ps1 .sql".split()
|
|
31
|
+
)
|
|
32
|
+
_RANK_SUFFIX_BOOSTS = {".astro": 0.9, ".tsx": 0.5, ".jsx": 0.4}
|
|
33
|
+
_RANK_EXACT_FILENAME_PENALTIES = {
|
|
34
|
+
"package-lock.json": 1.8,
|
|
35
|
+
"pnpm-lock.yaml": 1.8,
|
|
36
|
+
"yarn.lock": 1.8,
|
|
37
|
+
"poetry.lock": 1.4,
|
|
38
|
+
"cargo.lock": 1.4,
|
|
39
|
+
"composer.lock": 1.2,
|
|
40
|
+
}
|
|
41
|
+
_FALLBACK_SCOPE_CANDIDATES = ("cortex", "src", "lib", "app", "packages", "tests")
|
|
42
|
+
_RANK_NAME_BOOSTS = {
|
|
43
|
+
"core": 0.9, "main": 0.8, "app": 0.7, "index": 0.6, "server": 0.7, "client": 0.5,
|
|
44
|
+
"api": 0.6, "router": 0.5, "service": 0.4, "model": 0.3, "store": 0.3,
|
|
45
|
+
}
|
|
46
|
+
_RANK_PATH_PENALTIES = {"tests": 0.85, "test": 0.85, "docs": 0.65, "examples": 0.7, "scripts": 0.8, "migrations": 0.8}
|
|
47
|
+
_RANK_PATH_BOOSTS = {"src": 0.15, "components": 0.2, "pages": 0.2, "layouts": 0.15}
|
|
48
|
+
_RELATIVE_IMPORT_SUFFIX_CANDIDATES = tuple(".py .ts .tsx .js .jsx .mjs .cjs .astro .vue .svelte".split())
|
|
49
|
+
_SYMBOL_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
|
50
|
+
(re.compile(r"^\s*class\s+([A-Za-z_]\w*)\b"), "class"),
|
|
51
|
+
(re.compile(r"^\s*def\s+([A-Za-z_]\w*)\s*\("), "def"),
|
|
52
|
+
(re.compile(r"^\s*(?:async\s+)?def\s+([A-Za-z_]\w*)\s*\("), "def"),
|
|
53
|
+
(re.compile(r"^\s*export\s+class\s+([A-Za-z_]\w*)\b"), "class"),
|
|
54
|
+
(re.compile(r"^\s*export\s+(?:async\s+)?function\s+([A-Za-z_]\w*)\s*\("), "function"),
|
|
55
|
+
(re.compile(r"^\s*(?:async\s+)?function\s+([A-Za-z_]\w*)\s*\("), "function"),
|
|
56
|
+
(re.compile(r"^\s*interface\s+([A-Za-z_]\w*)\b"), "interface"),
|
|
57
|
+
(re.compile(r"^\s*type\s+([A-Za-z_]\w*)\b"), "type"),
|
|
58
|
+
(re.compile(r"^\s*(?:const|let|var)\s+([A-Za-z_]\w*)\s*=\s*(?:async\s*)?\("), "const"),
|
|
59
|
+
(re.compile(r"^\s*(?:const|let|var)\s+([A-Za-z_]\w*)\s*=\s*function\b"), "const"),
|
|
60
|
+
(re.compile(r"^\s*([A-Za-z_]\w*)\s*\(\)\s*\{"), "function"),
|
|
61
|
+
# Go (type X struct/interface already matched by generic type pattern above)
|
|
62
|
+
(re.compile(r"^\s*func\s+(?:\([^)]*\)\s+)?([A-Za-z_]\w*)\s*\("), "func"),
|
|
63
|
+
# Rust
|
|
64
|
+
(re.compile(r"^\s*(?:pub\s+)?fn\s+([A-Za-z_]\w*)\s*[(<]"), "fn"),
|
|
65
|
+
(re.compile(r"^\s*(?:pub\s+)?struct\s+([A-Za-z_]\w*)"), "struct"),
|
|
66
|
+
(re.compile(r"^\s*(?:pub\s+)?enum\s+([A-Za-z_]\w*)"), "enum"),
|
|
67
|
+
(re.compile(r"^\s*(?:pub\s+)?trait\s+([A-Za-z_]\w*)"), "trait"),
|
|
68
|
+
(re.compile(r"^\s*impl(?:<[^>]*>)?\s+([A-Za-z_]\w*)"), "impl"),
|
|
69
|
+
# Java/Kotlin
|
|
70
|
+
(re.compile(r"^\s*(?:public|private|protected)?\s*(?:static\s+)?(?:abstract\s+)?class\s+([A-Za-z_]\w*)"), "class"),
|
|
71
|
+
(re.compile(r"^\s*(?:public|private|protected)?\s*interface\s+([A-Za-z_]\w*)"), "interface"),
|
|
72
|
+
# Ruby
|
|
73
|
+
(re.compile(r"^\s*module\s+([A-Za-z_]\w*)"), "module"),
|
|
74
|
+
]
|
|
75
|
+
_IMPORT_PATTERNS: list[re.Pattern[str]] = [
|
|
76
|
+
re.compile(r"""^\s*import\s+.*?\s+from\s+["']([^"']+)["']"""),
|
|
77
|
+
re.compile(r"""^\s*import\s+["']([^"']+)["']"""),
|
|
78
|
+
re.compile(r"""^\s*export\s+.*?\s+from\s+["']([^"']+)["']"""),
|
|
79
|
+
re.compile(r"""require\(\s*["']([^"']+)["']\s*\)"""),
|
|
80
|
+
# Go (standalone import)
|
|
81
|
+
re.compile(r"""^\s*import\s+"([^"]+)"$"""),
|
|
82
|
+
# Rust
|
|
83
|
+
re.compile(r"""^\s*(?:pub\s+)?use\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)"""),
|
|
84
|
+
# Java/Kotlin
|
|
85
|
+
re.compile(r"""^\s*import\s+(?:static\s+)?([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*)"""),
|
|
86
|
+
# Ruby
|
|
87
|
+
re.compile(r"""^\s*require\s+["']([^"']+)["']"""),
|
|
88
|
+
re.compile(r"""^\s*require_relative\s+["']([^"']+)["']"""),
|
|
89
|
+
]
|
|
90
|
+
_RANKING_OPTIONAL_DEPENDENCIES = {
|
|
91
|
+
"networkx": "networkx",
|
|
92
|
+
}
|
|
93
|
+
_PARSER_OPTIONAL_DEPENDENCIES = {
|
|
94
|
+
"tree-sitter": "tree_sitter",
|
|
95
|
+
"tree-sitter-language-pack": "tree_sitter_language_pack",
|
|
96
|
+
}
|
|
97
|
+
_CORE_TREE_SITTER_LANG_BY_SUFFIX = {
|
|
98
|
+
".py": "python",
|
|
99
|
+
".js": "javascript",
|
|
100
|
+
".jsx": "javascript",
|
|
101
|
+
".mjs": "javascript",
|
|
102
|
+
".cjs": "javascript",
|
|
103
|
+
".ts": "typescript",
|
|
104
|
+
".tsx": "typescript",
|
|
105
|
+
".astro": "astro",
|
|
106
|
+
}
|
|
107
|
+
_EXTENDED_TREE_SITTER_LANG_BY_SUFFIX = {
|
|
108
|
+
".go": "go",
|
|
109
|
+
".rs": "rust",
|
|
110
|
+
".rb": "ruby",
|
|
111
|
+
".java": "java",
|
|
112
|
+
".kt": "kotlin",
|
|
113
|
+
".c": "c",
|
|
114
|
+
".h": "c",
|
|
115
|
+
".cc": "cpp",
|
|
116
|
+
".cpp": "cpp",
|
|
117
|
+
".hpp": "cpp",
|
|
118
|
+
".swift": "swift",
|
|
119
|
+
".scala": "scala",
|
|
120
|
+
".lua": "lua",
|
|
121
|
+
".php": "php",
|
|
122
|
+
".bash": "bash",
|
|
123
|
+
".sh": "bash",
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass(slots=True)
|
|
128
|
+
class RepoMapRankingEntry:
|
|
129
|
+
path: str
|
|
130
|
+
score: float
|
|
131
|
+
symbols: list[str] = field(default_factory=list)
|
|
132
|
+
|
|
133
|
+
def to_dict(self) -> dict[str, Any]:
|
|
134
|
+
return {
|
|
135
|
+
"path": self.path,
|
|
136
|
+
"score": round(float(self.score), 6),
|
|
137
|
+
"symbols": list(self.symbols),
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass(slots=True)
|
|
142
|
+
class RepoMapFileAnalysis:
|
|
143
|
+
path: str
|
|
144
|
+
byte_size: int
|
|
145
|
+
line_count: int
|
|
146
|
+
symbols: list[str] = field(default_factory=list)
|
|
147
|
+
symbol_count: int = 0
|
|
148
|
+
imports: list[str] = field(default_factory=list)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass(slots=True)
|
|
152
|
+
class RepoMapArtifact:
|
|
153
|
+
ok: bool
|
|
154
|
+
generated_at: str
|
|
155
|
+
provenance: dict[str, Any]
|
|
156
|
+
stats: dict[str, Any]
|
|
157
|
+
ranking: list[RepoMapRankingEntry]
|
|
158
|
+
text: str
|
|
159
|
+
error: dict[str, Any] | None = None
|
|
160
|
+
schema_version: str = SCHEMA_VERSION
|
|
161
|
+
|
|
162
|
+
def to_dict(self) -> dict[str, Any]:
|
|
163
|
+
data = {
|
|
164
|
+
"schema_version": self.schema_version,
|
|
165
|
+
"ok": self.ok,
|
|
166
|
+
"generated_at": self.generated_at,
|
|
167
|
+
"provenance": dict(self.provenance),
|
|
168
|
+
"stats": dict(self.stats),
|
|
169
|
+
"ranking": [entry.to_dict() for entry in self.ranking],
|
|
170
|
+
"text": self.text,
|
|
171
|
+
}
|
|
172
|
+
if self.error is not None:
|
|
173
|
+
data["error"] = dict(self.error)
|
|
174
|
+
return data
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass(slots=True)
|
|
178
|
+
class RepoMapRunResult:
|
|
179
|
+
artifact: RepoMapArtifact
|
|
180
|
+
artifact_path: str | None = None
|
|
181
|
+
session_artifact_path: str | None = None
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def ok(self) -> bool:
|
|
185
|
+
return self.artifact.ok
|
|
186
|
+
|
|
187
|
+
def to_dict(self) -> dict[str, Any]:
|
|
188
|
+
data = self.artifact.to_dict()
|
|
189
|
+
if self.artifact_path:
|
|
190
|
+
data["artifact_path"] = self.artifact_path
|
|
191
|
+
if self.session_artifact_path:
|
|
192
|
+
data["session_artifact_path"] = self.session_artifact_path
|
|
193
|
+
return data
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def run_repomap(
|
|
197
|
+
*,
|
|
198
|
+
root: str | Path,
|
|
199
|
+
repomap_config: Any | None = None,
|
|
200
|
+
scope: list[str] | None = None,
|
|
201
|
+
focus_files: list[str] | None = None,
|
|
202
|
+
output_path: str | None = None,
|
|
203
|
+
max_files: int | None = None,
|
|
204
|
+
max_text_bytes: int | None = None,
|
|
205
|
+
session_id: str | None = None,
|
|
206
|
+
timeout_ms: int | None = None,
|
|
207
|
+
parity_profile: bool | None = None,
|
|
208
|
+
) -> RepoMapRunResult:
|
|
209
|
+
start = time.perf_counter()
|
|
210
|
+
root_path = Path(root).resolve()
|
|
211
|
+
config_scope = _get_attr(repomap_config, "watch_paths", ["src"])
|
|
212
|
+
config_ignored = _get_attr(repomap_config, "ignored_dirs", [])
|
|
213
|
+
config_artifact = _get_attr(repomap_config, "artifact_path", ".cortex/artifacts/repomap/latest.json")
|
|
214
|
+
config_max_files = _get_attr(repomap_config, "max_ranked_files", 20)
|
|
215
|
+
config_max_text_bytes = _get_attr(repomap_config, "max_text_bytes", 8192)
|
|
216
|
+
config_prefer_ast = bool(_get_attr(repomap_config, "prefer_ast_graph", True))
|
|
217
|
+
config_parity_profile = bool(_get_attr(repomap_config, "parity_profile", False))
|
|
218
|
+
requested_scope = [str(v) for v in (scope or config_scope or ["src"])]
|
|
219
|
+
selected_scope = _select_scope(
|
|
220
|
+
root_path,
|
|
221
|
+
requested_scope=requested_scope,
|
|
222
|
+
user_scope_supplied=scope is not None,
|
|
223
|
+
)
|
|
224
|
+
selected_focus = [str(v) for v in (focus_files or [])]
|
|
225
|
+
selected_output = output_path or str(config_artifact)
|
|
226
|
+
selected_max_files = max(1, int(max_files if max_files is not None else config_max_files))
|
|
227
|
+
selected_max_text_bytes = max(
|
|
228
|
+
256, int(max_text_bytes if max_text_bytes is not None else config_max_text_bytes)
|
|
229
|
+
)
|
|
230
|
+
selected_timeout_ms = timeout_ms
|
|
231
|
+
selected_parity_profile = config_parity_profile if parity_profile is None else bool(parity_profile)
|
|
232
|
+
selected_extended_language_profile = bool(_get_attr(repomap_config, "extended_language_profile", False))
|
|
233
|
+
code_like_suffixes = _active_code_like_suffixes(repomap_config)
|
|
234
|
+
tree_sitter_lang_by_suffix = _active_tree_sitter_lang_by_suffix(repomap_config)
|
|
235
|
+
parser_cache: dict[str, Any] = {}
|
|
236
|
+
ranking_missing_deps = repomap_missing_dependencies()
|
|
237
|
+
parser_missing_deps = repomap_missing_parser_dependencies()
|
|
238
|
+
parser_backend = "tree_sitter" if (config_prefer_ast and not parser_missing_deps) else "builtin"
|
|
239
|
+
# AST dependency-edge discovery is built-in; networkx is an optional quality boost.
|
|
240
|
+
ast_mode_active = config_prefer_ast
|
|
241
|
+
|
|
242
|
+
def _fail_result(
|
|
243
|
+
*,
|
|
244
|
+
code: str,
|
|
245
|
+
message: str,
|
|
246
|
+
failed_stage: str,
|
|
247
|
+
include_parser_context: bool = False,
|
|
248
|
+
parser_profile: str | None = None,
|
|
249
|
+
) -> RepoMapRunResult:
|
|
250
|
+
failure_kwargs: dict[str, Any] = {}
|
|
251
|
+
if include_parser_context:
|
|
252
|
+
failure_kwargs = {
|
|
253
|
+
"parser_backend": parser_backend,
|
|
254
|
+
"parser_deps_missing": parser_missing_deps,
|
|
255
|
+
"parser_profile": parser_profile
|
|
256
|
+
or ("parity" if selected_parity_profile else "operational"),
|
|
257
|
+
}
|
|
258
|
+
artifact = _failure_artifact(
|
|
259
|
+
code=code,
|
|
260
|
+
message=message,
|
|
261
|
+
root=root_path,
|
|
262
|
+
scope=selected_scope,
|
|
263
|
+
focus_files=selected_focus,
|
|
264
|
+
start=start,
|
|
265
|
+
timeout_ms=selected_timeout_ms,
|
|
266
|
+
failed_stage=failed_stage,
|
|
267
|
+
**failure_kwargs,
|
|
268
|
+
)
|
|
269
|
+
return RepoMapRunResult(artifact=artifact)
|
|
270
|
+
|
|
271
|
+
if not root_path.exists() or not root_path.is_dir():
|
|
272
|
+
return _fail_result(
|
|
273
|
+
code="scan_failed",
|
|
274
|
+
message=f"Project root does not exist or is not a directory: {root_path}",
|
|
275
|
+
failed_stage="discovery",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
if selected_parity_profile and parser_missing_deps:
|
|
279
|
+
return _fail_result(
|
|
280
|
+
code="deps_missing",
|
|
281
|
+
message=(
|
|
282
|
+
"Parity profile requires tree-sitter parser dependencies; missing: "
|
|
283
|
+
+ ", ".join(parser_missing_deps)
|
|
284
|
+
+ ". Install with: pip install -e '.[repomap]'"
|
|
285
|
+
),
|
|
286
|
+
failed_stage="analysis",
|
|
287
|
+
include_parser_context=True,
|
|
288
|
+
parser_profile="parity",
|
|
289
|
+
)
|
|
290
|
+
if selected_parity_profile and not config_prefer_ast:
|
|
291
|
+
return _fail_result(
|
|
292
|
+
code="parity_profile_invalid",
|
|
293
|
+
message="Parity profile requires repomap.prefer_ast_graph=true.",
|
|
294
|
+
failed_stage="analysis",
|
|
295
|
+
include_parser_context=True,
|
|
296
|
+
parser_profile="parity",
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
if _timed_out(start, selected_timeout_ms):
|
|
300
|
+
return _fail_result(
|
|
301
|
+
code="timeout",
|
|
302
|
+
message="Repo-map generation timed out before discovery started.",
|
|
303
|
+
failed_stage="discovery",
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
files = _discover_files(
|
|
308
|
+
root=root_path,
|
|
309
|
+
scope=selected_scope,
|
|
310
|
+
ignored_dirs=[str(v) for v in config_ignored],
|
|
311
|
+
timeout_check=lambda: _timed_out(start, selected_timeout_ms),
|
|
312
|
+
)
|
|
313
|
+
except TimeoutError:
|
|
314
|
+
return _fail_result(
|
|
315
|
+
code="timeout",
|
|
316
|
+
message="Repo-map generation timed out during file discovery.",
|
|
317
|
+
failed_stage="discovery",
|
|
318
|
+
)
|
|
319
|
+
except OSError as exc:
|
|
320
|
+
return _fail_result(
|
|
321
|
+
code="scan_failed",
|
|
322
|
+
message=f"Failed during file discovery: {exc}",
|
|
323
|
+
failed_stage="discovery",
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
analyses, parser_stats = _analyze_files(
|
|
327
|
+
root_path,
|
|
328
|
+
files,
|
|
329
|
+
use_tree_sitter=parser_backend == "tree_sitter",
|
|
330
|
+
parser_cache=parser_cache,
|
|
331
|
+
tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
|
|
332
|
+
)
|
|
333
|
+
if selected_parity_profile and parser_stats["tree_sitter_files_parsed"] == 0:
|
|
334
|
+
return _fail_result(
|
|
335
|
+
code="parser_not_used",
|
|
336
|
+
message=(
|
|
337
|
+
"Parity profile requires tree-sitter-backed structural parsing, "
|
|
338
|
+
"but no files were parsed with tree-sitter in this scope."
|
|
339
|
+
),
|
|
340
|
+
failed_stage="analysis",
|
|
341
|
+
include_parser_context=True,
|
|
342
|
+
parser_profile="parity",
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
dependency_edges: list[tuple[str, str]] = []
|
|
346
|
+
graph_scores: dict[str, float] = {}
|
|
347
|
+
pagerank_backend = "none"
|
|
348
|
+
method = "heuristic_fallback"
|
|
349
|
+
if ast_mode_active:
|
|
350
|
+
dependency_edges = _build_dependency_edges(analyses)
|
|
351
|
+
graph_scores, pagerank_backend = _pagerank_scores_with_backend(
|
|
352
|
+
[item.path for item in analyses], dependency_edges
|
|
353
|
+
)
|
|
354
|
+
method = "ast_pagerank"
|
|
355
|
+
ranking = _rank_files(
|
|
356
|
+
analyses,
|
|
357
|
+
selected_focus,
|
|
358
|
+
selected_max_files,
|
|
359
|
+
code_like_suffixes=code_like_suffixes,
|
|
360
|
+
graph_scores=graph_scores,
|
|
361
|
+
)
|
|
362
|
+
text = _render_text(ranking, selected_max_text_bytes)
|
|
363
|
+
symbols_found = sum(item.symbol_count for item in analyses)
|
|
364
|
+
artifact = RepoMapArtifact(
|
|
365
|
+
ok=True,
|
|
366
|
+
generated_at=_now_iso8601(),
|
|
367
|
+
provenance={
|
|
368
|
+
"method": method,
|
|
369
|
+
"source_root": str(root_path),
|
|
370
|
+
"scope": selected_scope,
|
|
371
|
+
"focus_files": selected_focus,
|
|
372
|
+
"duration_ms": _duration_ms(start),
|
|
373
|
+
"timeout_ms": selected_timeout_ms,
|
|
374
|
+
"ast_requested": config_prefer_ast,
|
|
375
|
+
"ast_enabled": ast_mode_active,
|
|
376
|
+
"missing_deps": ranking_missing_deps,
|
|
377
|
+
"pagerank_backend": pagerank_backend,
|
|
378
|
+
"parser_backend": parser_backend,
|
|
379
|
+
"parser_profile": "parity" if selected_parity_profile else "operational",
|
|
380
|
+
"language_profile": "extended" if selected_extended_language_profile else "core",
|
|
381
|
+
"parser_deps_missing": parser_missing_deps,
|
|
382
|
+
"parser_stats": parser_stats,
|
|
383
|
+
},
|
|
384
|
+
stats={
|
|
385
|
+
"files_parsed": len(files),
|
|
386
|
+
"symbols_found": symbols_found,
|
|
387
|
+
"graph_edges": len(dependency_edges),
|
|
388
|
+
"byte_count": len(text.encode("utf-8")),
|
|
389
|
+
},
|
|
390
|
+
ranking=ranking,
|
|
391
|
+
text=text,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
latest_path = _resolve_output_path(root_path, selected_output)
|
|
395
|
+
try:
|
|
396
|
+
latest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
397
|
+
latest_path.write_text(json.dumps(artifact.to_dict(), indent=2, sort_keys=True), encoding="utf-8")
|
|
398
|
+
session_path: Path | None = None
|
|
399
|
+
if session_id:
|
|
400
|
+
session_path = root_path / ".cortex" / "artifacts" / "repomap" / f"{session_id}.json"
|
|
401
|
+
session_path.parent.mkdir(parents=True, exist_ok=True)
|
|
402
|
+
session_path.write_text(
|
|
403
|
+
json.dumps(artifact.to_dict(), indent=2, sort_keys=True),
|
|
404
|
+
encoding="utf-8",
|
|
405
|
+
)
|
|
406
|
+
except OSError as exc:
|
|
407
|
+
return _fail_result(
|
|
408
|
+
code="write_failed",
|
|
409
|
+
message=f"Failed to write repo-map artifact: {exc}",
|
|
410
|
+
failed_stage="write",
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
return RepoMapRunResult(
|
|
414
|
+
artifact=artifact,
|
|
415
|
+
artifact_path=str(latest_path),
|
|
416
|
+
session_artifact_path=str(session_path) if session_id else None,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _discover_files(
|
|
421
|
+
*,
|
|
422
|
+
root: Path,
|
|
423
|
+
scope: list[str],
|
|
424
|
+
ignored_dirs: list[str],
|
|
425
|
+
timeout_check: callable | None = None,
|
|
426
|
+
) -> list[str]:
|
|
427
|
+
discovered: list[str] = []
|
|
428
|
+
seen: set[str] = set()
|
|
429
|
+
ignored_names = set(DEFAULT_IGNORED_DIRS) | {str(v) for v in ignored_dirs}
|
|
430
|
+
|
|
431
|
+
for scope_entry in scope or ["src"]:
|
|
432
|
+
if timeout_check and timeout_check():
|
|
433
|
+
raise TimeoutError("repo-map discovery timed out")
|
|
434
|
+
target = (root / scope_entry).resolve() if not Path(scope_entry).is_absolute() else Path(scope_entry)
|
|
435
|
+
try:
|
|
436
|
+
target.relative_to(root)
|
|
437
|
+
except ValueError:
|
|
438
|
+
continue
|
|
439
|
+
if not target.exists():
|
|
440
|
+
continue
|
|
441
|
+
if target.is_file():
|
|
442
|
+
rel = _norm_rel_path(target, root)
|
|
443
|
+
if rel and rel not in seen and not _ignored(rel, ignored_names) and _is_text_candidate(target):
|
|
444
|
+
discovered.append(rel)
|
|
445
|
+
seen.add(rel)
|
|
446
|
+
continue
|
|
447
|
+
for dirpath, dirnames, filenames in os.walk(target):
|
|
448
|
+
if timeout_check and timeout_check():
|
|
449
|
+
raise TimeoutError("repo-map discovery timed out")
|
|
450
|
+
current_dir = Path(dirpath)
|
|
451
|
+
dirnames[:] = [d for d in dirnames if not _ignored(_norm_rel_path(current_dir / d, root), ignored_names)]
|
|
452
|
+
for filename in filenames:
|
|
453
|
+
path = current_dir / filename
|
|
454
|
+
rel = _norm_rel_path(path, root)
|
|
455
|
+
if not rel or rel in seen or _ignored(rel, ignored_names):
|
|
456
|
+
continue
|
|
457
|
+
if not _is_text_candidate(path):
|
|
458
|
+
continue
|
|
459
|
+
discovered.append(rel)
|
|
460
|
+
seen.add(rel)
|
|
461
|
+
return sorted(discovered)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def _select_scope(root: Path, requested_scope: list[str], user_scope_supplied: bool) -> list[str]:
|
|
465
|
+
normalized = [str(v) for v in requested_scope if str(v).strip()] or ["src"]
|
|
466
|
+
if user_scope_supplied or _scope_targets_exist(root, normalized):
|
|
467
|
+
return normalized
|
|
468
|
+
fallback = [name for name in _FALLBACK_SCOPE_CANDIDATES if (root / name).exists()]
|
|
469
|
+
return fallback or ["."]
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def _scope_targets_exist(root: Path, scope: list[str]) -> bool:
|
|
473
|
+
for entry in scope:
|
|
474
|
+
target = Path(entry)
|
|
475
|
+
if not target.is_absolute():
|
|
476
|
+
target = root / target
|
|
477
|
+
if target.exists():
|
|
478
|
+
return True
|
|
479
|
+
return False
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _analyze_files(
|
|
483
|
+
root: Path,
|
|
484
|
+
files: list[str],
|
|
485
|
+
*,
|
|
486
|
+
use_tree_sitter: bool,
|
|
487
|
+
parser_cache: dict[str, Any] | None = None,
|
|
488
|
+
tree_sitter_lang_by_suffix: dict[str, str] | None = None,
|
|
489
|
+
) -> tuple[list[RepoMapFileAnalysis], dict[str, int]]:
|
|
490
|
+
analyses: list[RepoMapFileAnalysis] = []
|
|
491
|
+
tree_sitter_attempted = 0
|
|
492
|
+
tree_sitter_parsed = 0
|
|
493
|
+
for rel in files:
|
|
494
|
+
analysis, attempted, parsed = _analyze_file(
|
|
495
|
+
root,
|
|
496
|
+
rel,
|
|
497
|
+
use_tree_sitter=use_tree_sitter,
|
|
498
|
+
parser_cache=parser_cache,
|
|
499
|
+
tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
|
|
500
|
+
)
|
|
501
|
+
analyses.append(analysis)
|
|
502
|
+
tree_sitter_attempted += int(attempted)
|
|
503
|
+
tree_sitter_parsed += int(parsed)
|
|
504
|
+
return (
|
|
505
|
+
analyses,
|
|
506
|
+
{
|
|
507
|
+
"files_analyzed": len(files),
|
|
508
|
+
"tree_sitter_files_attempted": tree_sitter_attempted,
|
|
509
|
+
"tree_sitter_files_parsed": tree_sitter_parsed,
|
|
510
|
+
"tree_sitter_files_fallback": max(0, tree_sitter_attempted - tree_sitter_parsed),
|
|
511
|
+
},
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def _analyze_file(
|
|
516
|
+
root: Path,
|
|
517
|
+
rel_path: str,
|
|
518
|
+
*,
|
|
519
|
+
use_tree_sitter: bool,
|
|
520
|
+
parser_cache: dict[str, Any] | None = None,
|
|
521
|
+
tree_sitter_lang_by_suffix: dict[str, str] | None = None,
|
|
522
|
+
) -> tuple[RepoMapFileAnalysis, bool, bool]:
|
|
523
|
+
path = root / rel_path
|
|
524
|
+
byte_size = 0
|
|
525
|
+
line_count = 0
|
|
526
|
+
symbols: list[str] = []
|
|
527
|
+
symbol_count = 0
|
|
528
|
+
imports: list[str] = []
|
|
529
|
+
tree_sitter_attempted = False
|
|
530
|
+
tree_sitter_parsed = False
|
|
531
|
+
try:
|
|
532
|
+
byte_size = path.stat().st_size
|
|
533
|
+
except OSError:
|
|
534
|
+
byte_size = 0
|
|
535
|
+
try:
|
|
536
|
+
text = path.read_text(encoding="utf-8", errors="ignore")
|
|
537
|
+
except OSError:
|
|
538
|
+
text = ""
|
|
539
|
+
if text:
|
|
540
|
+
line_count = text.count("\n") + (0 if text.endswith("\n") else 1)
|
|
541
|
+
symbols, symbol_count, imports, tree_sitter_attempted, tree_sitter_parsed = _extract_symbols_and_imports(
|
|
542
|
+
rel_path,
|
|
543
|
+
text,
|
|
544
|
+
use_tree_sitter=use_tree_sitter,
|
|
545
|
+
parser_cache=parser_cache,
|
|
546
|
+
tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
|
|
547
|
+
)
|
|
548
|
+
return (
|
|
549
|
+
RepoMapFileAnalysis(
|
|
550
|
+
path=rel_path,
|
|
551
|
+
byte_size=byte_size,
|
|
552
|
+
line_count=line_count,
|
|
553
|
+
symbols=symbols,
|
|
554
|
+
symbol_count=symbol_count,
|
|
555
|
+
imports=imports,
|
|
556
|
+
),
|
|
557
|
+
tree_sitter_attempted,
|
|
558
|
+
tree_sitter_parsed,
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _extract_symbols_and_imports(
|
|
563
|
+
rel_path: str,
|
|
564
|
+
text: str,
|
|
565
|
+
*,
|
|
566
|
+
use_tree_sitter: bool,
|
|
567
|
+
parser_cache: dict[str, Any] | None = None,
|
|
568
|
+
tree_sitter_lang_by_suffix: dict[str, str] | None = None,
|
|
569
|
+
max_symbols: int = 4,
|
|
570
|
+
) -> tuple[list[str], int, list[str], bool, bool]:
|
|
571
|
+
suffix = Path(rel_path).suffix.lower()
|
|
572
|
+
lang_map = _resolve_tree_sitter_lang_map(tree_sitter_lang_by_suffix)
|
|
573
|
+
tree_sitter_attempted = False
|
|
574
|
+
tree_sitter_parsed = False
|
|
575
|
+
if use_tree_sitter and suffix in lang_map:
|
|
576
|
+
tree_sitter_attempted, tree_sitter_parsed = _tree_sitter_parse_ok(
|
|
577
|
+
rel_path,
|
|
578
|
+
text,
|
|
579
|
+
parser_cache=parser_cache,
|
|
580
|
+
tree_sitter_lang_by_suffix=lang_map,
|
|
581
|
+
)
|
|
582
|
+
if suffix == ".py":
|
|
583
|
+
py_symbols, py_count, py_imports = _extract_python_symbols_and_imports(text, max_symbols=max_symbols)
|
|
584
|
+
if py_count > 0 or py_imports:
|
|
585
|
+
return py_symbols, py_count, py_imports, tree_sitter_attempted, tree_sitter_parsed
|
|
586
|
+
symbols, count = _extract_symbol_summaries(text, max_symbols=max_symbols)
|
|
587
|
+
imports = _extract_import_targets(text)
|
|
588
|
+
return symbols, count, imports, tree_sitter_attempted, tree_sitter_parsed
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def _extract_python_symbols_and_imports(
|
|
592
|
+
text: str,
|
|
593
|
+
*,
|
|
594
|
+
max_symbols: int = 4,
|
|
595
|
+
) -> tuple[list[str], int, list[str]]:
|
|
596
|
+
try:
|
|
597
|
+
tree = ast.parse(text)
|
|
598
|
+
except SyntaxError:
|
|
599
|
+
return [], 0, []
|
|
600
|
+
|
|
601
|
+
seen: set[str] = set()
|
|
602
|
+
summaries: list[str] = []
|
|
603
|
+
count = 0
|
|
604
|
+
imports: list[str] = []
|
|
605
|
+
import_seen: set[str] = set()
|
|
606
|
+
|
|
607
|
+
for node in ast.walk(tree):
|
|
608
|
+
if isinstance(node, ast.ClassDef):
|
|
609
|
+
label = f"class {node.name}"
|
|
610
|
+
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
611
|
+
label = f"def {node.name}"
|
|
612
|
+
else:
|
|
613
|
+
label = ""
|
|
614
|
+
if label and label not in seen:
|
|
615
|
+
seen.add(label)
|
|
616
|
+
count += 1
|
|
617
|
+
if len(summaries) < max_symbols:
|
|
618
|
+
summaries.append(label)
|
|
619
|
+
|
|
620
|
+
if isinstance(node, ast.Import):
|
|
621
|
+
for alias in node.names:
|
|
622
|
+
target = alias.name.strip()
|
|
623
|
+
if target and target not in import_seen:
|
|
624
|
+
import_seen.add(target)
|
|
625
|
+
imports.append(target)
|
|
626
|
+
elif isinstance(node, ast.ImportFrom):
|
|
627
|
+
prefix = "." * max(0, int(node.level))
|
|
628
|
+
base = str(node.module or "").strip()
|
|
629
|
+
if base:
|
|
630
|
+
target = f"{prefix}{base}"
|
|
631
|
+
if target not in import_seen:
|
|
632
|
+
import_seen.add(target)
|
|
633
|
+
imports.append(target)
|
|
634
|
+
continue
|
|
635
|
+
for alias in node.names:
|
|
636
|
+
target = f"{prefix}{str(alias.name).strip()}"
|
|
637
|
+
if target and target not in import_seen:
|
|
638
|
+
import_seen.add(target)
|
|
639
|
+
imports.append(target)
|
|
640
|
+
|
|
641
|
+
return summaries, count, imports
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def _tree_sitter_parse_ok(
|
|
645
|
+
rel_path: str,
|
|
646
|
+
text: str,
|
|
647
|
+
*,
|
|
648
|
+
parser_cache: dict[str, Any] | None = None,
|
|
649
|
+
tree_sitter_lang_by_suffix: dict[str, str] | None = None,
|
|
650
|
+
) -> tuple[bool, bool]:
|
|
651
|
+
parser_and_source = _tree_sitter_parser_for_path(
|
|
652
|
+
rel_path,
|
|
653
|
+
text,
|
|
654
|
+
parser_cache=parser_cache,
|
|
655
|
+
tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
|
|
656
|
+
)
|
|
657
|
+
if parser_and_source is None:
|
|
658
|
+
return False, False
|
|
659
|
+
parser, parser_source = parser_and_source
|
|
660
|
+
source_bytes = parser_source.encode("utf-8")
|
|
661
|
+
try:
|
|
662
|
+
tree = parser.parse(source_bytes)
|
|
663
|
+
except Exception: # noqa: BLE001
|
|
664
|
+
return True, False
|
|
665
|
+
root = getattr(tree, "root_node", None)
|
|
666
|
+
return True, root is not None
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _tree_sitter_parser_for_path(
|
|
670
|
+
rel_path: str,
|
|
671
|
+
text: str,
|
|
672
|
+
*,
|
|
673
|
+
parser_cache: dict[str, Any] | None = None,
|
|
674
|
+
tree_sitter_lang_by_suffix: dict[str, str] | None = None,
|
|
675
|
+
) -> tuple[Any, str] | None:
|
|
676
|
+
suffix = Path(rel_path).suffix.lower()
|
|
677
|
+
lang_map = _resolve_tree_sitter_lang_map(tree_sitter_lang_by_suffix)
|
|
678
|
+
language = lang_map.get(suffix)
|
|
679
|
+
if not language:
|
|
680
|
+
return None
|
|
681
|
+
if suffix == ".astro":
|
|
682
|
+
frontmatter = _extract_astro_frontmatter(text)
|
|
683
|
+
if not frontmatter:
|
|
684
|
+
return None
|
|
685
|
+
parser = _tree_sitter_parser("typescript", parser_cache=parser_cache)
|
|
686
|
+
if parser is None:
|
|
687
|
+
return None
|
|
688
|
+
return parser, frontmatter
|
|
689
|
+
parser = _tree_sitter_parser(language, parser_cache=parser_cache)
|
|
690
|
+
if parser is None:
|
|
691
|
+
return None
|
|
692
|
+
return parser, text
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def _extract_astro_frontmatter(text: str) -> str:
|
|
696
|
+
lines = text.splitlines()
|
|
697
|
+
if not lines or lines[0].strip() != "---":
|
|
698
|
+
return ""
|
|
699
|
+
for idx in range(1, len(lines)):
|
|
700
|
+
if lines[idx].strip() == "---":
|
|
701
|
+
return "\n".join(lines[1:idx])
|
|
702
|
+
return ""
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def _tree_sitter_parser(language: str, *, parser_cache: dict[str, Any] | None = None) -> Any | None:
|
|
706
|
+
cache = parser_cache if parser_cache is not None else {}
|
|
707
|
+
cached = cache.get(language)
|
|
708
|
+
if cached is not None:
|
|
709
|
+
return cached
|
|
710
|
+
try:
|
|
711
|
+
from tree_sitter_language_pack import get_parser
|
|
712
|
+
|
|
713
|
+
parser = get_parser(language)
|
|
714
|
+
except Exception: # noqa: BLE001
|
|
715
|
+
return None
|
|
716
|
+
cache[language] = parser
|
|
717
|
+
return parser
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _extract_symbol_summaries(text: str, max_symbols: int = 4) -> tuple[list[str], int]:
|
|
721
|
+
seen: set[str] = set()
|
|
722
|
+
summaries: list[str] = []
|
|
723
|
+
count = 0
|
|
724
|
+
for line in text.splitlines():
|
|
725
|
+
for pattern, kind in _SYMBOL_PATTERNS:
|
|
726
|
+
match = pattern.match(line)
|
|
727
|
+
if not match:
|
|
728
|
+
continue
|
|
729
|
+
name = match.group(1)
|
|
730
|
+
if not name:
|
|
731
|
+
continue
|
|
732
|
+
label = f"{kind} {name}"
|
|
733
|
+
if label in seen:
|
|
734
|
+
continue
|
|
735
|
+
seen.add(label)
|
|
736
|
+
count += 1
|
|
737
|
+
if len(summaries) < max_symbols:
|
|
738
|
+
summaries.append(label)
|
|
739
|
+
break
|
|
740
|
+
return summaries, count
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def _extract_import_targets(text: str) -> list[str]:
|
|
744
|
+
targets: list[str] = []
|
|
745
|
+
seen: set[str] = set()
|
|
746
|
+
in_go_import_block = False
|
|
747
|
+
|
|
748
|
+
for line in text.splitlines():
|
|
749
|
+
stripped = line.strip()
|
|
750
|
+
if in_go_import_block:
|
|
751
|
+
if stripped == ")":
|
|
752
|
+
in_go_import_block = False
|
|
753
|
+
continue
|
|
754
|
+
if stripped.startswith('"') and stripped.endswith('"') and len(stripped) > 1:
|
|
755
|
+
target = stripped[1:-1].strip()
|
|
756
|
+
if target and target not in seen:
|
|
757
|
+
seen.add(target)
|
|
758
|
+
targets.append(target)
|
|
759
|
+
continue
|
|
760
|
+
if stripped == "import (":
|
|
761
|
+
in_go_import_block = True
|
|
762
|
+
continue
|
|
763
|
+
for pattern in _IMPORT_PATTERNS:
|
|
764
|
+
match = pattern.search(line)
|
|
765
|
+
if not match:
|
|
766
|
+
continue
|
|
767
|
+
target = str(match.group(1) or "").strip()
|
|
768
|
+
if target and target not in seen:
|
|
769
|
+
seen.add(target)
|
|
770
|
+
targets.append(target)
|
|
771
|
+
break
|
|
772
|
+
return targets
|
|
773
|
+
|
|
774
|
+
|
|
775
|
+
def _render_entry_chunk(entry: RepoMapRankingEntry) -> str:
|
|
776
|
+
lines = [f"{entry.path} ({entry.score:.3f})\n"]
|
|
777
|
+
for symbol in entry.symbols:
|
|
778
|
+
lines.append(f" - {symbol}\n")
|
|
779
|
+
return "".join(lines)
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
def _truncate_utf8(text: str, max_bytes: int) -> str:
|
|
783
|
+
if max_bytes <= 0:
|
|
784
|
+
return ""
|
|
785
|
+
raw = text.encode("utf-8")
|
|
786
|
+
if len(raw) <= max_bytes:
|
|
787
|
+
return text
|
|
788
|
+
clipped = raw[:max_bytes]
|
|
789
|
+
while clipped:
|
|
790
|
+
try:
|
|
791
|
+
return clipped.decode("utf-8")
|
|
792
|
+
except UnicodeDecodeError as exc:
|
|
793
|
+
clipped = clipped[: exc.start]
|
|
794
|
+
return ""
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def _build_dependency_edges(analyses: list[RepoMapFileAnalysis]) -> list[tuple[str, str]]:
|
|
798
|
+
available_paths = {item.path for item in analyses}
|
|
799
|
+
python_index = _build_python_module_index(available_paths)
|
|
800
|
+
edges: set[tuple[str, str]] = set()
|
|
801
|
+
for analysis in analyses:
|
|
802
|
+
src = analysis.path
|
|
803
|
+
suffix = Path(src).suffix.lower()
|
|
804
|
+
for raw_target in analysis.imports:
|
|
805
|
+
target = raw_target.split("?", 1)[0].split("#", 1)[0].strip()
|
|
806
|
+
if not target:
|
|
807
|
+
continue
|
|
808
|
+
dst = None
|
|
809
|
+
if suffix == ".py":
|
|
810
|
+
dst = _resolve_python_import(src, target, python_index)
|
|
811
|
+
if dst is None:
|
|
812
|
+
dst = _resolve_relative_import(src, target, available_paths)
|
|
813
|
+
if dst and dst != src and dst in available_paths:
|
|
814
|
+
edges.add((src, dst))
|
|
815
|
+
return sorted(edges)
|
|
816
|
+
|
|
817
|
+
|
|
818
|
+
def _build_python_module_index(paths: set[str]) -> dict[str, str]:
|
|
819
|
+
index: dict[str, str] = {}
|
|
820
|
+
for rel in paths:
|
|
821
|
+
path = Path(rel)
|
|
822
|
+
if path.suffix.lower() != ".py":
|
|
823
|
+
continue
|
|
824
|
+
if path.name == "__init__.py":
|
|
825
|
+
module_name = ".".join(path.parts[:-1])
|
|
826
|
+
else:
|
|
827
|
+
module_name = ".".join(path.with_suffix("").parts)
|
|
828
|
+
if module_name and module_name not in index:
|
|
829
|
+
index[module_name] = rel
|
|
830
|
+
return index
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
def _resolve_python_import(source_path: str, target: str, index: dict[str, str]) -> str | None:
|
|
834
|
+
if not target:
|
|
835
|
+
return None
|
|
836
|
+
module_name = target
|
|
837
|
+
if target.startswith("."):
|
|
838
|
+
level = len(target) - len(target.lstrip("."))
|
|
839
|
+
module_tail = target[level:]
|
|
840
|
+
source = Path(source_path)
|
|
841
|
+
package_parts = (
|
|
842
|
+
list(source.parts[:-1]) if source.name == "__init__.py" else list(source.with_suffix("").parts[:-1])
|
|
843
|
+
)
|
|
844
|
+
trim = max(0, level - 1)
|
|
845
|
+
if trim > len(package_parts):
|
|
846
|
+
return None
|
|
847
|
+
base_parts = package_parts[: len(package_parts) - trim] if trim else package_parts
|
|
848
|
+
tail_parts = [part for part in module_tail.split(".") if part]
|
|
849
|
+
if not base_parts and not tail_parts:
|
|
850
|
+
return None
|
|
851
|
+
module_name = ".".join(base_parts + tail_parts)
|
|
852
|
+
|
|
853
|
+
return index.get(module_name)
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
def _resolve_relative_import(source_path: str, target: str, available_paths: set[str]) -> str | None:
|
|
857
|
+
if not target.startswith((".", "/")):
|
|
858
|
+
return None
|
|
859
|
+
source = PurePosixPath(source_path)
|
|
860
|
+
raw = str(source.parent / target) if target.startswith(".") else target.lstrip("/")
|
|
861
|
+
base = _normalize_rel(raw)
|
|
862
|
+
if not base:
|
|
863
|
+
return None
|
|
864
|
+
if base in available_paths:
|
|
865
|
+
return base
|
|
866
|
+
base_path = PurePosixPath(base)
|
|
867
|
+
if base_path.suffix:
|
|
868
|
+
return None
|
|
869
|
+
candidates: list[str] = []
|
|
870
|
+
for suffix in _RELATIVE_IMPORT_SUFFIX_CANDIDATES:
|
|
871
|
+
candidates.append(f"{base}{suffix}")
|
|
872
|
+
candidates.append(str(PurePosixPath(base) / f"index{suffix}"))
|
|
873
|
+
for candidate in candidates:
|
|
874
|
+
normalized = _normalize_rel(candidate)
|
|
875
|
+
if normalized and normalized in available_paths:
|
|
876
|
+
return normalized
|
|
877
|
+
return None
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
def _normalize_rel(path: str) -> str:
|
|
881
|
+
parts: list[str] = []
|
|
882
|
+
for piece in path.replace("\\", "/").split("/"):
|
|
883
|
+
if piece in {"", "."}:
|
|
884
|
+
continue
|
|
885
|
+
if piece == "..":
|
|
886
|
+
if not parts:
|
|
887
|
+
return ""
|
|
888
|
+
parts.pop()
|
|
889
|
+
continue
|
|
890
|
+
parts.append(piece)
|
|
891
|
+
return "/".join(parts)
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
def _pagerank_scores_with_backend(
|
|
895
|
+
paths: list[str], edges: list[tuple[str, str]]
|
|
896
|
+
) -> tuple[dict[str, float], str]:
|
|
897
|
+
if not paths:
|
|
898
|
+
return {}, "none"
|
|
899
|
+
scores = _pagerank_scores_networkx(paths, edges)
|
|
900
|
+
backend = "networkx"
|
|
901
|
+
if not scores:
|
|
902
|
+
scores = _pagerank_scores_simple(paths, edges)
|
|
903
|
+
backend = "simple"
|
|
904
|
+
peak = max(scores.values(), default=0.0)
|
|
905
|
+
if peak <= 0:
|
|
906
|
+
return ({path: 0.0 for path in paths}, backend)
|
|
907
|
+
return ({path: float(scores.get(path, 0.0) / peak) for path in paths}, backend)
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def _pagerank_scores_networkx(paths: list[str], edges: list[tuple[str, str]]) -> dict[str, float]:
|
|
911
|
+
try:
|
|
912
|
+
import networkx as nx
|
|
913
|
+
except Exception: # noqa: BLE001
|
|
914
|
+
return {}
|
|
915
|
+
graph = nx.DiGraph()
|
|
916
|
+
graph.add_nodes_from(paths)
|
|
917
|
+
graph.add_edges_from(edges)
|
|
918
|
+
if graph.number_of_nodes() == 0:
|
|
919
|
+
return {}
|
|
920
|
+
try:
|
|
921
|
+
pagerank = nx.pagerank(graph, alpha=0.85)
|
|
922
|
+
except Exception: # noqa: BLE001
|
|
923
|
+
return {}
|
|
924
|
+
return {path: float(pagerank.get(path, 0.0)) for path in paths}
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
def _pagerank_scores_simple(
|
|
928
|
+
paths: list[str],
|
|
929
|
+
edges: list[tuple[str, str]],
|
|
930
|
+
*,
|
|
931
|
+
alpha: float = 0.85,
|
|
932
|
+
max_iter: int = 100,
|
|
933
|
+
tol: float = 1e-6,
|
|
934
|
+
) -> dict[str, float]:
|
|
935
|
+
nodes = list(paths)
|
|
936
|
+
n = len(nodes)
|
|
937
|
+
if n == 0:
|
|
938
|
+
return {}
|
|
939
|
+
node_set = set(nodes)
|
|
940
|
+
outgoing: dict[str, set[str]] = {node: set() for node in nodes}
|
|
941
|
+
incoming: dict[str, set[str]] = {node: set() for node in nodes}
|
|
942
|
+
for src, dst in edges:
|
|
943
|
+
if src not in node_set or dst not in node_set:
|
|
944
|
+
continue
|
|
945
|
+
outgoing[src].add(dst)
|
|
946
|
+
incoming[dst].add(src)
|
|
947
|
+
|
|
948
|
+
rank = {node: 1.0 / n for node in nodes}
|
|
949
|
+
base = (1.0 - alpha) / n
|
|
950
|
+
for _ in range(max_iter):
|
|
951
|
+
dangling = sum(rank[node] for node, outs in outgoing.items() if not outs)
|
|
952
|
+
next_rank: dict[str, float] = {}
|
|
953
|
+
diff = 0.0
|
|
954
|
+
for node in nodes:
|
|
955
|
+
inbound = 0.0
|
|
956
|
+
for src in incoming[node]:
|
|
957
|
+
outs = outgoing[src]
|
|
958
|
+
if outs:
|
|
959
|
+
inbound += rank[src] / len(outs)
|
|
960
|
+
value = base + alpha * (inbound + dangling / n)
|
|
961
|
+
next_rank[node] = value
|
|
962
|
+
diff += abs(value - rank[node])
|
|
963
|
+
rank = next_rank
|
|
964
|
+
if diff <= tol:
|
|
965
|
+
break
|
|
966
|
+
|
|
967
|
+
total = sum(rank.values())
|
|
968
|
+
if total <= 0:
|
|
969
|
+
return {node: 0.0 for node in nodes}
|
|
970
|
+
return {node: float(value / total) for node, value in rank.items()}
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
def _rank_files(
|
|
974
|
+
analyses: list[RepoMapFileAnalysis],
|
|
975
|
+
focus_files: list[str],
|
|
976
|
+
max_files: int,
|
|
977
|
+
*,
|
|
978
|
+
code_like_suffixes: set[str] | None = None,
|
|
979
|
+
graph_scores: dict[str, float] | None = None,
|
|
980
|
+
) -> list[RepoMapRankingEntry]:
|
|
981
|
+
code_like_suffixes = code_like_suffixes or (set(_CORE_CODE_LIKE_SUFFIXES) | set(_EXTENDED_CODE_LIKE_SUFFIXES))
|
|
982
|
+
graph_scores = graph_scores or {}
|
|
983
|
+
focus = {str(v).replace("\\", "/") for v in focus_files}
|
|
984
|
+
focus_basenames = {Path(path).name for path in focus}
|
|
985
|
+
scored: list[RepoMapRankingEntry] = []
|
|
986
|
+
for analysis in analyses:
|
|
987
|
+
path = analysis.path.replace("\\", "/")
|
|
988
|
+
path_obj = Path(path)
|
|
989
|
+
suffix = path_obj.suffix.lower()
|
|
990
|
+
depth = path.count("/")
|
|
991
|
+
score = 1.0
|
|
992
|
+
if suffix in code_like_suffixes:
|
|
993
|
+
score += 1.6
|
|
994
|
+
score += _RANK_SUFFIX_BOOSTS.get(suffix, 0.0)
|
|
995
|
+
if path in focus:
|
|
996
|
+
score += 25.0
|
|
997
|
+
elif path_obj.name in focus_basenames:
|
|
998
|
+
score += 4.0
|
|
999
|
+
score += max(0.0, 0.8 - min(depth, 10) * 0.08)
|
|
1000
|
+
score += min(1.5, analysis.symbol_count * 0.2)
|
|
1001
|
+
score += min(0.8, analysis.line_count / 400.0)
|
|
1002
|
+
|
|
1003
|
+
stem = path_obj.stem.lower()
|
|
1004
|
+
for name, boost in _RANK_NAME_BOOSTS.items():
|
|
1005
|
+
if stem == name or stem.startswith(f"{name}_") or stem.endswith(f"_{name}"):
|
|
1006
|
+
score += boost
|
|
1007
|
+
for part in path_obj.parts[:-1]:
|
|
1008
|
+
score += _RANK_PATH_BOOSTS.get(part.lower(), 0.0)
|
|
1009
|
+
score -= _RANK_PATH_PENALTIES.get(part.lower(), 0.0)
|
|
1010
|
+
score -= _RANK_EXACT_FILENAME_PENALTIES.get(path_obj.name.lower(), 0.0)
|
|
1011
|
+
if "generated" in stem or "snapshot" in stem:
|
|
1012
|
+
score -= 0.4
|
|
1013
|
+
score += 2.0 * max(0.0, float(graph_scores.get(path, 0.0)))
|
|
1014
|
+
|
|
1015
|
+
scored.append(
|
|
1016
|
+
RepoMapRankingEntry(
|
|
1017
|
+
path=path,
|
|
1018
|
+
score=max(0.0, score),
|
|
1019
|
+
symbols=list(analysis.symbols),
|
|
1020
|
+
)
|
|
1021
|
+
)
|
|
1022
|
+
scored.sort(key=lambda item: (-item.score, item.path))
|
|
1023
|
+
return scored[: max(0, max_files)]
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
def _render_text(ranking: list[RepoMapRankingEntry], max_text_bytes: int) -> str:
|
|
1027
|
+
if not ranking:
|
|
1028
|
+
return ""
|
|
1029
|
+
budget = max(256, max_text_bytes)
|
|
1030
|
+
chunks: list[tuple[str, int]] = []
|
|
1031
|
+
used = 0
|
|
1032
|
+
truncated = False
|
|
1033
|
+
total = len(ranking)
|
|
1034
|
+
|
|
1035
|
+
for entry in ranking:
|
|
1036
|
+
chunk = _render_entry_chunk(entry)
|
|
1037
|
+
chunk_bytes = len(chunk.encode("utf-8"))
|
|
1038
|
+
if used + chunk_bytes > budget:
|
|
1039
|
+
truncated = True
|
|
1040
|
+
break
|
|
1041
|
+
chunks.append((chunk, chunk_bytes))
|
|
1042
|
+
used += chunk_bytes
|
|
1043
|
+
|
|
1044
|
+
if truncated:
|
|
1045
|
+
note = f"... (truncated, showing {len(chunks)}/{total} files within {budget} bytes)\n"
|
|
1046
|
+
note_bytes = len(note.encode("utf-8"))
|
|
1047
|
+
while chunks and used + note_bytes > budget:
|
|
1048
|
+
_, removed_bytes = chunks.pop()
|
|
1049
|
+
used -= removed_bytes
|
|
1050
|
+
note = f"... (truncated, showing {len(chunks)}/{total} files within {budget} bytes)\n"
|
|
1051
|
+
note_bytes = len(note.encode("utf-8"))
|
|
1052
|
+
if used + note_bytes <= budget:
|
|
1053
|
+
chunks.append((note, note_bytes))
|
|
1054
|
+
else:
|
|
1055
|
+
return _truncate_utf8(note, budget)
|
|
1056
|
+
|
|
1057
|
+
return "".join(chunk for chunk, _ in chunks)
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
def _resolve_output_path(root: Path, output_path: str) -> Path:
|
|
1061
|
+
candidate = Path(output_path)
|
|
1062
|
+
return candidate if candidate.is_absolute() else (root / candidate)
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
def _failure_artifact(
|
|
1066
|
+
*,
|
|
1067
|
+
code: str,
|
|
1068
|
+
message: str,
|
|
1069
|
+
root: Path,
|
|
1070
|
+
scope: list[str],
|
|
1071
|
+
focus_files: list[str],
|
|
1072
|
+
start: float,
|
|
1073
|
+
timeout_ms: int | None,
|
|
1074
|
+
failed_stage: str,
|
|
1075
|
+
parser_backend: str = "none",
|
|
1076
|
+
parser_deps_missing: list[str] | None = None,
|
|
1077
|
+
parser_profile: str = "operational",
|
|
1078
|
+
) -> RepoMapArtifact:
|
|
1079
|
+
return RepoMapArtifact(
|
|
1080
|
+
ok=False,
|
|
1081
|
+
generated_at=_now_iso8601(),
|
|
1082
|
+
provenance={
|
|
1083
|
+
"method": "none",
|
|
1084
|
+
"source_root": str(root),
|
|
1085
|
+
"scope": list(scope),
|
|
1086
|
+
"focus_files": list(focus_files),
|
|
1087
|
+
"duration_ms": _duration_ms(start),
|
|
1088
|
+
"timeout_ms": timeout_ms,
|
|
1089
|
+
"parser_backend": parser_backend,
|
|
1090
|
+
"parser_profile": parser_profile,
|
|
1091
|
+
"parser_deps_missing": list(parser_deps_missing or []),
|
|
1092
|
+
},
|
|
1093
|
+
stats={
|
|
1094
|
+
"files_parsed": 0,
|
|
1095
|
+
"symbols_found": 0,
|
|
1096
|
+
"graph_edges": 0,
|
|
1097
|
+
"byte_count": 0,
|
|
1098
|
+
},
|
|
1099
|
+
ranking=[],
|
|
1100
|
+
text="",
|
|
1101
|
+
error={
|
|
1102
|
+
"code": code,
|
|
1103
|
+
"message": message,
|
|
1104
|
+
"retryable": code in {"deps_missing", "timeout", "scan_failed", "write_failed"},
|
|
1105
|
+
"failed_stage": failed_stage,
|
|
1106
|
+
},
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def _active_code_like_suffixes(repomap_config: Any) -> set[str]:
|
|
1111
|
+
extended = bool(_get_attr(repomap_config, "extended_language_profile", False))
|
|
1112
|
+
if extended:
|
|
1113
|
+
return set(_CORE_CODE_LIKE_SUFFIXES) | set(_EXTENDED_CODE_LIKE_SUFFIXES)
|
|
1114
|
+
return set(_CORE_CODE_LIKE_SUFFIXES)
|
|
1115
|
+
|
|
1116
|
+
|
|
1117
|
+
def _active_tree_sitter_lang_by_suffix(repomap_config: Any) -> dict[str, str]:
|
|
1118
|
+
mapping = dict(_CORE_TREE_SITTER_LANG_BY_SUFFIX)
|
|
1119
|
+
extended = bool(_get_attr(repomap_config, "extended_language_profile", False))
|
|
1120
|
+
if extended:
|
|
1121
|
+
mapping.update(_EXTENDED_TREE_SITTER_LANG_BY_SUFFIX)
|
|
1122
|
+
return mapping
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
def _resolve_tree_sitter_lang_map(tree_sitter_lang_by_suffix: dict[str, str] | None) -> dict[str, str]:
|
|
1126
|
+
return tree_sitter_lang_by_suffix or _CORE_TREE_SITTER_LANG_BY_SUFFIX
|
|
1127
|
+
|
|
1128
|
+
|
|
1129
|
+
def _get_attr(config: Any, name: str, default: Any) -> Any:
|
|
1130
|
+
return getattr(config, name, default) if config is not None else default
|
|
1131
|
+
|
|
1132
|
+
|
|
1133
|
+
def repomap_missing_dependencies() -> list[str]:
|
|
1134
|
+
missing: list[str] = []
|
|
1135
|
+
for package_name, module_name in _RANKING_OPTIONAL_DEPENDENCIES.items():
|
|
1136
|
+
if importlib.util.find_spec(module_name) is None:
|
|
1137
|
+
missing.append(package_name)
|
|
1138
|
+
return missing
|
|
1139
|
+
|
|
1140
|
+
|
|
1141
|
+
def repomap_missing_parser_dependencies() -> list[str]:
|
|
1142
|
+
missing: list[str] = []
|
|
1143
|
+
for package_name, module_name in _PARSER_OPTIONAL_DEPENDENCIES.items():
|
|
1144
|
+
if importlib.util.find_spec(module_name) is None:
|
|
1145
|
+
missing.append(package_name)
|
|
1146
|
+
return missing
|
|
1147
|
+
|
|
1148
|
+
|
|
1149
|
+
def _now_iso8601() -> str:
|
|
1150
|
+
return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
def _duration_ms(start: float) -> int:
|
|
1154
|
+
return max(0, int((time.perf_counter() - start) * 1000))
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def _timed_out(start: float, timeout_ms: int | None) -> bool:
|
|
1158
|
+
if timeout_ms is None:
|
|
1159
|
+
return False
|
|
1160
|
+
if timeout_ms <= 0:
|
|
1161
|
+
return True
|
|
1162
|
+
return _duration_ms(start) > timeout_ms
|
|
1163
|
+
|
|
1164
|
+
|
|
1165
|
+
def _norm_rel_path(path: Path, root: Path) -> str:
|
|
1166
|
+
try:
|
|
1167
|
+
return path.resolve().relative_to(root.resolve()).as_posix()
|
|
1168
|
+
except (OSError, ValueError):
|
|
1169
|
+
return ""
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
def _ignored(rel_path: str, ignored_dirs: set[str]) -> bool:
|
|
1173
|
+
if not rel_path:
|
|
1174
|
+
return True
|
|
1175
|
+
parts = [part for part in rel_path.replace("\\", "/").split("/") if part]
|
|
1176
|
+
for part in parts[:-1]:
|
|
1177
|
+
if part in ignored_dirs:
|
|
1178
|
+
return True
|
|
1179
|
+
filename = parts[-1]
|
|
1180
|
+
if filename.startswith(".") and filename not in {".env", ".gitignore"}:
|
|
1181
|
+
return True
|
|
1182
|
+
if filename.endswith((".min.js", ".min.css", ".map")):
|
|
1183
|
+
return True
|
|
1184
|
+
return False
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
def _is_text_candidate(path: Path) -> bool:
|
|
1188
|
+
try:
|
|
1189
|
+
if not path.is_file():
|
|
1190
|
+
return False
|
|
1191
|
+
if path.stat().st_size > MAX_DISCOVER_FILE_BYTES:
|
|
1192
|
+
return False
|
|
1193
|
+
except OSError:
|
|
1194
|
+
return False
|
|
1195
|
+
if path.suffix.lower() in _BINARY_SUFFIXES:
|
|
1196
|
+
return False
|
|
1197
|
+
try:
|
|
1198
|
+
sample = path.read_bytes()[:READ_SAMPLE_BYTES]
|
|
1199
|
+
except OSError:
|
|
1200
|
+
return False
|
|
1201
|
+
return b"\x00" not in sample
|