cortex-loop 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. cortex/__init__.py +7 -0
  2. cortex/adapters.py +339 -0
  3. cortex/blocklist.py +51 -0
  4. cortex/challenges.py +210 -0
  5. cortex/cli.py +7 -0
  6. cortex/core.py +601 -0
  7. cortex/core_helpers.py +190 -0
  8. cortex/data/identity_preamble.md +5 -0
  9. cortex/data/layer1_part_a.md +65 -0
  10. cortex/data/layer1_part_b.md +17 -0
  11. cortex/executive.py +295 -0
  12. cortex/foundation.py +185 -0
  13. cortex/genome.py +348 -0
  14. cortex/graveyard.py +226 -0
  15. cortex/hooks/__init__.py +27 -0
  16. cortex/hooks/_shared.py +167 -0
  17. cortex/hooks/post_tool_use.py +13 -0
  18. cortex/hooks/pre_tool_use.py +13 -0
  19. cortex/hooks/session_start.py +13 -0
  20. cortex/hooks/stop.py +13 -0
  21. cortex/invariants.py +258 -0
  22. cortex/packs.py +118 -0
  23. cortex/repomap.py +6 -0
  24. cortex/requirements.py +497 -0
  25. cortex/retry.py +312 -0
  26. cortex/stop_contract.py +217 -0
  27. cortex/stop_payload.py +122 -0
  28. cortex/stop_policy.py +100 -0
  29. cortex/stop_runtime.py +400 -0
  30. cortex/stop_signals.py +75 -0
  31. cortex/store.py +793 -0
  32. cortex/templates/__init__.py +10 -0
  33. cortex/utils.py +58 -0
  34. cortex_loop-0.1.0a1.dist-info/METADATA +121 -0
  35. cortex_loop-0.1.0a1.dist-info/RECORD +52 -0
  36. cortex_loop-0.1.0a1.dist-info/WHEEL +5 -0
  37. cortex_loop-0.1.0a1.dist-info/entry_points.txt +3 -0
  38. cortex_loop-0.1.0a1.dist-info/licenses/LICENSE +21 -0
  39. cortex_loop-0.1.0a1.dist-info/top_level.txt +3 -0
  40. cortex_ops_cli/__init__.py +3 -0
  41. cortex_ops_cli/_adapter_validation.py +119 -0
  42. cortex_ops_cli/_check_report.py +454 -0
  43. cortex_ops_cli/_check_report_output.py +270 -0
  44. cortex_ops_cli/_openai_bridge_probe.py +241 -0
  45. cortex_ops_cli/_openai_bridge_protocol.py +469 -0
  46. cortex_ops_cli/_runtime_profile_templates.py +341 -0
  47. cortex_ops_cli/_runtime_profiles.py +445 -0
  48. cortex_ops_cli/gemini_hooks.py +301 -0
  49. cortex_ops_cli/main.py +911 -0
  50. cortex_ops_cli/openai_app_server_bridge.py +375 -0
  51. cortex_repomap/__init__.py +1 -0
  52. cortex_repomap/engine.py +1201 -0
@@ -0,0 +1,1201 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import importlib.util
5
+ import json
6
+ import os
7
+ import re
8
+ import time
9
+ from dataclasses import dataclass, field
10
+ from datetime import UTC, datetime
11
+ from pathlib import Path, PurePosixPath
12
+ from typing import Any
13
+
14
+ SCHEMA_VERSION = "repomap_artifact_v1"
15
+ MAX_DISCOVER_FILE_BYTES = 512_000
16
+ READ_SAMPLE_BYTES = 8192
17
+ DEFAULT_IGNORED_DIRS = set(
18
+ ".git .hg .svn .venv venv .tox .mypy_cache .pytest_cache .ruff_cache .cortex "
19
+ "node_modules dist build __pycache__ .next coverage".split()
20
+ )
21
+ _BINARY_SUFFIXES = set(
22
+ ".png .jpg .jpeg .gif .webp .bmp .ico .pdf .zip .gz .tgz .bz2 .xz .7z .tar .jar .war "
23
+ ".so .dll .dylib .exe .bin .woff .woff2 .ttf .otf .mp3 .mp4 .mov .avi .wav .sqlite .db "
24
+ ".pyc .pyo".split()
25
+ )
26
+ _CORE_CODE_LIKE_SUFFIXES = set(
27
+ ".astro .py .js .jsx .ts .tsx .mjs .cjs .toml .yaml .yml .json .md .html .css .scss".split()
28
+ )
29
+ _EXTENDED_CODE_LIKE_SUFFIXES = set(
30
+ ".svelte .vue .pyi .java .kt .go .rs .rb .php .c .cc .cpp .h .hpp .cs .swift .scala .lua .sh .bash .zsh .ps1 .sql".split()
31
+ )
32
+ _RANK_SUFFIX_BOOSTS = {".astro": 0.9, ".tsx": 0.5, ".jsx": 0.4}
33
+ _RANK_EXACT_FILENAME_PENALTIES = {
34
+ "package-lock.json": 1.8,
35
+ "pnpm-lock.yaml": 1.8,
36
+ "yarn.lock": 1.8,
37
+ "poetry.lock": 1.4,
38
+ "cargo.lock": 1.4,
39
+ "composer.lock": 1.2,
40
+ }
41
+ _FALLBACK_SCOPE_CANDIDATES = ("cortex", "src", "lib", "app", "packages", "tests")
42
+ _RANK_NAME_BOOSTS = {
43
+ "core": 0.9, "main": 0.8, "app": 0.7, "index": 0.6, "server": 0.7, "client": 0.5,
44
+ "api": 0.6, "router": 0.5, "service": 0.4, "model": 0.3, "store": 0.3,
45
+ }
46
+ _RANK_PATH_PENALTIES = {"tests": 0.85, "test": 0.85, "docs": 0.65, "examples": 0.7, "scripts": 0.8, "migrations": 0.8}
47
+ _RANK_PATH_BOOSTS = {"src": 0.15, "components": 0.2, "pages": 0.2, "layouts": 0.15}
48
+ _RELATIVE_IMPORT_SUFFIX_CANDIDATES = tuple(".py .ts .tsx .js .jsx .mjs .cjs .astro .vue .svelte".split())
49
+ _SYMBOL_PATTERNS: list[tuple[re.Pattern[str], str]] = [
50
+ (re.compile(r"^\s*class\s+([A-Za-z_]\w*)\b"), "class"),
51
+ (re.compile(r"^\s*def\s+([A-Za-z_]\w*)\s*\("), "def"),
52
+ (re.compile(r"^\s*(?:async\s+)?def\s+([A-Za-z_]\w*)\s*\("), "def"),
53
+ (re.compile(r"^\s*export\s+class\s+([A-Za-z_]\w*)\b"), "class"),
54
+ (re.compile(r"^\s*export\s+(?:async\s+)?function\s+([A-Za-z_]\w*)\s*\("), "function"),
55
+ (re.compile(r"^\s*(?:async\s+)?function\s+([A-Za-z_]\w*)\s*\("), "function"),
56
+ (re.compile(r"^\s*interface\s+([A-Za-z_]\w*)\b"), "interface"),
57
+ (re.compile(r"^\s*type\s+([A-Za-z_]\w*)\b"), "type"),
58
+ (re.compile(r"^\s*(?:const|let|var)\s+([A-Za-z_]\w*)\s*=\s*(?:async\s*)?\("), "const"),
59
+ (re.compile(r"^\s*(?:const|let|var)\s+([A-Za-z_]\w*)\s*=\s*function\b"), "const"),
60
+ (re.compile(r"^\s*([A-Za-z_]\w*)\s*\(\)\s*\{"), "function"),
61
+ # Go (type X struct/interface already matched by generic type pattern above)
62
+ (re.compile(r"^\s*func\s+(?:\([^)]*\)\s+)?([A-Za-z_]\w*)\s*\("), "func"),
63
+ # Rust
64
+ (re.compile(r"^\s*(?:pub\s+)?fn\s+([A-Za-z_]\w*)\s*[(<]"), "fn"),
65
+ (re.compile(r"^\s*(?:pub\s+)?struct\s+([A-Za-z_]\w*)"), "struct"),
66
+ (re.compile(r"^\s*(?:pub\s+)?enum\s+([A-Za-z_]\w*)"), "enum"),
67
+ (re.compile(r"^\s*(?:pub\s+)?trait\s+([A-Za-z_]\w*)"), "trait"),
68
+ (re.compile(r"^\s*impl(?:<[^>]*>)?\s+([A-Za-z_]\w*)"), "impl"),
69
+ # Java/Kotlin
70
+ (re.compile(r"^\s*(?:public|private|protected)?\s*(?:static\s+)?(?:abstract\s+)?class\s+([A-Za-z_]\w*)"), "class"),
71
+ (re.compile(r"^\s*(?:public|private|protected)?\s*interface\s+([A-Za-z_]\w*)"), "interface"),
72
+ # Ruby
73
+ (re.compile(r"^\s*module\s+([A-Za-z_]\w*)"), "module"),
74
+ ]
75
+ _IMPORT_PATTERNS: list[re.Pattern[str]] = [
76
+ re.compile(r"""^\s*import\s+.*?\s+from\s+["']([^"']+)["']"""),
77
+ re.compile(r"""^\s*import\s+["']([^"']+)["']"""),
78
+ re.compile(r"""^\s*export\s+.*?\s+from\s+["']([^"']+)["']"""),
79
+ re.compile(r"""require\(\s*["']([^"']+)["']\s*\)"""),
80
+ # Go (standalone import)
81
+ re.compile(r"""^\s*import\s+"([^"]+)"$"""),
82
+ # Rust
83
+ re.compile(r"""^\s*(?:pub\s+)?use\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)"""),
84
+ # Java/Kotlin
85
+ re.compile(r"""^\s*import\s+(?:static\s+)?([A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*)"""),
86
+ # Ruby
87
+ re.compile(r"""^\s*require\s+["']([^"']+)["']"""),
88
+ re.compile(r"""^\s*require_relative\s+["']([^"']+)["']"""),
89
+ ]
90
+ _RANKING_OPTIONAL_DEPENDENCIES = {
91
+ "networkx": "networkx",
92
+ }
93
+ _PARSER_OPTIONAL_DEPENDENCIES = {
94
+ "tree-sitter": "tree_sitter",
95
+ "tree-sitter-language-pack": "tree_sitter_language_pack",
96
+ }
97
+ _CORE_TREE_SITTER_LANG_BY_SUFFIX = {
98
+ ".py": "python",
99
+ ".js": "javascript",
100
+ ".jsx": "javascript",
101
+ ".mjs": "javascript",
102
+ ".cjs": "javascript",
103
+ ".ts": "typescript",
104
+ ".tsx": "typescript",
105
+ ".astro": "astro",
106
+ }
107
+ _EXTENDED_TREE_SITTER_LANG_BY_SUFFIX = {
108
+ ".go": "go",
109
+ ".rs": "rust",
110
+ ".rb": "ruby",
111
+ ".java": "java",
112
+ ".kt": "kotlin",
113
+ ".c": "c",
114
+ ".h": "c",
115
+ ".cc": "cpp",
116
+ ".cpp": "cpp",
117
+ ".hpp": "cpp",
118
+ ".swift": "swift",
119
+ ".scala": "scala",
120
+ ".lua": "lua",
121
+ ".php": "php",
122
+ ".bash": "bash",
123
+ ".sh": "bash",
124
+ }
125
+
126
+
127
+ @dataclass(slots=True)
128
+ class RepoMapRankingEntry:
129
+ path: str
130
+ score: float
131
+ symbols: list[str] = field(default_factory=list)
132
+
133
+ def to_dict(self) -> dict[str, Any]:
134
+ return {
135
+ "path": self.path,
136
+ "score": round(float(self.score), 6),
137
+ "symbols": list(self.symbols),
138
+ }
139
+
140
+
141
+ @dataclass(slots=True)
142
+ class RepoMapFileAnalysis:
143
+ path: str
144
+ byte_size: int
145
+ line_count: int
146
+ symbols: list[str] = field(default_factory=list)
147
+ symbol_count: int = 0
148
+ imports: list[str] = field(default_factory=list)
149
+
150
+
151
+ @dataclass(slots=True)
152
+ class RepoMapArtifact:
153
+ ok: bool
154
+ generated_at: str
155
+ provenance: dict[str, Any]
156
+ stats: dict[str, Any]
157
+ ranking: list[RepoMapRankingEntry]
158
+ text: str
159
+ error: dict[str, Any] | None = None
160
+ schema_version: str = SCHEMA_VERSION
161
+
162
+ def to_dict(self) -> dict[str, Any]:
163
+ data = {
164
+ "schema_version": self.schema_version,
165
+ "ok": self.ok,
166
+ "generated_at": self.generated_at,
167
+ "provenance": dict(self.provenance),
168
+ "stats": dict(self.stats),
169
+ "ranking": [entry.to_dict() for entry in self.ranking],
170
+ "text": self.text,
171
+ }
172
+ if self.error is not None:
173
+ data["error"] = dict(self.error)
174
+ return data
175
+
176
+
177
+ @dataclass(slots=True)
178
+ class RepoMapRunResult:
179
+ artifact: RepoMapArtifact
180
+ artifact_path: str | None = None
181
+ session_artifact_path: str | None = None
182
+
183
+ @property
184
+ def ok(self) -> bool:
185
+ return self.artifact.ok
186
+
187
+ def to_dict(self) -> dict[str, Any]:
188
+ data = self.artifact.to_dict()
189
+ if self.artifact_path:
190
+ data["artifact_path"] = self.artifact_path
191
+ if self.session_artifact_path:
192
+ data["session_artifact_path"] = self.session_artifact_path
193
+ return data
194
+
195
+
196
+ def run_repomap(
197
+ *,
198
+ root: str | Path,
199
+ repomap_config: Any | None = None,
200
+ scope: list[str] | None = None,
201
+ focus_files: list[str] | None = None,
202
+ output_path: str | None = None,
203
+ max_files: int | None = None,
204
+ max_text_bytes: int | None = None,
205
+ session_id: str | None = None,
206
+ timeout_ms: int | None = None,
207
+ parity_profile: bool | None = None,
208
+ ) -> RepoMapRunResult:
209
+ start = time.perf_counter()
210
+ root_path = Path(root).resolve()
211
+ config_scope = _get_attr(repomap_config, "watch_paths", ["src"])
212
+ config_ignored = _get_attr(repomap_config, "ignored_dirs", [])
213
+ config_artifact = _get_attr(repomap_config, "artifact_path", ".cortex/artifacts/repomap/latest.json")
214
+ config_max_files = _get_attr(repomap_config, "max_ranked_files", 20)
215
+ config_max_text_bytes = _get_attr(repomap_config, "max_text_bytes", 8192)
216
+ config_prefer_ast = bool(_get_attr(repomap_config, "prefer_ast_graph", True))
217
+ config_parity_profile = bool(_get_attr(repomap_config, "parity_profile", False))
218
+ requested_scope = [str(v) for v in (scope or config_scope or ["src"])]
219
+ selected_scope = _select_scope(
220
+ root_path,
221
+ requested_scope=requested_scope,
222
+ user_scope_supplied=scope is not None,
223
+ )
224
+ selected_focus = [str(v) for v in (focus_files or [])]
225
+ selected_output = output_path or str(config_artifact)
226
+ selected_max_files = max(1, int(max_files if max_files is not None else config_max_files))
227
+ selected_max_text_bytes = max(
228
+ 256, int(max_text_bytes if max_text_bytes is not None else config_max_text_bytes)
229
+ )
230
+ selected_timeout_ms = timeout_ms
231
+ selected_parity_profile = config_parity_profile if parity_profile is None else bool(parity_profile)
232
+ selected_extended_language_profile = bool(_get_attr(repomap_config, "extended_language_profile", False))
233
+ code_like_suffixes = _active_code_like_suffixes(repomap_config)
234
+ tree_sitter_lang_by_suffix = _active_tree_sitter_lang_by_suffix(repomap_config)
235
+ parser_cache: dict[str, Any] = {}
236
+ ranking_missing_deps = repomap_missing_dependencies()
237
+ parser_missing_deps = repomap_missing_parser_dependencies()
238
+ parser_backend = "tree_sitter" if (config_prefer_ast and not parser_missing_deps) else "builtin"
239
+ # AST dependency-edge discovery is built-in; networkx is an optional quality boost.
240
+ ast_mode_active = config_prefer_ast
241
+
242
+ def _fail_result(
243
+ *,
244
+ code: str,
245
+ message: str,
246
+ failed_stage: str,
247
+ include_parser_context: bool = False,
248
+ parser_profile: str | None = None,
249
+ ) -> RepoMapRunResult:
250
+ failure_kwargs: dict[str, Any] = {}
251
+ if include_parser_context:
252
+ failure_kwargs = {
253
+ "parser_backend": parser_backend,
254
+ "parser_deps_missing": parser_missing_deps,
255
+ "parser_profile": parser_profile
256
+ or ("parity" if selected_parity_profile else "operational"),
257
+ }
258
+ artifact = _failure_artifact(
259
+ code=code,
260
+ message=message,
261
+ root=root_path,
262
+ scope=selected_scope,
263
+ focus_files=selected_focus,
264
+ start=start,
265
+ timeout_ms=selected_timeout_ms,
266
+ failed_stage=failed_stage,
267
+ **failure_kwargs,
268
+ )
269
+ return RepoMapRunResult(artifact=artifact)
270
+
271
+ if not root_path.exists() or not root_path.is_dir():
272
+ return _fail_result(
273
+ code="scan_failed",
274
+ message=f"Project root does not exist or is not a directory: {root_path}",
275
+ failed_stage="discovery",
276
+ )
277
+
278
+ if selected_parity_profile and parser_missing_deps:
279
+ return _fail_result(
280
+ code="deps_missing",
281
+ message=(
282
+ "Parity profile requires tree-sitter parser dependencies; missing: "
283
+ + ", ".join(parser_missing_deps)
284
+ + ". Install with: pip install -e '.[repomap]'"
285
+ ),
286
+ failed_stage="analysis",
287
+ include_parser_context=True,
288
+ parser_profile="parity",
289
+ )
290
+ if selected_parity_profile and not config_prefer_ast:
291
+ return _fail_result(
292
+ code="parity_profile_invalid",
293
+ message="Parity profile requires repomap.prefer_ast_graph=true.",
294
+ failed_stage="analysis",
295
+ include_parser_context=True,
296
+ parser_profile="parity",
297
+ )
298
+
299
+ if _timed_out(start, selected_timeout_ms):
300
+ return _fail_result(
301
+ code="timeout",
302
+ message="Repo-map generation timed out before discovery started.",
303
+ failed_stage="discovery",
304
+ )
305
+
306
+ try:
307
+ files = _discover_files(
308
+ root=root_path,
309
+ scope=selected_scope,
310
+ ignored_dirs=[str(v) for v in config_ignored],
311
+ timeout_check=lambda: _timed_out(start, selected_timeout_ms),
312
+ )
313
+ except TimeoutError:
314
+ return _fail_result(
315
+ code="timeout",
316
+ message="Repo-map generation timed out during file discovery.",
317
+ failed_stage="discovery",
318
+ )
319
+ except OSError as exc:
320
+ return _fail_result(
321
+ code="scan_failed",
322
+ message=f"Failed during file discovery: {exc}",
323
+ failed_stage="discovery",
324
+ )
325
+
326
+ analyses, parser_stats = _analyze_files(
327
+ root_path,
328
+ files,
329
+ use_tree_sitter=parser_backend == "tree_sitter",
330
+ parser_cache=parser_cache,
331
+ tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
332
+ )
333
+ if selected_parity_profile and parser_stats["tree_sitter_files_parsed"] == 0:
334
+ return _fail_result(
335
+ code="parser_not_used",
336
+ message=(
337
+ "Parity profile requires tree-sitter-backed structural parsing, "
338
+ "but no files were parsed with tree-sitter in this scope."
339
+ ),
340
+ failed_stage="analysis",
341
+ include_parser_context=True,
342
+ parser_profile="parity",
343
+ )
344
+
345
+ dependency_edges: list[tuple[str, str]] = []
346
+ graph_scores: dict[str, float] = {}
347
+ pagerank_backend = "none"
348
+ method = "heuristic_fallback"
349
+ if ast_mode_active:
350
+ dependency_edges = _build_dependency_edges(analyses)
351
+ graph_scores, pagerank_backend = _pagerank_scores_with_backend(
352
+ [item.path for item in analyses], dependency_edges
353
+ )
354
+ method = "ast_pagerank"
355
+ ranking = _rank_files(
356
+ analyses,
357
+ selected_focus,
358
+ selected_max_files,
359
+ code_like_suffixes=code_like_suffixes,
360
+ graph_scores=graph_scores,
361
+ )
362
+ text = _render_text(ranking, selected_max_text_bytes)
363
+ symbols_found = sum(item.symbol_count for item in analyses)
364
+ artifact = RepoMapArtifact(
365
+ ok=True,
366
+ generated_at=_now_iso8601(),
367
+ provenance={
368
+ "method": method,
369
+ "source_root": str(root_path),
370
+ "scope": selected_scope,
371
+ "focus_files": selected_focus,
372
+ "duration_ms": _duration_ms(start),
373
+ "timeout_ms": selected_timeout_ms,
374
+ "ast_requested": config_prefer_ast,
375
+ "ast_enabled": ast_mode_active,
376
+ "missing_deps": ranking_missing_deps,
377
+ "pagerank_backend": pagerank_backend,
378
+ "parser_backend": parser_backend,
379
+ "parser_profile": "parity" if selected_parity_profile else "operational",
380
+ "language_profile": "extended" if selected_extended_language_profile else "core",
381
+ "parser_deps_missing": parser_missing_deps,
382
+ "parser_stats": parser_stats,
383
+ },
384
+ stats={
385
+ "files_parsed": len(files),
386
+ "symbols_found": symbols_found,
387
+ "graph_edges": len(dependency_edges),
388
+ "byte_count": len(text.encode("utf-8")),
389
+ },
390
+ ranking=ranking,
391
+ text=text,
392
+ )
393
+
394
+ latest_path = _resolve_output_path(root_path, selected_output)
395
+ try:
396
+ latest_path.parent.mkdir(parents=True, exist_ok=True)
397
+ latest_path.write_text(json.dumps(artifact.to_dict(), indent=2, sort_keys=True), encoding="utf-8")
398
+ session_path: Path | None = None
399
+ if session_id:
400
+ session_path = root_path / ".cortex" / "artifacts" / "repomap" / f"{session_id}.json"
401
+ session_path.parent.mkdir(parents=True, exist_ok=True)
402
+ session_path.write_text(
403
+ json.dumps(artifact.to_dict(), indent=2, sort_keys=True),
404
+ encoding="utf-8",
405
+ )
406
+ except OSError as exc:
407
+ return _fail_result(
408
+ code="write_failed",
409
+ message=f"Failed to write repo-map artifact: {exc}",
410
+ failed_stage="write",
411
+ )
412
+
413
+ return RepoMapRunResult(
414
+ artifact=artifact,
415
+ artifact_path=str(latest_path),
416
+ session_artifact_path=str(session_path) if session_id else None,
417
+ )
418
+
419
+
420
+ def _discover_files(
421
+ *,
422
+ root: Path,
423
+ scope: list[str],
424
+ ignored_dirs: list[str],
425
+ timeout_check: callable | None = None,
426
+ ) -> list[str]:
427
+ discovered: list[str] = []
428
+ seen: set[str] = set()
429
+ ignored_names = set(DEFAULT_IGNORED_DIRS) | {str(v) for v in ignored_dirs}
430
+
431
+ for scope_entry in scope or ["src"]:
432
+ if timeout_check and timeout_check():
433
+ raise TimeoutError("repo-map discovery timed out")
434
+ target = (root / scope_entry).resolve() if not Path(scope_entry).is_absolute() else Path(scope_entry)
435
+ try:
436
+ target.relative_to(root)
437
+ except ValueError:
438
+ continue
439
+ if not target.exists():
440
+ continue
441
+ if target.is_file():
442
+ rel = _norm_rel_path(target, root)
443
+ if rel and rel not in seen and not _ignored(rel, ignored_names) and _is_text_candidate(target):
444
+ discovered.append(rel)
445
+ seen.add(rel)
446
+ continue
447
+ for dirpath, dirnames, filenames in os.walk(target):
448
+ if timeout_check and timeout_check():
449
+ raise TimeoutError("repo-map discovery timed out")
450
+ current_dir = Path(dirpath)
451
+ dirnames[:] = [d for d in dirnames if not _ignored(_norm_rel_path(current_dir / d, root), ignored_names)]
452
+ for filename in filenames:
453
+ path = current_dir / filename
454
+ rel = _norm_rel_path(path, root)
455
+ if not rel or rel in seen or _ignored(rel, ignored_names):
456
+ continue
457
+ if not _is_text_candidate(path):
458
+ continue
459
+ discovered.append(rel)
460
+ seen.add(rel)
461
+ return sorted(discovered)
462
+
463
+
464
+ def _select_scope(root: Path, requested_scope: list[str], user_scope_supplied: bool) -> list[str]:
465
+ normalized = [str(v) for v in requested_scope if str(v).strip()] or ["src"]
466
+ if user_scope_supplied or _scope_targets_exist(root, normalized):
467
+ return normalized
468
+ fallback = [name for name in _FALLBACK_SCOPE_CANDIDATES if (root / name).exists()]
469
+ return fallback or ["."]
470
+
471
+
472
+ def _scope_targets_exist(root: Path, scope: list[str]) -> bool:
473
+ for entry in scope:
474
+ target = Path(entry)
475
+ if not target.is_absolute():
476
+ target = root / target
477
+ if target.exists():
478
+ return True
479
+ return False
480
+
481
+
482
+ def _analyze_files(
483
+ root: Path,
484
+ files: list[str],
485
+ *,
486
+ use_tree_sitter: bool,
487
+ parser_cache: dict[str, Any] | None = None,
488
+ tree_sitter_lang_by_suffix: dict[str, str] | None = None,
489
+ ) -> tuple[list[RepoMapFileAnalysis], dict[str, int]]:
490
+ analyses: list[RepoMapFileAnalysis] = []
491
+ tree_sitter_attempted = 0
492
+ tree_sitter_parsed = 0
493
+ for rel in files:
494
+ analysis, attempted, parsed = _analyze_file(
495
+ root,
496
+ rel,
497
+ use_tree_sitter=use_tree_sitter,
498
+ parser_cache=parser_cache,
499
+ tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
500
+ )
501
+ analyses.append(analysis)
502
+ tree_sitter_attempted += int(attempted)
503
+ tree_sitter_parsed += int(parsed)
504
+ return (
505
+ analyses,
506
+ {
507
+ "files_analyzed": len(files),
508
+ "tree_sitter_files_attempted": tree_sitter_attempted,
509
+ "tree_sitter_files_parsed": tree_sitter_parsed,
510
+ "tree_sitter_files_fallback": max(0, tree_sitter_attempted - tree_sitter_parsed),
511
+ },
512
+ )
513
+
514
+
515
+ def _analyze_file(
516
+ root: Path,
517
+ rel_path: str,
518
+ *,
519
+ use_tree_sitter: bool,
520
+ parser_cache: dict[str, Any] | None = None,
521
+ tree_sitter_lang_by_suffix: dict[str, str] | None = None,
522
+ ) -> tuple[RepoMapFileAnalysis, bool, bool]:
523
+ path = root / rel_path
524
+ byte_size = 0
525
+ line_count = 0
526
+ symbols: list[str] = []
527
+ symbol_count = 0
528
+ imports: list[str] = []
529
+ tree_sitter_attempted = False
530
+ tree_sitter_parsed = False
531
+ try:
532
+ byte_size = path.stat().st_size
533
+ except OSError:
534
+ byte_size = 0
535
+ try:
536
+ text = path.read_text(encoding="utf-8", errors="ignore")
537
+ except OSError:
538
+ text = ""
539
+ if text:
540
+ line_count = text.count("\n") + (0 if text.endswith("\n") else 1)
541
+ symbols, symbol_count, imports, tree_sitter_attempted, tree_sitter_parsed = _extract_symbols_and_imports(
542
+ rel_path,
543
+ text,
544
+ use_tree_sitter=use_tree_sitter,
545
+ parser_cache=parser_cache,
546
+ tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
547
+ )
548
+ return (
549
+ RepoMapFileAnalysis(
550
+ path=rel_path,
551
+ byte_size=byte_size,
552
+ line_count=line_count,
553
+ symbols=symbols,
554
+ symbol_count=symbol_count,
555
+ imports=imports,
556
+ ),
557
+ tree_sitter_attempted,
558
+ tree_sitter_parsed,
559
+ )
560
+
561
+
562
+ def _extract_symbols_and_imports(
563
+ rel_path: str,
564
+ text: str,
565
+ *,
566
+ use_tree_sitter: bool,
567
+ parser_cache: dict[str, Any] | None = None,
568
+ tree_sitter_lang_by_suffix: dict[str, str] | None = None,
569
+ max_symbols: int = 4,
570
+ ) -> tuple[list[str], int, list[str], bool, bool]:
571
+ suffix = Path(rel_path).suffix.lower()
572
+ lang_map = _resolve_tree_sitter_lang_map(tree_sitter_lang_by_suffix)
573
+ tree_sitter_attempted = False
574
+ tree_sitter_parsed = False
575
+ if use_tree_sitter and suffix in lang_map:
576
+ tree_sitter_attempted, tree_sitter_parsed = _tree_sitter_parse_ok(
577
+ rel_path,
578
+ text,
579
+ parser_cache=parser_cache,
580
+ tree_sitter_lang_by_suffix=lang_map,
581
+ )
582
+ if suffix == ".py":
583
+ py_symbols, py_count, py_imports = _extract_python_symbols_and_imports(text, max_symbols=max_symbols)
584
+ if py_count > 0 or py_imports:
585
+ return py_symbols, py_count, py_imports, tree_sitter_attempted, tree_sitter_parsed
586
+ symbols, count = _extract_symbol_summaries(text, max_symbols=max_symbols)
587
+ imports = _extract_import_targets(text)
588
+ return symbols, count, imports, tree_sitter_attempted, tree_sitter_parsed
589
+
590
+
591
+ def _extract_python_symbols_and_imports(
592
+ text: str,
593
+ *,
594
+ max_symbols: int = 4,
595
+ ) -> tuple[list[str], int, list[str]]:
596
+ try:
597
+ tree = ast.parse(text)
598
+ except SyntaxError:
599
+ return [], 0, []
600
+
601
+ seen: set[str] = set()
602
+ summaries: list[str] = []
603
+ count = 0
604
+ imports: list[str] = []
605
+ import_seen: set[str] = set()
606
+
607
+ for node in ast.walk(tree):
608
+ if isinstance(node, ast.ClassDef):
609
+ label = f"class {node.name}"
610
+ elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
611
+ label = f"def {node.name}"
612
+ else:
613
+ label = ""
614
+ if label and label not in seen:
615
+ seen.add(label)
616
+ count += 1
617
+ if len(summaries) < max_symbols:
618
+ summaries.append(label)
619
+
620
+ if isinstance(node, ast.Import):
621
+ for alias in node.names:
622
+ target = alias.name.strip()
623
+ if target and target not in import_seen:
624
+ import_seen.add(target)
625
+ imports.append(target)
626
+ elif isinstance(node, ast.ImportFrom):
627
+ prefix = "." * max(0, int(node.level))
628
+ base = str(node.module or "").strip()
629
+ if base:
630
+ target = f"{prefix}{base}"
631
+ if target not in import_seen:
632
+ import_seen.add(target)
633
+ imports.append(target)
634
+ continue
635
+ for alias in node.names:
636
+ target = f"{prefix}{str(alias.name).strip()}"
637
+ if target and target not in import_seen:
638
+ import_seen.add(target)
639
+ imports.append(target)
640
+
641
+ return summaries, count, imports
642
+
643
+
644
+ def _tree_sitter_parse_ok(
645
+ rel_path: str,
646
+ text: str,
647
+ *,
648
+ parser_cache: dict[str, Any] | None = None,
649
+ tree_sitter_lang_by_suffix: dict[str, str] | None = None,
650
+ ) -> tuple[bool, bool]:
651
+ parser_and_source = _tree_sitter_parser_for_path(
652
+ rel_path,
653
+ text,
654
+ parser_cache=parser_cache,
655
+ tree_sitter_lang_by_suffix=tree_sitter_lang_by_suffix,
656
+ )
657
+ if parser_and_source is None:
658
+ return False, False
659
+ parser, parser_source = parser_and_source
660
+ source_bytes = parser_source.encode("utf-8")
661
+ try:
662
+ tree = parser.parse(source_bytes)
663
+ except Exception: # noqa: BLE001
664
+ return True, False
665
+ root = getattr(tree, "root_node", None)
666
+ return True, root is not None
667
+
668
+
669
+ def _tree_sitter_parser_for_path(
670
+ rel_path: str,
671
+ text: str,
672
+ *,
673
+ parser_cache: dict[str, Any] | None = None,
674
+ tree_sitter_lang_by_suffix: dict[str, str] | None = None,
675
+ ) -> tuple[Any, str] | None:
676
+ suffix = Path(rel_path).suffix.lower()
677
+ lang_map = _resolve_tree_sitter_lang_map(tree_sitter_lang_by_suffix)
678
+ language = lang_map.get(suffix)
679
+ if not language:
680
+ return None
681
+ if suffix == ".astro":
682
+ frontmatter = _extract_astro_frontmatter(text)
683
+ if not frontmatter:
684
+ return None
685
+ parser = _tree_sitter_parser("typescript", parser_cache=parser_cache)
686
+ if parser is None:
687
+ return None
688
+ return parser, frontmatter
689
+ parser = _tree_sitter_parser(language, parser_cache=parser_cache)
690
+ if parser is None:
691
+ return None
692
+ return parser, text
693
+
694
+
695
+ def _extract_astro_frontmatter(text: str) -> str:
696
+ lines = text.splitlines()
697
+ if not lines or lines[0].strip() != "---":
698
+ return ""
699
+ for idx in range(1, len(lines)):
700
+ if lines[idx].strip() == "---":
701
+ return "\n".join(lines[1:idx])
702
+ return ""
703
+
704
+
705
+ def _tree_sitter_parser(language: str, *, parser_cache: dict[str, Any] | None = None) -> Any | None:
706
+ cache = parser_cache if parser_cache is not None else {}
707
+ cached = cache.get(language)
708
+ if cached is not None:
709
+ return cached
710
+ try:
711
+ from tree_sitter_language_pack import get_parser
712
+
713
+ parser = get_parser(language)
714
+ except Exception: # noqa: BLE001
715
+ return None
716
+ cache[language] = parser
717
+ return parser
718
+
719
+
720
+ def _extract_symbol_summaries(text: str, max_symbols: int = 4) -> tuple[list[str], int]:
721
+ seen: set[str] = set()
722
+ summaries: list[str] = []
723
+ count = 0
724
+ for line in text.splitlines():
725
+ for pattern, kind in _SYMBOL_PATTERNS:
726
+ match = pattern.match(line)
727
+ if not match:
728
+ continue
729
+ name = match.group(1)
730
+ if not name:
731
+ continue
732
+ label = f"{kind} {name}"
733
+ if label in seen:
734
+ continue
735
+ seen.add(label)
736
+ count += 1
737
+ if len(summaries) < max_symbols:
738
+ summaries.append(label)
739
+ break
740
+ return summaries, count
741
+
742
+
743
+ def _extract_import_targets(text: str) -> list[str]:
744
+ targets: list[str] = []
745
+ seen: set[str] = set()
746
+ in_go_import_block = False
747
+
748
+ for line in text.splitlines():
749
+ stripped = line.strip()
750
+ if in_go_import_block:
751
+ if stripped == ")":
752
+ in_go_import_block = False
753
+ continue
754
+ if stripped.startswith('"') and stripped.endswith('"') and len(stripped) > 1:
755
+ target = stripped[1:-1].strip()
756
+ if target and target not in seen:
757
+ seen.add(target)
758
+ targets.append(target)
759
+ continue
760
+ if stripped == "import (":
761
+ in_go_import_block = True
762
+ continue
763
+ for pattern in _IMPORT_PATTERNS:
764
+ match = pattern.search(line)
765
+ if not match:
766
+ continue
767
+ target = str(match.group(1) or "").strip()
768
+ if target and target not in seen:
769
+ seen.add(target)
770
+ targets.append(target)
771
+ break
772
+ return targets
773
+
774
+
775
+ def _render_entry_chunk(entry: RepoMapRankingEntry) -> str:
776
+ lines = [f"{entry.path} ({entry.score:.3f})\n"]
777
+ for symbol in entry.symbols:
778
+ lines.append(f" - {symbol}\n")
779
+ return "".join(lines)
780
+
781
+
782
+ def _truncate_utf8(text: str, max_bytes: int) -> str:
783
+ if max_bytes <= 0:
784
+ return ""
785
+ raw = text.encode("utf-8")
786
+ if len(raw) <= max_bytes:
787
+ return text
788
+ clipped = raw[:max_bytes]
789
+ while clipped:
790
+ try:
791
+ return clipped.decode("utf-8")
792
+ except UnicodeDecodeError as exc:
793
+ clipped = clipped[: exc.start]
794
+ return ""
795
+
796
+
797
+ def _build_dependency_edges(analyses: list[RepoMapFileAnalysis]) -> list[tuple[str, str]]:
798
+ available_paths = {item.path for item in analyses}
799
+ python_index = _build_python_module_index(available_paths)
800
+ edges: set[tuple[str, str]] = set()
801
+ for analysis in analyses:
802
+ src = analysis.path
803
+ suffix = Path(src).suffix.lower()
804
+ for raw_target in analysis.imports:
805
+ target = raw_target.split("?", 1)[0].split("#", 1)[0].strip()
806
+ if not target:
807
+ continue
808
+ dst = None
809
+ if suffix == ".py":
810
+ dst = _resolve_python_import(src, target, python_index)
811
+ if dst is None:
812
+ dst = _resolve_relative_import(src, target, available_paths)
813
+ if dst and dst != src and dst in available_paths:
814
+ edges.add((src, dst))
815
+ return sorted(edges)
816
+
817
+
818
+ def _build_python_module_index(paths: set[str]) -> dict[str, str]:
819
+ index: dict[str, str] = {}
820
+ for rel in paths:
821
+ path = Path(rel)
822
+ if path.suffix.lower() != ".py":
823
+ continue
824
+ if path.name == "__init__.py":
825
+ module_name = ".".join(path.parts[:-1])
826
+ else:
827
+ module_name = ".".join(path.with_suffix("").parts)
828
+ if module_name and module_name not in index:
829
+ index[module_name] = rel
830
+ return index
831
+
832
+
833
+ def _resolve_python_import(source_path: str, target: str, index: dict[str, str]) -> str | None:
834
+ if not target:
835
+ return None
836
+ module_name = target
837
+ if target.startswith("."):
838
+ level = len(target) - len(target.lstrip("."))
839
+ module_tail = target[level:]
840
+ source = Path(source_path)
841
+ package_parts = (
842
+ list(source.parts[:-1]) if source.name == "__init__.py" else list(source.with_suffix("").parts[:-1])
843
+ )
844
+ trim = max(0, level - 1)
845
+ if trim > len(package_parts):
846
+ return None
847
+ base_parts = package_parts[: len(package_parts) - trim] if trim else package_parts
848
+ tail_parts = [part for part in module_tail.split(".") if part]
849
+ if not base_parts and not tail_parts:
850
+ return None
851
+ module_name = ".".join(base_parts + tail_parts)
852
+
853
+ return index.get(module_name)
854
+
855
+
856
+ def _resolve_relative_import(source_path: str, target: str, available_paths: set[str]) -> str | None:
857
+ if not target.startswith((".", "/")):
858
+ return None
859
+ source = PurePosixPath(source_path)
860
+ raw = str(source.parent / target) if target.startswith(".") else target.lstrip("/")
861
+ base = _normalize_rel(raw)
862
+ if not base:
863
+ return None
864
+ if base in available_paths:
865
+ return base
866
+ base_path = PurePosixPath(base)
867
+ if base_path.suffix:
868
+ return None
869
+ candidates: list[str] = []
870
+ for suffix in _RELATIVE_IMPORT_SUFFIX_CANDIDATES:
871
+ candidates.append(f"{base}{suffix}")
872
+ candidates.append(str(PurePosixPath(base) / f"index{suffix}"))
873
+ for candidate in candidates:
874
+ normalized = _normalize_rel(candidate)
875
+ if normalized and normalized in available_paths:
876
+ return normalized
877
+ return None
878
+
879
+
880
+ def _normalize_rel(path: str) -> str:
881
+ parts: list[str] = []
882
+ for piece in path.replace("\\", "/").split("/"):
883
+ if piece in {"", "."}:
884
+ continue
885
+ if piece == "..":
886
+ if not parts:
887
+ return ""
888
+ parts.pop()
889
+ continue
890
+ parts.append(piece)
891
+ return "/".join(parts)
892
+
893
+
894
+ def _pagerank_scores_with_backend(
895
+ paths: list[str], edges: list[tuple[str, str]]
896
+ ) -> tuple[dict[str, float], str]:
897
+ if not paths:
898
+ return {}, "none"
899
+ scores = _pagerank_scores_networkx(paths, edges)
900
+ backend = "networkx"
901
+ if not scores:
902
+ scores = _pagerank_scores_simple(paths, edges)
903
+ backend = "simple"
904
+ peak = max(scores.values(), default=0.0)
905
+ if peak <= 0:
906
+ return ({path: 0.0 for path in paths}, backend)
907
+ return ({path: float(scores.get(path, 0.0) / peak) for path in paths}, backend)
908
+
909
+
910
+ def _pagerank_scores_networkx(paths: list[str], edges: list[tuple[str, str]]) -> dict[str, float]:
911
+ try:
912
+ import networkx as nx
913
+ except Exception: # noqa: BLE001
914
+ return {}
915
+ graph = nx.DiGraph()
916
+ graph.add_nodes_from(paths)
917
+ graph.add_edges_from(edges)
918
+ if graph.number_of_nodes() == 0:
919
+ return {}
920
+ try:
921
+ pagerank = nx.pagerank(graph, alpha=0.85)
922
+ except Exception: # noqa: BLE001
923
+ return {}
924
+ return {path: float(pagerank.get(path, 0.0)) for path in paths}
925
+
926
+
927
+ def _pagerank_scores_simple(
928
+ paths: list[str],
929
+ edges: list[tuple[str, str]],
930
+ *,
931
+ alpha: float = 0.85,
932
+ max_iter: int = 100,
933
+ tol: float = 1e-6,
934
+ ) -> dict[str, float]:
935
+ nodes = list(paths)
936
+ n = len(nodes)
937
+ if n == 0:
938
+ return {}
939
+ node_set = set(nodes)
940
+ outgoing: dict[str, set[str]] = {node: set() for node in nodes}
941
+ incoming: dict[str, set[str]] = {node: set() for node in nodes}
942
+ for src, dst in edges:
943
+ if src not in node_set or dst not in node_set:
944
+ continue
945
+ outgoing[src].add(dst)
946
+ incoming[dst].add(src)
947
+
948
+ rank = {node: 1.0 / n for node in nodes}
949
+ base = (1.0 - alpha) / n
950
+ for _ in range(max_iter):
951
+ dangling = sum(rank[node] for node, outs in outgoing.items() if not outs)
952
+ next_rank: dict[str, float] = {}
953
+ diff = 0.0
954
+ for node in nodes:
955
+ inbound = 0.0
956
+ for src in incoming[node]:
957
+ outs = outgoing[src]
958
+ if outs:
959
+ inbound += rank[src] / len(outs)
960
+ value = base + alpha * (inbound + dangling / n)
961
+ next_rank[node] = value
962
+ diff += abs(value - rank[node])
963
+ rank = next_rank
964
+ if diff <= tol:
965
+ break
966
+
967
+ total = sum(rank.values())
968
+ if total <= 0:
969
+ return {node: 0.0 for node in nodes}
970
+ return {node: float(value / total) for node, value in rank.items()}
971
+
972
+
973
+ def _rank_files(
974
+ analyses: list[RepoMapFileAnalysis],
975
+ focus_files: list[str],
976
+ max_files: int,
977
+ *,
978
+ code_like_suffixes: set[str] | None = None,
979
+ graph_scores: dict[str, float] | None = None,
980
+ ) -> list[RepoMapRankingEntry]:
981
+ code_like_suffixes = code_like_suffixes or (set(_CORE_CODE_LIKE_SUFFIXES) | set(_EXTENDED_CODE_LIKE_SUFFIXES))
982
+ graph_scores = graph_scores or {}
983
+ focus = {str(v).replace("\\", "/") for v in focus_files}
984
+ focus_basenames = {Path(path).name for path in focus}
985
+ scored: list[RepoMapRankingEntry] = []
986
+ for analysis in analyses:
987
+ path = analysis.path.replace("\\", "/")
988
+ path_obj = Path(path)
989
+ suffix = path_obj.suffix.lower()
990
+ depth = path.count("/")
991
+ score = 1.0
992
+ if suffix in code_like_suffixes:
993
+ score += 1.6
994
+ score += _RANK_SUFFIX_BOOSTS.get(suffix, 0.0)
995
+ if path in focus:
996
+ score += 25.0
997
+ elif path_obj.name in focus_basenames:
998
+ score += 4.0
999
+ score += max(0.0, 0.8 - min(depth, 10) * 0.08)
1000
+ score += min(1.5, analysis.symbol_count * 0.2)
1001
+ score += min(0.8, analysis.line_count / 400.0)
1002
+
1003
+ stem = path_obj.stem.lower()
1004
+ for name, boost in _RANK_NAME_BOOSTS.items():
1005
+ if stem == name or stem.startswith(f"{name}_") or stem.endswith(f"_{name}"):
1006
+ score += boost
1007
+ for part in path_obj.parts[:-1]:
1008
+ score += _RANK_PATH_BOOSTS.get(part.lower(), 0.0)
1009
+ score -= _RANK_PATH_PENALTIES.get(part.lower(), 0.0)
1010
+ score -= _RANK_EXACT_FILENAME_PENALTIES.get(path_obj.name.lower(), 0.0)
1011
+ if "generated" in stem or "snapshot" in stem:
1012
+ score -= 0.4
1013
+ score += 2.0 * max(0.0, float(graph_scores.get(path, 0.0)))
1014
+
1015
+ scored.append(
1016
+ RepoMapRankingEntry(
1017
+ path=path,
1018
+ score=max(0.0, score),
1019
+ symbols=list(analysis.symbols),
1020
+ )
1021
+ )
1022
+ scored.sort(key=lambda item: (-item.score, item.path))
1023
+ return scored[: max(0, max_files)]
1024
+
1025
+
1026
+ def _render_text(ranking: list[RepoMapRankingEntry], max_text_bytes: int) -> str:
1027
+ if not ranking:
1028
+ return ""
1029
+ budget = max(256, max_text_bytes)
1030
+ chunks: list[tuple[str, int]] = []
1031
+ used = 0
1032
+ truncated = False
1033
+ total = len(ranking)
1034
+
1035
+ for entry in ranking:
1036
+ chunk = _render_entry_chunk(entry)
1037
+ chunk_bytes = len(chunk.encode("utf-8"))
1038
+ if used + chunk_bytes > budget:
1039
+ truncated = True
1040
+ break
1041
+ chunks.append((chunk, chunk_bytes))
1042
+ used += chunk_bytes
1043
+
1044
+ if truncated:
1045
+ note = f"... (truncated, showing {len(chunks)}/{total} files within {budget} bytes)\n"
1046
+ note_bytes = len(note.encode("utf-8"))
1047
+ while chunks and used + note_bytes > budget:
1048
+ _, removed_bytes = chunks.pop()
1049
+ used -= removed_bytes
1050
+ note = f"... (truncated, showing {len(chunks)}/{total} files within {budget} bytes)\n"
1051
+ note_bytes = len(note.encode("utf-8"))
1052
+ if used + note_bytes <= budget:
1053
+ chunks.append((note, note_bytes))
1054
+ else:
1055
+ return _truncate_utf8(note, budget)
1056
+
1057
+ return "".join(chunk for chunk, _ in chunks)
1058
+
1059
+
1060
+ def _resolve_output_path(root: Path, output_path: str) -> Path:
1061
+ candidate = Path(output_path)
1062
+ return candidate if candidate.is_absolute() else (root / candidate)
1063
+
1064
+
1065
+ def _failure_artifact(
1066
+ *,
1067
+ code: str,
1068
+ message: str,
1069
+ root: Path,
1070
+ scope: list[str],
1071
+ focus_files: list[str],
1072
+ start: float,
1073
+ timeout_ms: int | None,
1074
+ failed_stage: str,
1075
+ parser_backend: str = "none",
1076
+ parser_deps_missing: list[str] | None = None,
1077
+ parser_profile: str = "operational",
1078
+ ) -> RepoMapArtifact:
1079
+ return RepoMapArtifact(
1080
+ ok=False,
1081
+ generated_at=_now_iso8601(),
1082
+ provenance={
1083
+ "method": "none",
1084
+ "source_root": str(root),
1085
+ "scope": list(scope),
1086
+ "focus_files": list(focus_files),
1087
+ "duration_ms": _duration_ms(start),
1088
+ "timeout_ms": timeout_ms,
1089
+ "parser_backend": parser_backend,
1090
+ "parser_profile": parser_profile,
1091
+ "parser_deps_missing": list(parser_deps_missing or []),
1092
+ },
1093
+ stats={
1094
+ "files_parsed": 0,
1095
+ "symbols_found": 0,
1096
+ "graph_edges": 0,
1097
+ "byte_count": 0,
1098
+ },
1099
+ ranking=[],
1100
+ text="",
1101
+ error={
1102
+ "code": code,
1103
+ "message": message,
1104
+ "retryable": code in {"deps_missing", "timeout", "scan_failed", "write_failed"},
1105
+ "failed_stage": failed_stage,
1106
+ },
1107
+ )
1108
+
1109
+
1110
+ def _active_code_like_suffixes(repomap_config: Any) -> set[str]:
1111
+ extended = bool(_get_attr(repomap_config, "extended_language_profile", False))
1112
+ if extended:
1113
+ return set(_CORE_CODE_LIKE_SUFFIXES) | set(_EXTENDED_CODE_LIKE_SUFFIXES)
1114
+ return set(_CORE_CODE_LIKE_SUFFIXES)
1115
+
1116
+
1117
+ def _active_tree_sitter_lang_by_suffix(repomap_config: Any) -> dict[str, str]:
1118
+ mapping = dict(_CORE_TREE_SITTER_LANG_BY_SUFFIX)
1119
+ extended = bool(_get_attr(repomap_config, "extended_language_profile", False))
1120
+ if extended:
1121
+ mapping.update(_EXTENDED_TREE_SITTER_LANG_BY_SUFFIX)
1122
+ return mapping
1123
+
1124
+
1125
+ def _resolve_tree_sitter_lang_map(tree_sitter_lang_by_suffix: dict[str, str] | None) -> dict[str, str]:
1126
+ return tree_sitter_lang_by_suffix or _CORE_TREE_SITTER_LANG_BY_SUFFIX
1127
+
1128
+
1129
+ def _get_attr(config: Any, name: str, default: Any) -> Any:
1130
+ return getattr(config, name, default) if config is not None else default
1131
+
1132
+
1133
+ def repomap_missing_dependencies() -> list[str]:
1134
+ missing: list[str] = []
1135
+ for package_name, module_name in _RANKING_OPTIONAL_DEPENDENCIES.items():
1136
+ if importlib.util.find_spec(module_name) is None:
1137
+ missing.append(package_name)
1138
+ return missing
1139
+
1140
+
1141
+ def repomap_missing_parser_dependencies() -> list[str]:
1142
+ missing: list[str] = []
1143
+ for package_name, module_name in _PARSER_OPTIONAL_DEPENDENCIES.items():
1144
+ if importlib.util.find_spec(module_name) is None:
1145
+ missing.append(package_name)
1146
+ return missing
1147
+
1148
+
1149
+ def _now_iso8601() -> str:
1150
+ return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z")
1151
+
1152
+
1153
+ def _duration_ms(start: float) -> int:
1154
+ return max(0, int((time.perf_counter() - start) * 1000))
1155
+
1156
+
1157
+ def _timed_out(start: float, timeout_ms: int | None) -> bool:
1158
+ if timeout_ms is None:
1159
+ return False
1160
+ if timeout_ms <= 0:
1161
+ return True
1162
+ return _duration_ms(start) > timeout_ms
1163
+
1164
+
1165
+ def _norm_rel_path(path: Path, root: Path) -> str:
1166
+ try:
1167
+ return path.resolve().relative_to(root.resolve()).as_posix()
1168
+ except (OSError, ValueError):
1169
+ return ""
1170
+
1171
+
1172
+ def _ignored(rel_path: str, ignored_dirs: set[str]) -> bool:
1173
+ if not rel_path:
1174
+ return True
1175
+ parts = [part for part in rel_path.replace("\\", "/").split("/") if part]
1176
+ for part in parts[:-1]:
1177
+ if part in ignored_dirs:
1178
+ return True
1179
+ filename = parts[-1]
1180
+ if filename.startswith(".") and filename not in {".env", ".gitignore"}:
1181
+ return True
1182
+ if filename.endswith((".min.js", ".min.css", ".map")):
1183
+ return True
1184
+ return False
1185
+
1186
+
1187
+ def _is_text_candidate(path: Path) -> bool:
1188
+ try:
1189
+ if not path.is_file():
1190
+ return False
1191
+ if path.stat().st_size > MAX_DISCOVER_FILE_BYTES:
1192
+ return False
1193
+ except OSError:
1194
+ return False
1195
+ if path.suffix.lower() in _BINARY_SUFFIXES:
1196
+ return False
1197
+ try:
1198
+ sample = path.read_bytes()[:READ_SAMPLE_BYTES]
1199
+ except OSError:
1200
+ return False
1201
+ return b"\x00" not in sample