@pmaddire/gcie 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/AGENT.md +256 -0
  2. package/AGENT_USAGE.md +231 -0
  3. package/ARCHITECTURE.md +151 -0
  4. package/CLAUDE.md +69 -0
  5. package/DEBUGGING_PLAYBOOK.md +160 -0
  6. package/KNOWLEDGE_INDEX.md +154 -0
  7. package/POTENTIAL_UPDATES +130 -0
  8. package/PROJECT.md +141 -0
  9. package/README.md +371 -0
  10. package/REPO_DIGITAL_TWIN.md +98 -0
  11. package/ROADMAP.md +301 -0
  12. package/SETUP_ANY_REPO.md +85 -0
  13. package/bin/gcie-init.js +20 -0
  14. package/bin/gcie.js +45 -0
  15. package/cli/__init__.py +1 -0
  16. package/cli/app.py +163 -0
  17. package/cli/commands/__init__.py +1 -0
  18. package/cli/commands/cache.py +35 -0
  19. package/cli/commands/context.py +2426 -0
  20. package/cli/commands/context_slices.py +617 -0
  21. package/cli/commands/debug.py +24 -0
  22. package/cli/commands/index.py +17 -0
  23. package/cli/commands/query.py +20 -0
  24. package/cli/commands/setup.py +73 -0
  25. package/config/__init__.py +1 -0
  26. package/config/scanner_config.py +82 -0
  27. package/context/__init__.py +1 -0
  28. package/context/architecture_bootstrap.py +170 -0
  29. package/context/architecture_index.py +185 -0
  30. package/context/architecture_parser.py +170 -0
  31. package/context/architecture_slicer.py +308 -0
  32. package/context/context_router.py +70 -0
  33. package/context/fallback_evaluator.py +21 -0
  34. package/coverage_integration/__init__.py +1 -0
  35. package/coverage_integration/coverage_loader.py +55 -0
  36. package/debugging/__init__.py +12 -0
  37. package/debugging/bug_localizer.py +81 -0
  38. package/debugging/execution_path_analyzer.py +42 -0
  39. package/embeddings/__init__.py +6 -0
  40. package/embeddings/encoder.py +45 -0
  41. package/embeddings/faiss_index.py +72 -0
  42. package/git_integration/__init__.py +1 -0
  43. package/git_integration/git_miner.py +78 -0
  44. package/graphs/__init__.py +17 -0
  45. package/graphs/call_graph.py +70 -0
  46. package/graphs/code_graph.py +81 -0
  47. package/graphs/execution_graph.py +35 -0
  48. package/graphs/git_graph.py +43 -0
  49. package/graphs/graph_store.py +25 -0
  50. package/graphs/node_factory.py +21 -0
  51. package/graphs/test_graph.py +65 -0
  52. package/graphs/validators.py +28 -0
  53. package/graphs/variable_graph.py +51 -0
  54. package/knowledge_index/__init__.py +1 -0
  55. package/knowledge_index/index_builder.py +60 -0
  56. package/knowledge_index/models.py +35 -0
  57. package/knowledge_index/query_api.py +38 -0
  58. package/knowledge_index/store.py +23 -0
  59. package/llm_context/__init__.py +6 -0
  60. package/llm_context/context_builder.py +67 -0
  61. package/llm_context/snippet_selector.py +57 -0
  62. package/package.json +14 -0
  63. package/parser/__init__.py +18 -0
  64. package/parser/ast_parser.py +216 -0
  65. package/parser/call_resolver.py +52 -0
  66. package/parser/models.py +75 -0
  67. package/parser/tree_sitter_adapter.py +56 -0
  68. package/parser/variable_extractor.py +31 -0
  69. package/retrieval/__init__.py +17 -0
  70. package/retrieval/cache.py +22 -0
  71. package/retrieval/hybrid_retriever.py +249 -0
  72. package/retrieval/query_parser.py +38 -0
  73. package/retrieval/ranking.py +43 -0
  74. package/retrieval/semantic_retriever.py +39 -0
  75. package/retrieval/symbolic_retriever.py +80 -0
  76. package/scanner/__init__.py +5 -0
  77. package/scanner/file_filters.py +37 -0
  78. package/scanner/models.py +44 -0
  79. package/scanner/repository_scanner.py +55 -0
  80. package/scripts/bootstrap_from_github.ps1 +41 -0
  81. package/tracing/__init__.py +1 -0
  82. package/tracing/runtime_tracer.py +60 -0
@@ -0,0 +1,2426 @@
1
+ """CLI command: context."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+
10
+ import networkx as nx
11
+
12
+ from config.scanner_config import ScannerConfig
13
+ from graphs.call_graph import build_call_graph
14
+ from graphs.code_graph import build_code_structure_graph
15
+ from graphs.variable_graph import build_variable_graph
16
+ from llm_context.context_builder import build_context
17
+ from llm_context.snippet_selector import RankedSnippet, estimate_tokens
18
+ from parser.ast_parser import parse_python_file, parse_python_source
19
+ from retrieval.hybrid_retriever import hybrid_retrieve
20
+ from scanner.repository_scanner import scan_repository
21
+
22
+ # Simple in-process cache for repo-wide context builds
23
+ _REPO_CACHE: dict[str, tuple[nx.DiGraph, dict[str, str], dict[str, str], dict[str, str]]] = {}
24
+
25
+ _FRONTEND_EXTENSIONS = {
26
+ ".js",
27
+ ".jsx",
28
+ ".ts",
29
+ ".tsx",
30
+ ".css",
31
+ ".scss",
32
+ ".sass",
33
+ ".less",
34
+ ".html",
35
+ ".vue",
36
+ }
37
+
38
+ _CODE_EXTENSIONS = {
39
+ ".py",
40
+ ".pyi",
41
+ ".js",
42
+ ".jsx",
43
+ ".ts",
44
+ ".tsx",
45
+ ".json",
46
+ ".yaml",
47
+ ".yml",
48
+ ".toml",
49
+ ".ini",
50
+ ".cfg",
51
+ *sorted(_FRONTEND_EXTENSIONS),
52
+ }
53
+
54
+ _DOC_EXTENSIONS = {".md", ".txt", ".rst"}
55
+
56
+ _ALL_CONTEXT_EXTENSIONS = _CODE_EXTENSIONS | _DOC_EXTENSIONS
57
+
58
+ _EXCLUDE_GLOBS = (
59
+ "get-shit-done/docs/ja-JP/**",
60
+ "get-shit-done/docs/zh-CN/**",
61
+ )
62
+
63
+ _FRONTEND_KEYWORDS = {
64
+ "frontend",
65
+ "ui",
66
+ "ux",
67
+ "component",
68
+ "react",
69
+ "vue",
70
+ "svelte",
71
+ "angular",
72
+ "css",
73
+ "style",
74
+ "layout",
75
+ "toolbar",
76
+ "canvas",
77
+ "page",
78
+ "view",
79
+ "hook",
80
+ "hooks",
81
+ }
82
+
83
+ _STOPWORDS = {
84
+ "how",
85
+ "does",
86
+ "when",
87
+ "what",
88
+ "why",
89
+ "where",
90
+ "which",
91
+ "the",
92
+ "this",
93
+ "that",
94
+ "into",
95
+ "from",
96
+ "with",
97
+ "files",
98
+ "file",
99
+ "help",
100
+ "doesnt",
101
+ "using",
102
+ "used",
103
+ }
104
+
105
+ _OPERATIONAL_DOC_NAMES = {
106
+ "agent.md",
107
+ "agent_usage.md",
108
+ "architecture.md",
109
+ "project.md",
110
+ "roadmap.md",
111
+ "debugging_playbook.md",
112
+ "readme.md",
113
+ "skill.md",
114
+ "claude.md",
115
+ "contextgrabber.md",
116
+ }
117
+
118
+ _OPERATIONAL_PATH_HINTS = (
119
+ ".planning/",
120
+ ".gcie/",
121
+ "/skills/",
122
+ "get-shit-done/workflows/",
123
+ "get-shit-done/commands/",
124
+ "get-shit-done/templates/",
125
+ )
126
+
127
+ _QUERY_ALIASES = {
128
+ "routing": ("router", "route"),
129
+ "router": ("routing", "route"),
130
+ "bootstrapped": ("bootstrap", "init", "initialize", "managed"),
131
+ "bootstrapping": ("bootstrap", "init", "initialize", "managed"),
132
+ "bootstrap": ("init", "initialize", "managed", "index", "architecture"),
133
+ "managed": ("index", "architecture"),
134
+ "command": ("cli", "handler", "run", "context"),
135
+ "commands": ("cli", "handler", "run", "context"),
136
+ "context": ("builder", "command", "cli"),
137
+ "pipeline": ("retrieval", "hybrid", "symbolic", "semantic", "ranking"),
138
+ "retrieval": ("pipeline", "hybrid", "symbolic", "semantic", "ranking"),
139
+ "hybrid": ("retrieval", "symbolic", "semantic", "ranking"),
140
+ "builder": ("build", "index", "context"),
141
+ "builders": ("builder", "build", "index", "context"),
142
+ "build": ("builder", "context"),
143
+ "plan": ("planner", "pipeline", "stage"),
144
+ "planner": ("plan", "pipeline", "stage"),
145
+ "convert": ("conversion", "api", "route"),
146
+ "conversion": ("convert", "api", "route"),
147
+ "analyze": ("analysis", "pipeline", "stage"),
148
+ "analysis": ("analyze", "pipeline", "stage"),
149
+ "extract": ("extraction", "pipeline", "stage"),
150
+ "extraction": ("extract", "pipeline", "stage"),
151
+ "stage": ("pipeline", "plan", "build"),
152
+ "stages": ("stage", "pipeline", "plan", "build"),
153
+ "scanning": ("scanner", "scan", "repository"),
154
+ "scanner": ("scanning", "scan", "repository"),
155
+ "tracing": ("trace", "tracer", "execution"),
156
+ "represented": ("representation", "represent", "execution"),
157
+ "generate": ("generation", "agent", "model", "stream"),
158
+ "refine": ("refinement", "patch", "chat", "model"),
159
+ "wiring": ("app", "main", "entry", "route", "router"),
160
+ }
161
+
162
+ _GENERIC_ENTRYPOINT_STEMS = {"main", "index", "app"}
163
+ _GENERIC_ENTRYPOINT_PATHS = {
164
+ "frontend/src/main.jsx",
165
+ "frontend/index.html",
166
+ }
167
+ _BACKEND_PATH_HINTS = ("backend/", "server/", "api/", "services/", "service/", "workers/", "worker/")
168
+ _BACKEND_FILE_HINTS = (
169
+ "client",
170
+ "service",
171
+ "worker",
172
+ "controller",
173
+ "handler",
174
+ "router",
175
+ "route",
176
+ "config",
177
+ "settings",
178
+ "pipeline",
179
+ "plan",
180
+ "build",
181
+ "extract",
182
+ "analyze",
183
+ )
184
+ _CHAIN_TERMS = {
185
+ "stage",
186
+ "stages",
187
+ "pipeline",
188
+ "plan",
189
+ "planner",
190
+ "build",
191
+ "builder",
192
+ "convert",
193
+ "analyze",
194
+ "extract",
195
+ "workflow",
196
+ }
197
+ _COMMON_FAMILY_TOKENS = {
198
+ "src",
199
+ "tests",
200
+ "test",
201
+ "commands",
202
+ "command",
203
+ "context",
204
+ "cli",
205
+ "core",
206
+ "app",
207
+ "file",
208
+ "files",
209
+ "index",
210
+ "init",
211
+ "main",
212
+ }
213
+
214
+ _SYSTEM_QUERY_TERMS = {
215
+ "architecture",
216
+ "bootstrap",
217
+ "command",
218
+ "commands",
219
+ "context",
220
+ "pipeline",
221
+ "retrieval",
222
+ "workflow",
223
+ "builder",
224
+ "builders",
225
+ "graph",
226
+ "routing",
227
+ "router",
228
+ "generate",
229
+ "refine",
230
+ "wiring",
231
+ }
232
+
233
+ _SUPPORT_ROLE_TOKENS = {
234
+ "app",
235
+ "main",
236
+ "index",
237
+ "entry",
238
+ "router",
239
+ "route",
240
+ "context",
241
+ "builder",
242
+ "hook",
243
+ "hooks",
244
+ "provider",
245
+ "service",
246
+ "client",
247
+ "store",
248
+ "state",
249
+ "handler",
250
+ "controller",
251
+ "bootstrap",
252
+ "command",
253
+ "commands",
254
+ "retriever",
255
+ "selector",
256
+ "evaluator",
257
+ "parser",
258
+ "scanner",
259
+ "generate",
260
+ "refine",
261
+ }
262
+
263
+ _SUPPORT_PROMOTION_TERMS = {
264
+ "routing",
265
+ "router",
266
+ "fallback",
267
+ "bootstrap",
268
+ "managed",
269
+ "index",
270
+ "builder",
271
+ "build",
272
+ "command",
273
+ "commands",
274
+ "context",
275
+ "orchestration",
276
+ "workflow",
277
+ }
278
+
279
+
280
+ def _snippet_from_lines(lines: list[str], max_lines: int) -> str:
281
+ if not lines:
282
+ return ""
283
+ return "\n".join(lines[:max_lines]).strip()
284
+
285
+
286
+ def _repo_signature(repo_path: Path, manifest_files) -> str:
287
+ parts: list[str] = [repo_path.as_posix()]
288
+ for entry in manifest_files:
289
+ try:
290
+ stat = (repo_path / entry.relative_path).stat()
291
+ except OSError:
292
+ continue
293
+ parts.append(f"{entry.relative_path.as_posix()}:{stat.st_mtime_ns}:{stat.st_size}")
294
+ return "|".join(parts)
295
+
296
+
297
+ def _cache_dir(repo_path: Path) -> Path:
298
+ return repo_path / ".gcie" / "cache"
299
+
300
+
301
+ def _cache_path(repo_path: Path) -> Path:
302
+ return _cache_dir(repo_path) / "context_cache.json"
303
+
304
+
305
+ def _load_disk_cache(cache_path: Path) -> tuple[str, nx.DiGraph, dict[str, str], dict[str, str], dict[str, str]] | None:
306
+ if not cache_path.exists():
307
+ return None
308
+ try:
309
+ payload = json.loads(cache_path.read_text(encoding="utf-8"))
310
+ except Exception:
311
+ return None
312
+
313
+ signature = payload.get("signature")
314
+ graph_data = payload.get("graph")
315
+ if signature is None or graph_data is None:
316
+ return None
317
+
318
+ graph = nx.node_link_graph(graph_data, directed=True)
319
+ file_text = payload.get("file_text", {})
320
+ function_snippets = payload.get("function_snippets", {})
321
+ class_snippets = payload.get("class_snippets", {})
322
+ return signature, graph, file_text, function_snippets, class_snippets
323
+
324
+
325
+ def _save_disk_cache(
326
+ cache_path: Path,
327
+ *,
328
+ signature: str,
329
+ graph: nx.DiGraph,
330
+ file_text: dict[str, str],
331
+ function_snippets: dict[str, str],
332
+ class_snippets: dict[str, str],
333
+ ) -> None:
334
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
335
+ payload = {
336
+ "signature": signature,
337
+ "graph": nx.node_link_data(graph),
338
+ "file_text": file_text,
339
+ "function_snippets": function_snippets,
340
+ "class_snippets": class_snippets,
341
+ }
342
+ cache_path.write_text(json.dumps(payload), encoding="utf-8")
343
+
344
+
345
+ def _effective_intent(query: str, intent: str | None) -> str:
346
+ if intent:
347
+ return intent
348
+ text = query.lower()
349
+ if any(word in text for word in ("debug", "why", "error", "fail", "bug", "trace")):
350
+ return "debug"
351
+ if any(word in text for word in ("refactor", "rewrite", "migrate", "restructure")):
352
+ return "refactor"
353
+ if any(word in text for word in ("add", "change", "update", "extend", "modify", "remove", "rename")):
354
+ return "edit"
355
+ return "explore"
356
+
357
+
358
+ def _query_terms(query: str) -> set[str]:
359
+ raw_terms = re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", query.lower())
360
+ terms: set[str] = set()
361
+ for term in raw_terms:
362
+ for part in term.split("_"):
363
+ if len(part) >= 3 and part not in _STOPWORDS:
364
+ terms.add(part)
365
+ for alias in _QUERY_ALIASES.get(part, ()):
366
+ if len(alias) >= 3 and alias not in _STOPWORDS:
367
+ terms.add(alias)
368
+ return terms
369
+
370
+
371
+ def _is_system_query(query: str) -> bool:
372
+ return bool(_query_terms(query) & _SYSTEM_QUERY_TERMS)
373
+
374
+
375
+ def _classify_path(path: str) -> str:
376
+ candidate = Path(path)
377
+ suffix = candidate.suffix.lower()
378
+ lowered = candidate.as_posix().lower()
379
+ name = candidate.name.lower()
380
+
381
+ if suffix in _CODE_EXTENSIONS:
382
+ return "code"
383
+ if suffix not in _DOC_EXTENSIONS:
384
+ return "general_doc"
385
+ if name in _OPERATIONAL_DOC_NAMES:
386
+ return "operational_doc"
387
+ if any(hint in lowered for hint in _OPERATIONAL_PATH_HINTS):
388
+ return "operational_doc"
389
+ if "/plans/" in lowered or lowered.endswith("-plan.md") or lowered.endswith("-context.md"):
390
+ return "operational_doc"
391
+ return "general_doc"
392
+
393
+
394
+ def _normalized_query_text(query: str) -> str:
395
+ return query.lower().replace('\\', '/')
396
+
397
+
398
+ def _explicit_file_mention_score(path: str, query: str) -> float:
399
+ normalized_query = _normalized_query_text(query)
400
+ normalized_path = path.lower().replace('\\', '/')
401
+ candidate = Path(path)
402
+ name = candidate.name.lower()
403
+ stem = candidate.stem.lower()
404
+ score = 0.0
405
+ if normalized_path and normalized_path in normalized_query:
406
+ score += 1.2
407
+ if name and name in normalized_query:
408
+ score += 0.75
409
+ if stem and re.search(rf"\b{re.escape(stem)}\b", normalized_query):
410
+ score += 0.2
411
+ return min(1.6, score)
412
+
413
+
414
+ def _mentioned_file_paths(file_text: dict[str, str], query: str) -> list[tuple[float, str]]:
415
+ matches: list[tuple[float, str]] = []
416
+ for rel_path in file_text:
417
+ score = _explicit_file_mention_score(rel_path, query)
418
+ if score <= 0:
419
+ continue
420
+ matches.append((score, rel_path))
421
+ matches.sort(key=lambda item: (-item[0], item[1]))
422
+ return matches
423
+
424
+
425
+ def _layer_bucket(path: str | None) -> str:
426
+ if not path:
427
+ return "unknown"
428
+ normalized = path.lower().replace("\\", "/")
429
+ if normalized.startswith(("frontend/", "ui/", "web/")):
430
+ return "frontend"
431
+ if normalized.startswith(("backend/", "server/", "api/")):
432
+ return "backend"
433
+ if normalized.startswith(("tests/", "test/")):
434
+ return "test"
435
+ if normalized.startswith(("docs/", ".gcie/")) or normalized.endswith(".md"):
436
+ return "docs"
437
+ if any(token in normalized for token in ("build", "theme", "pptx", "worker", "job")):
438
+ return "build"
439
+ candidate = Path(path)
440
+ if candidate.parts:
441
+ return candidate.parts[0].lower()
442
+ return candidate.stem.lower()
443
+
444
+
445
+
446
+ def _is_edit_like_query(query: str, intent: str | None) -> bool:
447
+ effective = _effective_intent(query, intent)
448
+ if effective in {"edit", "refactor"}:
449
+ return True
450
+ lowered = query.lower()
451
+ return any(term in lowered for term in ("fix", "modify", "patch", "rename", "update", "change"))
452
+
453
+
454
+ def _is_backend_path(path: str) -> bool:
455
+ lowered = path.lower().replace("\\", "/")
456
+ if lowered.startswith(_BACKEND_PATH_HINTS):
457
+ return True
458
+ tokens = _family_tokens(path) | {Path(path).stem.lower()}
459
+ return bool(tokens & set(_BACKEND_FILE_HINTS))
460
+
461
+
462
+ def _query_shape(query: str, intent: str | None, explicit_paths: set[str]) -> str:
463
+ terms = _query_terms(query)
464
+ effective = _effective_intent(query, intent)
465
+ lowered = query.lower()
466
+ explicit_layers = {_layer_bucket(path) for path in explicit_paths}
467
+ explicit_count = len(explicit_paths)
468
+
469
+ has_frontend = any(layer == "frontend" for layer in explicit_layers) or "frontend/" in lowered
470
+ has_backend = any(layer in {"backend", "api"} for layer in explicit_layers) or any(
471
+ hint in lowered for hint in ("/api/", "app.py", "main.py", "backend")
472
+ )
473
+ has_chain_terms = bool(terms & _CHAIN_TERMS)
474
+
475
+ if explicit_count >= 4 or (explicit_count >= 3 and has_chain_terms):
476
+ return "multi_hop_chain"
477
+ if has_frontend and has_backend:
478
+ return "cross_layer_ui_api"
479
+ if has_chain_terms and ("build" in terms or "planner" in terms or "stage" in terms):
480
+ return "builder_orchestrator"
481
+
482
+ backend_explicit = [path for path in explicit_paths if _is_backend_path(path)]
483
+ if len(backend_explicit) >= 2 and effective in {"explore", "debug", "edit", "refactor"}:
484
+ return "backend_config_pair"
485
+
486
+ if explicit_count == 1:
487
+ return "single_file"
488
+
489
+ if explicit_count >= 2:
490
+ families = {_candidate_family(path) for path in explicit_paths}
491
+ if len(families) == 1:
492
+ return "same_layer_pair"
493
+
494
+ return "single_file"
495
+
496
+
497
+ def _is_generic_entrypoint(path: str) -> bool:
498
+ normalized = path.lower().replace("\\", "/")
499
+ candidate = Path(path)
500
+ if normalized in _GENERIC_ENTRYPOINT_PATHS:
501
+ return True
502
+ if candidate.stem.lower() in _GENERIC_ENTRYPOINT_STEMS:
503
+ return True
504
+ return False
505
+
506
+
507
+ def _candidate_role(
508
+ path: str | None,
509
+ query: str,
510
+ query_shape: str,
511
+ explicit_targets: set[str],
512
+ strong_paths: list[str],
513
+ ) -> str:
514
+ if not path:
515
+ return "support_config"
516
+
517
+ normalized = path.lower().replace("\\", "/")
518
+ candidate = Path(path)
519
+ stem = candidate.stem.lower()
520
+ role = _file_role(path)
521
+
522
+ if path in explicit_targets:
523
+ return "explicit_target"
524
+ if _is_generic_entrypoint(path):
525
+ return "generic_entrypoint"
526
+ if role in {"app", "main", "index", "router", "route", "entry", "command"}:
527
+ return "caller_or_entry"
528
+
529
+ if query_shape == "multi_hop_chain":
530
+ tokens = _family_tokens(path)
531
+ if tokens & {"plan", "build", "stage", "pipeline", "extract", "analyze"} and stem not in _GENERIC_ENTRYPOINT_STEMS:
532
+ return "intermediate_pipeline"
533
+
534
+ if _is_backend_path(path):
535
+ for anchor in strong_paths:
536
+ if not _is_backend_path(anchor):
537
+ continue
538
+ anchor_path = Path(anchor)
539
+ if candidate.parent == anchor_path.parent and path != anchor:
540
+ return "sibling_module"
541
+
542
+ suffix = candidate.suffix.lower()
543
+ if suffix in {".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".css", ".scss", ".sass", ".less", ".html"}:
544
+ return "support_config"
545
+
546
+ if "config" in normalized or "settings" in normalized or any(hint in normalized for hint in ("tailwind", "vite", "postcss", "vercel", "package.json")):
547
+ return "support_config"
548
+
549
+ return "sibling_module"
550
+
551
+
552
+ def _role_adjustment(role: str, query_shape: str, query: str, intent: str | None) -> float:
553
+ effective = _effective_intent(query, intent)
554
+ if role == "explicit_target":
555
+ if _is_edit_like_query(query, intent):
556
+ return 0.95
557
+ return 0.55
558
+ if role == "generic_entrypoint":
559
+ if _is_edit_like_query(query, intent):
560
+ return -0.4
561
+ if query_shape in {"multi_hop_chain", "builder_orchestrator"}:
562
+ return -0.12
563
+ return -0.24
564
+ if role == "sibling_module":
565
+ if query_shape in {"backend_config_pair", "same_layer_pair"}:
566
+ return 0.18
567
+ return 0.08
568
+ if role == "caller_or_entry":
569
+ if query_shape in {"cross_layer_ui_api", "multi_hop_chain", "builder_orchestrator"}:
570
+ return 0.16
571
+ return 0.04
572
+ if role == "intermediate_pipeline":
573
+ if query_shape == "multi_hop_chain":
574
+ return 0.3
575
+ if query_shape == "builder_orchestrator":
576
+ return 0.16
577
+ return 0.08
578
+ if role == "support_config":
579
+ if effective == "debug":
580
+ return -0.28
581
+ if effective in {"edit", "refactor"}:
582
+ return -0.16
583
+ return -0.12
584
+ return 0.0
585
+
586
+
587
+
588
+ def _subtree_locality_adjustment(path: str | None, explicit_targets: set[str], query_shape: str) -> float:
589
+ if not path or not explicit_targets:
590
+ return 0.0
591
+ explicit_roots = {Path(item).parts[0].lower() for item in explicit_targets if Path(item).parts}
592
+ explicit_parents = {
593
+ Path(item).parent.as_posix().lower()
594
+ for item in explicit_targets
595
+ if Path(item).parent.as_posix() not in {"", "."}
596
+ }
597
+ explicit_families = {_candidate_family(item) for item in explicit_targets}
598
+ path_local_focus = any(len(Path(item).parts) >= 3 for item in explicit_targets)
599
+ candidate = Path(path)
600
+ candidate_root = candidate.parts[0].lower() if candidate.parts else ""
601
+ candidate_family = _candidate_family(path)
602
+ candidate_parent = candidate.parent.as_posix().lower()
603
+
604
+ if path in explicit_targets:
605
+ return 0.18
606
+ if candidate_parent in explicit_parents and path_local_focus:
607
+ return 0.22
608
+ if candidate_family in explicit_families:
609
+ return 0.14 if path_local_focus else 0.12
610
+ if query_shape in {"cross_layer_ui_api", "backend_config_pair"}:
611
+ if candidate_root in explicit_roots:
612
+ return 0.04
613
+ return -0.08
614
+ if len(explicit_roots) == 1:
615
+ if candidate_root not in explicit_roots:
616
+ return -0.18 if path_local_focus else -0.14
617
+ return 0.12
618
+ if path_local_focus and candidate_root in explicit_roots:
619
+ return 0.02
620
+ return -0.04
621
+
622
+
623
+ def _support_config_penalty(path: str | None, role: str, explicit_targets: set[str]) -> float:
624
+ if not path or not explicit_targets:
625
+ return 0.0
626
+ if role not in {"support_config", "general_doc", "operational_doc"}:
627
+ return 0.0
628
+ candidate_family = _candidate_family(path)
629
+ explicit_families = {_candidate_family(item) for item in explicit_targets}
630
+ if candidate_family in explicit_families:
631
+ return -0.08
632
+ return -0.26
633
+
634
+
635
+ def _promote_priority_first(
636
+ ranked: list[RankedSnippet],
637
+ explicit_priority_ids: set[str],
638
+ linked_priority_ids: set[str],
639
+ chain_priority_ids: set[str],
640
+ explicit_targets: set[str] | None = None,
641
+ ) -> list[RankedSnippet]:
642
+ explicit_priority_ids = set(explicit_priority_ids)
643
+ linked_priority_ids = set(linked_priority_ids)
644
+ chain_priority_ids = set(chain_priority_ids)
645
+ explicit_targets = set(explicit_targets or set())
646
+ explicit_roots = {Path(path).parts[0].lower() for path in explicit_targets if Path(path).parts}
647
+ explicit_families = {_candidate_family(path) for path in explicit_targets}
648
+ explicit_parents = {
649
+ Path(path).parent.as_posix().lower()
650
+ for path in explicit_targets
651
+ if Path(path).parent.as_posix() not in {"", "."}
652
+ }
653
+
654
+ def _locality_tier(node_id: str) -> int:
655
+ path = _node_file_path(node_id)
656
+ if not path:
657
+ return 6
658
+ if path in explicit_targets:
659
+ return 0
660
+ candidate = Path(path)
661
+ candidate_parent = candidate.parent.as_posix().lower()
662
+ if candidate_parent in explicit_parents:
663
+ return 1
664
+ if _candidate_family(path) in explicit_families:
665
+ return 2
666
+ candidate_root = candidate.parts[0].lower() if candidate.parts else ""
667
+ if candidate_root in explicit_roots:
668
+ return 3
669
+ return 4
670
+
671
+ return sorted(
672
+ ranked,
673
+ key=lambda item: (
674
+ 0 if item.node_id in explicit_priority_ids else 1,
675
+ 0 if item.node_id in linked_priority_ids else 1,
676
+ 0 if item.node_id in chain_priority_ids else 1,
677
+ 0 if item.node_id.startswith("file:") else 1,
678
+ _locality_tier(item.node_id),
679
+ -item.score,
680
+ item.node_id,
681
+ ),
682
+ )
683
+
684
+ def _family_competition_adjustment(path: str | None, explicit_targets: set[str], query_shape: str) -> float:
685
+ if not path or not explicit_targets:
686
+ return 0.0
687
+ explicit_roots = {Path(item).parts[0].lower() for item in explicit_targets if Path(item).parts}
688
+ path_local_focus = any(len(Path(item).parts) >= 3 for item in explicit_targets)
689
+ explicit_families = {_candidate_family(item) for item in explicit_targets}
690
+ family = _candidate_family(path)
691
+ if family in explicit_families:
692
+ return 0.24 if path_local_focus else 0.16
693
+ if query_shape in {"cross_layer_ui_api", "same_layer_pair", "backend_config_pair"}:
694
+ return -0.06
695
+ if path_local_focus and len(explicit_roots) == 1:
696
+ return -0.08
697
+ return -0.02
698
+
699
+ def _entrypoint_penalty(path: str, explicit_targets: set[str]) -> float:
700
+ if not explicit_targets:
701
+ return 0.0
702
+ if not _is_generic_entrypoint(path):
703
+ return 0.0
704
+
705
+ candidate = Path(path)
706
+ candidate_layer = _layer_bucket(path)
707
+ candidate_family = _candidate_family(path)
708
+ has_stronger_peer = any(
709
+ target != path
710
+ and (_layer_bucket(target) == candidate_layer or _candidate_family(target) == candidate_family)
711
+ and not _is_generic_entrypoint(target)
712
+ for target in explicit_targets
713
+ )
714
+ if has_stronger_peer:
715
+ if candidate.stem.lower() in {"main", "app"}:
716
+ return 0.34
717
+ return 0.28
718
+ return 0.0
719
+
720
+
721
+ def _explicit_priority_ids(file_text: dict[str, str], query: str, intent: str | None = None) -> set[str]:
722
+ threshold = 0.5 if _is_edit_like_query(query, intent) else 0.75
723
+ return {f"file:{path}" for score, path in _mentioned_file_paths(file_text, query) if score >= threshold}
724
+
725
+
726
+ def _layer_priority_ids(ranked: list[RankedSnippet], query: str, intent: str | None, explicit_priority_ids: set[str]) -> set[str]:
727
+ if _effective_intent(query, intent) != "edit":
728
+ return set()
729
+ explicit_paths = [node_id[5:] for node_id in explicit_priority_ids if node_id.startswith("file:")]
730
+ explicit_layers = {_layer_bucket(path) for path in explicit_paths}
731
+ if len(explicit_layers) < 2:
732
+ return set()
733
+
734
+ best_by_layer: dict[str, tuple[float, str]] = {}
735
+ for item in ranked:
736
+ if not item.node_id.startswith("file:"):
737
+ continue
738
+ path = item.node_id[5:]
739
+ if _classify_path(path) != "code":
740
+ continue
741
+ layer = _layer_bucket(path)
742
+ if layer not in explicit_layers:
743
+ continue
744
+ current = best_by_layer.get(layer)
745
+ score = item.score + _explicit_file_mention_score(path, query)
746
+ if current is None or score > current[0] or (score == current[0] and item.node_id < current[1]):
747
+ best_by_layer[layer] = (score, item.node_id)
748
+ return {node_id for _, node_id in best_by_layer.values()}
749
+
750
+
751
+ def _family_tokens(path: str) -> set[str]:
752
+ candidate = Path(path)
753
+ tokens: set[str] = set()
754
+ for part in candidate.parts:
755
+ for token in re.split(r"[^a-zA-Z0-9_]+", part.lower()):
756
+ for piece in token.split("_"):
757
+ if len(piece) >= 3 and piece not in _COMMON_FAMILY_TOKENS:
758
+ tokens.add(piece)
759
+ stem = candidate.stem.lower()
760
+ for piece in stem.split("_"):
761
+ if len(piece) >= 3 and piece not in _COMMON_FAMILY_TOKENS:
762
+ tokens.add(piece)
763
+ return tokens
764
+
765
+
766
+ def _path_match_score(path: str, query: str) -> float:
767
+ terms = _query_terms(query)
768
+ score = _explicit_file_mention_score(path, query)
769
+ if not terms:
770
+ return score
771
+ lowered = path.lower()
772
+ score = 0.0
773
+ for term in terms:
774
+ if term in lowered:
775
+ score += 0.2
776
+ parts = {part for part in re.split(r"[^a-zA-Z0-9_]+", lowered) if part}
777
+ overlap = terms & parts
778
+ if overlap:
779
+ score += 0.15 * len(overlap)
780
+ family_overlap = terms & _family_tokens(path)
781
+ if family_overlap:
782
+ score += 0.08 * len(family_overlap)
783
+ if lowered.startswith("tests/") and "test" not in terms and "tests" not in terms:
784
+ score -= 0.1
785
+ return score
786
+
787
+
788
+ def _content_match_score(content: str, query: str) -> float:
789
+ terms = _query_terms(query)
790
+ if not terms:
791
+ return 0.0
792
+ lowered = content.lower()
793
+ hits = sum(1 for term in terms if term in lowered)
794
+ if hits == 0:
795
+ return 0.0
796
+ return min(0.35, hits * 0.07)
797
+
798
+
799
+ def _class_weight(path: str, query: str, intent: str | None) -> float:
800
+ file_class = _classify_path(path)
801
+ lowered_path = path.lower().replace("\\", "/")
802
+ effective_intent = _effective_intent(query, intent)
803
+ if effective_intent == "debug":
804
+ if file_class == "code":
805
+ return 0.4
806
+ if "get-shit-done/" in lowered_path:
807
+ return -0.45
808
+ if file_class == "operational_doc":
809
+ if "/templates/" in lowered_path:
810
+ return -0.28
811
+ return 0.12
812
+ return -0.35
813
+ if effective_intent == "explore":
814
+ if file_class == "code":
815
+ return 0.18
816
+ if "get-shit-done/" in lowered_path:
817
+ return -0.32
818
+ if file_class == "operational_doc":
819
+ if "/templates/" in lowered_path:
820
+ return -0.2
821
+ return 0.22
822
+ return -0.05
823
+ if effective_intent == "refactor":
824
+ if file_class == "code":
825
+ return 0.3
826
+ if "get-shit-done/" in lowered_path:
827
+ return -0.3
828
+ if file_class == "operational_doc":
829
+ if "/templates/" in lowered_path:
830
+ return -0.18
831
+ return 0.1
832
+ return -0.15
833
+ if file_class == "code":
834
+ return 0.25
835
+ if file_class == "operational_doc":
836
+ return 0.1
837
+ return -0.1
838
+
839
+
840
+ def _strong_path_matches(ranked: list[RankedSnippet], query: str, intent: str | None) -> list[str]:
841
+ strong: list[str] = []
842
+ for item in ranked:
843
+ if item.node_id.startswith("file:"):
844
+ path = item.node_id[5:]
845
+ elif item.node_id.startswith(("function:", "class:")):
846
+ path = item.node_id.split(":", 1)[1].split("::", 1)[0]
847
+ else:
848
+ continue
849
+ strength = _path_match_score(path, query) + _class_weight(path, query, intent)
850
+ if strength >= 0.55:
851
+ strong.append(path)
852
+ return strong[:8]
853
+
854
+
855
+ def _reference_tokens(path: str) -> set[str]:
856
+ candidate = Path(path)
857
+ dotted = ".".join(candidate.with_suffix("").parts)
858
+ tokens = {candidate.stem.lower(), dotted.lower()}
859
+ if candidate.parent.parts:
860
+ tokens.add(f"{candidate.parent.name.lower()}.{candidate.stem.lower()}")
861
+ return {token for token in tokens if token}
862
+
863
+
864
+ def _adjacency_boost(
865
+ path: str,
866
+ query: str,
867
+ intent: str | None,
868
+ strong_paths: list[str],
869
+ file_text: dict[str, str],
870
+ ) -> float:
871
+ if not strong_paths:
872
+ return 0.0
873
+ current = Path(path)
874
+ current_tokens = _family_tokens(path)
875
+ current_class = _classify_path(path)
876
+ terms = _query_terms(query)
877
+ bonus = 0.0
878
+ reference_tokens = _reference_tokens(path)
879
+ for matched in strong_paths:
880
+ if matched == path:
881
+ continue
882
+ matched_path = Path(matched)
883
+ shared_tokens = current_tokens & _family_tokens(matched)
884
+ if shared_tokens:
885
+ bonus += min(0.24, 0.08 * len(shared_tokens))
886
+ if current.parts and matched_path.parts and current.parts[0] == matched_path.parts[0]:
887
+ bonus += 0.06
888
+ if current.parent == matched_path.parent:
889
+ bonus += 0.08
890
+ if _is_backend_path(path) and _is_backend_path(matched) and current.parent == matched_path.parent and (terms & current_tokens):
891
+ bonus += 0.18
892
+ matched_text = file_text.get(matched, "").lower()
893
+ if matched_text and any(token in matched_text for token in reference_tokens):
894
+ bonus += 0.16
895
+ if _is_system_query(query) and current_class == "code" and _classify_path(matched) == "code":
896
+ if shared_tokens:
897
+ bonus += 0.05
898
+ if current.parent == matched_path.parent:
899
+ bonus += 0.04
900
+ return min(0.45, bonus)
901
+
902
+
903
+ def _support_role_bonus(path: str, query: str, strong_paths: list[str], file_text: dict[str, str]) -> float:
904
+ current = Path(path)
905
+ tokens = _family_tokens(path)
906
+ score = 0.0
907
+ if tokens & _SUPPORT_ROLE_TOKENS:
908
+ score += 0.14
909
+ if current.stem.lower() in _SUPPORT_ROLE_TOKENS:
910
+ score += 0.12
911
+ if _frontend_bias(query) and current.stem.lower() in {"app", "main", "index"}:
912
+ score += 0.1
913
+ for matched in strong_paths:
914
+ matched_path = Path(matched)
915
+ matched_text = file_text.get(matched, "").lower()
916
+ if current == matched_path:
917
+ continue
918
+ if current.parts and matched_path.parts and current.parts[0] == matched_path.parts[0]:
919
+ if tokens & _family_tokens(matched):
920
+ score += 0.08
921
+ if matched_text and any(token in matched_text for token in _reference_tokens(path)):
922
+ score += 0.16
923
+ return min(0.35, score)
924
+
925
+
926
+ def _mandatory_node_ids(
927
+ ranked: list[RankedSnippet],
928
+ query: str,
929
+ intent: str | None,
930
+ *,
931
+ support_priority_ids: set[str] | None = None,
932
+ explicit_priority_ids: set[str] | None = None,
933
+ ) -> set[str]:
934
+ mandatory: set[str] = set(support_priority_ids or set()) | set(explicit_priority_ids or set())
935
+ support_enabled = _support_promotion_enabled(query, intent)
936
+ for item in ranked:
937
+ if not item.node_id.startswith("file:"):
938
+ continue
939
+ path = item.node_id[len("file:") :]
940
+ file_class = _classify_path(path)
941
+ if file_class == "general_doc" and _effective_intent(query, intent) == "debug":
942
+ continue
943
+ if file_class != "code" and item.node_id not in (explicit_priority_ids or set()):
944
+ continue
945
+ if _path_match_score(path, query) + _class_weight(path, query, intent) >= 0.45:
946
+ mandatory.add(item.node_id)
947
+ continue
948
+ support_tokens = _family_tokens(path) | {Path(path).stem.lower()}
949
+ if support_enabled and _classify_path(path) == "code" and support_tokens & _SUPPORT_ROLE_TOKENS and item.score >= 1.25:
950
+ mandatory.add(item.node_id)
951
+ return mandatory
952
+
953
+
954
+
955
+ def _linked_file_priority_ids(
956
+ ranked: list[RankedSnippet],
957
+ explicit_priority_ids: set[str],
958
+ query_shape: str,
959
+ query: str,
960
+ intent: str | None,
961
+ ) -> set[str]:
962
+ if not explicit_priority_ids:
963
+ return set()
964
+ if not _is_edit_like_query(query, intent):
965
+ return set()
966
+ if query_shape not in {"same_layer_pair", "cross_layer_ui_api", "backend_config_pair", "builder_orchestrator"}:
967
+ return set()
968
+
969
+ keep = {
970
+ item.node_id
971
+ for item in sorted(ranked, key=lambda snippet: (-snippet.score, snippet.node_id))
972
+ if item.node_id in explicit_priority_ids
973
+ }
974
+ return set(sorted(keep)[:3])
975
+
976
+
977
+ def _chain_quota_priority_ids(
978
+ ranked: list[RankedSnippet],
979
+ query: str,
980
+ intent: str | None,
981
+ explicit_targets: set[str],
982
+ ) -> set[str]:
983
+ query_shape = _query_shape(query, intent, explicit_targets)
984
+ if query_shape not in {"multi_hop_chain", "builder_orchestrator"}:
985
+ return set()
986
+
987
+ file_candidates = [item for item in ranked if item.node_id.startswith("file:") and _classify_path(item.node_id[5:]) == "code"]
988
+ if not file_candidates:
989
+ return set()
990
+
991
+ sorted_candidates = sorted(file_candidates, key=lambda item: (-item.score, item.node_id))
992
+ caller = next(
993
+ (
994
+ item.node_id
995
+ for item in sorted_candidates
996
+ if _candidate_role(item.node_id[5:], query, query_shape, explicit_targets, []) in {"caller_or_entry", "generic_entrypoint"}
997
+ ),
998
+ None,
999
+ )
1000
+
1001
+ middle = next(
1002
+ (
1003
+ item.node_id
1004
+ for item in sorted_candidates
1005
+ if _candidate_role(item.node_id[5:], query, query_shape, explicit_targets, []) in {"intermediate_pipeline", "sibling_module"}
1006
+ and item.node_id != caller
1007
+ ),
1008
+ None,
1009
+ )
1010
+
1011
+ downstream = next(
1012
+ (
1013
+ item.node_id
1014
+ for item in sorted_candidates
1015
+ if item.node_id != caller and item.node_id != middle and (
1016
+ item.node_id[5:] in explicit_targets or _candidate_role(item.node_id[5:], query, query_shape, explicit_targets, []) != "generic_entrypoint"
1017
+ )
1018
+ ),
1019
+ None,
1020
+ )
1021
+
1022
+ return {node_id for node_id in (caller, middle, downstream) if node_id}
1023
+
1024
+ def _collect_repo_modules(repo_path: Path) -> tuple[list, dict, dict, dict, str, nx.DiGraph]:
1025
+ config = ScannerConfig.from_extensions(
1026
+ sorted(_ALL_CONTEXT_EXTENSIONS),
1027
+ include_hidden=False,
1028
+ )
1029
+ config.exclude_globs = _EXCLUDE_GLOBS
1030
+ manifest = scan_repository(repo_path, config=config)
1031
+ signature = _repo_signature(repo_path, manifest.files)
1032
+
1033
+ cache_hit = _REPO_CACHE.get(signature)
1034
+ if cache_hit is not None:
1035
+ graph, file_text, function_snippets, class_snippets = cache_hit
1036
+ return [], file_text, function_snippets, class_snippets, signature, graph
1037
+
1038
+ disk_cache = _load_disk_cache(_cache_path(repo_path))
1039
+ if disk_cache is not None:
1040
+ cached_sig, graph, file_text, function_snippets, class_snippets = disk_cache
1041
+ if cached_sig == signature:
1042
+ _REPO_CACHE.clear()
1043
+ _REPO_CACHE[signature] = (graph, file_text, function_snippets, class_snippets)
1044
+ return [], file_text, function_snippets, class_snippets, signature, graph
1045
+
1046
+ modules = []
1047
+ file_text: dict[str, str] = {}
1048
+ function_snippets: dict[str, str] = {}
1049
+ class_snippets: dict[str, str] = {}
1050
+
1051
+ graph = nx.DiGraph()
1052
+
1053
+ for entry in manifest.files:
1054
+ file_rel = entry.relative_path.as_posix()
1055
+ file_path = repo_path / entry.relative_path
1056
+
1057
+ try:
1058
+ source = file_path.read_text(encoding="utf-8")
1059
+ except Exception:
1060
+ continue
1061
+
1062
+ file_text[file_rel] = source
1063
+ file_node_id = f"file:{file_rel}"
1064
+ graph.add_node(
1065
+ file_node_id,
1066
+ type="file",
1067
+ label=file_rel,
1068
+ path=file_rel,
1069
+ file_class=_classify_path(file_rel),
1070
+ )
1071
+
1072
+ if entry.suffix in {".py", ".pyi"}:
1073
+ module = parse_python_source(source, file=Path(file_rel))
1074
+ modules.append(module)
1075
+
1076
+ lines = source.splitlines()
1077
+ for fn in module.functions:
1078
+ start = max(fn.start_line, 1)
1079
+ end = max(fn.end_line, start)
1080
+ snippet = "\n".join(lines[start - 1 : end]).strip()
1081
+ node_id = f"function:{Path(fn.file).as_posix()}::{fn.name}"
1082
+ if snippet:
1083
+ function_snippets[node_id] = snippet
1084
+
1085
+ for cls in module.classes:
1086
+ start = max(cls.start_line, 1)
1087
+ end = max(cls.end_line, start)
1088
+ snippet = "\n".join(lines[start - 1 : end]).strip()
1089
+ node_id = f"class:{Path(cls.file).as_posix()}::{cls.name}"
1090
+ if snippet:
1091
+ class_snippets[node_id] = snippet
1092
+
1093
+ if modules:
1094
+ graph = nx.compose(
1095
+ graph,
1096
+ nx.compose(
1097
+ nx.compose(build_call_graph(modules), build_variable_graph(modules)),
1098
+ build_code_structure_graph(modules),
1099
+ ),
1100
+ )
1101
+
1102
+ _REPO_CACHE.clear()
1103
+ _REPO_CACHE[signature] = (graph, file_text, function_snippets, class_snippets)
1104
+ _save_disk_cache(
1105
+ _cache_path(repo_path),
1106
+ signature=signature,
1107
+ graph=graph,
1108
+ file_text=file_text,
1109
+ function_snippets=function_snippets,
1110
+ class_snippets=class_snippets,
1111
+ )
1112
+ return modules, file_text, function_snippets, class_snippets, signature, graph
1113
+
1114
+
1115
+ def _frontend_bias(query: str) -> bool:
1116
+ text = query.lower()
1117
+ return any(keyword in text for keyword in _FRONTEND_KEYWORDS)
1118
+
1119
+
1120
+ def _boost_score(
1121
+ node_id: str,
1122
+ base_score: float,
1123
+ query: str,
1124
+ intent: str | None,
1125
+ strong_paths: list[str] | None = None,
1126
+ file_text: dict[str, str] | None = None,
1127
+ ) -> float:
1128
+ boosted = base_score
1129
+ strong_paths = strong_paths or []
1130
+ file_text = file_text or {}
1131
+ if node_id.startswith("file:"):
1132
+ path = node_id[len("file:") :]
1133
+ suffix = Path(path).suffix.lower()
1134
+ boosted += _path_match_score(path, query)
1135
+ boosted += _class_weight(path, query, intent)
1136
+ boosted += _adjacency_boost(path, query, intent, strong_paths, file_text)
1137
+ boosted += _support_role_bonus(path, query, strong_paths, file_text)
1138
+ if _frontend_bias(query) and suffix in _FRONTEND_EXTENSIONS:
1139
+ boosted += 0.2
1140
+ if suffix in {".py", ".pyi", ".js", ".jsx", ".ts", ".tsx"}:
1141
+ boosted += 0.05
1142
+ return boosted
1143
+
1144
+ if node_id.startswith(("function:", "class:")):
1145
+ file_path = node_id.split(":", 1)[1].split("::", 1)[0]
1146
+ boosted += _path_match_score(file_path, query)
1147
+ boosted += _class_weight(file_path, query, intent)
1148
+ boosted += _adjacency_boost(file_path, query, intent, strong_paths, file_text)
1149
+ boosted += _support_role_bonus(file_path, query, strong_paths, file_text)
1150
+ return boosted + 0.2
1151
+
1152
+ return boosted
1153
+
1154
+
1155
+ def _supplemental_file_snippets(
1156
+ file_text: dict[str, str],
1157
+ query: str,
1158
+ intent: str | None,
1159
+ strong_paths: list[str],
1160
+ *,
1161
+ limit: int = 12,
1162
+ ) -> list[RankedSnippet]:
1163
+ supplemental: list[RankedSnippet] = []
1164
+ effective_intent = _effective_intent(query, intent)
1165
+ system_query = _is_system_query(query)
1166
+ for path, text in file_text.items():
1167
+ path_score = _path_match_score(path, query)
1168
+ content_score = _content_match_score(text[:4000], query)
1169
+ class_score = _class_weight(path, query, intent)
1170
+ adjacency = _adjacency_boost(path, query, intent, strong_paths, file_text)
1171
+ support_bonus = _support_role_bonus(path, query, strong_paths, file_text)
1172
+ total = path_score + content_score + class_score + adjacency + support_bonus
1173
+ if effective_intent == "debug" and _classify_path(path) == "general_doc":
1174
+ if total < 0.2:
1175
+ continue
1176
+ elif total <= 0.18:
1177
+ continue
1178
+ supplemental.append(
1179
+ RankedSnippet(
1180
+ node_id=f"file:{path}",
1181
+ content=_snippet_from_lines(text.splitlines(), max_lines=120),
1182
+ score=total,
1183
+ )
1184
+ )
1185
+ supplemental.sort(key=lambda item: (-item.score, item.node_id))
1186
+ if not system_query:
1187
+ return supplemental[:limit]
1188
+
1189
+ selected: list[RankedSnippet] = []
1190
+ family_counts: dict[str, int] = {}
1191
+ for item in supplemental:
1192
+ family = Path(item.node_id[5:]).parts[0] if Path(item.node_id[5:]).parts else item.node_id
1193
+ if family_counts.get(family, 0) >= 3:
1194
+ continue
1195
+ selected.append(item)
1196
+ family_counts[family] = family_counts.get(family, 0) + 1
1197
+ if len(selected) >= limit:
1198
+ break
1199
+ return selected
1200
+
1201
+
1202
+ def _top_anchor_paths(ranked: list[RankedSnippet], *, limit: int = 6) -> list[str]:
1203
+ anchors: list[str] = []
1204
+ seen: set[str] = set()
1205
+ for item in sorted(ranked, key=lambda snippet: (-snippet.score, snippet.node_id)):
1206
+ if not item.node_id.startswith(("file:", "function:", "class:")):
1207
+ continue
1208
+ if item.node_id.startswith("file:"):
1209
+ path = item.node_id[5:]
1210
+ else:
1211
+ path = item.node_id.split(":", 1)[1].split("::", 1)[0]
1212
+ if _classify_path(path) != "code":
1213
+ continue
1214
+ if path in seen:
1215
+ continue
1216
+ anchors.append(path)
1217
+ seen.add(path)
1218
+ if len(anchors) >= limit:
1219
+ break
1220
+ return anchors
1221
+
1222
+
1223
+ def _seed_anchor_paths(file_text: dict[str, str], query: str, intent: str | None, *, limit: int = 4) -> list[str]:
1224
+ seeded: list[tuple[float, str]] = []
1225
+ for path, text in file_text.items():
1226
+ if _classify_path(path) != "code":
1227
+ continue
1228
+ score = _path_match_score(path, query)
1229
+ score += _content_match_score(text[:2000], query)
1230
+ score += _class_weight(path, query, intent)
1231
+ score += _support_role_bonus(path, query, [], file_text)
1232
+ if score < 0.35:
1233
+ continue
1234
+ seeded.append((score, path))
1235
+ seeded.sort(key=lambda item: (-item[0], item[1]))
1236
+ return [path for _, path in seeded[:limit]]
1237
+
1238
+
1239
+ def _repair_candidate_bonus(path: str, query: str, intent: str | None, anchor_paths: list[str], file_text: dict[str, str]) -> float:
1240
+ if not anchor_paths:
1241
+ return 0.0
1242
+ score = _support_role_bonus(path, query, anchor_paths, file_text)
1243
+ score += _adjacency_boost(path, query, intent, anchor_paths, file_text)
1244
+ tokens = _family_tokens(path)
1245
+ if _is_system_query(query) and tokens & _SUPPORT_ROLE_TOKENS:
1246
+ score += 0.08
1247
+ candidate = Path(path)
1248
+ for anchor in anchor_paths:
1249
+ anchor_path = Path(anchor)
1250
+ if candidate.parent == anchor_path.parent:
1251
+ score += 0.08
1252
+ if candidate.parts and anchor_path.parts and candidate.parts[0] == anchor_path.parts[0]:
1253
+ score += 0.05
1254
+ return min(0.55, score)
1255
+
1256
+
1257
+ def _support_promotion_enabled(query: str, intent: str | None) -> bool:
1258
+ effective_intent = _effective_intent(query, intent)
1259
+ if effective_intent not in {"debug", "explore", "edit", "refactor"}:
1260
+ return False
1261
+ return bool(_query_terms(query) & _SUPPORT_PROMOTION_TERMS) or _is_system_query(query)
1262
+
1263
+
1264
+ def _support_promotion_score(path: str, query: str, anchor_paths: list[str], file_text: dict[str, str]) -> float:
1265
+ if not anchor_paths:
1266
+ return 0.0
1267
+ candidate = Path(path)
1268
+ tokens = _family_tokens(path)
1269
+ score = 0.0
1270
+ if tokens & _SUPPORT_ROLE_TOKENS:
1271
+ score += 0.14
1272
+ if candidate.stem.lower() in _SUPPORT_ROLE_TOKENS:
1273
+ score += 0.12
1274
+ score += min(0.22, _path_match_score(path, query))
1275
+
1276
+ for anchor in anchor_paths:
1277
+ anchor_path = Path(anchor)
1278
+ anchor_text = file_text.get(anchor, "").lower()
1279
+ if candidate.parent == anchor_path.parent:
1280
+ score += 0.16
1281
+ if candidate.parts and anchor_path.parts and candidate.parts[0] == anchor_path.parts[0]:
1282
+ score += 0.06
1283
+ if tokens & _family_tokens(anchor):
1284
+ score += 0.08
1285
+ if anchor_text and any(token in anchor_text for token in _reference_tokens(path)):
1286
+ score += 0.18
1287
+ return min(0.7, score)
1288
+
1289
+
1290
+ def _promoted_support_file_snippets(
1291
+ file_text: dict[str, str],
1292
+ query: str,
1293
+ intent: str | None,
1294
+ anchor_paths: list[str],
1295
+ existing_ids: set[str],
1296
+ *,
1297
+ limit: int = 2,
1298
+ ) -> list[RankedSnippet]:
1299
+ if not _support_promotion_enabled(query, intent):
1300
+ return []
1301
+
1302
+ promoted: list[RankedSnippet] = []
1303
+ for path, text in file_text.items():
1304
+ node_id = f"file:{path}"
1305
+ if node_id in existing_ids:
1306
+ continue
1307
+ if _classify_path(path) != "code":
1308
+ continue
1309
+ promotion_score = _support_promotion_score(path, query, anchor_paths, file_text)
1310
+ if promotion_score < 0.38:
1311
+ continue
1312
+ total = promotion_score + _content_match_score(text[:3000], query)
1313
+ promoted.append(
1314
+ RankedSnippet(
1315
+ node_id=node_id,
1316
+ content=_snippet_from_lines(text.splitlines(), max_lines=120),
1317
+ score=total,
1318
+ )
1319
+ )
1320
+
1321
+ promoted.sort(key=lambda item: (-item.score, item.node_id))
1322
+ return promoted[:limit]
1323
+
1324
+
1325
+ def _support_priority_ids(ranked: list[RankedSnippet], query: str, intent: str | None) -> set[str]:
1326
+ if not _support_promotion_enabled(query, intent):
1327
+ return set()
1328
+ anchor_paths = _top_anchor_paths(ranked)
1329
+ if not anchor_paths:
1330
+ return set()
1331
+
1332
+ candidates: list[tuple[float, str]] = []
1333
+ for item in ranked:
1334
+ if not item.node_id.startswith("file:"):
1335
+ continue
1336
+ path = item.node_id[5:]
1337
+ if _classify_path(path) != "code":
1338
+ continue
1339
+ support_score = _support_promotion_score(path, query, anchor_paths, {})
1340
+ support_tokens = _family_tokens(path) | {Path(path).stem.lower()}
1341
+ if not (support_tokens & _SUPPORT_ROLE_TOKENS):
1342
+ continue
1343
+ if support_score < 0.2 and item.score < 1.2:
1344
+ continue
1345
+ candidates.append((item.score + support_score, item.node_id))
1346
+
1347
+ candidates.sort(key=lambda item: (-item[0], item[1]))
1348
+ return {node_id for _, node_id in candidates[:2]}
1349
+
1350
+
1351
+ def _collapse_support_query_snippets(
1352
+ ranked: list[RankedSnippet],
1353
+ query: str,
1354
+ intent: str | None,
1355
+ file_text: dict[str, str],
1356
+ ) -> tuple[list[RankedSnippet], set[str]]:
1357
+ support_priority_ids = _support_priority_ids(ranked, query, intent)
1358
+ if not support_priority_ids:
1359
+ return ranked, set()
1360
+
1361
+ retained_files = {node_id[5:] for node_id in support_priority_ids if node_id.startswith("file:")}
1362
+ filtered: list[RankedSnippet] = []
1363
+ for item in ranked:
1364
+ if item.node_id.startswith(("function:", "class:")):
1365
+ parent_path = item.node_id.split(":", 1)[1].split("::", 1)[0]
1366
+ if parent_path in retained_files:
1367
+ continue
1368
+ if item.node_id.startswith("file:"):
1369
+ path = item.node_id[5:]
1370
+ if path.endswith(".md") and retained_files:
1371
+ if _classify_path(path) != "operational_doc":
1372
+ continue
1373
+ filtered.append(item)
1374
+
1375
+ filtered.sort(
1376
+ key=lambda item: (
1377
+ 0 if item.node_id in support_priority_ids else 1,
1378
+ 0 if item.node_id.startswith("file:") else 1,
1379
+ -item.score,
1380
+ item.node_id,
1381
+ )
1382
+ )
1383
+ return filtered, support_priority_ids
1384
+
1385
+
1386
+ def _repair_file_snippets(
1387
+ file_text: dict[str, str],
1388
+ query: str,
1389
+ intent: str | None,
1390
+ ranked: list[RankedSnippet],
1391
+ existing_ids: set[str],
1392
+ *,
1393
+ limit: int = 4,
1394
+ ) -> list[RankedSnippet]:
1395
+ anchor_paths = _top_anchor_paths(ranked)
1396
+ if not anchor_paths:
1397
+ anchor_paths = _seed_anchor_paths(file_text, query, intent)
1398
+ if not anchor_paths:
1399
+ return []
1400
+
1401
+ repair: list[RankedSnippet] = []
1402
+ promoted_ids: set[str] = set()
1403
+ for item in _promoted_support_file_snippets(file_text, query, intent, anchor_paths, existing_ids, limit=min(2, limit)):
1404
+ repair.append(item)
1405
+ promoted_ids.add(item.node_id)
1406
+
1407
+ for path, text in file_text.items():
1408
+ node_id = f"file:{path}"
1409
+ if node_id in existing_ids or node_id in promoted_ids:
1410
+ continue
1411
+ if _classify_path(path) == "general_doc":
1412
+ continue
1413
+ base = _path_match_score(path, query) + _class_weight(path, query, intent)
1414
+ content = _content_match_score(text[:4000], query)
1415
+ repair_bonus = _repair_candidate_bonus(path, query, intent, anchor_paths, file_text)
1416
+ total = base + content + repair_bonus
1417
+ if total < 0.45:
1418
+ continue
1419
+ repair.append(
1420
+ RankedSnippet(
1421
+ node_id=node_id,
1422
+ content=_snippet_from_lines(text.splitlines(), max_lines=120),
1423
+ score=total,
1424
+ )
1425
+ )
1426
+
1427
+ repair.sort(key=lambda item: (-item.score, item.node_id))
1428
+ return repair[:limit]
1429
+
1430
+
1431
+ def _selected_anchor_paths(selected: tuple[RankedSnippet, ...], query: str, intent: str | None) -> list[str]:
1432
+ if not _support_promotion_enabled(query, intent):
1433
+ return []
1434
+ anchors: list[str] = []
1435
+ seen: set[str] = set()
1436
+ for item in selected:
1437
+ if item.node_id.startswith("file:"):
1438
+ path = item.node_id[5:]
1439
+ elif item.node_id.startswith(("function:", "class:")):
1440
+ path = item.node_id.split(":", 1)[1].split("::", 1)[0]
1441
+ else:
1442
+ continue
1443
+ if _classify_path(path) != "code":
1444
+ continue
1445
+ tokens = _family_tokens(path) | {Path(path).stem.lower()}
1446
+ if not (tokens & _SUPPORT_ROLE_TOKENS):
1447
+ continue
1448
+ if path in seen:
1449
+ continue
1450
+ anchors.append(path)
1451
+ seen.add(path)
1452
+ return anchors
1453
+
1454
+
1455
+ def _reference_fallback_snippets(
1456
+ file_text: dict[str, str],
1457
+ query: str,
1458
+ intent: str | None,
1459
+ selected: tuple[RankedSnippet, ...],
1460
+ existing_ids: set[str],
1461
+ *,
1462
+ limit: int = 2,
1463
+ ) -> list[RankedSnippet]:
1464
+ anchors = _selected_anchor_paths(selected, query, intent)
1465
+ if not anchors:
1466
+ return []
1467
+
1468
+ recovered: list[RankedSnippet] = []
1469
+ for path, text in file_text.items():
1470
+ node_id = f"file:{path}"
1471
+ if node_id in existing_ids:
1472
+ continue
1473
+ if _classify_path(path) != "code":
1474
+ continue
1475
+ tokens = _family_tokens(path) | {Path(path).stem.lower()}
1476
+ if not (tokens & _SUPPORT_ROLE_TOKENS):
1477
+ continue
1478
+ candidate = Path(path)
1479
+ reference_hits = 0
1480
+ same_dir_hits = 0
1481
+ family_hits = 0
1482
+ for anchor in anchors:
1483
+ anchor_path = Path(anchor)
1484
+ anchor_text = file_text.get(anchor, "").lower()
1485
+ if candidate.parent == anchor_path.parent:
1486
+ same_dir_hits += 1
1487
+ if candidate.parts and anchor_path.parts and candidate.parts[0] == anchor_path.parts[0]:
1488
+ family_hits += 1
1489
+ if anchor_text and any(token in anchor_text for token in _reference_tokens(path)):
1490
+ reference_hits += 1
1491
+ if reference_hits == 0:
1492
+ continue
1493
+ total = (
1494
+ 0.5 * reference_hits
1495
+ + 0.12 * same_dir_hits
1496
+ + 0.08 * family_hits
1497
+ + _path_match_score(path, query)
1498
+ + _content_match_score(text[:3000], query)
1499
+ + _class_weight(path, query, intent)
1500
+ )
1501
+ if total < 0.72:
1502
+ continue
1503
+ recovered.append(
1504
+ RankedSnippet(
1505
+ node_id=node_id,
1506
+ content=_snippet_from_lines(text.splitlines(), max_lines=120),
1507
+ score=total,
1508
+ )
1509
+ )
1510
+
1511
+ recovered.sort(key=lambda item: (-item.score, item.node_id))
1512
+ return recovered[:limit]
1513
+
1514
+
1515
+ def _apply_reference_fallback(
1516
+ ranked: list[RankedSnippet],
1517
+ file_text: dict[str, str],
1518
+ query: str,
1519
+ intent: str | None,
1520
+ payload,
1521
+ ) -> tuple[list[RankedSnippet], set[str], str | None]:
1522
+ existing_ids = {item.node_id for item in ranked}
1523
+ recovered = _reference_fallback_snippets(file_text, query, intent, payload.snippets, existing_ids)
1524
+ if not recovered:
1525
+ return ranked, set(), None
1526
+
1527
+ recovery_ids = {item.node_id for item in recovered}
1528
+ ranked.extend(recovered)
1529
+ ranked.sort(
1530
+ key=lambda item: (
1531
+ 0 if item.node_id in recovery_ids else 1,
1532
+ 0 if item.node_id.startswith("file:") else 1,
1533
+ -item.score,
1534
+ item.node_id,
1535
+ )
1536
+ )
1537
+ return ranked, recovery_ids, "reference_search"
1538
+
1539
+
1540
+ @dataclass(frozen=True, slots=True)
1541
+ class _ChannelCandidate:
1542
+ node_id: str
1543
+ channel: str
1544
+ score: float
1545
+ content: str
1546
+ rationale: str
1547
+
1548
+
1549
+ @dataclass(frozen=True, slots=True)
1550
+ class _AdaptiveCompanion:
1551
+ path: str
1552
+ score: float
1553
+ path_score: float
1554
+ same_family: bool
1555
+ role: str
1556
+
1557
+
1558
+ def _node_file_path(node_id: str) -> str | None:
1559
+ if node_id.startswith("file:"):
1560
+ return node_id[5:]
1561
+ if node_id.startswith(("function:", "class:")):
1562
+ return node_id.split(":", 1)[1].split("::", 1)[0]
1563
+ return None
1564
+
1565
+
1566
+ def _file_role(path: str | None) -> str:
1567
+ if not path:
1568
+ return "unknown"
1569
+ candidate = Path(path)
1570
+ stem = candidate.stem.lower()
1571
+ if stem in _SUPPORT_ROLE_TOKENS:
1572
+ return stem
1573
+ for token in sorted(_family_tokens(path)):
1574
+ if token in _SUPPORT_ROLE_TOKENS:
1575
+ return token
1576
+ file_class = _classify_path(path)
1577
+ if file_class == "operational_doc":
1578
+ return "operational_doc"
1579
+ if file_class == "general_doc":
1580
+ return "general_doc"
1581
+ return "module"
1582
+
1583
+
1584
+ def _candidate_family(path: str | None) -> str:
1585
+ if not path:
1586
+ return "unknown"
1587
+ candidate = Path(path)
1588
+ if len(candidate.parts) >= 2:
1589
+ return "/".join(candidate.parts[:2])
1590
+ if candidate.parts:
1591
+ return candidate.parts[0]
1592
+ return candidate.stem
1593
+
1594
+
1595
+ def _candidate_content(
1596
+ node_id: str,
1597
+ file_text: dict[str, str],
1598
+ function_snippets: dict[str, str],
1599
+ class_snippets: dict[str, str],
1600
+ ) -> str:
1601
+ snippet = function_snippets.get(node_id)
1602
+ if snippet is None:
1603
+ snippet = class_snippets.get(node_id)
1604
+ if snippet is None:
1605
+ file_path = _node_file_path(node_id)
1606
+ if file_path:
1607
+ text = file_text.get(file_path, "")
1608
+ if text:
1609
+ snippet = _snippet_from_lines(text.splitlines(), max_lines=120)
1610
+ return snippet or ""
1611
+
1612
+
1613
+ def _query_variant(query: str, channel: str) -> str:
1614
+ if channel != "expand":
1615
+ return query
1616
+ expanded = sorted(_query_terms(query))
1617
+ return query if not expanded else f"{query} {' '.join(expanded)}"
1618
+
1619
+
1620
+ def _file_channel_candidates(
1621
+ file_text: dict[str, str],
1622
+ query: str,
1623
+ intent: str | None,
1624
+ *,
1625
+ channel: str,
1626
+ anchor_paths: list[str] | None = None,
1627
+ limit: int = 16,
1628
+ ) -> list[_ChannelCandidate]:
1629
+ if channel == "adj" and not anchor_paths:
1630
+ return []
1631
+
1632
+ variant = _query_variant(query, channel)
1633
+ out: list[_ChannelCandidate] = []
1634
+ for path, text in file_text.items():
1635
+ file_class = _classify_path(path)
1636
+ if channel == "adj" and file_class != "code":
1637
+ continue
1638
+ path_score = _path_match_score(path, variant)
1639
+ content_score = _content_match_score(text[:4000], variant)
1640
+ class_score = _class_weight(path, query, intent)
1641
+ adjacency = 0.0
1642
+ support_bonus = 0.0
1643
+ threshold = 0.28
1644
+
1645
+ if channel == "lex":
1646
+ threshold = 0.22
1647
+ elif channel == "expand":
1648
+ support_bonus = 0.6 * _support_role_bonus(path, variant, anchor_paths or [], file_text)
1649
+ threshold = 0.3
1650
+ elif channel == "adj":
1651
+ adjacency = _repair_candidate_bonus(path, query, intent, anchor_paths or [], file_text)
1652
+ support_bonus = _support_role_bonus(path, query, anchor_paths or [], file_text)
1653
+ threshold = 0.46
1654
+
1655
+ total = path_score + content_score + class_score + adjacency + support_bonus
1656
+ if file_class == "general_doc" and _effective_intent(query, intent) == "debug":
1657
+ total -= 0.2
1658
+ if total < threshold:
1659
+ continue
1660
+
1661
+ out.append(
1662
+ _ChannelCandidate(
1663
+ node_id=f"file:{path}",
1664
+ channel=channel,
1665
+ score=total,
1666
+ content=_snippet_from_lines(text.splitlines(), max_lines=120),
1667
+ rationale=f"path={path_score:.2f},content={content_score:.2f},adj={adjacency:.2f},support={support_bonus:.2f}",
1668
+ )
1669
+ )
1670
+
1671
+ out.sort(key=lambda item: (-item.score, item.node_id))
1672
+ return out[:limit]
1673
+
1674
+
1675
+ def _explicit_file_channel_candidates(
1676
+ file_text: dict[str, str],
1677
+ query: str,
1678
+ intent: str | None,
1679
+ *,
1680
+ limit: int = 8,
1681
+ ) -> list[_ChannelCandidate]:
1682
+ out: list[_ChannelCandidate] = []
1683
+ for score, rel_path in _mentioned_file_paths(file_text, query)[:limit]:
1684
+ text = file_text.get(rel_path, "")
1685
+ if not text:
1686
+ continue
1687
+ out.append(
1688
+ _ChannelCandidate(
1689
+ node_id=f"file:{rel_path}",
1690
+ channel="target",
1691
+ score=score + 1.0,
1692
+ content=_snippet_from_lines(text.splitlines(), max_lines=220 if _is_edit_like_query(query, intent) else 120),
1693
+ rationale=f"explicit_file={score:.2f}",
1694
+ )
1695
+ )
1696
+ return out
1697
+
1698
+
1699
+ def _vector_channel_candidates(
1700
+ graph: nx.DiGraph,
1701
+ query: str,
1702
+ file_text: dict[str, str],
1703
+ function_snippets: dict[str, str],
1704
+ class_snippets: dict[str, str],
1705
+ *,
1706
+ top_k: int,
1707
+ ) -> list[_ChannelCandidate]:
1708
+ out: list[_ChannelCandidate] = []
1709
+ for candidate in hybrid_retrieve(graph, query, top_k=top_k):
1710
+ content = _candidate_content(candidate.node_id, file_text, function_snippets, class_snippets)
1711
+ if not content:
1712
+ continue
1713
+ out.append(
1714
+ _ChannelCandidate(
1715
+ node_id=candidate.node_id,
1716
+ channel="vec",
1717
+ score=candidate.score,
1718
+ content=content,
1719
+ rationale=candidate.rationale,
1720
+ )
1721
+ )
1722
+ return out
1723
+
1724
+
1725
+ def _fuse_context_channels(
1726
+ channel_map: dict[str, list[_ChannelCandidate]],
1727
+ query: str,
1728
+ intent: str | None,
1729
+ file_text: dict[str, str],
1730
+ *,
1731
+ explicit_targets: set[str] | None = None,
1732
+ query_shape: str | None = None,
1733
+ limit: int = 48,
1734
+ ) -> tuple[list[RankedSnippet], dict[str, dict[str, object]]]:
1735
+ channel_weights = {
1736
+ "target": 1.5,
1737
+ "lex": 1.0,
1738
+ "vec": 1.15,
1739
+ "expand": 0.9,
1740
+ "adj": 0.95,
1741
+ "fallback": 1.05,
1742
+ }
1743
+ merged: dict[str, dict[str, object]] = {}
1744
+ explicit_targets = explicit_targets or set()
1745
+ resolved_query_shape = query_shape or _query_shape(query, intent, explicit_targets)
1746
+
1747
+ for channel_name in ("target", "lex", "vec", "expand", "adj", "fallback"):
1748
+ candidates = channel_map.get(channel_name, [])
1749
+ candidates = sorted(candidates, key=lambda item: (-item.score, item.node_id))
1750
+ for rank, item in enumerate(candidates, start=1):
1751
+ entry = merged.setdefault(
1752
+ item.node_id,
1753
+ {
1754
+ "rrf": 0.0,
1755
+ "best_score": item.score,
1756
+ "content": item.content,
1757
+ "channels": set(),
1758
+ "rationales": [],
1759
+ },
1760
+ )
1761
+ entry["rrf"] = float(entry["rrf"]) + channel_weights.get(channel_name, 1.0) / (50.0 + rank)
1762
+ entry["best_score"] = max(float(entry["best_score"]), item.score)
1763
+ entry["content"] = entry["content"] or item.content
1764
+ cast_channels = entry["channels"]
1765
+ assert isinstance(cast_channels, set)
1766
+ cast_channels.add(channel_name)
1767
+ cast_rationales = entry["rationales"]
1768
+ assert isinstance(cast_rationales, list)
1769
+ cast_rationales.append(f"{channel_name}:{item.rationale}")
1770
+
1771
+ preliminary = sorted(
1772
+ merged.items(),
1773
+ key=lambda pair: (-float(pair[1]["rrf"]), -float(pair[1]["best_score"]), pair[0]),
1774
+ )
1775
+ strong_paths: list[str] = []
1776
+ seen_paths: set[str] = set()
1777
+ for node_id, _ in preliminary:
1778
+ path = _node_file_path(node_id)
1779
+ if not path or _classify_path(path) != "code":
1780
+ continue
1781
+ if path in seen_paths:
1782
+ continue
1783
+ strong_paths.append(path)
1784
+ seen_paths.add(path)
1785
+ if len(strong_paths) >= 8:
1786
+ break
1787
+
1788
+ ranked: list[RankedSnippet] = []
1789
+ attached: dict[str, dict[str, object]] = {}
1790
+ for node_id, entry in preliminary:
1791
+ path = _node_file_path(node_id)
1792
+ channels = tuple(sorted(entry["channels"]))
1793
+ final_score = float(entry["rrf"]) * 16.0 + _boost_score(
1794
+ node_id,
1795
+ float(entry["best_score"]),
1796
+ query,
1797
+ intent,
1798
+ strong_paths,
1799
+ file_text,
1800
+ )
1801
+ final_score += 0.05 * len(channels)
1802
+ if "target" in channels:
1803
+ final_score += 0.55
1804
+ if path:
1805
+ final_score -= _entrypoint_penalty(path, explicit_targets)
1806
+ if "lex" in channels and "vec" in channels:
1807
+ final_score += 0.18
1808
+ if "adj" in channels:
1809
+ final_score += 0.14
1810
+ if "expand" in channels:
1811
+ final_score += 0.08
1812
+ base_role = _file_role(path)
1813
+ candidate_role = "ranked"
1814
+ if explicit_targets:
1815
+ candidate_role = _candidate_role(path, query, resolved_query_shape, explicit_targets, strong_paths)
1816
+ final_score += _role_adjustment(candidate_role, resolved_query_shape, query, intent)
1817
+ final_score += _family_competition_adjustment(path, explicit_targets, resolved_query_shape)
1818
+ final_score += _subtree_locality_adjustment(path, explicit_targets, resolved_query_shape)
1819
+ final_score += _support_config_penalty(path, candidate_role, explicit_targets)
1820
+ if base_role in _SUPPORT_ROLE_TOKENS and _support_promotion_enabled(query, intent):
1821
+ final_score += 0.1
1822
+
1823
+ ranked.append(
1824
+ RankedSnippet(
1825
+ node_id=node_id,
1826
+ content=str(entry["content"]),
1827
+ score=final_score,
1828
+ )
1829
+ )
1830
+ attached[node_id] = {
1831
+ "channels": list(channels[:4]),
1832
+ "family": _candidate_family(path),
1833
+ "file_role": base_role,
1834
+ "candidate_role": candidate_role,
1835
+ "query_shape": resolved_query_shape,
1836
+ "file_class": _classify_path(path) if path else "unknown",
1837
+ "why_included": "+".join(channels) if channels else "ranked",
1838
+ }
1839
+
1840
+ ranked.sort(key=lambda item: (-item.score, item.node_id))
1841
+ return ranked[:limit], attached
1842
+
1843
+
1844
+ def _selected_file_paths(selected: tuple[RankedSnippet, ...]) -> list[str]:
1845
+ paths: list[str] = []
1846
+ seen: set[str] = set()
1847
+ for item in selected:
1848
+ path = _node_file_path(item.node_id)
1849
+ if not path or path in seen:
1850
+ continue
1851
+ paths.append(path)
1852
+ seen.add(path)
1853
+ return paths
1854
+
1855
+
1856
+ def _referenced_companion_paths(
1857
+ anchor_paths: list[str],
1858
+ file_text: dict[str, str],
1859
+ selected_paths: set[str],
1860
+ ) -> list[str]:
1861
+ referenced: list[str] = []
1862
+ for path in sorted(file_text):
1863
+ if path in selected_paths or _classify_path(path) != "code":
1864
+ continue
1865
+ tokens = _reference_tokens(path)
1866
+ for anchor in anchor_paths:
1867
+ anchor_text = file_text.get(anchor, "").lower()
1868
+ if anchor_text and any(token in anchor_text for token in tokens):
1869
+ referenced.append(path)
1870
+ break
1871
+ return referenced
1872
+
1873
+
1874
+ def _context_fallback_reason(
1875
+ payload,
1876
+ query: str,
1877
+ intent: str | None,
1878
+ file_text: dict[str, str],
1879
+ attached: dict[str, dict[str, object]],
1880
+ ) -> str | None:
1881
+ selected_paths = _selected_file_paths(payload.snippets)
1882
+ code_paths = [path for path in selected_paths if _classify_path(path) == "code"]
1883
+ if not code_paths:
1884
+ return "insufficient_context_coverage"
1885
+
1886
+ referenced_missing = _referenced_companion_paths(code_paths, file_text, set(selected_paths))
1887
+ if referenced_missing:
1888
+ return "support_family_missing"
1889
+
1890
+ families = {_candidate_family(path) for path in code_paths}
1891
+ strong_files = 0
1892
+ for snippet in payload.snippets:
1893
+ meta = attached.get(snippet.node_id, {})
1894
+ channels = meta.get("channels", []) if isinstance(meta, dict) else []
1895
+ if len(channels) >= 2 and (_node_file_path(snippet.node_id) or "") in code_paths:
1896
+ strong_files += 1
1897
+
1898
+ if _is_system_query(query) and (len(code_paths) < 2 or len(families) < 2):
1899
+ return "insufficient_context_coverage"
1900
+ if _support_promotion_enabled(query, intent) and strong_files < 2:
1901
+ return "low_context_confidence"
1902
+ return None
1903
+
1904
+
1905
+ def _normal_search_fallback_snippets(
1906
+ file_text: dict[str, str],
1907
+ query: str,
1908
+ intent: str | None,
1909
+ selected: tuple[RankedSnippet, ...],
1910
+ existing_ids: set[str],
1911
+ *,
1912
+ limit: int = 4,
1913
+ ) -> list[_ChannelCandidate]:
1914
+ selected_paths = _selected_file_paths(selected)
1915
+ anchors = [path for path in selected_paths if _classify_path(path) == "code"]
1916
+ referenced = set(_referenced_companion_paths(anchors, file_text, set(selected_paths)))
1917
+
1918
+ out: list[_ChannelCandidate] = []
1919
+ for path, text in file_text.items():
1920
+ node_id = f"file:{path}"
1921
+ if node_id in existing_ids:
1922
+ continue
1923
+ file_class = _classify_path(path)
1924
+ if file_class == "general_doc" and _effective_intent(query, intent) == "debug":
1925
+ continue
1926
+ path_score = _path_match_score(path, query)
1927
+ content_score = _content_match_score(text[:5000], query)
1928
+ class_score = _class_weight(path, query, intent)
1929
+ adjacency = _adjacency_boost(path, query, intent, anchors, file_text)
1930
+ support_bonus = _support_role_bonus(path, query, anchors, file_text)
1931
+ total = path_score + content_score + class_score + adjacency + support_bonus
1932
+ if path in referenced:
1933
+ total += 0.75
1934
+ if total < 0.5:
1935
+ continue
1936
+ out.append(
1937
+ _ChannelCandidate(
1938
+ node_id=node_id,
1939
+ channel="fallback",
1940
+ score=total,
1941
+ content=_snippet_from_lines(text.splitlines(), max_lines=120),
1942
+ rationale=f"fallback:path={path_score:.2f},content={content_score:.2f},referenced={path in referenced}",
1943
+ )
1944
+ )
1945
+
1946
+ out.sort(key=lambda item: (-item.score, item.node_id))
1947
+ return out[:limit]
1948
+
1949
+
1950
+ def _apply_normal_search_fallback(
1951
+ ranked: list[RankedSnippet],
1952
+ file_text: dict[str, str],
1953
+ query: str,
1954
+ intent: str | None,
1955
+ payload,
1956
+ attached: dict[str, dict[str, object]],
1957
+ ) -> tuple[list[RankedSnippet], dict[str, dict[str, object]], set[str], str | None, bool]:
1958
+ reason = _context_fallback_reason(payload, query, intent, file_text, attached)
1959
+ if not reason:
1960
+ return ranked, attached, set(), None, False
1961
+
1962
+ existing_ids = {item.node_id for item in ranked}
1963
+ fallback_candidates = _normal_search_fallback_snippets(file_text, query, intent, payload.snippets, existing_ids)
1964
+ if not fallback_candidates:
1965
+ return ranked, attached, set(), reason, True
1966
+
1967
+ fused_ranked, fused_attached = _fuse_context_channels({"fallback": fallback_candidates}, query, intent, file_text, limit=len(fallback_candidates))
1968
+ fallback_ids = {item.node_id for item in fused_ranked}
1969
+ for node_id, meta in fused_attached.items():
1970
+ attached[node_id] = meta
1971
+ ranked.extend(fused_ranked)
1972
+ ranked.sort(
1973
+ key=lambda item: (
1974
+ 0 if item.node_id in fallback_ids else 1,
1975
+ 0 if item.node_id.startswith("file:") else 1,
1976
+ -item.score,
1977
+ item.node_id,
1978
+ )
1979
+ )
1980
+ return ranked, attached, fallback_ids, reason, True
1981
+
1982
+
1983
+
1984
+ def _skeletonize_content(content: str, max_lines: int = 60) -> str:
1985
+ lines = content.splitlines()
1986
+ if len(lines) <= max_lines:
1987
+ return content
1988
+
1989
+ signature_pattern = re.compile(r"^\s*(def\s+|class\s+|export\s+|function\s+|const\s+|let\s+|var\s+|@app\.route|if\s+__name__)", re.IGNORECASE)
1990
+ selected: list[str] = []
1991
+ for line in lines:
1992
+ if signature_pattern.search(line):
1993
+ selected.append(line)
1994
+ if len(selected) >= max_lines:
1995
+ break
1996
+
1997
+ if len(selected) < min(20, max_lines):
1998
+ selected = lines[:max_lines]
1999
+
2000
+ return "\n".join(selected).strip()
2001
+
2002
+
2003
+ def _packaging_sets(
2004
+ ranked: list[RankedSnippet],
2005
+ attached: dict[str, dict[str, object]],
2006
+ *,
2007
+ explicit_priority_ids: set[str],
2008
+ linked_priority_ids: set[str],
2009
+ chain_priority_ids: set[str],
2010
+ mandatory_node_ids: set[str],
2011
+ ) -> tuple[set[str], set[str]]:
2012
+ pivot_ids = set(explicit_priority_ids) | set(linked_priority_ids) | set(chain_priority_ids)
2013
+ if not pivot_ids:
2014
+ for item in ranked:
2015
+ if item.node_id.startswith("file:"):
2016
+ pivot_ids.add(item.node_id)
2017
+ if len(pivot_ids) >= 2:
2018
+ break
2019
+
2020
+ skeleton_ids: set[str] = set()
2021
+ for item in ranked:
2022
+ if not item.node_id.startswith("file:"):
2023
+ continue
2024
+ if item.node_id in pivot_ids or item.node_id in mandatory_node_ids:
2025
+ continue
2026
+ meta = attached.get(item.node_id, {})
2027
+ role = meta.get("candidate_role") if isinstance(meta, dict) else None
2028
+ if role in {"support_config", "sibling_module", "caller_or_entry", "generic_entrypoint"}:
2029
+ skeleton_ids.add(item.node_id)
2030
+ return pivot_ids, skeleton_ids
2031
+
2032
+
2033
+ def _apply_packaging(
2034
+ ranked: list[RankedSnippet],
2035
+ pivot_ids: set[str],
2036
+ skeleton_ids: set[str],
2037
+ *,
2038
+ max_skeleton_lines: int = 60,
2039
+ ) -> list[RankedSnippet]:
2040
+ packed: list[RankedSnippet] = []
2041
+ for item in ranked:
2042
+ if item.node_id in skeleton_ids:
2043
+ packed.append(
2044
+ RankedSnippet(
2045
+ node_id=item.node_id,
2046
+ score=item.score,
2047
+ content=_skeletonize_content(item.content, max_lines=max_skeleton_lines),
2048
+ )
2049
+ )
2050
+ continue
2051
+ packed.append(item)
2052
+ return packed
2053
+
2054
+ def run_context(path: str, query: str, budget: int | None, intent: str | None, top_k: int = 40) -> dict:
2055
+ target = Path(path)
2056
+
2057
+ if target.is_dir():
2058
+ _, file_text, function_snippets, class_snippets, _, graph = _collect_repo_modules(target)
2059
+ else:
2060
+ module = parse_python_file(target)
2061
+ source = target.read_text(encoding="utf-8").splitlines()
2062
+ graph = nx.compose(
2063
+ nx.compose(build_call_graph((module,)), build_variable_graph((module,))),
2064
+ build_code_structure_graph((module,)),
2065
+ )
2066
+ file_rel = target.as_posix()
2067
+ file_text = {file_rel: "\n".join(source)}
2068
+ function_snippets = {}
2069
+ class_snippets = {}
2070
+ lines = source
2071
+ for fn in module.functions:
2072
+ start = max(fn.start_line, 1)
2073
+ end = max(fn.end_line, start)
2074
+ snippet = "\n".join(lines[start - 1 : end]).strip()
2075
+ node_id = f"function:{Path(fn.file).as_posix()}::{fn.name}"
2076
+ if snippet:
2077
+ function_snippets[node_id] = snippet
2078
+ for cls in module.classes:
2079
+ start = max(cls.start_line, 1)
2080
+ end = max(cls.end_line, start)
2081
+ snippet = "\n".join(lines[start - 1 : end]).strip()
2082
+ node_id = f"class:{Path(cls.file).as_posix()}::{cls.name}"
2083
+ if snippet:
2084
+ class_snippets[node_id] = snippet
2085
+
2086
+ explicit_priority_ids = _explicit_priority_ids(file_text, query, intent)
2087
+ explicit_target_paths = {node_id[5:] for node_id in explicit_priority_ids if node_id.startswith("file:")}
2088
+ query_shape = _query_shape(query, intent, explicit_target_paths)
2089
+
2090
+ channels: dict[str, list[_ChannelCandidate]] = {
2091
+ "target": _explicit_file_channel_candidates(file_text, query, intent),
2092
+ "vec": _vector_channel_candidates(graph, query, file_text, function_snippets, class_snippets, top_k=top_k),
2093
+ "lex": _file_channel_candidates(file_text, query, intent, channel="lex", limit=18),
2094
+ "expand": _file_channel_candidates(file_text, query, intent, channel="expand", limit=14),
2095
+ }
2096
+ ranked, attached = _fuse_context_channels(
2097
+ channels,
2098
+ query,
2099
+ intent,
2100
+ file_text,
2101
+ explicit_targets=explicit_target_paths,
2102
+ query_shape=query_shape,
2103
+ )
2104
+ anchor_paths = _top_anchor_paths(ranked)
2105
+ channels["adj"] = _file_channel_candidates(file_text, query, intent, channel="adj", anchor_paths=anchor_paths, limit=10)
2106
+ ranked, attached = _fuse_context_channels(
2107
+ channels,
2108
+ query,
2109
+ intent,
2110
+ file_text,
2111
+ explicit_targets=explicit_target_paths,
2112
+ query_shape=query_shape,
2113
+ )
2114
+
2115
+ ranked, support_priority_ids = _collapse_support_query_snippets(ranked, query, intent, file_text)
2116
+ explicit_priority_ids = {candidate.node_id for candidate in channels.get("target", [])}
2117
+ explicit_priority_ids |= _layer_priority_ids(ranked, query, intent, explicit_priority_ids)
2118
+ linked_priority_ids = _linked_file_priority_ids(ranked, explicit_priority_ids, query_shape, query, intent)
2119
+ chain_priority_ids = _chain_quota_priority_ids(ranked, query, intent, explicit_target_paths)
2120
+ ranked = _promote_priority_first(
2121
+ ranked,
2122
+ explicit_priority_ids,
2123
+ linked_priority_ids,
2124
+ chain_priority_ids,
2125
+ explicit_target_paths,
2126
+ )
2127
+
2128
+ mandatory_node_ids = _mandatory_node_ids(
2129
+ ranked,
2130
+ query,
2131
+ intent,
2132
+ support_priority_ids=support_priority_ids | linked_priority_ids | chain_priority_ids,
2133
+ explicit_priority_ids=explicit_priority_ids | linked_priority_ids | chain_priority_ids,
2134
+ )
2135
+
2136
+ pivot_node_ids, skeleton_node_ids = _packaging_sets(
2137
+ ranked,
2138
+ attached,
2139
+ explicit_priority_ids=explicit_priority_ids,
2140
+ linked_priority_ids=linked_priority_ids,
2141
+ chain_priority_ids=chain_priority_ids,
2142
+ mandatory_node_ids=mandatory_node_ids,
2143
+ )
2144
+ packed_ranked = _apply_packaging(ranked, pivot_node_ids, skeleton_node_ids)
2145
+
2146
+ payload = build_context(
2147
+ query,
2148
+ packed_ranked,
2149
+ token_budget=budget,
2150
+ intent=intent,
2151
+ mandatory_node_ids=mandatory_node_ids,
2152
+ )
2153
+
2154
+ ranked, attached, fallback_priority_ids, fallback_reason, fallback_search_used = _apply_normal_search_fallback(
2155
+ ranked,
2156
+ file_text,
2157
+ query,
2158
+ intent,
2159
+ payload,
2160
+ attached,
2161
+ )
2162
+ if fallback_priority_ids:
2163
+ combined_priority_ids = support_priority_ids | fallback_priority_ids | linked_priority_ids | chain_priority_ids
2164
+ ranked, support_priority_ids = _collapse_support_query_snippets(ranked, query, intent, file_text)
2165
+ combined_priority_ids |= support_priority_ids
2166
+ ranked = _promote_priority_first(
2167
+ ranked,
2168
+ explicit_priority_ids,
2169
+ linked_priority_ids,
2170
+ chain_priority_ids,
2171
+ explicit_target_paths,
2172
+ )
2173
+ mandatory_node_ids = _mandatory_node_ids(
2174
+ ranked,
2175
+ query,
2176
+ intent,
2177
+ support_priority_ids=combined_priority_ids,
2178
+ explicit_priority_ids=explicit_priority_ids | linked_priority_ids | chain_priority_ids,
2179
+ )
2180
+ pivot_node_ids, skeleton_node_ids = _packaging_sets(
2181
+ ranked,
2182
+ attached,
2183
+ explicit_priority_ids=explicit_priority_ids,
2184
+ linked_priority_ids=linked_priority_ids,
2185
+ chain_priority_ids=chain_priority_ids,
2186
+ mandatory_node_ids=mandatory_node_ids,
2187
+ )
2188
+ packed_ranked = _apply_packaging(ranked, pivot_node_ids, skeleton_node_ids)
2189
+ payload = build_context(
2190
+ query,
2191
+ packed_ranked,
2192
+ token_budget=budget,
2193
+ intent=intent,
2194
+ mandatory_node_ids=mandatory_node_ids,
2195
+ )
2196
+
2197
+ snippets_out: list[dict[str, object]] = []
2198
+ for snippet in payload.snippets:
2199
+ base_meta = attached.get(
2200
+ snippet.node_id,
2201
+ {
2202
+ "channels": [],
2203
+ "family": _candidate_family(_node_file_path(snippet.node_id)),
2204
+ "file_role": _file_role(_node_file_path(snippet.node_id)),
2205
+ "candidate_role": "ranked",
2206
+ "query_shape": query_shape,
2207
+ "file_class": _classify_path(_node_file_path(snippet.node_id) or ""),
2208
+ "why_included": "selected",
2209
+ },
2210
+ )
2211
+ meta = dict(base_meta)
2212
+ if snippet.node_id in pivot_node_ids:
2213
+ meta["packaging_role"] = "pivot"
2214
+ elif snippet.node_id in skeleton_node_ids:
2215
+ meta["packaging_role"] = "adjacent_support"
2216
+ else:
2217
+ meta["packaging_role"] = "full"
2218
+
2219
+ snippets_out.append(
2220
+ {
2221
+ "node_id": snippet.node_id,
2222
+ "score": snippet.score,
2223
+ "content": snippet.content,
2224
+ "attached_context": meta,
2225
+ }
2226
+ )
2227
+
2228
+ return {
2229
+ "query": payload.query,
2230
+ "tokens": payload.total_tokens_estimate,
2231
+ "snippets": snippets_out,
2232
+ "fallback_search_used": fallback_search_used,
2233
+ "fallback_reason": fallback_reason,
2234
+ }
2235
+
2236
+ def _adaptive_companion_candidates(
2237
+ payload: dict,
2238
+ file_text: dict[str, str],
2239
+ query: str,
2240
+ intent: str | None,
2241
+ ) -> list[_AdaptiveCompanion]:
2242
+ selected_paths = _selected_file_paths(
2243
+ tuple(
2244
+ RankedSnippet(
2245
+ node_id=item["node_id"],
2246
+ content=item.get("content", ""),
2247
+ score=float(item.get("score", 0.0)),
2248
+ )
2249
+ for item in payload.get("snippets", [])
2250
+ )
2251
+ )
2252
+ selected_set = set(selected_paths)
2253
+ anchors = [path for path in selected_paths if _classify_path(path) == "code"]
2254
+ if not anchors:
2255
+ return []
2256
+
2257
+ anchor_families = {_candidate_family(path) for path in anchors}
2258
+ candidates: list[_AdaptiveCompanion] = []
2259
+ for path in _referenced_companion_paths(anchors, file_text, selected_set):
2260
+ if _classify_path(path) != "code":
2261
+ continue
2262
+ text = file_text.get(path, "")
2263
+ path_score = _path_match_score(path, query)
2264
+ same_family = _candidate_family(path) in anchor_families
2265
+ role = _file_role(path)
2266
+ score = path_score
2267
+ score += _content_match_score(text[:4000], query)
2268
+ score += _class_weight(path, query, intent)
2269
+ score += _support_role_bonus(path, query, anchors, file_text)
2270
+ score += _adjacency_boost(path, query, intent, anchors, file_text)
2271
+ if same_family:
2272
+ score += 0.18
2273
+ if role in _SUPPORT_ROLE_TOKENS:
2274
+ score += 0.08
2275
+ candidates.append(
2276
+ _AdaptiveCompanion(
2277
+ path=path,
2278
+ score=score,
2279
+ path_score=path_score,
2280
+ same_family=same_family,
2281
+ role=role,
2282
+ )
2283
+ )
2284
+
2285
+ candidates.sort(key=lambda item: (-item.score, item.path))
2286
+ return candidates
2287
+
2288
+
2289
+ def _adaptive_missing_companions(
2290
+ payload: dict,
2291
+ file_text: dict[str, str],
2292
+ query: str,
2293
+ intent: str | None,
2294
+ *,
2295
+ limit: int = 1,
2296
+ ) -> list[str]:
2297
+ if payload.get("fallback_reason") != "support_family_missing":
2298
+ return []
2299
+ if not (_is_system_query(query) or _support_promotion_enabled(query, intent)):
2300
+ return []
2301
+
2302
+ candidates = _adaptive_companion_candidates(payload, file_text, query, intent)
2303
+ filtered = [
2304
+ item for item in candidates
2305
+ if item.score >= 1.05 and (item.same_family or item.path_score >= 0.35 or item.role in _SUPPORT_ROLE_TOKENS)
2306
+ ]
2307
+ return [item.path for item in filtered[:limit]]
2308
+
2309
+
2310
+ def _adaptive_replace_index(snippets: list[dict], family: str) -> int | None:
2311
+ candidates: list[tuple[float, int]] = []
2312
+ for idx, item in enumerate(snippets):
2313
+ attached = item.get("attached_context", {})
2314
+ if attached.get("why_included") == "adaptive_pin":
2315
+ continue
2316
+ if attached.get("family") != family:
2317
+ continue
2318
+ candidates.append((float(item.get("score", 0.0)), idx))
2319
+ if not candidates:
2320
+ return None
2321
+ candidates.sort(key=lambda item: (item[0], item[1]))
2322
+ return candidates[0][1]
2323
+
2324
+
2325
+ def run_context_adaptive(
2326
+ path: str,
2327
+ query: str,
2328
+ budget: int | None,
2329
+ intent: str | None,
2330
+ top_k: int = 40,
2331
+ *,
2332
+ completion_limit: int = 1,
2333
+ ) -> dict:
2334
+ payload = run_context(path, query, budget, intent, top_k=top_k)
2335
+ target = Path(path)
2336
+ repo_root = target if target.is_dir() else target.parent
2337
+ _, file_text, _, _, _, _ = _collect_repo_modules(repo_root)
2338
+
2339
+ missing_paths = _adaptive_missing_companions(payload, file_text, query, intent, limit=completion_limit)
2340
+ if not missing_paths:
2341
+ payload["adaptive_completion_used"] = False
2342
+ payload["adaptive_completion_reason"] = None
2343
+ payload["adaptive_missing_files"] = []
2344
+ return payload
2345
+
2346
+ remaining_budget = budget
2347
+ used_tokens = int(payload.get("tokens", 0))
2348
+ snippets = list(payload.get("snippets", []))
2349
+ existing_ids = {item["node_id"] for item in snippets}
2350
+ added: list[str] = []
2351
+ for rel_path in missing_paths:
2352
+ node_id = f"file:{rel_path}"
2353
+ if node_id in existing_ids:
2354
+ continue
2355
+ content = _snippet_from_lines(file_text.get(rel_path, "").splitlines(), max_lines=60)
2356
+ if not content:
2357
+ continue
2358
+ token_cost = estimate_tokens(content)
2359
+ family = _candidate_family(rel_path)
2360
+ replace_idx = _adaptive_replace_index(snippets, family)
2361
+ if replace_idx is not None:
2362
+ replaced = snippets.pop(replace_idx)
2363
+ used_tokens -= estimate_tokens(replaced.get("content", ""))
2364
+ existing_ids.discard(replaced["node_id"])
2365
+ if remaining_budget is not None and used_tokens + token_cost > remaining_budget:
2366
+ continue
2367
+ snippets.append(
2368
+ {
2369
+ "node_id": node_id,
2370
+ "score": 99.0,
2371
+ "content": content,
2372
+ "attached_context": {
2373
+ "channels": ["adaptive_pin"],
2374
+ "family": family,
2375
+ "file_role": _file_role(rel_path),
2376
+ "file_class": _classify_path(rel_path),
2377
+ "why_included": "adaptive_pin",
2378
+ },
2379
+ }
2380
+ )
2381
+ used_tokens += token_cost
2382
+ existing_ids.add(node_id)
2383
+ added.append(rel_path)
2384
+
2385
+ snippets.sort(key=lambda item: (0 if item.get("attached_context", {}).get("why_included") == "adaptive_pin" else 1, -float(item.get("score", 0.0)), item["node_id"]))
2386
+ payload["snippets"] = snippets
2387
+ payload["tokens"] = used_tokens
2388
+ payload["adaptive_completion_used"] = bool(added)
2389
+ payload["adaptive_completion_reason"] = "missing_referenced_companion" if added else None
2390
+ payload["adaptive_missing_files"] = added
2391
+ return payload
2392
+
2393
+
2394
+
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+
2425
+
2426
+