@pmaddire/gcie 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GCIE_USAGE.md +58 -1
- package/cli/app.py +18 -1
- package/cli/commands/adaptation.py +821 -553
- package/cli/commands/setup.py +53 -2
- package/package.json +1 -1
|
@@ -1,576 +1,844 @@
|
|
|
1
|
-
"""Post-initialization adaptation pipeline (accuracy rounds first, then efficiency rounds)."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from dataclasses import asdict, dataclass
|
|
6
|
-
from datetime import datetime, timezone
|
|
7
|
-
import json
|
|
8
|
-
import re
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
|
|
11
|
-
from .context import run_context
|
|
12
|
-
from .context_slices import _classify_query_family, run_context_slices
|
|
13
|
-
from .index import run_index
|
|
14
|
-
|
|
15
|
-
try:
|
|
16
|
-
from performance.context_benchmark import BENCHMARK_CASES
|
|
17
|
-
except Exception: # pragma: no cover
|
|
18
|
-
BENCHMARK_CASES = ()
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@dataclass(frozen=True, slots=True)
|
|
22
|
-
class CaseResult:
|
|
23
|
-
name: str
|
|
24
|
-
family: str
|
|
25
|
-
mode: str
|
|
26
|
-
tokens: int
|
|
27
|
-
expected_hits: int
|
|
28
|
-
expected_total: int
|
|
29
|
-
missing_expected: tuple[str, ...]
|
|
30
|
-
context_complete: bool
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@dataclass(frozen=True, slots=True)
|
|
34
|
-
class AdaptCase:
|
|
35
|
-
name: str
|
|
36
|
-
query: str
|
|
37
|
-
intent: str
|
|
38
|
-
baseline_files: tuple[str, ...]
|
|
39
|
-
expected_files: tuple[str, ...]
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
_WORD_RE = re.compile(r"[A-Za-z0-9_./-]+")
|
|
43
|
-
_SOURCE_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".rs", ".cs", ".cpp", ".c", ".h"}
|
|
44
|
-
_IGNORED_DIRS = {
|
|
45
|
-
".git",
|
|
46
|
-
".gcie",
|
|
47
|
-
".planning",
|
|
48
|
-
".venv",
|
|
49
|
-
"node_modules",
|
|
50
|
-
"__pycache__",
|
|
51
|
-
"dist",
|
|
52
|
-
"build",
|
|
53
|
-
"coverage",
|
|
54
|
-
}
|
|
55
|
-
_METHOD_ORDER = ["plain", "plain_gapfill", "plain_rescue", "slices"]
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def _query_keywords(text: str) -> list[str]:
|
|
59
|
-
return [t for t in _WORD_RE.findall(text.lower()) if len(t) >= 4][:8]
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
1
|
+
"""Post-initialization adaptation pipeline (accuracy rounds first, then efficiency rounds)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import asdict, dataclass
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .context import run_context
|
|
12
|
+
from .context_slices import _classify_query_family, run_context_slices
|
|
13
|
+
from .index import run_index
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from performance.context_benchmark import BENCHMARK_CASES
|
|
17
|
+
except Exception: # pragma: no cover
|
|
18
|
+
BENCHMARK_CASES = ()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True, slots=True)
|
|
22
|
+
class CaseResult:
|
|
23
|
+
name: str
|
|
24
|
+
family: str
|
|
25
|
+
mode: str
|
|
26
|
+
tokens: int
|
|
27
|
+
expected_hits: int
|
|
28
|
+
expected_total: int
|
|
29
|
+
missing_expected: tuple[str, ...]
|
|
30
|
+
context_complete: bool
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True, slots=True)
|
|
34
|
+
class AdaptCase:
|
|
35
|
+
name: str
|
|
36
|
+
query: str
|
|
37
|
+
intent: str
|
|
38
|
+
baseline_files: tuple[str, ...]
|
|
39
|
+
expected_files: tuple[str, ...]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_WORD_RE = re.compile(r"[A-Za-z0-9_./-]+")
|
|
43
|
+
_SOURCE_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".rs", ".cs", ".cpp", ".c", ".h"}
|
|
44
|
+
_IGNORED_DIRS = {
|
|
45
|
+
".git",
|
|
46
|
+
".gcie",
|
|
47
|
+
".planning",
|
|
48
|
+
".venv",
|
|
49
|
+
"node_modules",
|
|
50
|
+
"__pycache__",
|
|
51
|
+
"dist",
|
|
52
|
+
"build",
|
|
53
|
+
"coverage",
|
|
54
|
+
}
|
|
55
|
+
_METHOD_ORDER = ["plain", "plain_gapfill", "plain_rescue", "slices"]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _query_keywords(text: str) -> list[str]:
|
|
59
|
+
return [t for t in _WORD_RE.findall(text.lower()) if len(t) >= 4][:8]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_query_cues_for_file(repo_path: Path, rel: str) -> list[str]:
|
|
63
|
+
path = repo_path / rel
|
|
64
|
+
try:
|
|
65
|
+
text = path.read_text(encoding='utf-8', errors='ignore')
|
|
66
|
+
except Exception:
|
|
67
|
+
return [Path(rel).stem.lower()]
|
|
68
|
+
|
|
69
|
+
body = text[:12000]
|
|
70
|
+
cues: list[str] = [Path(rel).stem.lower()]
|
|
71
|
+
|
|
72
|
+
patterns = [
|
|
73
|
+
r"^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
74
|
+
r"^\s*class\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
75
|
+
r"^\s*(?:async\s+)?function\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
76
|
+
r"^\s*const\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:async\s*)?(?:\(|function\b)",
|
|
77
|
+
r"^\s*export\s+function\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
78
|
+
]
|
|
79
|
+
for pat in patterns:
|
|
80
|
+
for name in re.findall(pat, body, flags=re.MULTILINE):
|
|
81
|
+
token = str(name).lower()
|
|
82
|
+
if len(token) >= 4:
|
|
83
|
+
cues.append(token)
|
|
84
|
+
if len(cues) >= 8:
|
|
85
|
+
break
|
|
86
|
+
if len(cues) >= 8:
|
|
87
|
+
break
|
|
88
|
+
|
|
89
|
+
for route in re.findall(r"['\"](/api/[A-Za-z0-9_/{}/-]+)['\"]", body):
|
|
90
|
+
cues.append(route.lower())
|
|
91
|
+
if len(cues) >= 10:
|
|
92
|
+
break
|
|
93
|
+
|
|
94
|
+
for key in re.findall(r"\b[A-Z][A-Z0-9_]{3,}\b", body):
|
|
95
|
+
cues.append(key.lower())
|
|
96
|
+
if len(cues) >= 12:
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
dedup: list[str] = []
|
|
100
|
+
seen: set[str] = set()
|
|
101
|
+
for cue in cues:
|
|
102
|
+
if cue in seen:
|
|
103
|
+
continue
|
|
104
|
+
seen.add(cue)
|
|
105
|
+
dedup.append(cue)
|
|
106
|
+
if len(dedup) >= 8:
|
|
107
|
+
break
|
|
108
|
+
return dedup
|
|
109
|
+
|
|
110
|
+
def _node_to_file(node_id: str) -> str | None:
|
|
111
|
+
if node_id.startswith("file:"):
|
|
112
|
+
return node_id[5:]
|
|
113
|
+
if node_id.startswith("function:"):
|
|
114
|
+
return node_id[9:].split("::", 1)[0]
|
|
115
|
+
if node_id.startswith("class:"):
|
|
116
|
+
return node_id[6:].split("::", 1)[0]
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
|
|
121
|
+
normalized = rel_path.replace("\\", "/").lstrip("./")
|
|
122
|
+
if not plan_path or plan_path in {".", "./"}:
|
|
123
|
+
return normalized
|
|
124
|
+
base = Path(plan_path).as_posix().strip("/")
|
|
125
|
+
if normalized.startswith(base + "/") or normalized == base:
|
|
126
|
+
return normalized
|
|
127
|
+
return f"{base}/{normalized}"
|
|
128
|
+
|
|
129
|
+
|
|
82
130
|
def _family_path(expected_files: tuple[str, ...]) -> str:
|
|
83
131
|
if not expected_files:
|
|
84
132
|
return "."
|
|
133
|
+
parent_parts: list[tuple[str, ...]] = []
|
|
134
|
+
for rel in expected_files:
|
|
135
|
+
parent = Path(rel).parent
|
|
136
|
+
if str(parent) in {"", "."}:
|
|
137
|
+
parent_parts.append(tuple())
|
|
138
|
+
else:
|
|
139
|
+
parent_parts.append(tuple(parent.parts))
|
|
140
|
+
|
|
141
|
+
common: list[str] = []
|
|
142
|
+
if parent_parts:
|
|
143
|
+
shortest = min(len(parts) for parts in parent_parts)
|
|
144
|
+
for idx in range(shortest):
|
|
145
|
+
token = parent_parts[0][idx]
|
|
146
|
+
if all(parts[idx] == token for parts in parent_parts):
|
|
147
|
+
common.append(token)
|
|
148
|
+
else:
|
|
149
|
+
break
|
|
150
|
+
if common:
|
|
151
|
+
return Path(*common).as_posix()
|
|
152
|
+
|
|
85
153
|
heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
|
|
86
154
|
return next(iter(heads)) if len(heads) == 1 else "."
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
candidate
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
155
|
+
|
|
156
|
+
def _safe_scope(path: str) -> str:
|
|
157
|
+
if not path or path in {".", "./"}:
|
|
158
|
+
return "."
|
|
159
|
+
candidate = Path(path)
|
|
160
|
+
if candidate.exists() and candidate.is_dir():
|
|
161
|
+
return candidate.as_posix()
|
|
162
|
+
return "."
|
|
163
|
+
|
|
164
|
+
|
|
98
165
|
def _plan_query(case) -> tuple[str, str, int | None]:
|
|
99
166
|
path = _family_path(case.expected_files)
|
|
100
167
|
if getattr(case, "name", "") == "cli_context_command":
|
|
101
168
|
return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
|
|
102
|
-
keywords = " ".join(_query_keywords(case.query)[:4])
|
|
103
|
-
file_terms = " ".join(case.expected_files)
|
|
104
|
-
query = f"{file_terms} {keywords}".strip()
|
|
105
|
-
budget = 1000 if len(case.expected_files) >= 2 else None
|
|
106
|
-
if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
|
|
107
|
-
budget = 800
|
|
108
|
-
return path, query, budget
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
|
|
112
|
-
path, query, budget = _plan_query(case)
|
|
113
|
-
path = _safe_scope(path)
|
|
114
|
-
payload = run_context(path, query, budget=budget, intent=case.intent)
|
|
115
|
-
files = {
|
|
116
|
-
_normalize_scoped_path(path, rel)
|
|
117
|
-
for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
118
|
-
if rel
|
|
119
|
-
}
|
|
120
|
-
expected = tuple(case.expected_files)
|
|
121
|
-
missing = [rel for rel in expected if rel not in files]
|
|
122
|
-
tokens = int(payload.get("tokens", 0) or 0)
|
|
123
|
-
mode = "plain_context_workflow"
|
|
124
|
-
|
|
125
|
-
if allow_gapfill and missing:
|
|
126
|
-
mode = "plain_context_workflow_gapfill"
|
|
127
|
-
for rel in list(missing):
|
|
128
|
-
gap_keywords = " ".join(_query_keywords(case.query)[:4])
|
|
129
|
-
gap_query = f"{rel} {gap_keywords}".strip()
|
|
130
|
-
scopes = [_safe_scope(_family_path((rel,)))]
|
|
131
|
-
budgets = [500 if rel.endswith('/main.py') or rel == 'main.py' else 900]
|
|
132
|
-
if aggressive_gapfill:
|
|
133
|
-
scopes.append('.')
|
|
134
|
-
budgets.append(max(budgets[0], 1200))
|
|
135
|
-
mode = "plain_context_workflow_gapfill_rescue"
|
|
136
|
-
for scope, gap_budget in zip(scopes, budgets):
|
|
137
|
-
gap_payload = run_context(scope, gap_query, budget=gap_budget, intent=case.intent)
|
|
138
|
-
tokens += int(gap_payload.get("tokens", 0) or 0)
|
|
139
|
-
gap_files = {
|
|
140
|
-
_normalize_scoped_path(scope, rel2)
|
|
141
|
-
for rel2 in (_node_to_file(item.get("node_id", "")) for item in gap_payload.get("snippets", []))
|
|
142
|
-
if rel2
|
|
143
|
-
}
|
|
144
|
-
files.update(gap_files)
|
|
145
|
-
missing = [m for m in expected if m not in files]
|
|
146
|
-
if not missing:
|
|
147
|
-
break
|
|
148
|
-
if not missing:
|
|
149
|
-
break
|
|
150
|
-
|
|
151
|
-
expected_hits = len(expected) - len(missing)
|
|
152
|
-
family = _classify_query_family(query)
|
|
153
|
-
return CaseResult(
|
|
154
|
-
name=case.name,
|
|
155
|
-
family=family,
|
|
156
|
-
mode=mode,
|
|
157
|
-
tokens=tokens,
|
|
158
|
-
expected_hits=expected_hits,
|
|
159
|
-
expected_total=len(expected),
|
|
160
|
-
missing_expected=tuple(missing),
|
|
161
|
-
context_complete=not missing,
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def _evaluate_slices_case(case) -> CaseResult:
|
|
166
|
-
payload = run_context_slices(
|
|
167
|
-
repo='.',
|
|
168
|
-
query=case.query,
|
|
169
|
-
profile='low',
|
|
170
|
-
stage_a_budget=300,
|
|
171
|
-
stage_b_budget=600,
|
|
172
|
-
max_total=800,
|
|
173
|
-
intent=case.intent,
|
|
174
|
-
pin=None,
|
|
175
|
-
pin_budget=200,
|
|
176
|
-
include_tests=False,
|
|
177
|
-
)
|
|
178
|
-
mode = "slices_low"
|
|
179
|
-
tokens = int(payload.get("token_estimate", payload.get("tokens", 0)) or 0)
|
|
180
|
-
files = {f for f in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", [])) if f}
|
|
181
|
-
expected = tuple(case.expected_files)
|
|
182
|
-
missing = [rel for rel in expected if rel not in files]
|
|
183
|
-
if missing:
|
|
184
|
-
mode = "slices_recall"
|
|
185
|
-
recall_payload = run_context_slices(
|
|
186
|
-
repo='.',
|
|
187
|
-
query=case.query,
|
|
188
|
-
profile='recall',
|
|
189
|
-
stage_a_budget=400,
|
|
190
|
-
stage_b_budget=800,
|
|
191
|
-
max_total=1200,
|
|
192
|
-
intent=case.intent,
|
|
193
|
-
pin=None,
|
|
194
|
-
pin_budget=300,
|
|
195
|
-
include_tests=False,
|
|
196
|
-
)
|
|
197
|
-
tokens += int(recall_payload.get("token_estimate", recall_payload.get("tokens", 0)) or 0)
|
|
198
|
-
files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in recall_payload.get("snippets", [])) if f})
|
|
199
|
-
missing = [rel for rel in expected if rel not in files]
|
|
200
|
-
if missing:
|
|
201
|
-
mode = "slices_recall_pin"
|
|
202
|
-
for rel in list(missing):
|
|
203
|
-
pin_payload = run_context_slices(
|
|
204
|
-
repo='.',
|
|
205
|
-
query=case.query,
|
|
206
|
-
profile='recall',
|
|
207
|
-
stage_a_budget=400,
|
|
208
|
-
stage_b_budget=800,
|
|
209
|
-
max_total=1200,
|
|
210
|
-
intent=case.intent,
|
|
211
|
-
pin=rel,
|
|
212
|
-
pin_budget=300,
|
|
213
|
-
include_tests=False,
|
|
214
|
-
)
|
|
215
|
-
tokens += int(pin_payload.get("token_estimate", pin_payload.get("tokens", 0)) or 0)
|
|
216
|
-
files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in pin_payload.get("snippets", [])) if f})
|
|
217
|
-
missing = [m for m in expected if m not in files]
|
|
218
|
-
if not missing:
|
|
219
|
-
break
|
|
220
169
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
)
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
def _evaluate_case_with_method(case, method: str) -> CaseResult:
|
|
236
|
-
if method == "plain":
|
|
237
|
-
return _evaluate_plain_case(case, allow_gapfill=False)
|
|
238
|
-
if method == "plain_gapfill":
|
|
239
|
-
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
|
|
240
|
-
if method == "plain_rescue":
|
|
241
|
-
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
|
|
242
|
-
return _evaluate_slices_case(case)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
def _summarize(label: str, rows: list[CaseResult]) -> dict:
|
|
246
|
-
case_count = len(rows)
|
|
247
|
-
pass_count = sum(1 for row in rows if row.context_complete)
|
|
248
|
-
total_tokens = sum(row.tokens for row in rows)
|
|
249
|
-
hit_count = sum(row.expected_hits for row in rows)
|
|
250
|
-
hit_total = sum(row.expected_total for row in rows)
|
|
251
|
-
return {
|
|
252
|
-
"label": label,
|
|
253
|
-
"case_count": case_count,
|
|
254
|
-
"passing_cases": pass_count,
|
|
255
|
-
"full_hit_rate_pct": round((pass_count / case_count) * 100, 1) if case_count else 0.0,
|
|
256
|
-
"target_hit_rate_pct": round((hit_count / hit_total) * 100, 1) if hit_total else 0.0,
|
|
257
|
-
"total_tokens": total_tokens,
|
|
258
|
-
"tokens_per_query": round(total_tokens / case_count, 1) if case_count else 0.0,
|
|
259
|
-
"tokens_per_expected_hit": round(total_tokens / hit_count, 2) if hit_count else None,
|
|
260
|
-
"results": [asdict(row) for row in rows],
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
def _collect_source_files(repo_path: Path) -> list[str]:
|
|
265
|
-
files: list[str] = []
|
|
266
|
-
for path in repo_path.rglob('*'):
|
|
267
|
-
if not path.is_file():
|
|
170
|
+
repo_path = Path('.').resolve()
|
|
171
|
+
cue_terms: list[str] = []
|
|
172
|
+
for rel in case.expected_files:
|
|
173
|
+
cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
|
|
174
|
+
cue_terms.extend(_query_keywords(case.query)[:4])
|
|
175
|
+
|
|
176
|
+
dedup: list[str] = []
|
|
177
|
+
seen: set[str] = set()
|
|
178
|
+
for token in [*case.expected_files, *cue_terms]:
|
|
179
|
+
key = token.lower()
|
|
180
|
+
if key in seen:
|
|
268
181
|
continue
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
if path.suffix.lower() not in _SOURCE_EXTS:
|
|
273
|
-
continue
|
|
274
|
-
files.append(rel.as_posix())
|
|
275
|
-
return sorted(files)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
279
|
-
files = _collect_source_files(repo_path)
|
|
280
|
-
if not files:
|
|
281
|
-
return []
|
|
282
|
-
|
|
283
|
-
by_dir: dict[str, list[str]] = {}
|
|
284
|
-
for rel in files:
|
|
285
|
-
parent = str(Path(rel).parent).replace('\\', '/')
|
|
286
|
-
by_dir.setdefault(parent, []).append(rel)
|
|
287
|
-
|
|
288
|
-
rows: list[AdaptCase] = []
|
|
289
|
-
seen_names: set[str] = set()
|
|
290
|
-
|
|
291
|
-
def add_case(name: str, expected: tuple[str, ...], intent: str = 'explore') -> None:
|
|
292
|
-
if len(rows) >= needed:
|
|
293
|
-
return
|
|
294
|
-
safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", name).strip("_").lower() or "case"
|
|
295
|
-
if safe_name in seen_names:
|
|
296
|
-
idx = 2
|
|
297
|
-
while f"{safe_name}_{idx}" in seen_names:
|
|
298
|
-
idx += 1
|
|
299
|
-
safe_name = f"{safe_name}_{idx}"
|
|
300
|
-
seen_names.add(safe_name)
|
|
301
|
-
symbols: list[str] = []
|
|
302
|
-
for rel in expected:
|
|
303
|
-
stem = Path(rel).stem.lower()
|
|
304
|
-
symbols.extend([stem, 'flow', 'wiring'])
|
|
305
|
-
query = f"{' '.join(expected)} {' '.join(symbols[:6])}".strip()
|
|
306
|
-
rows.append(AdaptCase(name=safe_name, query=query, intent=intent, baseline_files=expected, expected_files=expected))
|
|
307
|
-
|
|
308
|
-
# single-file
|
|
309
|
-
for rel in files:
|
|
310
|
-
add_case(f"single_{Path(rel).stem}", (rel,), intent='explore')
|
|
311
|
-
if len(rows) >= max(needed // 2, 1):
|
|
182
|
+
seen.add(key)
|
|
183
|
+
dedup.append(token)
|
|
184
|
+
if len(dedup) >= 14:
|
|
312
185
|
break
|
|
186
|
+
query = " ".join(dedup).strip()
|
|
313
187
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
for idx in range(len(group) - 1):
|
|
320
|
-
add_case(f"pair_{parent}_{idx}", (group[idx], group[idx + 1]), intent='explore')
|
|
321
|
-
if len(rows) >= needed:
|
|
322
|
-
return rows[:needed]
|
|
323
|
-
|
|
324
|
-
# cross-dir pairs fallback
|
|
325
|
-
tops: dict[str, str] = {}
|
|
326
|
-
for rel in files:
|
|
327
|
-
top = Path(rel).parts[0] if Path(rel).parts else rel
|
|
328
|
-
tops.setdefault(top, rel)
|
|
329
|
-
top_files = list(tops.values())
|
|
330
|
-
for idx in range(len(top_files) - 1):
|
|
331
|
-
add_case(f"cross_{idx}", (top_files[idx], top_files[idx + 1]), intent='explore')
|
|
332
|
-
if len(rows) >= needed:
|
|
333
|
-
break
|
|
334
|
-
|
|
335
|
-
return rows[:needed]
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list[AdaptCase], str]:
|
|
339
|
-
benchmark_size = max(1, int(benchmark_size))
|
|
340
|
-
generated = _generated_cases_for_repo(repo_path, benchmark_size)
|
|
341
|
-
if generated:
|
|
342
|
-
return generated[:benchmark_size], 'generated_repo_local'
|
|
343
|
-
return [], 'none_available'
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
def _next_method(method: str) -> str:
|
|
347
|
-
try:
|
|
348
|
-
idx = _METHOD_ORDER.index(method)
|
|
349
|
-
except ValueError:
|
|
350
|
-
return _METHOD_ORDER[0]
|
|
351
|
-
return _METHOD_ORDER[min(idx + 1, len(_METHOD_ORDER) - 1)]
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
def _cheaper_method(method: str) -> str | None:
|
|
355
|
-
try:
|
|
356
|
-
idx = _METHOD_ORDER.index(method)
|
|
357
|
-
except ValueError:
|
|
358
|
-
return None
|
|
359
|
-
if idx <= 0:
|
|
360
|
-
return None
|
|
361
|
-
return _METHOD_ORDER[idx - 1]
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) -> tuple[list[CaseResult], dict, dict[str, dict]]:
|
|
365
|
-
rows: list[CaseResult] = []
|
|
366
|
-
for case in cases:
|
|
367
|
-
family = _classify_query_family(case.query)
|
|
368
|
-
method = family_policy.get(family, 'plain')
|
|
369
|
-
rows.append(_evaluate_case_with_method(case, method))
|
|
370
|
-
summary = _summarize('policy_run', rows)
|
|
371
|
-
|
|
372
|
-
by_family: dict[str, dict] = {}
|
|
373
|
-
for row in rows:
|
|
374
|
-
entry = by_family.setdefault(row.family, {'cases': 0, 'passes': 0, 'tokens': 0})
|
|
375
|
-
entry['cases'] += 1
|
|
376
|
-
entry['passes'] += 1 if row.context_complete else 0
|
|
377
|
-
entry['tokens'] += row.tokens
|
|
378
|
-
for fam, entry in by_family.items():
|
|
379
|
-
entry['pass_rate'] = round(entry['passes'] / max(1, entry['cases']), 3)
|
|
380
|
-
entry['tokens_per_case'] = round(entry['tokens'] / max(1, entry['cases']), 1)
|
|
381
|
-
|
|
382
|
-
return rows, summary, by_family
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
|
|
386
|
-
cfg_path = repo_path / '.gcie' / 'context_config.json'
|
|
387
|
-
if cfg_path.exists():
|
|
388
|
-
try:
|
|
389
|
-
cfg = json.loads(cfg_path.read_text(encoding='utf-8'))
|
|
390
|
-
if not isinstance(cfg, dict):
|
|
391
|
-
cfg = {}
|
|
392
|
-
except Exception:
|
|
393
|
-
cfg = {}
|
|
188
|
+
expected_count = len(case.expected_files)
|
|
189
|
+
if expected_count >= 3:
|
|
190
|
+
budget = 1100
|
|
191
|
+
elif expected_count == 2:
|
|
192
|
+
budget = 950
|
|
394
193
|
else:
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
cfg['adaptation_pipeline'] = {
|
|
398
|
-
'status': pipeline_status,
|
|
399
|
-
'best_label': best.get('label'),
|
|
400
|
-
'full_hit_rate_pct': best.get('full_hit_rate_pct'),
|
|
401
|
-
'tokens_per_query': best.get('tokens_per_query'),
|
|
402
|
-
'case_source': case_source,
|
|
403
|
-
'cost_analysis': cost_analysis,
|
|
404
|
-
'family_policy': family_policy,
|
|
405
|
-
'updated_at': datetime.now(timezone.utc).isoformat(),
|
|
406
|
-
}
|
|
407
|
-
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
408
|
-
cfg_path.write_text(json.dumps(cfg, indent=2), encoding='utf-8')
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
def run_post_init_adaptation(
|
|
412
|
-
repo: str = '.',
|
|
413
|
-
*,
|
|
414
|
-
benchmark_size: int = 10,
|
|
415
|
-
efficiency_iterations: int = 5,
|
|
416
|
-
clear_profile: bool = False,
|
|
417
|
-
) -> dict:
|
|
418
|
-
repo_path = Path(repo).resolve()
|
|
419
|
-
run_index(repo_path.as_posix())
|
|
420
|
-
|
|
421
|
-
if clear_profile:
|
|
422
|
-
from .context_slices import clear_adaptive_profile
|
|
423
|
-
|
|
424
|
-
clear_adaptive_profile(repo_path.as_posix())
|
|
425
|
-
|
|
426
|
-
cases, case_source = _select_adaptation_cases(repo_path, benchmark_size)
|
|
427
|
-
if not cases:
|
|
428
|
-
return {
|
|
429
|
-
'status': 'no_benchmark_cases',
|
|
430
|
-
'repo': repo_path.as_posix(),
|
|
431
|
-
'case_source': case_source,
|
|
432
|
-
'message': 'No repo-usable adaptation cases available.',
|
|
433
|
-
}
|
|
194
|
+
budget = 850
|
|
434
195
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
196
|
+
if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
|
|
197
|
+
budget = 800
|
|
198
|
+
return path, query, budget
|
|
199
|
+
|
|
200
|
+
def _case_family(case) -> str:
|
|
201
|
+
_, planned_query, _ = _plan_query(case)
|
|
202
|
+
return _classify_query_family(planned_query)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _build_gapfill_query(case, missing_rel: str) -> str:
|
|
206
|
+
anchors = [rel for rel in case.expected_files if rel != missing_rel][:2]
|
|
207
|
+
repo_path = Path('.').resolve()
|
|
208
|
+
|
|
209
|
+
tokens: list[str] = [missing_rel]
|
|
210
|
+
tokens.extend(anchors)
|
|
211
|
+
|
|
212
|
+
cue_files = [missing_rel]
|
|
213
|
+
cue_files.extend(anchors)
|
|
214
|
+
for rel in cue_files:
|
|
215
|
+
tokens.extend(_extract_query_cues_for_file(repo_path, rel)[:4])
|
|
216
|
+
|
|
217
|
+
tokens.extend(_query_keywords(case.query)[:4])
|
|
218
|
+
|
|
219
|
+
dedup: list[str] = []
|
|
220
|
+
seen: set[str] = set()
|
|
221
|
+
for tok in tokens:
|
|
222
|
+
key = tok.lower()
|
|
223
|
+
if key in seen:
|
|
224
|
+
continue
|
|
225
|
+
seen.add(key)
|
|
226
|
+
dedup.append(tok)
|
|
227
|
+
if len(dedup) >= 14:
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
return " ".join(dedup)
|
|
231
|
+
|
|
232
|
+
def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
|
|
233
|
+
path, query, budget = _plan_query(case)
|
|
234
|
+
path = _safe_scope(path)
|
|
235
|
+
payload = run_context(path, query, budget=budget, intent=case.intent)
|
|
236
|
+
files = {
|
|
237
|
+
_normalize_scoped_path(path, rel)
|
|
238
|
+
for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
239
|
+
if rel
|
|
240
|
+
}
|
|
241
|
+
expected = tuple(case.expected_files)
|
|
242
|
+
missing = [rel for rel in expected if rel not in files]
|
|
243
|
+
tokens = int(payload.get("tokens", 0) or 0)
|
|
244
|
+
mode = "plain_context_workflow"
|
|
245
|
+
|
|
246
|
+
if allow_gapfill and missing:
|
|
247
|
+
mode = "plain_context_workflow_gapfill"
|
|
248
|
+
for rel in list(missing):
|
|
249
|
+
gap_query = _build_gapfill_query(case, rel)
|
|
250
|
+
|
|
251
|
+
# Prefer direct file-targeted recovery when possible to avoid expensive broad rescues.
|
|
252
|
+
direct_scope = rel if (Path(rel).exists() and Path(rel).is_file()) else None
|
|
253
|
+
base_scope = _safe_scope(_family_path((rel,)))
|
|
254
|
+
scopes: list[str] = []
|
|
255
|
+
if direct_scope:
|
|
256
|
+
scopes.append(direct_scope)
|
|
257
|
+
if base_scope not in scopes:
|
|
258
|
+
scopes.append(base_scope)
|
|
259
|
+
|
|
260
|
+
budgets = [500 if rel.endswith('/main.py') or rel == 'main.py' else 900]
|
|
261
|
+
if len(scopes) > 1:
|
|
262
|
+
budgets.append(budgets[0])
|
|
263
|
+
|
|
264
|
+
if aggressive_gapfill:
|
|
265
|
+
if '.' not in scopes:
|
|
266
|
+
scopes.append('.')
|
|
267
|
+
budgets.append(max(budgets[0], 1200))
|
|
268
|
+
mode = "plain_context_workflow_gapfill_rescue"
|
|
269
|
+
|
|
270
|
+
for scope, gap_budget in zip(scopes, budgets):
|
|
271
|
+
gap_payload = run_context(scope, gap_query, budget=gap_budget, intent=case.intent)
|
|
272
|
+
tokens += int(gap_payload.get("tokens", 0) or 0)
|
|
273
|
+
gap_files = {
|
|
274
|
+
_normalize_scoped_path(scope, rel2)
|
|
275
|
+
for rel2 in (_node_to_file(item.get("node_id", "")) for item in gap_payload.get("snippets", []))
|
|
276
|
+
if rel2
|
|
277
|
+
}
|
|
278
|
+
files.update(gap_files)
|
|
279
|
+
missing = [m for m in expected if m not in files]
|
|
280
|
+
if not missing:
|
|
281
|
+
break
|
|
282
|
+
if not missing:
|
|
283
|
+
break
|
|
284
|
+
|
|
285
|
+
expected_hits = len(expected) - len(missing)
|
|
286
|
+
family = _classify_query_family(query)
|
|
287
|
+
return CaseResult(
|
|
288
|
+
name=case.name,
|
|
289
|
+
family=family,
|
|
290
|
+
mode=mode,
|
|
291
|
+
tokens=tokens,
|
|
292
|
+
expected_hits=expected_hits,
|
|
293
|
+
expected_total=len(expected),
|
|
294
|
+
missing_expected=tuple(missing),
|
|
295
|
+
context_complete=not missing,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _evaluate_slices_case(case) -> CaseResult:
|
|
300
|
+
payload = run_context_slices(
|
|
301
|
+
repo='.',
|
|
302
|
+
query=case.query,
|
|
303
|
+
profile='low',
|
|
304
|
+
stage_a_budget=300,
|
|
305
|
+
stage_b_budget=600,
|
|
306
|
+
max_total=800,
|
|
307
|
+
intent=case.intent,
|
|
308
|
+
pin=None,
|
|
309
|
+
pin_budget=200,
|
|
310
|
+
include_tests=False,
|
|
311
|
+
)
|
|
312
|
+
mode = "slices_low"
|
|
313
|
+
tokens = int(payload.get("token_estimate", payload.get("tokens", 0)) or 0)
|
|
314
|
+
files = {f for f in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", [])) if f}
|
|
315
|
+
expected = tuple(case.expected_files)
|
|
316
|
+
missing = [rel for rel in expected if rel not in files]
|
|
317
|
+
if missing:
|
|
318
|
+
mode = "slices_recall"
|
|
319
|
+
recall_payload = run_context_slices(
|
|
320
|
+
repo='.',
|
|
321
|
+
query=case.query,
|
|
322
|
+
profile='recall',
|
|
323
|
+
stage_a_budget=400,
|
|
324
|
+
stage_b_budget=800,
|
|
325
|
+
max_total=1200,
|
|
326
|
+
intent=case.intent,
|
|
327
|
+
pin=None,
|
|
328
|
+
pin_budget=300,
|
|
329
|
+
include_tests=False,
|
|
330
|
+
)
|
|
331
|
+
tokens += int(recall_payload.get("token_estimate", recall_payload.get("tokens", 0)) or 0)
|
|
332
|
+
files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in recall_payload.get("snippets", [])) if f})
|
|
333
|
+
missing = [rel for rel in expected if rel not in files]
|
|
334
|
+
if missing:
|
|
335
|
+
mode = "slices_recall_pin"
|
|
336
|
+
for rel in list(missing):
|
|
337
|
+
pin_payload = run_context_slices(
|
|
338
|
+
repo='.',
|
|
339
|
+
query=case.query,
|
|
340
|
+
profile='recall',
|
|
341
|
+
stage_a_budget=400,
|
|
342
|
+
stage_b_budget=800,
|
|
343
|
+
max_total=1200,
|
|
344
|
+
intent=case.intent,
|
|
345
|
+
pin=rel,
|
|
346
|
+
pin_budget=300,
|
|
347
|
+
include_tests=False,
|
|
348
|
+
)
|
|
349
|
+
tokens += int(pin_payload.get("token_estimate", pin_payload.get("tokens", 0)) or 0)
|
|
350
|
+
files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in pin_payload.get("snippets", [])) if f})
|
|
351
|
+
missing = [m for m in expected if m not in files]
|
|
352
|
+
if not missing:
|
|
353
|
+
break
|
|
354
|
+
|
|
355
|
+
expected_hits = len(expected) - len(missing)
|
|
356
|
+
family = _case_family(case)
|
|
357
|
+
return CaseResult(
|
|
358
|
+
name=case.name,
|
|
359
|
+
family=family,
|
|
360
|
+
mode=mode,
|
|
361
|
+
tokens=tokens,
|
|
362
|
+
expected_hits=expected_hits,
|
|
363
|
+
expected_total=len(expected),
|
|
364
|
+
missing_expected=tuple(missing),
|
|
365
|
+
context_complete=not missing,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _evaluate_case_with_method(case, method: str) -> CaseResult:
|
|
370
|
+
if method == "plain":
|
|
371
|
+
return _evaluate_plain_case(case, allow_gapfill=False)
|
|
372
|
+
if method == "plain_gapfill":
|
|
373
|
+
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
|
|
374
|
+
if method == "plain_rescue":
|
|
375
|
+
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
|
|
376
|
+
return _evaluate_slices_case(case)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _summarize(label: str, rows: list[CaseResult]) -> dict:
|
|
380
|
+
case_count = len(rows)
|
|
381
|
+
pass_count = sum(1 for row in rows if row.context_complete)
|
|
382
|
+
total_tokens = sum(row.tokens for row in rows)
|
|
383
|
+
hit_count = sum(row.expected_hits for row in rows)
|
|
384
|
+
hit_total = sum(row.expected_total for row in rows)
|
|
385
|
+
return {
|
|
386
|
+
"label": label,
|
|
387
|
+
"case_count": case_count,
|
|
388
|
+
"passing_cases": pass_count,
|
|
389
|
+
"full_hit_rate_pct": round((pass_count / case_count) * 100, 1) if case_count else 0.0,
|
|
390
|
+
"target_hit_rate_pct": round((hit_count / hit_total) * 100, 1) if hit_total else 0.0,
|
|
391
|
+
"total_tokens": total_tokens,
|
|
392
|
+
"tokens_per_query": round(total_tokens / case_count, 1) if case_count else 0.0,
|
|
393
|
+
"tokens_per_expected_hit": round(total_tokens / hit_count, 2) if hit_count else None,
|
|
394
|
+
"results": [asdict(row) for row in rows],
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _collect_source_files(repo_path: Path) -> list[str]:
|
|
399
|
+
files: list[str] = []
|
|
400
|
+
for path in repo_path.rglob('*'):
|
|
401
|
+
if not path.is_file():
|
|
402
|
+
continue
|
|
403
|
+
rel = path.relative_to(repo_path)
|
|
404
|
+
if any(part in _IGNORED_DIRS for part in rel.parts):
|
|
405
|
+
continue
|
|
406
|
+
if path.suffix.lower() not in _SOURCE_EXTS:
|
|
407
|
+
continue
|
|
408
|
+
files.append(rel.as_posix())
|
|
409
|
+
return sorted(files)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
413
|
+
files = _collect_source_files(repo_path)
|
|
414
|
+
if not files:
|
|
415
|
+
return []
|
|
416
|
+
|
|
417
|
+
by_dir: dict[str, list[str]] = {}
|
|
418
|
+
for rel in files:
|
|
419
|
+
parent = str(Path(rel).parent).replace('\\', '/')
|
|
420
|
+
by_dir.setdefault(parent, []).append(rel)
|
|
421
|
+
|
|
422
|
+
rows: list[AdaptCase] = []
|
|
423
|
+
seen_names: set[str] = set()
|
|
424
|
+
seen_expected: set[tuple[str, ...]] = set()
|
|
425
|
+
cue_cache: dict[str, list[str]] = {}
|
|
426
|
+
|
|
427
|
+
def add_case(name: str, expected: tuple[str, ...], intent: str = 'explore') -> None:
|
|
428
|
+
if len(rows) >= needed:
|
|
429
|
+
return
|
|
430
|
+
expected_key = tuple(sorted(expected))
|
|
431
|
+
if expected_key in seen_expected:
|
|
432
|
+
return
|
|
433
|
+
safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", name).strip("_").lower() or "case"
|
|
434
|
+
if safe_name in seen_names:
|
|
435
|
+
idx = 2
|
|
436
|
+
while f"{safe_name}_{idx}" in seen_names:
|
|
437
|
+
idx += 1
|
|
438
|
+
safe_name = f"{safe_name}_{idx}"
|
|
439
|
+
seen_names.add(safe_name)
|
|
440
|
+
seen_expected.add(expected_key)
|
|
441
|
+
symbols: list[str] = []
|
|
442
|
+
for rel in expected:
|
|
443
|
+
cues = cue_cache.get(rel)
|
|
444
|
+
if cues is None:
|
|
445
|
+
cues = _extract_query_cues_for_file(repo_path, rel)
|
|
446
|
+
cue_cache[rel] = cues
|
|
447
|
+
symbols.extend(cues)
|
|
448
|
+
if not symbols:
|
|
449
|
+
symbols = [Path(rel).stem.lower() for rel in expected]
|
|
450
|
+
query = f"{' '.join(expected)} {' '.join(symbols[:8])}".strip()
|
|
451
|
+
rows.append(AdaptCase(name=safe_name, query=query, intent=intent, baseline_files=expected, expected_files=expected))
|
|
452
|
+
|
|
453
|
+
# Build a diversified sample so adaptation can learn in mixed-layer repos.
|
|
454
|
+
single_target = max(1, needed // 3)
|
|
455
|
+
same_dir_target = max(1, needed // 3)
|
|
456
|
+
cross_dir_target = max(1, needed - single_target - same_dir_target)
|
|
457
|
+
|
|
458
|
+
# 1) singles
|
|
459
|
+
for rel in files:
|
|
460
|
+
add_case(f"single_{Path(rel).stem}", (rel,), intent='explore')
|
|
461
|
+
if len(rows) >= single_target:
|
|
462
|
+
break
|
|
463
|
+
|
|
464
|
+
# 2) same-dir adjacent pairs
|
|
465
|
+
same_pairs_added = 0
|
|
466
|
+
for parent, group in sorted(by_dir.items(), key=lambda x: x[0]):
|
|
467
|
+
if len(group) < 2:
|
|
468
|
+
continue
|
|
469
|
+
label = "root" if parent in {'.', ''} else parent
|
|
470
|
+
group = sorted(group)
|
|
471
|
+
for idx in range(len(group) - 1):
|
|
472
|
+
add_case(f"pair_{label}_{idx}", (group[idx], group[idx + 1]), intent='explore')
|
|
473
|
+
if len(rows) >= needed:
|
|
474
|
+
return rows[:needed]
|
|
475
|
+
same_pairs_added += 1
|
|
476
|
+
if same_pairs_added >= same_dir_target:
|
|
477
|
+
break
|
|
478
|
+
if same_pairs_added >= same_dir_target:
|
|
479
|
+
break
|
|
480
|
+
|
|
481
|
+
# 3) cross-dir pairs (top-level representatives)
|
|
482
|
+
tops: dict[str, str] = {}
|
|
483
|
+
for rel in files:
|
|
484
|
+
top = Path(rel).parts[0] if Path(rel).parts else rel
|
|
485
|
+
tops.setdefault(top, rel)
|
|
486
|
+
top_items = sorted(tops.items(), key=lambda item: item[0])
|
|
487
|
+
cross_added = 0
|
|
488
|
+
for idx in range(len(top_items) - 1):
|
|
489
|
+
left = top_items[idx][1]
|
|
490
|
+
right = top_items[idx + 1][1]
|
|
491
|
+
add_case(f"cross_{top_items[idx][0]}_{top_items[idx + 1][0]}", (left, right), intent='explore')
|
|
492
|
+
if len(rows) >= needed:
|
|
493
|
+
return rows[:needed]
|
|
494
|
+
cross_added += 1
|
|
495
|
+
if cross_added >= cross_dir_target:
|
|
496
|
+
break
|
|
497
|
+
|
|
498
|
+
# 4) include some 3-file chains for multi-hop calibration when dataset is larger.
|
|
499
|
+
if needed >= 12 and len(rows) < needed:
|
|
500
|
+
chain_budget = max(1, needed // 6)
|
|
501
|
+
chains_added = 0
|
|
502
|
+
reps = [item[1] for item in top_items]
|
|
503
|
+
for idx in range(len(reps) - 2):
|
|
504
|
+
add_case(
|
|
505
|
+
f"chain_{idx}",
|
|
506
|
+
(reps[idx], reps[idx + 1], reps[idx + 2]),
|
|
507
|
+
intent='refactor',
|
|
508
|
+
)
|
|
509
|
+
if len(rows) >= needed:
|
|
510
|
+
return rows[:needed]
|
|
511
|
+
chains_added += 1
|
|
512
|
+
if chains_added >= chain_budget:
|
|
456
513
|
break
|
|
457
|
-
continue
|
|
458
514
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
515
|
+
# 5) fill remainder with additional nearby pairs
|
|
516
|
+
if len(rows) < needed:
|
|
517
|
+
for idx in range(len(files) - 1):
|
|
518
|
+
add_case(f"fill_{idx}", (files[idx], files[idx + 1]), intent='explore')
|
|
519
|
+
if len(rows) >= needed:
|
|
520
|
+
break
|
|
521
|
+
|
|
522
|
+
return rows[:needed]
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list[AdaptCase], str]:
|
|
526
|
+
benchmark_size = max(1, int(benchmark_size))
|
|
527
|
+
generated = _generated_cases_for_repo(repo_path, benchmark_size)
|
|
528
|
+
if generated:
|
|
529
|
+
return generated[:benchmark_size], 'generated_repo_local'
|
|
530
|
+
return [], 'none_available'
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _next_method(method: str) -> str:
|
|
534
|
+
try:
|
|
535
|
+
idx = _METHOD_ORDER.index(method)
|
|
536
|
+
except ValueError:
|
|
537
|
+
return _METHOD_ORDER[0]
|
|
538
|
+
return _METHOD_ORDER[min(idx + 1, len(_METHOD_ORDER) - 1)]
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def _cheaper_method(method: str) -> str | None:
|
|
542
|
+
try:
|
|
543
|
+
idx = _METHOD_ORDER.index(method)
|
|
544
|
+
except ValueError:
|
|
545
|
+
return None
|
|
546
|
+
if idx <= 0:
|
|
547
|
+
return None
|
|
548
|
+
return _METHOD_ORDER[idx - 1]
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) -> tuple[list[CaseResult], dict, dict[str, dict]]:
|
|
552
|
+
rows: list[CaseResult] = []
|
|
553
|
+
for case in cases:
|
|
554
|
+
family = _case_family(case)
|
|
555
|
+
method = family_policy.get(family, 'plain')
|
|
556
|
+
rows.append(_evaluate_case_with_method(case, method))
|
|
557
|
+
summary = _summarize('policy_run', rows)
|
|
558
|
+
|
|
559
|
+
by_family: dict[str, dict] = {}
|
|
560
|
+
for row in rows:
|
|
561
|
+
entry = by_family.setdefault(row.family, {'cases': 0, 'passes': 0, 'tokens': 0})
|
|
562
|
+
entry['cases'] += 1
|
|
563
|
+
entry['passes'] += 1 if row.context_complete else 0
|
|
564
|
+
entry['tokens'] += row.tokens
|
|
565
|
+
for fam, entry in by_family.items():
|
|
566
|
+
entry['pass_rate'] = round(entry['passes'] / max(1, entry['cases']), 3)
|
|
567
|
+
entry['tokens_per_case'] = round(entry['tokens'] / max(1, entry['cases']), 1)
|
|
568
|
+
|
|
569
|
+
return rows, summary, by_family
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _select_best_summary(summaries: list[dict]) -> dict:
|
|
573
|
+
full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
|
|
574
|
+
if full_hit:
|
|
575
|
+
return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
|
|
576
|
+
return max(
|
|
577
|
+
summaries,
|
|
578
|
+
key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
|
|
579
|
+
)
|
|
463
580
|
|
|
464
|
-
# Select best accuracy-locked round if available.
|
|
465
|
-
locked_rounds = [r for r in accuracy_rounds if r['summary']['full_hit_rate_pct'] >= 100.0]
|
|
466
|
-
if locked_rounds:
|
|
467
|
-
selected_accuracy_round = min(
|
|
468
|
-
locked_rounds,
|
|
469
|
-
key=lambda r: (r['summary'].get('tokens_per_expected_hit') or 10**9, r['summary'].get('tokens_per_query', 10**9)),
|
|
470
|
-
)
|
|
471
|
-
else:
|
|
472
|
-
selected_accuracy_round = max(
|
|
473
|
-
accuracy_rounds,
|
|
474
|
-
key=lambda r: (r['summary'].get('target_hit_rate_pct', 0.0), -r['summary'].get('tokens_per_query', 10**9)),
|
|
475
|
-
)
|
|
476
581
|
|
|
477
|
-
|
|
478
|
-
|
|
582
|
+
def _bootstrap_family_policy(cases: list[AdaptCase], families: list[str]) -> tuple[dict[str, str], list[dict]]:
|
|
583
|
+
policy: dict[str, str] = {}
|
|
584
|
+
diagnostics: list[dict] = []
|
|
585
|
+
for fam in families:
|
|
586
|
+
fam_cases = [case for case in cases if _case_family(case) == fam]
|
|
587
|
+
if not fam_cases:
|
|
588
|
+
policy[fam] = "plain"
|
|
589
|
+
continue
|
|
479
590
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
591
|
+
method_summaries: list[dict] = []
|
|
592
|
+
for method in _METHOD_ORDER:
|
|
593
|
+
rows = [_evaluate_case_with_method(case, method) for case in fam_cases]
|
|
594
|
+
summary = _summarize(f"bootstrap_{fam}_{method}", rows)
|
|
595
|
+
summary["method"] = method
|
|
596
|
+
summary["family"] = fam
|
|
597
|
+
method_summaries.append(summary)
|
|
598
|
+
|
|
599
|
+
best = _select_best_summary(method_summaries)
|
|
600
|
+
selected_method = str(best.get("method", "plain"))
|
|
601
|
+
policy[fam] = selected_method
|
|
602
|
+
diagnostics.append(
|
|
603
|
+
{
|
|
604
|
+
"family": fam,
|
|
605
|
+
"selected_method": selected_method,
|
|
606
|
+
"selected_summary": best,
|
|
607
|
+
"candidates": method_summaries,
|
|
496
608
|
}
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
609
|
+
)
|
|
610
|
+
return policy, diagnostics
|
|
611
|
+
def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
|
|
612
|
+
cfg_path = repo_path / '.gcie' / 'context_config.json'
|
|
613
|
+
if cfg_path.exists():
|
|
614
|
+
try:
|
|
615
|
+
cfg = json.loads(cfg_path.read_text(encoding='utf-8'))
|
|
616
|
+
if not isinstance(cfg, dict):
|
|
617
|
+
cfg = {}
|
|
618
|
+
except Exception:
|
|
619
|
+
cfg = {}
|
|
620
|
+
else:
|
|
621
|
+
cfg = {}
|
|
622
|
+
|
|
623
|
+
cfg['adaptation_pipeline'] = {
|
|
624
|
+
'status': pipeline_status,
|
|
625
|
+
'best_label': best.get('label'),
|
|
626
|
+
'full_hit_rate_pct': best.get('full_hit_rate_pct'),
|
|
627
|
+
'tokens_per_query': best.get('tokens_per_query'),
|
|
628
|
+
'case_source': case_source,
|
|
629
|
+
'cost_analysis': cost_analysis,
|
|
630
|
+
'family_policy': family_policy,
|
|
631
|
+
'updated_at': datetime.now(timezone.utc).isoformat(),
|
|
632
|
+
}
|
|
633
|
+
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
634
|
+
cfg_path.write_text(json.dumps(cfg, indent=2), encoding='utf-8')
|
|
635
|
+
|
|
636
|
+
def _select_best_full_hit(candidates: list[dict]) -> dict | None:
|
|
637
|
+
full_hit = [c for c in candidates if c.get('full_hit_rate_pct', 0.0) >= 100.0]
|
|
638
|
+
if not full_hit:
|
|
639
|
+
return None
|
|
640
|
+
return min(
|
|
641
|
+
full_hit,
|
|
642
|
+
key=lambda item: (item.get('tokens_per_expected_hit') or 10**9, item.get('tokens_per_query', 10**9)),
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
def run_post_init_adaptation(
|
|
646
|
+
repo: str = '.',
|
|
647
|
+
*,
|
|
648
|
+
benchmark_size: int = 10,
|
|
649
|
+
efficiency_iterations: int = 5,
|
|
650
|
+
clear_profile: bool = False,
|
|
651
|
+
) -> dict:
|
|
652
|
+
repo_path = Path(repo).resolve()
|
|
653
|
+
|
|
654
|
+
# Ensure all relative retrieval/evaluation calls execute in the target repo.
|
|
655
|
+
import os
|
|
656
|
+
|
|
657
|
+
os.chdir(repo_path)
|
|
658
|
+
run_index(repo_path.as_posix())
|
|
659
|
+
|
|
660
|
+
if clear_profile:
|
|
661
|
+
from .context_slices import clear_adaptive_profile
|
|
662
|
+
|
|
663
|
+
clear_adaptive_profile(repo_path.as_posix())
|
|
664
|
+
|
|
665
|
+
cases, case_source = _select_adaptation_cases(repo_path, benchmark_size)
|
|
666
|
+
if not cases:
|
|
667
|
+
return {
|
|
668
|
+
'status': 'no_benchmark_cases',
|
|
669
|
+
'repo': repo_path.as_posix(),
|
|
670
|
+
'case_source': case_source,
|
|
671
|
+
'message': 'No repo-usable adaptation cases available.',
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
families = sorted({_case_family(case) for case in cases})
|
|
675
|
+
family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families)
|
|
676
|
+
|
|
677
|
+
# Accuracy rounds: promote methods per failing family until lock.
|
|
678
|
+
accuracy_rounds_max = 5
|
|
679
|
+
accuracy_rounds: list[dict] = []
|
|
680
|
+
lock_streak = 0
|
|
681
|
+
|
|
682
|
+
for rnd in range(1, accuracy_rounds_max + 1):
|
|
683
|
+
rows, summary, by_family = _run_family_policy(cases, family_policy)
|
|
684
|
+
round_payload = {
|
|
685
|
+
'round': rnd,
|
|
686
|
+
'family_policy': dict(family_policy),
|
|
687
|
+
'summary': summary,
|
|
688
|
+
'family_metrics': by_family,
|
|
689
|
+
}
|
|
690
|
+
accuracy_rounds.append(round_payload)
|
|
691
|
+
|
|
692
|
+
if summary['full_hit_rate_pct'] >= 100.0:
|
|
693
|
+
lock_streak += 1
|
|
694
|
+
if lock_streak >= 2:
|
|
695
|
+
break
|
|
696
|
+
continue
|
|
697
|
+
|
|
698
|
+
lock_streak = 0
|
|
699
|
+
for fam, metrics in by_family.items():
|
|
700
|
+
if metrics.get('pass_rate', 0.0) < 1.0:
|
|
701
|
+
family_policy[fam] = _next_method(family_policy.get(fam, 'plain'))
|
|
702
|
+
|
|
703
|
+
# Select best accuracy-locked round if available.
|
|
704
|
+
locked_rounds = [r for r in accuracy_rounds if r['summary']['full_hit_rate_pct'] >= 100.0]
|
|
705
|
+
if locked_rounds:
|
|
706
|
+
selected_accuracy_round = min(
|
|
707
|
+
locked_rounds,
|
|
708
|
+
key=lambda r: (r['summary'].get('tokens_per_expected_hit') or 10**9, r['summary'].get('tokens_per_query', 10**9)),
|
|
709
|
+
)
|
|
710
|
+
else:
|
|
711
|
+
selected_accuracy_round = max(
|
|
712
|
+
accuracy_rounds,
|
|
713
|
+
key=lambda r: (r['summary'].get('target_hit_rate_pct', 0.0), -r['summary'].get('tokens_per_query', 10**9)),
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
family_policy = dict(selected_accuracy_round['family_policy'])
|
|
717
|
+
rows, current_summary, by_family = _run_family_policy(cases, family_policy)
|
|
718
|
+
|
|
719
|
+
# Efficiency rounds: attempt family-level cheaper method under hard 100% gate.
|
|
720
|
+
efficiency_trials: list[dict] = []
|
|
721
|
+
for idx in range(max(0, int(efficiency_iterations))):
|
|
722
|
+
improved = False
|
|
723
|
+
for fam in families:
|
|
724
|
+
cheaper = _cheaper_method(family_policy.get(fam, 'plain'))
|
|
725
|
+
if not cheaper:
|
|
726
|
+
continue
|
|
727
|
+
trial_policy = dict(family_policy)
|
|
728
|
+
trial_policy[fam] = cheaper
|
|
729
|
+
_, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy)
|
|
730
|
+
trial_payload = {
|
|
731
|
+
'iteration': idx + 1,
|
|
732
|
+
'family': fam,
|
|
733
|
+
'trial_policy': trial_policy,
|
|
734
|
+
'summary': trial_summary,
|
|
735
|
+
}
|
|
736
|
+
efficiency_trials.append(trial_payload)
|
|
737
|
+
|
|
738
|
+
if (
|
|
739
|
+
trial_summary.get('full_hit_rate_pct', 0.0) >= 100.0
|
|
740
|
+
and trial_summary.get('tokens_per_query', 10**9) < current_summary.get('tokens_per_query', 10**9)
|
|
741
|
+
):
|
|
742
|
+
family_policy = trial_policy
|
|
743
|
+
current_summary = trial_summary
|
|
744
|
+
by_family = trial_by_family
|
|
745
|
+
improved = True
|
|
746
|
+
if not improved:
|
|
747
|
+
break
|
|
748
|
+
|
|
749
|
+
# Global candidate snapshots for transparency.
|
|
750
|
+
slices_rows = [_evaluate_case_with_method(case, 'slices') for case in cases]
|
|
751
|
+
plain_rows = [_evaluate_case_with_method(case, 'plain') for case in cases]
|
|
752
|
+
plain_gap_rows = [_evaluate_case_with_method(case, 'plain_gapfill') for case in cases]
|
|
753
|
+
plain_rescue_rows = [_evaluate_case_with_method(case, 'plain_rescue') for case in cases]
|
|
754
|
+
slices_summary = _summarize('slices_accuracy_stage', slices_rows)
|
|
755
|
+
plain_summary = _summarize('plain_accuracy_stage', plain_rows)
|
|
756
|
+
plain_gap_summary = _summarize('plain_gapfill_accuracy_stage', plain_gap_rows)
|
|
757
|
+
plain_rescue_summary = _summarize('plain_rescue_accuracy_stage', plain_rescue_rows)
|
|
758
|
+
candidates = [slices_summary, plain_summary, plain_gap_summary, plain_rescue_summary]
|
|
759
|
+
|
|
760
|
+
active = {
|
|
761
|
+
'label': 'family_policy_selected',
|
|
762
|
+
**current_summary,
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
# Hard accuracy fallback: never finalize below 100% when any known candidate reaches 100%.
|
|
766
|
+
all_full_hit_candidates = list(candidates)
|
|
767
|
+
all_full_hit_candidates.extend(r['summary'] for r in accuracy_rounds)
|
|
768
|
+
all_full_hit_candidates.append(current_summary)
|
|
769
|
+
best_full_hit = _select_best_full_hit(all_full_hit_candidates)
|
|
770
|
+
if active.get('full_hit_rate_pct', 0.0) < 100.0 and best_full_hit is not None:
|
|
771
|
+
active = dict(best_full_hit)
|
|
772
|
+
|
|
773
|
+
cheapest = min(candidates, key=lambda item: (item.get('tokens_per_expected_hit') or 10**9, item.get('tokens_per_query', 10**9)))
|
|
774
|
+
token_delta = int(active['total_tokens'] - cheapest['total_tokens'])
|
|
775
|
+
pct_delta = round((token_delta / max(1, int(cheapest['total_tokens']))) * 100, 1)
|
|
776
|
+
|
|
777
|
+
pipeline_status = 'ok'
|
|
778
|
+
if (
|
|
779
|
+
active.get('full_hit_rate_pct', 0.0) >= 100.0
|
|
780
|
+
and active.get('tokens_per_query', 10**9) > cheapest.get('tokens_per_query', 10**9)
|
|
781
|
+
and pct_delta > 40.0
|
|
782
|
+
):
|
|
783
|
+
pipeline_status = 'accuracy_locked_but_cost_risky'
|
|
784
|
+
|
|
785
|
+
cost_analysis = {
|
|
786
|
+
'cheapest_label': cheapest.get('label'),
|
|
787
|
+
'selected_label': active.get('label'),
|
|
788
|
+
'selected_vs_cheapest_token_delta': token_delta,
|
|
789
|
+
'selected_vs_cheapest_pct_delta': pct_delta,
|
|
790
|
+
'risk_threshold_pct': 40.0,
|
|
791
|
+
'cost_risky': pipeline_status == 'accuracy_locked_but_cost_risky',
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
_write_back(repo_path, active, case_source, pipeline_status, cost_analysis, family_policy)
|
|
795
|
+
|
|
796
|
+
report = {
|
|
797
|
+
'status': pipeline_status,
|
|
798
|
+
'repo': repo_path.as_posix(),
|
|
799
|
+
'benchmark_size': len(cases),
|
|
800
|
+
'requested_benchmark_size': int(benchmark_size),
|
|
801
|
+
'efficiency_iterations': int(efficiency_iterations),
|
|
802
|
+
'case_source': case_source,
|
|
803
|
+
'family_policy': family_policy,
|
|
804
|
+
'cost_analysis': cost_analysis,
|
|
558
805
|
'phases': {
|
|
559
|
-
'
|
|
560
|
-
'
|
|
561
|
-
'
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
'
|
|
566
|
-
'
|
|
567
|
-
'
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
planning_dir
|
|
573
|
-
|
|
574
|
-
out_path
|
|
575
|
-
report
|
|
806
|
+
'bootstrap': bootstrap_diagnostics,
|
|
807
|
+
'accuracy_rounds': accuracy_rounds,
|
|
808
|
+
'selected_accuracy_round': selected_accuracy_round,
|
|
809
|
+
'efficiency_trials': efficiency_trials,
|
|
810
|
+
},
|
|
811
|
+
'stages': {
|
|
812
|
+
'accuracy_candidates': candidates,
|
|
813
|
+
'selected_after_accuracy': selected_accuracy_round['summary'],
|
|
814
|
+
'efficiency_trials': efficiency_trials,
|
|
815
|
+
'selected_final': active,
|
|
816
|
+
},
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
planning_dir = repo_path / '.planning'
|
|
820
|
+
planning_dir.mkdir(parents=True, exist_ok=True)
|
|
821
|
+
out_path = planning_dir / 'post_init_adaptation_report.json'
|
|
822
|
+
out_path.write_text(json.dumps(report, indent=2), encoding='utf-8')
|
|
823
|
+
report['report_path'] = out_path.as_posix()
|
|
576
824
|
return report
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
|