@pmaddire/gcie 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/commands/adaptation.py +484 -339
- package/package.json +1 -1
|
@@ -1,341 +1,486 @@
|
|
|
1
|
-
"""Post-initialization adaptation pipeline (accuracy first, then efficiency)."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
from dataclasses import asdict, dataclass
|
|
6
|
-
from datetime import datetime, timezone
|
|
7
|
-
import json
|
|
8
|
-
import re
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
|
|
11
|
-
from .context import run_context
|
|
12
|
-
from .context_slices import _classify_query_family, run_context_slices
|
|
13
|
-
from .index import run_index
|
|
14
|
-
|
|
15
|
-
try:
|
|
16
|
-
from performance.context_benchmark import BENCHMARK_CASES
|
|
17
|
-
except Exception: # pragma: no cover - fallback for limited installs
|
|
18
|
-
BENCHMARK_CASES = ()
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
@dataclass(frozen=True, slots=True)
|
|
22
|
-
class CaseResult:
|
|
23
|
-
name: str
|
|
24
|
-
family: str
|
|
25
|
-
mode: str
|
|
26
|
-
tokens: int
|
|
27
|
-
expected_hits: int
|
|
28
|
-
expected_total: int
|
|
29
|
-
missing_expected: tuple[str, ...]
|
|
30
|
-
context_complete: bool
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
if
|
|
70
|
-
return
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
"
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
if
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
mode
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
"status": "complete",
|
|
257
|
-
"best_label": best.get("label"),
|
|
258
|
-
"full_hit_rate_pct": best.get("full_hit_rate_pct"),
|
|
259
|
-
"tokens_per_query": best.get("tokens_per_query"),
|
|
260
|
-
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
261
|
-
}
|
|
262
|
-
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
263
|
-
cfg_path.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def run_post_init_adaptation(
|
|
267
|
-
repo: str = ".",
|
|
268
|
-
*,
|
|
269
|
-
benchmark_size: int = 10,
|
|
270
|
-
efficiency_iterations: int = 5,
|
|
271
|
-
clear_profile: bool = False,
|
|
272
|
-
) -> dict:
|
|
273
|
-
"""Run accuracy-lock then efficiency adaptation protocol after setup/index."""
|
|
274
|
-
repo_path = Path(repo).resolve()
|
|
275
|
-
run_index(repo_path.as_posix())
|
|
276
|
-
|
|
277
|
-
if clear_profile:
|
|
278
|
-
from .context_slices import clear_adaptive_profile
|
|
279
|
-
|
|
280
|
-
clear_adaptive_profile(repo_path.as_posix())
|
|
281
|
-
|
|
282
|
-
cases = list(BENCHMARK_CASES)
|
|
283
|
-
if not cases:
|
|
284
|
-
return {
|
|
285
|
-
"status": "no_benchmark_cases",
|
|
286
|
-
"repo": repo_path.as_posix(),
|
|
287
|
-
"message": "No benchmark cases available for accuracy-locked adaptation.",
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
benchmark_size = max(1, min(len(cases), int(benchmark_size)))
|
|
291
|
-
cases = cases[:benchmark_size]
|
|
292
|
-
|
|
293
|
-
slices_rows = [_evaluate_slices_case(case) for case in cases]
|
|
294
|
-
plain_rows = [_evaluate_plain_case(case, allow_gapfill=False) for case in cases]
|
|
295
|
-
plain_gap_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
|
|
296
|
-
|
|
297
|
-
slices_summary = _summarize("slices_accuracy_stage", slices_rows)
|
|
298
|
-
plain_summary = _summarize("plain_accuracy_stage", plain_rows)
|
|
299
|
-
plain_gap_summary = _summarize("plain_gapfill_accuracy_stage", plain_gap_rows)
|
|
300
|
-
|
|
301
|
-
candidates = [slices_summary, plain_summary, plain_gap_summary]
|
|
302
|
-
full_hit = [candidate for candidate in candidates if candidate["full_hit_rate_pct"] >= 100.0]
|
|
303
|
-
if full_hit:
|
|
304
|
-
best = min(full_hit, key=lambda item: (item["tokens_per_expected_hit"] or 10**9, item["tokens_per_query"]))
|
|
305
|
-
else:
|
|
306
|
-
best = max(candidates, key=lambda item: item["target_hit_rate_pct"])
|
|
307
|
-
|
|
308
|
-
efficiency_trials: list[dict] = []
|
|
309
|
-
active = best
|
|
310
|
-
for idx in range(max(0, int(efficiency_iterations))):
|
|
311
|
-
if active["label"] != "plain_gapfill_accuracy_stage":
|
|
312
|
-
break
|
|
313
|
-
trial_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
|
|
314
|
-
trial = _summarize(f"plain_gapfill_eff_trial_{idx + 1}", trial_rows)
|
|
315
|
-
efficiency_trials.append(trial)
|
|
316
|
-
if trial["full_hit_rate_pct"] >= active["full_hit_rate_pct"] and trial["tokens_per_query"] < active["tokens_per_query"]:
|
|
317
|
-
active = trial
|
|
318
|
-
|
|
319
|
-
_write_back(repo_path, active)
|
|
320
|
-
|
|
321
|
-
report = {
|
|
322
|
-
"status": "ok",
|
|
323
|
-
"repo": repo_path.as_posix(),
|
|
324
|
-
"benchmark_size": benchmark_size,
|
|
325
|
-
"efficiency_iterations": int(efficiency_iterations),
|
|
326
|
-
"stages": {
|
|
327
|
-
"accuracy_candidates": [slices_summary, plain_summary, plain_gap_summary],
|
|
328
|
-
"selected_after_accuracy": best,
|
|
329
|
-
"efficiency_trials": efficiency_trials,
|
|
330
|
-
"selected_final": active,
|
|
331
|
-
},
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
planning_dir = repo_path / ".planning"
|
|
335
|
-
planning_dir.mkdir(parents=True, exist_ok=True)
|
|
336
|
-
out_path = planning_dir / "post_init_adaptation_report.json"
|
|
337
|
-
out_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
|
|
338
|
-
report["report_path"] = out_path.as_posix()
|
|
339
|
-
return report
|
|
1
|
+
"""Post-initialization adaptation pipeline (accuracy first, then efficiency)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import asdict, dataclass
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
import json
|
|
8
|
+
import re
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .context import run_context
|
|
12
|
+
from .context_slices import _classify_query_family, run_context_slices
|
|
13
|
+
from .index import run_index
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from performance.context_benchmark import BENCHMARK_CASES
|
|
17
|
+
except Exception: # pragma: no cover - fallback for limited installs
|
|
18
|
+
BENCHMARK_CASES = ()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True, slots=True)
|
|
22
|
+
class CaseResult:
|
|
23
|
+
name: str
|
|
24
|
+
family: str
|
|
25
|
+
mode: str
|
|
26
|
+
tokens: int
|
|
27
|
+
expected_hits: int
|
|
28
|
+
expected_total: int
|
|
29
|
+
missing_expected: tuple[str, ...]
|
|
30
|
+
context_complete: bool
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True, slots=True)
|
|
34
|
+
class AdaptCase:
|
|
35
|
+
name: str
|
|
36
|
+
query: str
|
|
37
|
+
intent: str
|
|
38
|
+
baseline_files: tuple[str, ...]
|
|
39
|
+
expected_files: tuple[str, ...]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_WORD_RE = re.compile(r"[A-Za-z0-9_./-]+")
|
|
43
|
+
_SOURCE_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".rs", ".cs", ".cpp", ".c", ".h"}
|
|
44
|
+
_IGNORED_DIRS = {
|
|
45
|
+
".git",
|
|
46
|
+
".gcie",
|
|
47
|
+
".planning",
|
|
48
|
+
".venv",
|
|
49
|
+
"node_modules",
|
|
50
|
+
"__pycache__",
|
|
51
|
+
"dist",
|
|
52
|
+
"build",
|
|
53
|
+
"coverage",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _query_keywords(text: str) -> list[str]:
|
|
58
|
+
terms: list[str] = []
|
|
59
|
+
for token in _WORD_RE.findall(text.lower()):
|
|
60
|
+
if len(token) < 4:
|
|
61
|
+
continue
|
|
62
|
+
terms.append(token)
|
|
63
|
+
return terms[:8]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _node_to_file(node_id: str) -> str | None:
|
|
67
|
+
if node_id.startswith("file:"):
|
|
68
|
+
return node_id[5:]
|
|
69
|
+
if node_id.startswith("function:"):
|
|
70
|
+
return node_id[9:].split("::", 1)[0]
|
|
71
|
+
if node_id.startswith("class:"):
|
|
72
|
+
return node_id[6:].split("::", 1)[0]
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
|
|
77
|
+
normalized = rel_path.replace("\\", "/").lstrip("./")
|
|
78
|
+
if not plan_path or plan_path in {".", "./"}:
|
|
79
|
+
return normalized
|
|
80
|
+
base = Path(plan_path).as_posix().strip("/")
|
|
81
|
+
if normalized.startswith(base + "/") or normalized == base:
|
|
82
|
+
return normalized
|
|
83
|
+
return f"{base}/{normalized}"
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _family_path(expected_files: tuple[str, ...]) -> str:
|
|
87
|
+
if not expected_files:
|
|
88
|
+
return "."
|
|
89
|
+
heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
|
|
90
|
+
if len(heads) == 1:
|
|
91
|
+
return next(iter(heads))
|
|
92
|
+
return "."
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _safe_scope(path: str) -> str:
|
|
96
|
+
"""Return a valid retrieval scope for the current repo."""
|
|
97
|
+
if not path or path in {".", "./"}:
|
|
98
|
+
return "."
|
|
99
|
+
candidate = Path(path)
|
|
100
|
+
if candidate.exists() and candidate.is_dir():
|
|
101
|
+
return candidate.as_posix()
|
|
102
|
+
return "."
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _plan_query(case) -> tuple[str, str, int | None]:
|
|
106
|
+
path = _family_path(case.expected_files)
|
|
107
|
+
if getattr(case, "name", "") == "cli_context_command":
|
|
108
|
+
path = "."
|
|
109
|
+
query = "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector"
|
|
110
|
+
return path, query, 950
|
|
111
|
+
keywords = " ".join(_query_keywords(case.query)[:4])
|
|
112
|
+
file_terms = " ".join(case.expected_files)
|
|
113
|
+
query = f"{file_terms} {keywords}".strip()
|
|
114
|
+
budget = 1000 if len(case.expected_files) >= 2 else None
|
|
115
|
+
if getattr(case, "name", "") in {
|
|
116
|
+
"repository_scanner_filters",
|
|
117
|
+
"knowledge_index_query_api",
|
|
118
|
+
"execution_trace_graph",
|
|
119
|
+
"parser_fallbacks",
|
|
120
|
+
}:
|
|
121
|
+
budget = 800
|
|
122
|
+
return path, query, budget
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _evaluate_plain_case(case, *, allow_gapfill: bool = True) -> CaseResult:
|
|
126
|
+
path, query, budget = _plan_query(case)
|
|
127
|
+
path = _safe_scope(path)
|
|
128
|
+
payload = run_context(path, query, budget=budget, intent=case.intent)
|
|
129
|
+
files = {
|
|
130
|
+
_normalize_scoped_path(path, rel_path)
|
|
131
|
+
for rel_path in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
132
|
+
if rel_path
|
|
133
|
+
}
|
|
134
|
+
expected = tuple(case.expected_files)
|
|
135
|
+
missing = [rel for rel in expected if rel not in files]
|
|
136
|
+
tokens = int(payload.get("tokens", 0) or 0)
|
|
137
|
+
mode = "plain_context_workflow"
|
|
138
|
+
|
|
139
|
+
if allow_gapfill and missing:
|
|
140
|
+
mode = "plain_context_workflow_gapfill"
|
|
141
|
+
for rel in list(missing):
|
|
142
|
+
scope = _safe_scope(_family_path((rel,)))
|
|
143
|
+
gap_keywords = " ".join(_query_keywords(case.query)[:4])
|
|
144
|
+
gap_query = f"{rel} {gap_keywords}".strip()
|
|
145
|
+
gap_budget = 500 if rel.endswith("/main.py") or rel == "main.py" else 900
|
|
146
|
+
gap_payload = run_context(scope, gap_query, budget=gap_budget, intent=case.intent)
|
|
147
|
+
tokens += int(gap_payload.get("tokens", 0) or 0)
|
|
148
|
+
gap_files = {
|
|
149
|
+
_normalize_scoped_path(scope, rel_path)
|
|
150
|
+
for rel_path in (_node_to_file(item.get("node_id", "")) for item in gap_payload.get("snippets", []))
|
|
151
|
+
if rel_path
|
|
152
|
+
}
|
|
153
|
+
files.update(gap_files)
|
|
154
|
+
missing = [m for m in expected if m not in files]
|
|
155
|
+
if not missing:
|
|
156
|
+
break
|
|
157
|
+
|
|
158
|
+
expected_hits = len(expected) - len(missing)
|
|
159
|
+
family = _classify_query_family(query)
|
|
160
|
+
return CaseResult(
|
|
161
|
+
name=case.name,
|
|
162
|
+
family=family,
|
|
163
|
+
mode=mode,
|
|
164
|
+
tokens=tokens,
|
|
165
|
+
expected_hits=expected_hits,
|
|
166
|
+
expected_total=len(expected),
|
|
167
|
+
missing_expected=tuple(missing),
|
|
168
|
+
context_complete=not missing,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _evaluate_slices_case(case) -> CaseResult:
|
|
173
|
+
payload = run_context_slices(
|
|
174
|
+
repo=".",
|
|
175
|
+
query=case.query,
|
|
176
|
+
profile="low",
|
|
177
|
+
stage_a_budget=300,
|
|
178
|
+
stage_b_budget=600,
|
|
179
|
+
max_total=800,
|
|
180
|
+
intent=case.intent,
|
|
181
|
+
pin=None,
|
|
182
|
+
pin_budget=200,
|
|
183
|
+
include_tests=False,
|
|
184
|
+
)
|
|
185
|
+
mode = "slices_low"
|
|
186
|
+
tokens = int(payload.get("token_estimate", payload.get("tokens", 0)) or 0)
|
|
187
|
+
files = {
|
|
188
|
+
_node_to_file(item.get("node_id", ""))
|
|
189
|
+
for item in payload.get("snippets", [])
|
|
190
|
+
}
|
|
191
|
+
files = {f for f in files if f}
|
|
192
|
+
expected = tuple(case.expected_files)
|
|
193
|
+
missing = [rel for rel in expected if rel not in files]
|
|
194
|
+
if missing:
|
|
195
|
+
mode = "slices_recall"
|
|
196
|
+
recall_payload = run_context_slices(
|
|
197
|
+
repo=".",
|
|
198
|
+
query=case.query,
|
|
199
|
+
profile="recall",
|
|
200
|
+
stage_a_budget=400,
|
|
201
|
+
stage_b_budget=800,
|
|
202
|
+
max_total=1200,
|
|
203
|
+
intent=case.intent,
|
|
204
|
+
pin=None,
|
|
205
|
+
pin_budget=300,
|
|
206
|
+
include_tests=False,
|
|
207
|
+
)
|
|
208
|
+
tokens += int(recall_payload.get("token_estimate", recall_payload.get("tokens", 0)) or 0)
|
|
209
|
+
files.update(
|
|
210
|
+
{
|
|
211
|
+
f
|
|
212
|
+
for f in (_node_to_file(item.get("node_id", "")) for item in recall_payload.get("snippets", []))
|
|
213
|
+
if f
|
|
214
|
+
}
|
|
215
|
+
)
|
|
216
|
+
missing = [rel for rel in expected if rel not in files]
|
|
217
|
+
if missing:
|
|
218
|
+
mode = "slices_recall_pin"
|
|
219
|
+
for rel in list(missing):
|
|
220
|
+
pin_payload = run_context_slices(
|
|
221
|
+
repo=".",
|
|
222
|
+
query=case.query,
|
|
223
|
+
profile="recall",
|
|
224
|
+
stage_a_budget=400,
|
|
225
|
+
stage_b_budget=800,
|
|
226
|
+
max_total=1200,
|
|
227
|
+
intent=case.intent,
|
|
228
|
+
pin=rel,
|
|
229
|
+
pin_budget=300,
|
|
230
|
+
include_tests=False,
|
|
231
|
+
)
|
|
232
|
+
tokens += int(pin_payload.get("token_estimate", pin_payload.get("tokens", 0)) or 0)
|
|
233
|
+
files.update(
|
|
234
|
+
{
|
|
235
|
+
f
|
|
236
|
+
for f in (_node_to_file(item.get("node_id", "")) for item in pin_payload.get("snippets", []))
|
|
237
|
+
if f
|
|
238
|
+
}
|
|
239
|
+
)
|
|
240
|
+
missing = [m for m in expected if m not in files]
|
|
241
|
+
if not missing:
|
|
242
|
+
break
|
|
243
|
+
expected_hits = len(expected) - len(missing)
|
|
244
|
+
family = _classify_query_family(case.query)
|
|
245
|
+
return CaseResult(
|
|
246
|
+
name=case.name,
|
|
247
|
+
family=family,
|
|
248
|
+
mode=mode,
|
|
249
|
+
tokens=tokens,
|
|
250
|
+
expected_hits=expected_hits,
|
|
251
|
+
expected_total=len(expected),
|
|
252
|
+
missing_expected=tuple(missing),
|
|
253
|
+
context_complete=not missing,
|
|
254
|
+
)
|
|
255
|
+
|
|
340
256
|
|
|
257
|
+
def _summarize(label: str, rows: list[CaseResult]) -> dict:
|
|
258
|
+
case_count = len(rows)
|
|
259
|
+
pass_count = sum(1 for row in rows if row.context_complete)
|
|
260
|
+
total_tokens = sum(row.tokens for row in rows)
|
|
261
|
+
hit_count = sum(row.expected_hits for row in rows)
|
|
262
|
+
hit_total = sum(row.expected_total for row in rows)
|
|
263
|
+
return {
|
|
264
|
+
"label": label,
|
|
265
|
+
"case_count": case_count,
|
|
266
|
+
"passing_cases": pass_count,
|
|
267
|
+
"full_hit_rate_pct": round((pass_count / case_count) * 100, 1) if case_count else 0.0,
|
|
268
|
+
"target_hit_rate_pct": round((hit_count / hit_total) * 100, 1) if hit_total else 0.0,
|
|
269
|
+
"total_tokens": total_tokens,
|
|
270
|
+
"tokens_per_query": round(total_tokens / case_count, 1) if case_count else 0.0,
|
|
271
|
+
"tokens_per_expected_hit": round(total_tokens / hit_count, 2) if hit_count else None,
|
|
272
|
+
"results": [asdict(row) for row in rows],
|
|
273
|
+
}
|
|
341
274
|
|
|
275
|
+
|
|
276
|
+
def _collect_source_files(repo_path: Path) -> list[str]:
|
|
277
|
+
files: list[str] = []
|
|
278
|
+
for path in repo_path.rglob("*"):
|
|
279
|
+
if not path.is_file():
|
|
280
|
+
continue
|
|
281
|
+
rel = path.relative_to(repo_path)
|
|
282
|
+
if any(part in _IGNORED_DIRS for part in rel.parts):
|
|
283
|
+
continue
|
|
284
|
+
if path.suffix.lower() not in _SOURCE_EXTS:
|
|
285
|
+
continue
|
|
286
|
+
files.append(rel.as_posix())
|
|
287
|
+
return sorted(files)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _static_cases_for_repo(repo_path: Path) -> list[AdaptCase]:
|
|
291
|
+
out: list[AdaptCase] = []
|
|
292
|
+
for case in list(BENCHMARK_CASES):
|
|
293
|
+
expected = tuple(case.expected_files)
|
|
294
|
+
if not expected:
|
|
295
|
+
continue
|
|
296
|
+
if not all((repo_path / rel).exists() for rel in expected):
|
|
297
|
+
continue
|
|
298
|
+
baseline = tuple(rel for rel in case.baseline_files if (repo_path / rel).exists())
|
|
299
|
+
if not baseline:
|
|
300
|
+
baseline = expected
|
|
301
|
+
out.append(
|
|
302
|
+
AdaptCase(
|
|
303
|
+
name=case.name,
|
|
304
|
+
query=case.query,
|
|
305
|
+
intent=case.intent,
|
|
306
|
+
baseline_files=baseline,
|
|
307
|
+
expected_files=expected,
|
|
308
|
+
)
|
|
309
|
+
)
|
|
310
|
+
return out
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
314
|
+
files = _collect_source_files(repo_path)
|
|
315
|
+
if not files:
|
|
316
|
+
return []
|
|
317
|
+
|
|
318
|
+
by_dir: dict[str, list[str]] = {}
|
|
319
|
+
for rel in files:
|
|
320
|
+
parent = str(Path(rel).parent).replace("\\", "/")
|
|
321
|
+
by_dir.setdefault(parent, []).append(rel)
|
|
322
|
+
|
|
323
|
+
rows: list[AdaptCase] = []
|
|
324
|
+
seen_names: set[str] = set()
|
|
325
|
+
|
|
326
|
+
def add_case(name: str, expected: tuple[str, ...], intent: str = "explore") -> None:
|
|
327
|
+
if len(rows) >= needed:
|
|
328
|
+
return
|
|
329
|
+
safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", name).strip("_").lower() or "case"
|
|
330
|
+
if safe_name in seen_names:
|
|
331
|
+
idx = 2
|
|
332
|
+
while f"{safe_name}_{idx}" in seen_names:
|
|
333
|
+
idx += 1
|
|
334
|
+
safe_name = f"{safe_name}_{idx}"
|
|
335
|
+
seen_names.add(safe_name)
|
|
336
|
+
symbols = []
|
|
337
|
+
for rel in expected:
|
|
338
|
+
stem = Path(rel).stem.lower()
|
|
339
|
+
symbols.extend([stem, "flow", "wiring"])
|
|
340
|
+
query = f"{' '.join(expected)} {' '.join(symbols[:6])}".strip()
|
|
341
|
+
rows.append(
|
|
342
|
+
AdaptCase(
|
|
343
|
+
name=safe_name,
|
|
344
|
+
query=query,
|
|
345
|
+
intent=intent,
|
|
346
|
+
baseline_files=expected,
|
|
347
|
+
expected_files=expected,
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Single-file probes.
|
|
352
|
+
for rel in files:
|
|
353
|
+
add_case(f"single_{Path(rel).stem}", (rel,), intent="explore")
|
|
354
|
+
if len(rows) >= max(needed // 2, 1):
|
|
355
|
+
break
|
|
356
|
+
|
|
357
|
+
# Same-directory pairs.
|
|
358
|
+
for parent, group in sorted(by_dir.items(), key=lambda item: item[0]):
|
|
359
|
+
if len(group) < 2:
|
|
360
|
+
continue
|
|
361
|
+
group = sorted(group)
|
|
362
|
+
for idx in range(len(group) - 1):
|
|
363
|
+
add_case(f"pair_{parent}_{idx}", (group[idx], group[idx + 1]), intent="explore")
|
|
364
|
+
if len(rows) >= needed:
|
|
365
|
+
return rows[:needed]
|
|
366
|
+
|
|
367
|
+
# Cross-directory pairs if still needed.
|
|
368
|
+
tops: dict[str, str] = {}
|
|
369
|
+
for rel in files:
|
|
370
|
+
top = Path(rel).parts[0] if Path(rel).parts else rel
|
|
371
|
+
tops.setdefault(top, rel)
|
|
372
|
+
top_files = list(tops.values())
|
|
373
|
+
for idx in range(len(top_files) - 1):
|
|
374
|
+
add_case(f"cross_{idx}", (top_files[idx], top_files[idx + 1]), intent="explore")
|
|
375
|
+
if len(rows) >= needed:
|
|
376
|
+
break
|
|
377
|
+
|
|
378
|
+
return rows[:needed]
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list[AdaptCase], str]:
|
|
382
|
+
"""Select adaptation cases generated entirely from the target repo."""
|
|
383
|
+
benchmark_size = max(1, int(benchmark_size))
|
|
384
|
+
generated = _generated_cases_for_repo(repo_path, benchmark_size)
|
|
385
|
+
if generated:
|
|
386
|
+
return generated[:benchmark_size], "generated_repo_local"
|
|
387
|
+
return [], "none_available"
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _write_back(repo_path: Path, best: dict, case_source: str) -> None:
|
|
391
|
+
cfg_path = repo_path / ".gcie" / "context_config.json"
|
|
392
|
+
if cfg_path.exists():
|
|
393
|
+
try:
|
|
394
|
+
cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
|
|
395
|
+
if not isinstance(cfg, dict):
|
|
396
|
+
cfg = {}
|
|
397
|
+
except Exception:
|
|
398
|
+
cfg = {}
|
|
399
|
+
else:
|
|
400
|
+
cfg = {}
|
|
401
|
+
cfg["adaptation_pipeline"] = {
|
|
402
|
+
"status": "complete",
|
|
403
|
+
"best_label": best.get("label"),
|
|
404
|
+
"full_hit_rate_pct": best.get("full_hit_rate_pct"),
|
|
405
|
+
"tokens_per_query": best.get("tokens_per_query"),
|
|
406
|
+
"case_source": case_source,
|
|
407
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
408
|
+
}
|
|
409
|
+
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
410
|
+
cfg_path.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def run_post_init_adaptation(
|
|
414
|
+
repo: str = ".",
|
|
415
|
+
*,
|
|
416
|
+
benchmark_size: int = 10,
|
|
417
|
+
efficiency_iterations: int = 5,
|
|
418
|
+
clear_profile: bool = False,
|
|
419
|
+
) -> dict:
|
|
420
|
+
"""Run accuracy-lock then efficiency adaptation protocol after setup/index."""
|
|
421
|
+
repo_path = Path(repo).resolve()
|
|
422
|
+
run_index(repo_path.as_posix())
|
|
423
|
+
|
|
424
|
+
if clear_profile:
|
|
425
|
+
from .context_slices import clear_adaptive_profile
|
|
426
|
+
|
|
427
|
+
clear_adaptive_profile(repo_path.as_posix())
|
|
428
|
+
|
|
429
|
+
cases, case_source = _select_adaptation_cases(repo_path, benchmark_size)
|
|
430
|
+
if not cases:
|
|
431
|
+
return {
|
|
432
|
+
"status": "no_benchmark_cases",
|
|
433
|
+
"repo": repo_path.as_posix(),
|
|
434
|
+
"case_source": case_source,
|
|
435
|
+
"message": "No repo-usable adaptation cases available.",
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
slices_rows = [_evaluate_slices_case(case) for case in cases]
|
|
439
|
+
plain_rows = [_evaluate_plain_case(case, allow_gapfill=False) for case in cases]
|
|
440
|
+
plain_gap_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
|
|
441
|
+
|
|
442
|
+
slices_summary = _summarize("slices_accuracy_stage", slices_rows)
|
|
443
|
+
plain_summary = _summarize("plain_accuracy_stage", plain_rows)
|
|
444
|
+
plain_gap_summary = _summarize("plain_gapfill_accuracy_stage", plain_gap_rows)
|
|
445
|
+
|
|
446
|
+
candidates = [slices_summary, plain_summary, plain_gap_summary]
|
|
447
|
+
full_hit = [candidate for candidate in candidates if candidate["full_hit_rate_pct"] >= 100.0]
|
|
448
|
+
if full_hit:
|
|
449
|
+
best = min(full_hit, key=lambda item: (item["tokens_per_expected_hit"] or 10**9, item["tokens_per_query"]))
|
|
450
|
+
else:
|
|
451
|
+
best = max(candidates, key=lambda item: item["target_hit_rate_pct"])
|
|
452
|
+
|
|
453
|
+
efficiency_trials: list[dict] = []
|
|
454
|
+
active = best
|
|
455
|
+
for idx in range(max(0, int(efficiency_iterations))):
|
|
456
|
+
if active["label"] != "plain_gapfill_accuracy_stage":
|
|
457
|
+
break
|
|
458
|
+
trial_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
|
|
459
|
+
trial = _summarize(f"plain_gapfill_eff_trial_{idx + 1}", trial_rows)
|
|
460
|
+
efficiency_trials.append(trial)
|
|
461
|
+
if trial["full_hit_rate_pct"] >= active["full_hit_rate_pct"] and trial["tokens_per_query"] < active["tokens_per_query"]:
|
|
462
|
+
active = trial
|
|
463
|
+
|
|
464
|
+
_write_back(repo_path, active, case_source)
|
|
465
|
+
|
|
466
|
+
report = {
|
|
467
|
+
"status": "ok",
|
|
468
|
+
"repo": repo_path.as_posix(),
|
|
469
|
+
"benchmark_size": len(cases),
|
|
470
|
+
"requested_benchmark_size": int(benchmark_size),
|
|
471
|
+
"efficiency_iterations": int(efficiency_iterations),
|
|
472
|
+
"case_source": case_source,
|
|
473
|
+
"stages": {
|
|
474
|
+
"accuracy_candidates": [slices_summary, plain_summary, plain_gap_summary],
|
|
475
|
+
"selected_after_accuracy": best,
|
|
476
|
+
"efficiency_trials": efficiency_trials,
|
|
477
|
+
"selected_final": active,
|
|
478
|
+
},
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
planning_dir = repo_path / ".planning"
|
|
482
|
+
planning_dir.mkdir(parents=True, exist_ok=True)
|
|
483
|
+
out_path = planning_dir / "post_init_adaptation_report.json"
|
|
484
|
+
out_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
|
|
485
|
+
report["report_path"] = out_path.as_posix()
|
|
486
|
+
return report
|