@pmaddire/gcie 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GCIE_USAGE.md +58 -1
- package/cli/app.py +18 -1
- package/cli/commands/adaptation.py +534 -199
- package/cli/commands/setup.py +53 -2
- package/package.json +1 -1
package/GCIE_USAGE.md
CHANGED
|
@@ -38,8 +38,13 @@ gcie.cmd adaptive-profile . --clear
|
|
|
38
38
|
```
|
|
39
39
|
|
|
40
40
|
Post-init adaptation pipeline:
|
|
41
|
+
- run from the target repo root (cd <repo> first); use . as scope
|
|
42
|
+
- adaptation now bootstraps per-family method defaults before accuracy rounds (plain/plain-gapfill/plain-rescue/slices)
|
|
43
|
+
- adaptation case generation is mixed by design (single-file, same-layer pairs, cross-subtree pairs, and some 3-file chains on larger runs)
|
|
41
44
|
```powershell
|
|
42
45
|
gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5 --clear-profile
|
|
46
|
+
# mixed-layer repos: use wider calibration
|
|
47
|
+
gcie.cmd adapt . --benchmark-size 25 --efficiency-iterations 5 --clear-profile
|
|
43
48
|
```
|
|
44
49
|
|
|
45
50
|
One-shot setup + adaptation:
|
|
@@ -148,7 +153,7 @@ When retrieval is weak, apply in this exact order:
|
|
|
148
153
|
1. Query upgrade: add explicit files, symbols, caller/entry anchor
|
|
149
154
|
2. Scope correction: subtree vs root
|
|
150
155
|
3. One profile/budget escalation
|
|
151
|
-
4. Targeted gap-fill for only missing must-have file(s)
|
|
156
|
+
4. Targeted gap-fill for only missing must-have file(s), preferring direct file-path scope first
|
|
152
157
|
5. Multi-hop decomposition only if still incomplete
|
|
153
158
|
|
|
154
159
|
Stop condition:
|
|
@@ -189,6 +194,29 @@ gcie.cmd index .
|
|
|
189
194
|
- Proceed to calibration only after coverage is reachable with stable behavior.
|
|
190
195
|
- If not reachable, keep safer fallback mode for affected families and continue tracking.
|
|
191
196
|
|
|
197
|
+
## Calibration Quality Gate (Cross-Repo, Required)
|
|
198
|
+
|
|
199
|
+
Before accepting adaptation results, verify calibration quality:
|
|
200
|
+
|
|
201
|
+
1. Family diversity floor:
|
|
202
|
+
- the generated benchmark set should cover at least 3 task families when the repo has multiple top-level subsystems
|
|
203
|
+
- if adaptation output is dominated by only `single_file` and `same_layer_pair`, treat it as underfit
|
|
204
|
+
|
|
205
|
+
2. Underfit recovery:
|
|
206
|
+
- rerun adaptation with wider calibration
|
|
207
|
+
```powershell
|
|
208
|
+
gcie.cmd adapt . --benchmark-size 25 --efficiency-iterations 5 --clear-profile
|
|
209
|
+
```
|
|
210
|
+
- keep `benchmark-size 10` only for small/single-layer repos or quick smoke checks
|
|
211
|
+
|
|
212
|
+
3. Accuracy-first acceptance:
|
|
213
|
+
- do not accept a profile below `100%` full-hit if a recoverable path exists
|
|
214
|
+
- run one rescue cycle (query upgrade -> scope correction -> one budget/profile rung -> targeted gap-fill)
|
|
215
|
+
- only then finalize family defaults
|
|
216
|
+
|
|
217
|
+
4. Cost lock sanity:
|
|
218
|
+
- if selected profile is much more expensive than cheapest (`>40%` token delta), keep status as cost-risk and continue family-level refinement
|
|
219
|
+
- do not freeze expensive global defaults unless they are uniquely required for `100%`
|
|
192
220
|
## Automatic Post-Trigger Adaptation (Required)
|
|
193
221
|
|
|
194
222
|
After trigger detection in a repo session:
|
|
@@ -338,8 +366,37 @@ After running adaptation:
|
|
|
338
366
|
Commands:
|
|
339
367
|
```powershell
|
|
340
368
|
gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5 --clear-profile
|
|
369
|
+
# mixed-layer repos: use wider calibration
|
|
370
|
+
gcie.cmd adapt . --benchmark-size 25 --efficiency-iterations 5 --clear-profile
|
|
341
371
|
```
|
|
342
372
|
```powershell
|
|
343
373
|
gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5
|
|
344
374
|
```
|
|
345
375
|
|
|
376
|
+
|
|
377
|
+
## Remove GCIE From A Repo
|
|
378
|
+
|
|
379
|
+
To remove GCIE-managed files from the current repo:
|
|
380
|
+
|
|
381
|
+
```powershell
|
|
382
|
+
gcie.cmd remove .
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
Options:
|
|
386
|
+
- keep `GCIE_USAGE.md`: `--keep-usage`
|
|
387
|
+
- keep `SETUP_ANY_REPO.md`: `--keep-setup-doc`
|
|
388
|
+
- also remove `.planning` artifacts: `--remove-planning`
|
|
389
|
+
|
|
390
|
+
Example:
|
|
391
|
+
|
|
392
|
+
```powershell
|
|
393
|
+
gcie.cmd remove . --remove-planning
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
|
package/cli/app.py
CHANGED
|
@@ -14,7 +14,7 @@ from .commands.context_slices import adaptive_profile_summary, clear_adaptive_pr
|
|
|
14
14
|
from .commands.debug import run_debug
|
|
15
15
|
from .commands.index import run_index
|
|
16
16
|
from .commands.query import run_query
|
|
17
|
-
from .commands.setup import run_setup
|
|
17
|
+
from .commands.setup import run_remove, run_setup
|
|
18
18
|
|
|
19
19
|
app = typer.Typer(help="GraphCode Intelligence Engine CLI")
|
|
20
20
|
|
|
@@ -177,6 +177,21 @@ def setup_cmd(
|
|
|
177
177
|
typer.echo(json.dumps(result, indent=2))
|
|
178
178
|
|
|
179
179
|
|
|
180
|
+
@app.command("remove")
|
|
181
|
+
def remove_cmd(
|
|
182
|
+
path: str = typer.Argument("."),
|
|
183
|
+
remove_planning: bool = typer.Option(False, "--remove-planning", help="Also remove .planning artifacts"),
|
|
184
|
+
keep_usage: bool = typer.Option(False, "--keep-usage", help="Keep GCIE_USAGE.md in place"),
|
|
185
|
+
keep_setup_doc: bool = typer.Option(False, "--keep-setup-doc", help="Keep SETUP_ANY_REPO.md in place"),
|
|
186
|
+
) -> None:
|
|
187
|
+
result = run_remove(
|
|
188
|
+
path,
|
|
189
|
+
remove_planning=remove_planning,
|
|
190
|
+
remove_gcie_usage=not keep_usage,
|
|
191
|
+
remove_setup_doc=not keep_setup_doc,
|
|
192
|
+
)
|
|
193
|
+
typer.echo(json.dumps(result, indent=2))
|
|
194
|
+
|
|
180
195
|
@app.command("cache-clear")
|
|
181
196
|
def cache_clear_cmd(path: str = typer.Argument(".")) -> None:
|
|
182
197
|
result = clear_cache(path)
|
|
@@ -197,3 +212,5 @@ def cache_warm_cmd(path: str = typer.Argument(".")) -> None:
|
|
|
197
212
|
|
|
198
213
|
if __name__ == "__main__":
|
|
199
214
|
app()
|
|
215
|
+
|
|
216
|
+
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Post-initialization adaptation pipeline (accuracy first, then efficiency)."""
|
|
1
|
+
"""Post-initialization adaptation pipeline (accuracy rounds first, then efficiency rounds)."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
@@ -14,7 +14,7 @@ from .index import run_index
|
|
|
14
14
|
|
|
15
15
|
try:
|
|
16
16
|
from performance.context_benchmark import BENCHMARK_CASES
|
|
17
|
-
except Exception: # pragma: no cover
|
|
17
|
+
except Exception: # pragma: no cover
|
|
18
18
|
BENCHMARK_CASES = ()
|
|
19
19
|
|
|
20
20
|
|
|
@@ -52,16 +52,60 @@ _IGNORED_DIRS = {
|
|
|
52
52
|
"build",
|
|
53
53
|
"coverage",
|
|
54
54
|
}
|
|
55
|
+
_METHOD_ORDER = ["plain", "plain_gapfill", "plain_rescue", "slices"]
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
def _query_keywords(text: str) -> list[str]:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
59
|
+
return [t for t in _WORD_RE.findall(text.lower()) if len(t) >= 4][:8]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_query_cues_for_file(repo_path: Path, rel: str) -> list[str]:
|
|
63
|
+
path = repo_path / rel
|
|
64
|
+
try:
|
|
65
|
+
text = path.read_text(encoding='utf-8', errors='ignore')
|
|
66
|
+
except Exception:
|
|
67
|
+
return [Path(rel).stem.lower()]
|
|
68
|
+
|
|
69
|
+
body = text[:12000]
|
|
70
|
+
cues: list[str] = [Path(rel).stem.lower()]
|
|
71
|
+
|
|
72
|
+
patterns = [
|
|
73
|
+
r"^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
74
|
+
r"^\s*class\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
75
|
+
r"^\s*(?:async\s+)?function\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
76
|
+
r"^\s*const\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:async\s*)?(?:\(|function\b)",
|
|
77
|
+
r"^\s*export\s+function\s+([A-Za-z_][A-Za-z0-9_]*)",
|
|
78
|
+
]
|
|
79
|
+
for pat in patterns:
|
|
80
|
+
for name in re.findall(pat, body, flags=re.MULTILINE):
|
|
81
|
+
token = str(name).lower()
|
|
82
|
+
if len(token) >= 4:
|
|
83
|
+
cues.append(token)
|
|
84
|
+
if len(cues) >= 8:
|
|
85
|
+
break
|
|
86
|
+
if len(cues) >= 8:
|
|
87
|
+
break
|
|
64
88
|
|
|
89
|
+
for route in re.findall(r"['\"](/api/[A-Za-z0-9_/{}/-]+)['\"]", body):
|
|
90
|
+
cues.append(route.lower())
|
|
91
|
+
if len(cues) >= 10:
|
|
92
|
+
break
|
|
93
|
+
|
|
94
|
+
for key in re.findall(r"\b[A-Z][A-Z0-9_]{3,}\b", body):
|
|
95
|
+
cues.append(key.lower())
|
|
96
|
+
if len(cues) >= 12:
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
dedup: list[str] = []
|
|
100
|
+
seen: set[str] = set()
|
|
101
|
+
for cue in cues:
|
|
102
|
+
if cue in seen:
|
|
103
|
+
continue
|
|
104
|
+
seen.add(cue)
|
|
105
|
+
dedup.append(cue)
|
|
106
|
+
if len(dedup) >= 8:
|
|
107
|
+
break
|
|
108
|
+
return dedup
|
|
65
109
|
|
|
66
110
|
def _node_to_file(node_id: str) -> str | None:
|
|
67
111
|
if node_id.startswith("file:"):
|
|
@@ -83,17 +127,33 @@ def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
|
|
|
83
127
|
return f"{base}/{normalized}"
|
|
84
128
|
|
|
85
129
|
|
|
86
|
-
def _family_path(expected_files: tuple[str, ...]) -> str:
|
|
87
|
-
if not expected_files:
|
|
88
|
-
return "."
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
130
|
+
def _family_path(expected_files: tuple[str, ...]) -> str:
|
|
131
|
+
if not expected_files:
|
|
132
|
+
return "."
|
|
133
|
+
parent_parts: list[tuple[str, ...]] = []
|
|
134
|
+
for rel in expected_files:
|
|
135
|
+
parent = Path(rel).parent
|
|
136
|
+
if str(parent) in {"", "."}:
|
|
137
|
+
parent_parts.append(tuple())
|
|
138
|
+
else:
|
|
139
|
+
parent_parts.append(tuple(parent.parts))
|
|
140
|
+
|
|
141
|
+
common: list[str] = []
|
|
142
|
+
if parent_parts:
|
|
143
|
+
shortest = min(len(parts) for parts in parent_parts)
|
|
144
|
+
for idx in range(shortest):
|
|
145
|
+
token = parent_parts[0][idx]
|
|
146
|
+
if all(parts[idx] == token for parts in parent_parts):
|
|
147
|
+
common.append(token)
|
|
148
|
+
else:
|
|
149
|
+
break
|
|
150
|
+
if common:
|
|
151
|
+
return Path(*common).as_posix()
|
|
152
|
+
|
|
153
|
+
heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
|
|
154
|
+
return next(iter(heads)) if len(heads) == 1 else "."
|
|
94
155
|
|
|
95
156
|
def _safe_scope(path: str) -> str:
|
|
96
|
-
"""Return a valid retrieval scope for the current repo."""
|
|
97
157
|
if not path or path in {".", "./"}:
|
|
98
158
|
return "."
|
|
99
159
|
candidate = Path(path)
|
|
@@ -102,34 +162,81 @@ def _safe_scope(path: str) -> str:
|
|
|
102
162
|
return "."
|
|
103
163
|
|
|
104
164
|
|
|
105
|
-
def _plan_query(case) -> tuple[str, str, int | None]:
|
|
106
|
-
path = _family_path(case.expected_files)
|
|
107
|
-
if getattr(case, "name", "") == "cli_context_command":
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
165
|
+
def _plan_query(case) -> tuple[str, str, int | None]:
|
|
166
|
+
path = _family_path(case.expected_files)
|
|
167
|
+
if getattr(case, "name", "") == "cli_context_command":
|
|
168
|
+
return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
|
|
169
|
+
|
|
170
|
+
repo_path = Path('.').resolve()
|
|
171
|
+
cue_terms: list[str] = []
|
|
172
|
+
for rel in case.expected_files:
|
|
173
|
+
cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
|
|
174
|
+
cue_terms.extend(_query_keywords(case.query)[:4])
|
|
175
|
+
|
|
176
|
+
dedup: list[str] = []
|
|
177
|
+
seen: set[str] = set()
|
|
178
|
+
for token in [*case.expected_files, *cue_terms]:
|
|
179
|
+
key = token.lower()
|
|
180
|
+
if key in seen:
|
|
181
|
+
continue
|
|
182
|
+
seen.add(key)
|
|
183
|
+
dedup.append(token)
|
|
184
|
+
if len(dedup) >= 14:
|
|
185
|
+
break
|
|
186
|
+
query = " ".join(dedup).strip()
|
|
187
|
+
|
|
188
|
+
expected_count = len(case.expected_files)
|
|
189
|
+
if expected_count >= 3:
|
|
190
|
+
budget = 1100
|
|
191
|
+
elif expected_count == 2:
|
|
192
|
+
budget = 950
|
|
193
|
+
else:
|
|
194
|
+
budget = 850
|
|
195
|
+
|
|
196
|
+
if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
|
|
197
|
+
budget = 800
|
|
122
198
|
return path, query, budget
|
|
123
199
|
|
|
200
|
+
def _case_family(case) -> str:
|
|
201
|
+
_, planned_query, _ = _plan_query(case)
|
|
202
|
+
return _classify_query_family(planned_query)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _build_gapfill_query(case, missing_rel: str) -> str:
|
|
206
|
+
anchors = [rel for rel in case.expected_files if rel != missing_rel][:2]
|
|
207
|
+
repo_path = Path('.').resolve()
|
|
208
|
+
|
|
209
|
+
tokens: list[str] = [missing_rel]
|
|
210
|
+
tokens.extend(anchors)
|
|
211
|
+
|
|
212
|
+
cue_files = [missing_rel]
|
|
213
|
+
cue_files.extend(anchors)
|
|
214
|
+
for rel in cue_files:
|
|
215
|
+
tokens.extend(_extract_query_cues_for_file(repo_path, rel)[:4])
|
|
216
|
+
|
|
217
|
+
tokens.extend(_query_keywords(case.query)[:4])
|
|
124
218
|
|
|
125
|
-
|
|
219
|
+
dedup: list[str] = []
|
|
220
|
+
seen: set[str] = set()
|
|
221
|
+
for tok in tokens:
|
|
222
|
+
key = tok.lower()
|
|
223
|
+
if key in seen:
|
|
224
|
+
continue
|
|
225
|
+
seen.add(key)
|
|
226
|
+
dedup.append(tok)
|
|
227
|
+
if len(dedup) >= 14:
|
|
228
|
+
break
|
|
229
|
+
|
|
230
|
+
return " ".join(dedup)
|
|
231
|
+
|
|
232
|
+
def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
|
|
126
233
|
path, query, budget = _plan_query(case)
|
|
127
234
|
path = _safe_scope(path)
|
|
128
235
|
payload = run_context(path, query, budget=budget, intent=case.intent)
|
|
129
236
|
files = {
|
|
130
|
-
_normalize_scoped_path(path,
|
|
131
|
-
for
|
|
132
|
-
if
|
|
237
|
+
_normalize_scoped_path(path, rel)
|
|
238
|
+
for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
239
|
+
if rel
|
|
133
240
|
}
|
|
134
241
|
expected = tuple(case.expected_files)
|
|
135
242
|
missing = [rel for rel in expected if rel not in files]
|
|
@@ -139,19 +246,39 @@ def _evaluate_plain_case(case, *, allow_gapfill: bool = True) -> CaseResult:
|
|
|
139
246
|
if allow_gapfill and missing:
|
|
140
247
|
mode = "plain_context_workflow_gapfill"
|
|
141
248
|
for rel in list(missing):
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
249
|
+
gap_query = _build_gapfill_query(case, rel)
|
|
250
|
+
|
|
251
|
+
# Prefer direct file-targeted recovery when possible to avoid expensive broad rescues.
|
|
252
|
+
direct_scope = rel if (Path(rel).exists() and Path(rel).is_file()) else None
|
|
253
|
+
base_scope = _safe_scope(_family_path((rel,)))
|
|
254
|
+
scopes: list[str] = []
|
|
255
|
+
if direct_scope:
|
|
256
|
+
scopes.append(direct_scope)
|
|
257
|
+
if base_scope not in scopes:
|
|
258
|
+
scopes.append(base_scope)
|
|
259
|
+
|
|
260
|
+
budgets = [500 if rel.endswith('/main.py') or rel == 'main.py' else 900]
|
|
261
|
+
if len(scopes) > 1:
|
|
262
|
+
budgets.append(budgets[0])
|
|
263
|
+
|
|
264
|
+
if aggressive_gapfill:
|
|
265
|
+
if '.' not in scopes:
|
|
266
|
+
scopes.append('.')
|
|
267
|
+
budgets.append(max(budgets[0], 1200))
|
|
268
|
+
mode = "plain_context_workflow_gapfill_rescue"
|
|
269
|
+
|
|
270
|
+
for scope, gap_budget in zip(scopes, budgets):
|
|
271
|
+
gap_payload = run_context(scope, gap_query, budget=gap_budget, intent=case.intent)
|
|
272
|
+
tokens += int(gap_payload.get("tokens", 0) or 0)
|
|
273
|
+
gap_files = {
|
|
274
|
+
_normalize_scoped_path(scope, rel2)
|
|
275
|
+
for rel2 in (_node_to_file(item.get("node_id", "")) for item in gap_payload.get("snippets", []))
|
|
276
|
+
if rel2
|
|
277
|
+
}
|
|
278
|
+
files.update(gap_files)
|
|
279
|
+
missing = [m for m in expected if m not in files]
|
|
280
|
+
if not missing:
|
|
281
|
+
break
|
|
155
282
|
if not missing:
|
|
156
283
|
break
|
|
157
284
|
|
|
@@ -171,9 +298,9 @@ def _evaluate_plain_case(case, *, allow_gapfill: bool = True) -> CaseResult:
|
|
|
171
298
|
|
|
172
299
|
def _evaluate_slices_case(case) -> CaseResult:
|
|
173
300
|
payload = run_context_slices(
|
|
174
|
-
repo=
|
|
301
|
+
repo='.',
|
|
175
302
|
query=case.query,
|
|
176
|
-
profile=
|
|
303
|
+
profile='low',
|
|
177
304
|
stage_a_budget=300,
|
|
178
305
|
stage_b_budget=600,
|
|
179
306
|
max_total=800,
|
|
@@ -184,19 +311,15 @@ def _evaluate_slices_case(case) -> CaseResult:
|
|
|
184
311
|
)
|
|
185
312
|
mode = "slices_low"
|
|
186
313
|
tokens = int(payload.get("token_estimate", payload.get("tokens", 0)) or 0)
|
|
187
|
-
files = {
|
|
188
|
-
_node_to_file(item.get("node_id", ""))
|
|
189
|
-
for item in payload.get("snippets", [])
|
|
190
|
-
}
|
|
191
|
-
files = {f for f in files if f}
|
|
314
|
+
files = {f for f in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", [])) if f}
|
|
192
315
|
expected = tuple(case.expected_files)
|
|
193
316
|
missing = [rel for rel in expected if rel not in files]
|
|
194
317
|
if missing:
|
|
195
318
|
mode = "slices_recall"
|
|
196
319
|
recall_payload = run_context_slices(
|
|
197
|
-
repo=
|
|
320
|
+
repo='.',
|
|
198
321
|
query=case.query,
|
|
199
|
-
profile=
|
|
322
|
+
profile='recall',
|
|
200
323
|
stage_a_budget=400,
|
|
201
324
|
stage_b_budget=800,
|
|
202
325
|
max_total=1200,
|
|
@@ -206,21 +329,15 @@ def _evaluate_slices_case(case) -> CaseResult:
|
|
|
206
329
|
include_tests=False,
|
|
207
330
|
)
|
|
208
331
|
tokens += int(recall_payload.get("token_estimate", recall_payload.get("tokens", 0)) or 0)
|
|
209
|
-
files.update(
|
|
210
|
-
{
|
|
211
|
-
f
|
|
212
|
-
for f in (_node_to_file(item.get("node_id", "")) for item in recall_payload.get("snippets", []))
|
|
213
|
-
if f
|
|
214
|
-
}
|
|
215
|
-
)
|
|
332
|
+
files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in recall_payload.get("snippets", [])) if f})
|
|
216
333
|
missing = [rel for rel in expected if rel not in files]
|
|
217
334
|
if missing:
|
|
218
335
|
mode = "slices_recall_pin"
|
|
219
336
|
for rel in list(missing):
|
|
220
337
|
pin_payload = run_context_slices(
|
|
221
|
-
repo=
|
|
338
|
+
repo='.',
|
|
222
339
|
query=case.query,
|
|
223
|
-
profile=
|
|
340
|
+
profile='recall',
|
|
224
341
|
stage_a_budget=400,
|
|
225
342
|
stage_b_budget=800,
|
|
226
343
|
max_total=1200,
|
|
@@ -230,18 +347,13 @@ def _evaluate_slices_case(case) -> CaseResult:
|
|
|
230
347
|
include_tests=False,
|
|
231
348
|
)
|
|
232
349
|
tokens += int(pin_payload.get("token_estimate", pin_payload.get("tokens", 0)) or 0)
|
|
233
|
-
files.update(
|
|
234
|
-
{
|
|
235
|
-
f
|
|
236
|
-
for f in (_node_to_file(item.get("node_id", "")) for item in pin_payload.get("snippets", []))
|
|
237
|
-
if f
|
|
238
|
-
}
|
|
239
|
-
)
|
|
350
|
+
files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in pin_payload.get("snippets", [])) if f})
|
|
240
351
|
missing = [m for m in expected if m not in files]
|
|
241
352
|
if not missing:
|
|
242
353
|
break
|
|
354
|
+
|
|
243
355
|
expected_hits = len(expected) - len(missing)
|
|
244
|
-
family =
|
|
356
|
+
family = _case_family(case)
|
|
245
357
|
return CaseResult(
|
|
246
358
|
name=case.name,
|
|
247
359
|
family=family,
|
|
@@ -254,6 +366,16 @@ def _evaluate_slices_case(case) -> CaseResult:
|
|
|
254
366
|
)
|
|
255
367
|
|
|
256
368
|
|
|
369
|
+
def _evaluate_case_with_method(case, method: str) -> CaseResult:
|
|
370
|
+
if method == "plain":
|
|
371
|
+
return _evaluate_plain_case(case, allow_gapfill=False)
|
|
372
|
+
if method == "plain_gapfill":
|
|
373
|
+
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
|
|
374
|
+
if method == "plain_rescue":
|
|
375
|
+
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
|
|
376
|
+
return _evaluate_slices_case(case)
|
|
377
|
+
|
|
378
|
+
|
|
257
379
|
def _summarize(label: str, rows: list[CaseResult]) -> dict:
|
|
258
380
|
case_count = len(rows)
|
|
259
381
|
pass_count = sum(1 for row in rows if row.context_complete)
|
|
@@ -275,7 +397,7 @@ def _summarize(label: str, rows: list[CaseResult]) -> dict:
|
|
|
275
397
|
|
|
276
398
|
def _collect_source_files(repo_path: Path) -> list[str]:
|
|
277
399
|
files: list[str] = []
|
|
278
|
-
for path in repo_path.rglob(
|
|
400
|
+
for path in repo_path.rglob('*'):
|
|
279
401
|
if not path.is_file():
|
|
280
402
|
continue
|
|
281
403
|
rel = path.relative_to(repo_path)
|
|
@@ -287,29 +409,6 @@ def _collect_source_files(repo_path: Path) -> list[str]:
|
|
|
287
409
|
return sorted(files)
|
|
288
410
|
|
|
289
411
|
|
|
290
|
-
def _static_cases_for_repo(repo_path: Path) -> list[AdaptCase]:
|
|
291
|
-
out: list[AdaptCase] = []
|
|
292
|
-
for case in list(BENCHMARK_CASES):
|
|
293
|
-
expected = tuple(case.expected_files)
|
|
294
|
-
if not expected:
|
|
295
|
-
continue
|
|
296
|
-
if not all((repo_path / rel).exists() for rel in expected):
|
|
297
|
-
continue
|
|
298
|
-
baseline = tuple(rel for rel in case.baseline_files if (repo_path / rel).exists())
|
|
299
|
-
if not baseline:
|
|
300
|
-
baseline = expected
|
|
301
|
-
out.append(
|
|
302
|
-
AdaptCase(
|
|
303
|
-
name=case.name,
|
|
304
|
-
query=case.query,
|
|
305
|
-
intent=case.intent,
|
|
306
|
-
baseline_files=baseline,
|
|
307
|
-
expected_files=expected,
|
|
308
|
-
)
|
|
309
|
-
)
|
|
310
|
-
return out
|
|
311
|
-
|
|
312
|
-
|
|
313
412
|
def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
314
413
|
files = _collect_source_files(repo_path)
|
|
315
414
|
if not files:
|
|
@@ -317,15 +416,20 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
|
317
416
|
|
|
318
417
|
by_dir: dict[str, list[str]] = {}
|
|
319
418
|
for rel in files:
|
|
320
|
-
parent = str(Path(rel).parent).replace(
|
|
419
|
+
parent = str(Path(rel).parent).replace('\\', '/')
|
|
321
420
|
by_dir.setdefault(parent, []).append(rel)
|
|
322
421
|
|
|
323
422
|
rows: list[AdaptCase] = []
|
|
324
423
|
seen_names: set[str] = set()
|
|
424
|
+
seen_expected: set[tuple[str, ...]] = set()
|
|
425
|
+
cue_cache: dict[str, list[str]] = {}
|
|
325
426
|
|
|
326
|
-
def add_case(name: str, expected: tuple[str, ...], intent: str =
|
|
427
|
+
def add_case(name: str, expected: tuple[str, ...], intent: str = 'explore') -> None:
|
|
327
428
|
if len(rows) >= needed:
|
|
328
429
|
return
|
|
430
|
+
expected_key = tuple(sorted(expected))
|
|
431
|
+
if expected_key in seen_expected:
|
|
432
|
+
return
|
|
329
433
|
safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", name).strip("_").lower() or "case"
|
|
330
434
|
if safe_name in seen_names:
|
|
331
435
|
idx = 2
|
|
@@ -333,93 +437,224 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
|
333
437
|
idx += 1
|
|
334
438
|
safe_name = f"{safe_name}_{idx}"
|
|
335
439
|
seen_names.add(safe_name)
|
|
336
|
-
|
|
440
|
+
seen_expected.add(expected_key)
|
|
441
|
+
symbols: list[str] = []
|
|
337
442
|
for rel in expected:
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
443
|
+
cues = cue_cache.get(rel)
|
|
444
|
+
if cues is None:
|
|
445
|
+
cues = _extract_query_cues_for_file(repo_path, rel)
|
|
446
|
+
cue_cache[rel] = cues
|
|
447
|
+
symbols.extend(cues)
|
|
448
|
+
if not symbols:
|
|
449
|
+
symbols = [Path(rel).stem.lower() for rel in expected]
|
|
450
|
+
query = f"{' '.join(expected)} {' '.join(symbols[:8])}".strip()
|
|
451
|
+
rows.append(AdaptCase(name=safe_name, query=query, intent=intent, baseline_files=expected, expected_files=expected))
|
|
452
|
+
|
|
453
|
+
# Build a diversified sample so adaptation can learn in mixed-layer repos.
|
|
454
|
+
single_target = max(1, needed // 3)
|
|
455
|
+
same_dir_target = max(1, needed // 3)
|
|
456
|
+
cross_dir_target = max(1, needed - single_target - same_dir_target)
|
|
457
|
+
|
|
458
|
+
# 1) singles
|
|
352
459
|
for rel in files:
|
|
353
|
-
add_case(f"single_{Path(rel).stem}", (rel,), intent=
|
|
354
|
-
if len(rows) >=
|
|
460
|
+
add_case(f"single_{Path(rel).stem}", (rel,), intent='explore')
|
|
461
|
+
if len(rows) >= single_target:
|
|
355
462
|
break
|
|
356
463
|
|
|
357
|
-
#
|
|
358
|
-
|
|
464
|
+
# 2) same-dir adjacent pairs
|
|
465
|
+
same_pairs_added = 0
|
|
466
|
+
for parent, group in sorted(by_dir.items(), key=lambda x: x[0]):
|
|
359
467
|
if len(group) < 2:
|
|
360
468
|
continue
|
|
469
|
+
label = "root" if parent in {'.', ''} else parent
|
|
361
470
|
group = sorted(group)
|
|
362
471
|
for idx in range(len(group) - 1):
|
|
363
|
-
add_case(f"pair_{
|
|
472
|
+
add_case(f"pair_{label}_{idx}", (group[idx], group[idx + 1]), intent='explore')
|
|
364
473
|
if len(rows) >= needed:
|
|
365
474
|
return rows[:needed]
|
|
475
|
+
same_pairs_added += 1
|
|
476
|
+
if same_pairs_added >= same_dir_target:
|
|
477
|
+
break
|
|
478
|
+
if same_pairs_added >= same_dir_target:
|
|
479
|
+
break
|
|
366
480
|
|
|
367
|
-
#
|
|
481
|
+
# 3) cross-dir pairs (top-level representatives)
|
|
368
482
|
tops: dict[str, str] = {}
|
|
369
483
|
for rel in files:
|
|
370
484
|
top = Path(rel).parts[0] if Path(rel).parts else rel
|
|
371
485
|
tops.setdefault(top, rel)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
486
|
+
top_items = sorted(tops.items(), key=lambda item: item[0])
|
|
487
|
+
cross_added = 0
|
|
488
|
+
for idx in range(len(top_items) - 1):
|
|
489
|
+
left = top_items[idx][1]
|
|
490
|
+
right = top_items[idx + 1][1]
|
|
491
|
+
add_case(f"cross_{top_items[idx][0]}_{top_items[idx + 1][0]}", (left, right), intent='explore')
|
|
375
492
|
if len(rows) >= needed:
|
|
493
|
+
return rows[:needed]
|
|
494
|
+
cross_added += 1
|
|
495
|
+
if cross_added >= cross_dir_target:
|
|
376
496
|
break
|
|
377
497
|
|
|
498
|
+
# 4) include some 3-file chains for multi-hop calibration when dataset is larger.
|
|
499
|
+
if needed >= 12 and len(rows) < needed:
|
|
500
|
+
chain_budget = max(1, needed // 6)
|
|
501
|
+
chains_added = 0
|
|
502
|
+
reps = [item[1] for item in top_items]
|
|
503
|
+
for idx in range(len(reps) - 2):
|
|
504
|
+
add_case(
|
|
505
|
+
f"chain_{idx}",
|
|
506
|
+
(reps[idx], reps[idx + 1], reps[idx + 2]),
|
|
507
|
+
intent='refactor',
|
|
508
|
+
)
|
|
509
|
+
if len(rows) >= needed:
|
|
510
|
+
return rows[:needed]
|
|
511
|
+
chains_added += 1
|
|
512
|
+
if chains_added >= chain_budget:
|
|
513
|
+
break
|
|
514
|
+
|
|
515
|
+
# 5) fill remainder with additional nearby pairs
|
|
516
|
+
if len(rows) < needed:
|
|
517
|
+
for idx in range(len(files) - 1):
|
|
518
|
+
add_case(f"fill_{idx}", (files[idx], files[idx + 1]), intent='explore')
|
|
519
|
+
if len(rows) >= needed:
|
|
520
|
+
break
|
|
521
|
+
|
|
378
522
|
return rows[:needed]
|
|
379
523
|
|
|
380
524
|
|
|
381
525
|
def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list[AdaptCase], str]:
|
|
382
|
-
"""Select adaptation cases generated entirely from the target repo."""
|
|
383
526
|
benchmark_size = max(1, int(benchmark_size))
|
|
384
527
|
generated = _generated_cases_for_repo(repo_path, benchmark_size)
|
|
385
528
|
if generated:
|
|
386
|
-
return generated[:benchmark_size],
|
|
387
|
-
return [],
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
def
|
|
391
|
-
|
|
529
|
+
return generated[:benchmark_size], 'generated_repo_local'
|
|
530
|
+
return [], 'none_available'
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def _next_method(method: str) -> str:
|
|
534
|
+
try:
|
|
535
|
+
idx = _METHOD_ORDER.index(method)
|
|
536
|
+
except ValueError:
|
|
537
|
+
return _METHOD_ORDER[0]
|
|
538
|
+
return _METHOD_ORDER[min(idx + 1, len(_METHOD_ORDER) - 1)]
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def _cheaper_method(method: str) -> str | None:
|
|
542
|
+
try:
|
|
543
|
+
idx = _METHOD_ORDER.index(method)
|
|
544
|
+
except ValueError:
|
|
545
|
+
return None
|
|
546
|
+
if idx <= 0:
|
|
547
|
+
return None
|
|
548
|
+
return _METHOD_ORDER[idx - 1]
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) -> tuple[list[CaseResult], dict, dict[str, dict]]:
|
|
552
|
+
rows: list[CaseResult] = []
|
|
553
|
+
for case in cases:
|
|
554
|
+
family = _case_family(case)
|
|
555
|
+
method = family_policy.get(family, 'plain')
|
|
556
|
+
rows.append(_evaluate_case_with_method(case, method))
|
|
557
|
+
summary = _summarize('policy_run', rows)
|
|
558
|
+
|
|
559
|
+
by_family: dict[str, dict] = {}
|
|
560
|
+
for row in rows:
|
|
561
|
+
entry = by_family.setdefault(row.family, {'cases': 0, 'passes': 0, 'tokens': 0})
|
|
562
|
+
entry['cases'] += 1
|
|
563
|
+
entry['passes'] += 1 if row.context_complete else 0
|
|
564
|
+
entry['tokens'] += row.tokens
|
|
565
|
+
for fam, entry in by_family.items():
|
|
566
|
+
entry['pass_rate'] = round(entry['passes'] / max(1, entry['cases']), 3)
|
|
567
|
+
entry['tokens_per_case'] = round(entry['tokens'] / max(1, entry['cases']), 1)
|
|
568
|
+
|
|
569
|
+
return rows, summary, by_family
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _select_best_summary(summaries: list[dict]) -> dict:
|
|
573
|
+
full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
|
|
574
|
+
if full_hit:
|
|
575
|
+
return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
|
|
576
|
+
return max(
|
|
577
|
+
summaries,
|
|
578
|
+
key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _bootstrap_family_policy(cases: list[AdaptCase], families: list[str]) -> tuple[dict[str, str], list[dict]]:
|
|
583
|
+
policy: dict[str, str] = {}
|
|
584
|
+
diagnostics: list[dict] = []
|
|
585
|
+
for fam in families:
|
|
586
|
+
fam_cases = [case for case in cases if _case_family(case) == fam]
|
|
587
|
+
if not fam_cases:
|
|
588
|
+
policy[fam] = "plain"
|
|
589
|
+
continue
|
|
590
|
+
|
|
591
|
+
method_summaries: list[dict] = []
|
|
592
|
+
for method in _METHOD_ORDER:
|
|
593
|
+
rows = [_evaluate_case_with_method(case, method) for case in fam_cases]
|
|
594
|
+
summary = _summarize(f"bootstrap_{fam}_{method}", rows)
|
|
595
|
+
summary["method"] = method
|
|
596
|
+
summary["family"] = fam
|
|
597
|
+
method_summaries.append(summary)
|
|
598
|
+
|
|
599
|
+
best = _select_best_summary(method_summaries)
|
|
600
|
+
selected_method = str(best.get("method", "plain"))
|
|
601
|
+
policy[fam] = selected_method
|
|
602
|
+
diagnostics.append(
|
|
603
|
+
{
|
|
604
|
+
"family": fam,
|
|
605
|
+
"selected_method": selected_method,
|
|
606
|
+
"selected_summary": best,
|
|
607
|
+
"candidates": method_summaries,
|
|
608
|
+
}
|
|
609
|
+
)
|
|
610
|
+
return policy, diagnostics
|
|
611
|
+
def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
|
|
612
|
+
cfg_path = repo_path / '.gcie' / 'context_config.json'
|
|
392
613
|
if cfg_path.exists():
|
|
393
614
|
try:
|
|
394
|
-
cfg = json.loads(cfg_path.read_text(encoding=
|
|
615
|
+
cfg = json.loads(cfg_path.read_text(encoding='utf-8'))
|
|
395
616
|
if not isinstance(cfg, dict):
|
|
396
617
|
cfg = {}
|
|
397
618
|
except Exception:
|
|
398
619
|
cfg = {}
|
|
399
620
|
else:
|
|
400
621
|
cfg = {}
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
622
|
+
|
|
623
|
+
cfg['adaptation_pipeline'] = {
|
|
624
|
+
'status': pipeline_status,
|
|
625
|
+
'best_label': best.get('label'),
|
|
626
|
+
'full_hit_rate_pct': best.get('full_hit_rate_pct'),
|
|
627
|
+
'tokens_per_query': best.get('tokens_per_query'),
|
|
628
|
+
'case_source': case_source,
|
|
629
|
+
'cost_analysis': cost_analysis,
|
|
630
|
+
'family_policy': family_policy,
|
|
631
|
+
'updated_at': datetime.now(timezone.utc).isoformat(),
|
|
409
632
|
}
|
|
410
633
|
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
411
|
-
cfg_path.write_text(json.dumps(cfg, indent=2), encoding=
|
|
412
|
-
|
|
634
|
+
cfg_path.write_text(json.dumps(cfg, indent=2), encoding='utf-8')
|
|
635
|
+
|
|
636
|
+
def _select_best_full_hit(candidates: list[dict]) -> dict | None:
|
|
637
|
+
full_hit = [c for c in candidates if c.get('full_hit_rate_pct', 0.0) >= 100.0]
|
|
638
|
+
if not full_hit:
|
|
639
|
+
return None
|
|
640
|
+
return min(
|
|
641
|
+
full_hit,
|
|
642
|
+
key=lambda item: (item.get('tokens_per_expected_hit') or 10**9, item.get('tokens_per_query', 10**9)),
|
|
643
|
+
)
|
|
413
644
|
|
|
414
645
|
def run_post_init_adaptation(
|
|
415
|
-
repo: str =
|
|
646
|
+
repo: str = '.',
|
|
416
647
|
*,
|
|
417
648
|
benchmark_size: int = 10,
|
|
418
649
|
efficiency_iterations: int = 5,
|
|
419
650
|
clear_profile: bool = False,
|
|
420
651
|
) -> dict:
|
|
421
|
-
"""Run accuracy-lock then efficiency adaptation protocol after setup/index."""
|
|
422
652
|
repo_path = Path(repo).resolve()
|
|
653
|
+
|
|
654
|
+
# Ensure all relative retrieval/evaluation calls execute in the target repo.
|
|
655
|
+
import os
|
|
656
|
+
|
|
657
|
+
os.chdir(repo_path)
|
|
423
658
|
run_index(repo_path.as_posix())
|
|
424
659
|
|
|
425
660
|
if clear_profile:
|
|
@@ -430,80 +665,180 @@ def run_post_init_adaptation(
|
|
|
430
665
|
cases, case_source = _select_adaptation_cases(repo_path, benchmark_size)
|
|
431
666
|
if not cases:
|
|
432
667
|
return {
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
668
|
+
'status': 'no_benchmark_cases',
|
|
669
|
+
'repo': repo_path.as_posix(),
|
|
670
|
+
'case_source': case_source,
|
|
671
|
+
'message': 'No repo-usable adaptation cases available.',
|
|
437
672
|
}
|
|
438
673
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
674
|
+
families = sorted({_case_family(case) for case in cases})
|
|
675
|
+
family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families)
|
|
676
|
+
|
|
677
|
+
# Accuracy rounds: promote methods per failing family until lock.
|
|
678
|
+
accuracy_rounds_max = 5
|
|
679
|
+
accuracy_rounds: list[dict] = []
|
|
680
|
+
lock_streak = 0
|
|
681
|
+
|
|
682
|
+
for rnd in range(1, accuracy_rounds_max + 1):
|
|
683
|
+
rows, summary, by_family = _run_family_policy(cases, family_policy)
|
|
684
|
+
round_payload = {
|
|
685
|
+
'round': rnd,
|
|
686
|
+
'family_policy': dict(family_policy),
|
|
687
|
+
'summary': summary,
|
|
688
|
+
'family_metrics': by_family,
|
|
689
|
+
}
|
|
690
|
+
accuracy_rounds.append(round_payload)
|
|
442
691
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
692
|
+
if summary['full_hit_rate_pct'] >= 100.0:
|
|
693
|
+
lock_streak += 1
|
|
694
|
+
if lock_streak >= 2:
|
|
695
|
+
break
|
|
696
|
+
continue
|
|
446
697
|
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
698
|
+
lock_streak = 0
|
|
699
|
+
for fam, metrics in by_family.items():
|
|
700
|
+
if metrics.get('pass_rate', 0.0) < 1.0:
|
|
701
|
+
family_policy[fam] = _next_method(family_policy.get(fam, 'plain'))
|
|
702
|
+
|
|
703
|
+
# Select best accuracy-locked round if available.
|
|
704
|
+
locked_rounds = [r for r in accuracy_rounds if r['summary']['full_hit_rate_pct'] >= 100.0]
|
|
705
|
+
if locked_rounds:
|
|
706
|
+
selected_accuracy_round = min(
|
|
707
|
+
locked_rounds,
|
|
708
|
+
key=lambda r: (r['summary'].get('tokens_per_expected_hit') or 10**9, r['summary'].get('tokens_per_query', 10**9)),
|
|
709
|
+
)
|
|
451
710
|
else:
|
|
452
|
-
|
|
711
|
+
selected_accuracy_round = max(
|
|
712
|
+
accuracy_rounds,
|
|
713
|
+
key=lambda r: (r['summary'].get('target_hit_rate_pct', 0.0), -r['summary'].get('tokens_per_query', 10**9)),
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
family_policy = dict(selected_accuracy_round['family_policy'])
|
|
717
|
+
rows, current_summary, by_family = _run_family_policy(cases, family_policy)
|
|
453
718
|
|
|
719
|
+
# Efficiency rounds: attempt family-level cheaper method under hard 100% gate.
|
|
454
720
|
efficiency_trials: list[dict] = []
|
|
455
|
-
active = best
|
|
456
721
|
for idx in range(max(0, int(efficiency_iterations))):
|
|
457
|
-
|
|
722
|
+
improved = False
|
|
723
|
+
for fam in families:
|
|
724
|
+
cheaper = _cheaper_method(family_policy.get(fam, 'plain'))
|
|
725
|
+
if not cheaper:
|
|
726
|
+
continue
|
|
727
|
+
trial_policy = dict(family_policy)
|
|
728
|
+
trial_policy[fam] = cheaper
|
|
729
|
+
_, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy)
|
|
730
|
+
trial_payload = {
|
|
731
|
+
'iteration': idx + 1,
|
|
732
|
+
'family': fam,
|
|
733
|
+
'trial_policy': trial_policy,
|
|
734
|
+
'summary': trial_summary,
|
|
735
|
+
}
|
|
736
|
+
efficiency_trials.append(trial_payload)
|
|
737
|
+
|
|
738
|
+
if (
|
|
739
|
+
trial_summary.get('full_hit_rate_pct', 0.0) >= 100.0
|
|
740
|
+
and trial_summary.get('tokens_per_query', 10**9) < current_summary.get('tokens_per_query', 10**9)
|
|
741
|
+
):
|
|
742
|
+
family_policy = trial_policy
|
|
743
|
+
current_summary = trial_summary
|
|
744
|
+
by_family = trial_by_family
|
|
745
|
+
improved = True
|
|
746
|
+
if not improved:
|
|
458
747
|
break
|
|
459
|
-
trial_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
|
|
460
|
-
trial = _summarize(f"plain_gapfill_eff_trial_{idx + 1}", trial_rows)
|
|
461
|
-
efficiency_trials.append(trial)
|
|
462
|
-
if trial["full_hit_rate_pct"] >= active["full_hit_rate_pct"] and trial["tokens_per_query"] < active["tokens_per_query"]:
|
|
463
|
-
active = trial
|
|
464
748
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
749
|
+
# Global candidate snapshots for transparency.
|
|
750
|
+
slices_rows = [_evaluate_case_with_method(case, 'slices') for case in cases]
|
|
751
|
+
plain_rows = [_evaluate_case_with_method(case, 'plain') for case in cases]
|
|
752
|
+
plain_gap_rows = [_evaluate_case_with_method(case, 'plain_gapfill') for case in cases]
|
|
753
|
+
plain_rescue_rows = [_evaluate_case_with_method(case, 'plain_rescue') for case in cases]
|
|
754
|
+
slices_summary = _summarize('slices_accuracy_stage', slices_rows)
|
|
755
|
+
plain_summary = _summarize('plain_accuracy_stage', plain_rows)
|
|
756
|
+
plain_gap_summary = _summarize('plain_gapfill_accuracy_stage', plain_gap_rows)
|
|
757
|
+
plain_rescue_summary = _summarize('plain_rescue_accuracy_stage', plain_rescue_rows)
|
|
758
|
+
candidates = [slices_summary, plain_summary, plain_gap_summary, plain_rescue_summary]
|
|
759
|
+
|
|
760
|
+
active = {
|
|
761
|
+
'label': 'family_policy_selected',
|
|
762
|
+
**current_summary,
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
# Hard accuracy fallback: never finalize below 100% when any known candidate reaches 100%.
|
|
766
|
+
all_full_hit_candidates = list(candidates)
|
|
767
|
+
all_full_hit_candidates.extend(r['summary'] for r in accuracy_rounds)
|
|
768
|
+
all_full_hit_candidates.append(current_summary)
|
|
769
|
+
best_full_hit = _select_best_full_hit(all_full_hit_candidates)
|
|
770
|
+
if active.get('full_hit_rate_pct', 0.0) < 100.0 and best_full_hit is not None:
|
|
771
|
+
active = dict(best_full_hit)
|
|
468
772
|
|
|
469
|
-
|
|
773
|
+
cheapest = min(candidates, key=lambda item: (item.get('tokens_per_expected_hit') or 10**9, item.get('tokens_per_query', 10**9)))
|
|
774
|
+
token_delta = int(active['total_tokens'] - cheapest['total_tokens'])
|
|
775
|
+
pct_delta = round((token_delta / max(1, int(cheapest['total_tokens']))) * 100, 1)
|
|
776
|
+
|
|
777
|
+
pipeline_status = 'ok'
|
|
470
778
|
if (
|
|
471
|
-
active.get(
|
|
472
|
-
and active.get(
|
|
779
|
+
active.get('full_hit_rate_pct', 0.0) >= 100.0
|
|
780
|
+
and active.get('tokens_per_query', 10**9) > cheapest.get('tokens_per_query', 10**9)
|
|
473
781
|
and pct_delta > 40.0
|
|
474
782
|
):
|
|
475
|
-
pipeline_status =
|
|
783
|
+
pipeline_status = 'accuracy_locked_but_cost_risky'
|
|
476
784
|
|
|
477
785
|
cost_analysis = {
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
786
|
+
'cheapest_label': cheapest.get('label'),
|
|
787
|
+
'selected_label': active.get('label'),
|
|
788
|
+
'selected_vs_cheapest_token_delta': token_delta,
|
|
789
|
+
'selected_vs_cheapest_pct_delta': pct_delta,
|
|
790
|
+
'risk_threshold_pct': 40.0,
|
|
791
|
+
'cost_risky': pipeline_status == 'accuracy_locked_but_cost_risky',
|
|
484
792
|
}
|
|
485
793
|
|
|
486
|
-
_write_back(repo_path, active, case_source, pipeline_status, cost_analysis)
|
|
794
|
+
_write_back(repo_path, active, case_source, pipeline_status, cost_analysis, family_policy)
|
|
487
795
|
|
|
488
796
|
report = {
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
797
|
+
'status': pipeline_status,
|
|
798
|
+
'repo': repo_path.as_posix(),
|
|
799
|
+
'benchmark_size': len(cases),
|
|
800
|
+
'requested_benchmark_size': int(benchmark_size),
|
|
801
|
+
'efficiency_iterations': int(efficiency_iterations),
|
|
802
|
+
'case_source': case_source,
|
|
803
|
+
'family_policy': family_policy,
|
|
804
|
+
'cost_analysis': cost_analysis,
|
|
805
|
+
'phases': {
|
|
806
|
+
'bootstrap': bootstrap_diagnostics,
|
|
807
|
+
'accuracy_rounds': accuracy_rounds,
|
|
808
|
+
'selected_accuracy_round': selected_accuracy_round,
|
|
809
|
+
'efficiency_trials': efficiency_trials,
|
|
810
|
+
},
|
|
811
|
+
'stages': {
|
|
812
|
+
'accuracy_candidates': candidates,
|
|
813
|
+
'selected_after_accuracy': selected_accuracy_round['summary'],
|
|
814
|
+
'efficiency_trials': efficiency_trials,
|
|
815
|
+
'selected_final': active,
|
|
501
816
|
},
|
|
502
817
|
}
|
|
503
818
|
|
|
504
|
-
planning_dir = repo_path /
|
|
819
|
+
planning_dir = repo_path / '.planning'
|
|
505
820
|
planning_dir.mkdir(parents=True, exist_ok=True)
|
|
506
|
-
out_path = planning_dir /
|
|
507
|
-
out_path.write_text(json.dumps(report, indent=2), encoding=
|
|
508
|
-
report[
|
|
821
|
+
out_path = planning_dir / 'post_init_adaptation_report.json'
|
|
822
|
+
out_path.write_text(json.dumps(report, indent=2), encoding='utf-8')
|
|
823
|
+
report['report_path'] = out_path.as_posix()
|
|
509
824
|
return report
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
|
package/cli/commands/setup.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Repository setup and teardown helpers for GCIE."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import shutil
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
|
|
7
8
|
from context.architecture_bootstrap import ensure_initialized
|
|
@@ -24,6 +25,26 @@ def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
|
|
|
24
25
|
return "written"
|
|
25
26
|
|
|
26
27
|
|
|
28
|
+
def _is_within(base: Path, target: Path) -> bool:
|
|
29
|
+
try:
|
|
30
|
+
target.resolve().relative_to(base.resolve())
|
|
31
|
+
return True
|
|
32
|
+
except ValueError:
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _remove_path(root: Path, target: Path) -> str:
|
|
37
|
+
if not _is_within(root, target):
|
|
38
|
+
return "skipped_outside_repo"
|
|
39
|
+
if not target.exists():
|
|
40
|
+
return "not_found"
|
|
41
|
+
if target.is_dir():
|
|
42
|
+
shutil.rmtree(target)
|
|
43
|
+
return "removed_dir"
|
|
44
|
+
target.unlink()
|
|
45
|
+
return "removed_file"
|
|
46
|
+
|
|
47
|
+
|
|
27
48
|
def run_setup(
|
|
28
49
|
path: str,
|
|
29
50
|
*,
|
|
@@ -84,4 +105,34 @@ def run_setup(
|
|
|
84
105
|
else:
|
|
85
106
|
status["adaptation"] = {"skipped": True}
|
|
86
107
|
|
|
87
|
-
return status
|
|
108
|
+
return status
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def run_remove(
|
|
112
|
+
path: str,
|
|
113
|
+
*,
|
|
114
|
+
remove_planning: bool = False,
|
|
115
|
+
remove_gcie_usage: bool = True,
|
|
116
|
+
remove_setup_doc: bool = True,
|
|
117
|
+
) -> dict:
|
|
118
|
+
"""Remove GCIE-managed files from a repository."""
|
|
119
|
+
target = Path(path).resolve()
|
|
120
|
+
target.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
|
|
122
|
+
removed: dict[str, str] = {}
|
|
123
|
+
removed[".gcie"] = _remove_path(target, target / ".gcie")
|
|
124
|
+
|
|
125
|
+
if remove_gcie_usage:
|
|
126
|
+
removed["GCIE_USAGE.md"] = _remove_path(target, target / "GCIE_USAGE.md")
|
|
127
|
+
|
|
128
|
+
if remove_setup_doc:
|
|
129
|
+
removed["SETUP_ANY_REPO.md"] = _remove_path(target, target / "SETUP_ANY_REPO.md")
|
|
130
|
+
|
|
131
|
+
if remove_planning:
|
|
132
|
+
removed[".planning"] = _remove_path(target, target / ".planning")
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
"repo": target.as_posix(),
|
|
136
|
+
"removed": removed,
|
|
137
|
+
"remove_planning": remove_planning,
|
|
138
|
+
}
|