@pmaddire/gcie 0.1.11 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GCIE_USAGE.md +7 -2
- package/README.md +121 -191
- package/cli/app.py +46 -9
- package/cli/commands/adaptation.py +343 -218
- package/cli/commands/context.py +351 -145
- package/cli/commands/setup.py +140 -137
- package/llm_context/context_builder.py +83 -66
- package/llm_context/snippet_selector.py +157 -26
- package/package.json +1 -1
|
@@ -1,14 +1,16 @@
|
|
|
1
|
-
|
|
1
|
+
"""Post-initialization adaptation pipeline (accuracy rounds first, then efficiency rounds)."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
6
|
from dataclasses import asdict, dataclass
|
|
6
7
|
from datetime import datetime, timezone
|
|
7
8
|
import json
|
|
9
|
+
import os
|
|
8
10
|
import re
|
|
9
11
|
from pathlib import Path
|
|
10
12
|
|
|
11
|
-
from .context import run_context
|
|
13
|
+
from .context import run_context, run_context_basic
|
|
12
14
|
from .context_slices import _classify_query_family, run_context_slices
|
|
13
15
|
from .index import run_index
|
|
14
16
|
|
|
@@ -52,7 +54,20 @@ _IGNORED_DIRS = {
|
|
|
52
54
|
"build",
|
|
53
55
|
"coverage",
|
|
54
56
|
}
|
|
55
|
-
_METHOD_ORDER = ["plain", "plain_chain", "plain_gapfill", "plain_rescue", "slices"]
|
|
57
|
+
_METHOD_ORDER = ["plain_minimal", "plain", "plain_force", "plain_chain", "plain_gapfill", "plain_rescue", "slices"]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _adapt_worker_count(workers: int | None = None) -> int:
|
|
61
|
+
if workers is not None:
|
|
62
|
+
return max(1, int(workers))
|
|
63
|
+
env_value = os.getenv("GCIE_ADAPT_WORKERS", "").strip()
|
|
64
|
+
if env_value:
|
|
65
|
+
try:
|
|
66
|
+
return max(1, int(env_value))
|
|
67
|
+
except ValueError:
|
|
68
|
+
pass
|
|
69
|
+
cpu = os.cpu_count() or 4
|
|
70
|
+
return max(1, min(8, cpu))
|
|
56
71
|
|
|
57
72
|
|
|
58
73
|
def _query_keywords(text: str) -> list[str]:
|
|
@@ -127,31 +142,31 @@ def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
|
|
|
127
142
|
return f"{base}/{normalized}"
|
|
128
143
|
|
|
129
144
|
|
|
130
|
-
def _family_path(expected_files: tuple[str, ...]) -> str:
|
|
131
|
-
if not expected_files:
|
|
132
|
-
return "."
|
|
133
|
-
parent_parts: list[tuple[str, ...]] = []
|
|
134
|
-
for rel in expected_files:
|
|
135
|
-
parent = Path(rel).parent
|
|
136
|
-
if str(parent) in {"", "."}:
|
|
137
|
-
parent_parts.append(tuple())
|
|
138
|
-
else:
|
|
139
|
-
parent_parts.append(tuple(parent.parts))
|
|
140
|
-
|
|
141
|
-
common: list[str] = []
|
|
142
|
-
if parent_parts:
|
|
143
|
-
shortest = min(len(parts) for parts in parent_parts)
|
|
144
|
-
for idx in range(shortest):
|
|
145
|
-
token = parent_parts[0][idx]
|
|
146
|
-
if all(parts[idx] == token for parts in parent_parts):
|
|
147
|
-
common.append(token)
|
|
148
|
-
else:
|
|
149
|
-
break
|
|
150
|
-
if common:
|
|
151
|
-
return Path(*common).as_posix()
|
|
152
|
-
|
|
153
|
-
heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
|
|
154
|
-
return next(iter(heads)) if len(heads) == 1 else "."
|
|
145
|
+
def _family_path(expected_files: tuple[str, ...]) -> str:
|
|
146
|
+
if not expected_files:
|
|
147
|
+
return "."
|
|
148
|
+
parent_parts: list[tuple[str, ...]] = []
|
|
149
|
+
for rel in expected_files:
|
|
150
|
+
parent = Path(rel).parent
|
|
151
|
+
if str(parent) in {"", "."}:
|
|
152
|
+
parent_parts.append(tuple())
|
|
153
|
+
else:
|
|
154
|
+
parent_parts.append(tuple(parent.parts))
|
|
155
|
+
|
|
156
|
+
common: list[str] = []
|
|
157
|
+
if parent_parts:
|
|
158
|
+
shortest = min(len(parts) for parts in parent_parts)
|
|
159
|
+
for idx in range(shortest):
|
|
160
|
+
token = parent_parts[0][idx]
|
|
161
|
+
if all(parts[idx] == token for parts in parent_parts):
|
|
162
|
+
common.append(token)
|
|
163
|
+
else:
|
|
164
|
+
break
|
|
165
|
+
if common:
|
|
166
|
+
return Path(*common).as_posix()
|
|
167
|
+
|
|
168
|
+
heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
|
|
169
|
+
return next(iter(heads)) if len(heads) == 1 else "."
|
|
155
170
|
|
|
156
171
|
def _safe_scope(path: str) -> str:
|
|
157
172
|
if not path or path in {".", "./"}:
|
|
@@ -162,39 +177,39 @@ def _safe_scope(path: str) -> str:
|
|
|
162
177
|
return "."
|
|
163
178
|
|
|
164
179
|
|
|
165
|
-
def _plan_query(case) -> tuple[str, str, int | None]:
|
|
166
|
-
path = _family_path(case.expected_files)
|
|
167
|
-
if getattr(case, "name", "") == "cli_context_command":
|
|
168
|
-
return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
|
|
169
|
-
|
|
170
|
-
repo_path = Path('.').resolve()
|
|
171
|
-
cue_terms: list[str] = []
|
|
172
|
-
for rel in case.expected_files:
|
|
173
|
-
cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
|
|
174
|
-
cue_terms.extend(_query_keywords(case.query)[:4])
|
|
175
|
-
|
|
176
|
-
dedup: list[str] = []
|
|
177
|
-
seen: set[str] = set()
|
|
178
|
-
for token in [*case.expected_files, *cue_terms]:
|
|
179
|
-
key = token.lower()
|
|
180
|
-
if key in seen:
|
|
181
|
-
continue
|
|
182
|
-
seen.add(key)
|
|
183
|
-
dedup.append(token)
|
|
184
|
-
if len(dedup) >= 14:
|
|
185
|
-
break
|
|
186
|
-
query = " ".join(dedup).strip()
|
|
187
|
-
|
|
188
|
-
expected_count = len(case.expected_files)
|
|
189
|
-
if expected_count >= 3:
|
|
190
|
-
budget = 1100
|
|
191
|
-
elif expected_count == 2:
|
|
192
|
-
budget = 950
|
|
193
|
-
else:
|
|
194
|
-
budget = 850
|
|
195
|
-
|
|
196
|
-
if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
|
|
197
|
-
budget = 800
|
|
180
|
+
def _plan_query(case) -> tuple[str, str, int | None]:
|
|
181
|
+
path = _family_path(case.expected_files)
|
|
182
|
+
if getattr(case, "name", "") == "cli_context_command":
|
|
183
|
+
return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
|
|
184
|
+
|
|
185
|
+
repo_path = Path('.').resolve()
|
|
186
|
+
cue_terms: list[str] = []
|
|
187
|
+
for rel in case.expected_files:
|
|
188
|
+
cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
|
|
189
|
+
cue_terms.extend(_query_keywords(case.query)[:4])
|
|
190
|
+
|
|
191
|
+
dedup: list[str] = []
|
|
192
|
+
seen: set[str] = set()
|
|
193
|
+
for token in [*case.expected_files, *cue_terms]:
|
|
194
|
+
key = token.lower()
|
|
195
|
+
if key in seen:
|
|
196
|
+
continue
|
|
197
|
+
seen.add(key)
|
|
198
|
+
dedup.append(token)
|
|
199
|
+
if len(dedup) >= 14:
|
|
200
|
+
break
|
|
201
|
+
query = " ".join(dedup).strip()
|
|
202
|
+
|
|
203
|
+
expected_count = len(case.expected_files)
|
|
204
|
+
if expected_count >= 3:
|
|
205
|
+
budget = 1100
|
|
206
|
+
elif expected_count == 2:
|
|
207
|
+
budget = 950
|
|
208
|
+
else:
|
|
209
|
+
budget = 850
|
|
210
|
+
|
|
211
|
+
if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
|
|
212
|
+
budget = 800
|
|
198
213
|
return path, query, budget
|
|
199
214
|
|
|
200
215
|
def _case_family(case) -> str:
|
|
@@ -227,84 +242,84 @@ def _build_gapfill_query(case, missing_rel: str) -> str:
|
|
|
227
242
|
if len(dedup) >= 14:
|
|
228
243
|
break
|
|
229
244
|
|
|
230
|
-
return " ".join(dedup)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
def _collect_files_from_payload(scope: str, payload: dict) -> set[str]:
|
|
234
|
-
return {
|
|
235
|
-
_normalize_scoped_path(scope, rel)
|
|
236
|
-
for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
237
|
-
if rel
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def _hop_query_for_pair(case, left: str, right: str) -> str:
|
|
242
|
-
repo_path = Path('.').resolve()
|
|
243
|
-
cues: list[str] = []
|
|
244
|
-
cues.extend(_extract_query_cues_for_file(repo_path, left)[:3])
|
|
245
|
-
cues.extend(_extract_query_cues_for_file(repo_path, right)[:3])
|
|
246
|
-
cues.extend(_query_keywords(case.query)[:4])
|
|
247
|
-
|
|
248
|
-
dedup: list[str] = []
|
|
249
|
-
seen: set[str] = set()
|
|
250
|
-
for token in [left, right, *cues]:
|
|
251
|
-
key = token.lower()
|
|
252
|
-
if key in seen:
|
|
253
|
-
continue
|
|
254
|
-
seen.add(key)
|
|
255
|
-
dedup.append(token)
|
|
256
|
-
if len(dedup) >= 12:
|
|
257
|
-
break
|
|
258
|
-
return " ".join(dedup)
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
def _evaluate_plain_chain_case(case) -> CaseResult:
|
|
262
|
-
expected = tuple(case.expected_files)
|
|
263
|
-
if len(expected) < 3:
|
|
264
|
-
return _evaluate_plain_case(case, allow_gapfill=False)
|
|
265
|
-
|
|
266
|
-
tokens = 0
|
|
267
|
-
files: set[str] = set()
|
|
268
|
-
mode = "plain_chain_workflow"
|
|
269
|
-
|
|
270
|
-
# Decompose N-file chains into adjacent hops to reduce broad root overfetch.
|
|
271
|
-
for idx in range(len(expected) - 1):
|
|
272
|
-
left = expected[idx]
|
|
273
|
-
right = expected[idx + 1]
|
|
274
|
-
scope = _safe_scope(_family_path((left, right)))
|
|
275
|
-
query = _hop_query_for_pair(case, left, right)
|
|
276
|
-
hop_payload = run_context(scope, query, budget=950, intent=case.intent)
|
|
277
|
-
tokens += int(hop_payload.get("tokens", 0) or 0)
|
|
278
|
-
files.update(_collect_files_from_payload(scope, hop_payload))
|
|
279
|
-
|
|
280
|
-
missing = [rel for rel in expected if rel not in files]
|
|
281
|
-
if missing:
|
|
282
|
-
mode = "plain_chain_workflow_gapfill"
|
|
283
|
-
for rel in list(missing):
|
|
284
|
-
# Chain gapfill stays narrow: direct file scope only (no broad fallback).
|
|
285
|
-
scope = rel if (Path(rel).exists() and Path(rel).is_file()) else _safe_scope(_family_path((rel,)))
|
|
286
|
-
budget = 500 if rel.endswith('/main.py') or rel == 'main.py' else 700
|
|
287
|
-
gap_payload = run_context(scope, _build_gapfill_query(case, rel), budget=budget, intent=case.intent)
|
|
288
|
-
tokens += int(gap_payload.get("tokens", 0) or 0)
|
|
289
|
-
files.update(_collect_files_from_payload(scope, gap_payload))
|
|
290
|
-
missing = [m for m in expected if m not in files]
|
|
291
|
-
if not missing:
|
|
292
|
-
break
|
|
293
|
-
|
|
294
|
-
expected_hits = len(expected) - len(missing)
|
|
295
|
-
family = _classify_query_family(case.query)
|
|
296
|
-
return CaseResult(
|
|
297
|
-
name=case.name,
|
|
298
|
-
family=family,
|
|
299
|
-
mode=mode,
|
|
300
|
-
tokens=tokens,
|
|
301
|
-
expected_hits=expected_hits,
|
|
302
|
-
expected_total=len(expected),
|
|
303
|
-
missing_expected=tuple(missing),
|
|
304
|
-
context_complete=not missing,
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
|
|
245
|
+
return " ".join(dedup)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _collect_files_from_payload(scope: str, payload: dict) -> set[str]:
|
|
249
|
+
return {
|
|
250
|
+
_normalize_scoped_path(scope, rel)
|
|
251
|
+
for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
252
|
+
if rel
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _hop_query_for_pair(case, left: str, right: str) -> str:
|
|
257
|
+
repo_path = Path('.').resolve()
|
|
258
|
+
cues: list[str] = []
|
|
259
|
+
cues.extend(_extract_query_cues_for_file(repo_path, left)[:3])
|
|
260
|
+
cues.extend(_extract_query_cues_for_file(repo_path, right)[:3])
|
|
261
|
+
cues.extend(_query_keywords(case.query)[:4])
|
|
262
|
+
|
|
263
|
+
dedup: list[str] = []
|
|
264
|
+
seen: set[str] = set()
|
|
265
|
+
for token in [left, right, *cues]:
|
|
266
|
+
key = token.lower()
|
|
267
|
+
if key in seen:
|
|
268
|
+
continue
|
|
269
|
+
seen.add(key)
|
|
270
|
+
dedup.append(token)
|
|
271
|
+
if len(dedup) >= 12:
|
|
272
|
+
break
|
|
273
|
+
return " ".join(dedup)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _evaluate_plain_chain_case(case) -> CaseResult:
|
|
277
|
+
expected = tuple(case.expected_files)
|
|
278
|
+
if len(expected) < 3:
|
|
279
|
+
return _evaluate_plain_case(case, allow_gapfill=False)
|
|
280
|
+
|
|
281
|
+
tokens = 0
|
|
282
|
+
files: set[str] = set()
|
|
283
|
+
mode = "plain_chain_workflow"
|
|
284
|
+
|
|
285
|
+
# Decompose N-file chains into adjacent hops to reduce broad root overfetch.
|
|
286
|
+
for idx in range(len(expected) - 1):
|
|
287
|
+
left = expected[idx]
|
|
288
|
+
right = expected[idx + 1]
|
|
289
|
+
scope = _safe_scope(_family_path((left, right)))
|
|
290
|
+
query = _hop_query_for_pair(case, left, right)
|
|
291
|
+
hop_payload = run_context(scope, query, budget=950, intent=case.intent)
|
|
292
|
+
tokens += int(hop_payload.get("tokens", 0) or 0)
|
|
293
|
+
files.update(_collect_files_from_payload(scope, hop_payload))
|
|
294
|
+
|
|
295
|
+
missing = [rel for rel in expected if rel not in files]
|
|
296
|
+
if missing:
|
|
297
|
+
mode = "plain_chain_workflow_gapfill"
|
|
298
|
+
for rel in list(missing):
|
|
299
|
+
# Chain gapfill stays narrow: direct file scope only (no broad fallback).
|
|
300
|
+
scope = rel if (Path(rel).exists() and Path(rel).is_file()) else _safe_scope(_family_path((rel,)))
|
|
301
|
+
budget = 500 if rel.endswith('/main.py') or rel == 'main.py' else 700
|
|
302
|
+
gap_payload = run_context(scope, _build_gapfill_query(case, rel), budget=budget, intent=case.intent)
|
|
303
|
+
tokens += int(gap_payload.get("tokens", 0) or 0)
|
|
304
|
+
files.update(_collect_files_from_payload(scope, gap_payload))
|
|
305
|
+
missing = [m for m in expected if m not in files]
|
|
306
|
+
if not missing:
|
|
307
|
+
break
|
|
308
|
+
|
|
309
|
+
expected_hits = len(expected) - len(missing)
|
|
310
|
+
family = _classify_query_family(case.query)
|
|
311
|
+
return CaseResult(
|
|
312
|
+
name=case.name,
|
|
313
|
+
family=family,
|
|
314
|
+
mode=mode,
|
|
315
|
+
tokens=tokens,
|
|
316
|
+
expected_hits=expected_hits,
|
|
317
|
+
expected_total=len(expected),
|
|
318
|
+
missing_expected=tuple(missing),
|
|
319
|
+
context_complete=not missing,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
308
323
|
def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
|
|
309
324
|
path, query, budget = _plan_query(case)
|
|
310
325
|
path = _safe_scope(path)
|
|
@@ -370,7 +385,57 @@ def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill
|
|
|
370
385
|
missing_expected=tuple(missing),
|
|
371
386
|
context_complete=not missing,
|
|
372
387
|
)
|
|
373
|
-
|
|
388
|
+
|
|
389
|
+
def _evaluate_plain_minimal_case(case) -> CaseResult:
|
|
390
|
+
path, query, budget = _plan_query(case)
|
|
391
|
+
path = _safe_scope(path)
|
|
392
|
+
payload = run_context_basic(path, query, budget=budget, intent=case.intent)
|
|
393
|
+
files = {
|
|
394
|
+
_normalize_scoped_path(path, rel)
|
|
395
|
+
for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
396
|
+
if rel
|
|
397
|
+
}
|
|
398
|
+
expected = tuple(case.expected_files)
|
|
399
|
+
missing = [rel for rel in expected if rel not in files]
|
|
400
|
+
tokens = int(payload.get("tokens", 0) or 0)
|
|
401
|
+
expected_hits = len(expected) - len(missing)
|
|
402
|
+
family = _classify_query_family(query)
|
|
403
|
+
return CaseResult(
|
|
404
|
+
name=case.name,
|
|
405
|
+
family=family,
|
|
406
|
+
mode="plain_context_workflow_minimal",
|
|
407
|
+
tokens=tokens,
|
|
408
|
+
expected_hits=expected_hits,
|
|
409
|
+
expected_total=len(expected),
|
|
410
|
+
missing_expected=tuple(missing),
|
|
411
|
+
context_complete=not missing,
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _evaluate_plain_force_case(case) -> CaseResult:
|
|
416
|
+
path, query, budget = _plan_query(case)
|
|
417
|
+
path = _safe_scope(path)
|
|
418
|
+
payload = run_context(path, query, budget=budget, intent=case.intent, strict_accuracy=True)
|
|
419
|
+
files = {
|
|
420
|
+
_normalize_scoped_path(path, rel)
|
|
421
|
+
for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
|
|
422
|
+
if rel
|
|
423
|
+
}
|
|
424
|
+
expected = tuple(case.expected_files)
|
|
425
|
+
missing = [rel for rel in expected if rel not in files]
|
|
426
|
+
tokens = int(payload.get("tokens", 0) or 0)
|
|
427
|
+
expected_hits = len(expected) - len(missing)
|
|
428
|
+
family = _classify_query_family(query)
|
|
429
|
+
return CaseResult(
|
|
430
|
+
name=case.name,
|
|
431
|
+
family=family,
|
|
432
|
+
mode="plain_context_workflow_force",
|
|
433
|
+
tokens=tokens,
|
|
434
|
+
expected_hits=expected_hits,
|
|
435
|
+
expected_total=len(expected),
|
|
436
|
+
missing_expected=tuple(missing),
|
|
437
|
+
context_complete=not missing,
|
|
438
|
+
)
|
|
374
439
|
|
|
375
440
|
def _evaluate_slices_case(case) -> CaseResult:
|
|
376
441
|
payload = run_context_slices(
|
|
@@ -443,14 +508,18 @@ def _evaluate_slices_case(case) -> CaseResult:
|
|
|
443
508
|
|
|
444
509
|
|
|
445
510
|
def _evaluate_case_with_method(case, method: str) -> CaseResult:
|
|
511
|
+
if method == "plain_minimal":
|
|
512
|
+
return _evaluate_plain_minimal_case(case)
|
|
446
513
|
if method == "plain":
|
|
447
514
|
return _evaluate_plain_case(case, allow_gapfill=False)
|
|
515
|
+
if method == "plain_force":
|
|
516
|
+
return _evaluate_plain_force_case(case)
|
|
448
517
|
if method == "plain_chain":
|
|
449
518
|
return _evaluate_plain_chain_case(case)
|
|
450
|
-
if method == "plain_gapfill":
|
|
451
|
-
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
|
|
452
|
-
if method == "plain_rescue":
|
|
453
|
-
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
|
|
519
|
+
if method == "plain_gapfill":
|
|
520
|
+
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
|
|
521
|
+
if method == "plain_rescue":
|
|
522
|
+
return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
|
|
454
523
|
return _evaluate_slices_case(case)
|
|
455
524
|
|
|
456
525
|
|
|
@@ -531,6 +600,9 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
|
531
600
|
# Build a diversified sample so adaptation can learn in mixed-layer repos.
|
|
532
601
|
single_target = max(1, needed // 3)
|
|
533
602
|
same_dir_target = max(1, needed // 3)
|
|
603
|
+
local_target = max(1, needed // 2)
|
|
604
|
+
if single_target + same_dir_target < local_target:
|
|
605
|
+
same_dir_target = local_target - single_target
|
|
534
606
|
cross_dir_target = max(1, needed - single_target - same_dir_target)
|
|
535
607
|
|
|
536
608
|
# 1) singles
|
|
@@ -573,23 +645,23 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
|
573
645
|
if cross_added >= cross_dir_target:
|
|
574
646
|
break
|
|
575
647
|
|
|
576
|
-
# 4) include some 3-file chains for multi-hop calibration when dataset is larger.
|
|
577
|
-
if needed >= 12 and len(rows) < needed:
|
|
578
|
-
chain_budget = max(1, needed
|
|
579
|
-
chains_added = 0
|
|
580
|
-
reps = [item[1] for item in top_items]
|
|
581
|
-
for idx in range(len(reps) - 2):
|
|
582
|
-
add_case(
|
|
583
|
-
f"chain_{idx}",
|
|
584
|
-
(reps[idx], reps[idx + 1], reps[idx + 2]),
|
|
585
|
-
intent='refactor',
|
|
586
|
-
)
|
|
587
|
-
if len(rows) >= needed:
|
|
588
|
-
return rows[:needed]
|
|
589
|
-
chains_added += 1
|
|
590
|
-
if chains_added >= chain_budget:
|
|
591
|
-
break
|
|
592
|
-
|
|
648
|
+
# 4) include some 3-file chains for multi-hop calibration when dataset is larger.
|
|
649
|
+
if needed >= 12 and len(rows) < needed:
|
|
650
|
+
chain_budget = max(1, int(round(needed * 0.12)))
|
|
651
|
+
chains_added = 0
|
|
652
|
+
reps = [item[1] for item in top_items]
|
|
653
|
+
for idx in range(len(reps) - 2):
|
|
654
|
+
add_case(
|
|
655
|
+
f"chain_{idx}",
|
|
656
|
+
(reps[idx], reps[idx + 1], reps[idx + 2]),
|
|
657
|
+
intent='refactor',
|
|
658
|
+
)
|
|
659
|
+
if len(rows) >= needed:
|
|
660
|
+
return rows[:needed]
|
|
661
|
+
chains_added += 1
|
|
662
|
+
if chains_added >= chain_budget:
|
|
663
|
+
break
|
|
664
|
+
|
|
593
665
|
# 5) fill remainder with additional nearby pairs
|
|
594
666
|
if len(rows) < needed:
|
|
595
667
|
for idx in range(len(files) - 1):
|
|
@@ -626,12 +698,52 @@ def _cheaper_method(method: str) -> str | None:
|
|
|
626
698
|
return _METHOD_ORDER[idx - 1]
|
|
627
699
|
|
|
628
700
|
|
|
629
|
-
def
|
|
630
|
-
|
|
631
|
-
|
|
701
|
+
def _evaluate_cases_with_method(cases: list[AdaptCase], method: str, workers: int) -> list[CaseResult]:
|
|
702
|
+
if not cases:
|
|
703
|
+
return []
|
|
704
|
+
if workers <= 1 or len(cases) <= 1:
|
|
705
|
+
return [_evaluate_case_with_method(case, method) for case in cases]
|
|
706
|
+
|
|
707
|
+
slots: list[CaseResult | None] = [None] * len(cases)
|
|
708
|
+
max_workers = max(1, min(workers, len(cases)))
|
|
709
|
+
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
710
|
+
future_map = {
|
|
711
|
+
pool.submit(_evaluate_case_with_method, case, method): idx
|
|
712
|
+
for idx, case in enumerate(cases)
|
|
713
|
+
}
|
|
714
|
+
for future in as_completed(future_map):
|
|
715
|
+
slots[future_map[future]] = future.result()
|
|
716
|
+
|
|
717
|
+
return [row for row in slots if row is not None]
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _run_family_policy(
|
|
721
|
+
cases: list[AdaptCase],
|
|
722
|
+
family_policy: dict[str, str],
|
|
723
|
+
*,
|
|
724
|
+
workers: int,
|
|
725
|
+
) -> tuple[list[CaseResult], dict, dict[str, dict]]:
|
|
726
|
+
if not cases:
|
|
727
|
+
summary = _summarize('policy_run', [])
|
|
728
|
+
return [], summary, {}
|
|
729
|
+
|
|
730
|
+
grouped: dict[str, list[tuple[int, AdaptCase]]] = {}
|
|
731
|
+
for idx, case in enumerate(cases):
|
|
632
732
|
family = _case_family(case)
|
|
633
733
|
method = family_policy.get(family, 'plain')
|
|
634
|
-
|
|
734
|
+
key = f'{family}|{method}'
|
|
735
|
+
grouped.setdefault(key, []).append((idx, case))
|
|
736
|
+
|
|
737
|
+
ordered: list[CaseResult | None] = [None] * len(cases)
|
|
738
|
+
for key in sorted(grouped):
|
|
739
|
+
pairs = grouped[key]
|
|
740
|
+
_, method = key.split('|', 1)
|
|
741
|
+
group_cases = [case for _, case in pairs]
|
|
742
|
+
group_rows = _evaluate_cases_with_method(group_cases, method, workers)
|
|
743
|
+
for (orig_idx, _), row in zip(pairs, group_rows):
|
|
744
|
+
ordered[orig_idx] = row
|
|
745
|
+
|
|
746
|
+
rows = [row for row in ordered if row is not None]
|
|
635
747
|
summary = _summarize('policy_run', rows)
|
|
636
748
|
|
|
637
749
|
by_family: dict[str, dict] = {}
|
|
@@ -647,45 +759,51 @@ def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) ->
|
|
|
647
759
|
return rows, summary, by_family
|
|
648
760
|
|
|
649
761
|
|
|
650
|
-
def _select_best_summary(summaries: list[dict]) -> dict:
|
|
651
|
-
full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
|
|
652
|
-
if full_hit:
|
|
653
|
-
return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
|
|
654
|
-
return max(
|
|
655
|
-
summaries,
|
|
656
|
-
key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
|
|
657
|
-
)
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
def _bootstrap_family_policy(
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
762
|
+
def _select_best_summary(summaries: list[dict]) -> dict:
|
|
763
|
+
full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
|
|
764
|
+
if full_hit:
|
|
765
|
+
return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
|
|
766
|
+
return max(
|
|
767
|
+
summaries,
|
|
768
|
+
key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def _bootstrap_family_policy(
|
|
773
|
+
cases: list[AdaptCase],
|
|
774
|
+
families: list[str],
|
|
775
|
+
*,
|
|
776
|
+
workers: int,
|
|
777
|
+
) -> tuple[dict[str, str], list[dict]]:
|
|
778
|
+
policy: dict[str, str] = {}
|
|
779
|
+
diagnostics: list[dict] = []
|
|
780
|
+
for fam in families:
|
|
781
|
+
fam_cases = [case for case in cases if _case_family(case) == fam]
|
|
782
|
+
if not fam_cases:
|
|
783
|
+
policy[fam] = "plain"
|
|
784
|
+
continue
|
|
785
|
+
|
|
786
|
+
method_summaries: list[dict] = []
|
|
787
|
+
for method in _METHOD_ORDER:
|
|
788
|
+
rows = _evaluate_cases_with_method(fam_cases, method, workers)
|
|
789
|
+
summary = _summarize(f"bootstrap_{fam}_{method}", rows)
|
|
790
|
+
summary["method"] = method
|
|
791
|
+
summary["family"] = fam
|
|
792
|
+
method_summaries.append(summary)
|
|
793
|
+
|
|
794
|
+
best = _select_best_summary(method_summaries)
|
|
795
|
+
selected_method = str(best.get("method", "plain"))
|
|
796
|
+
policy[fam] = selected_method
|
|
797
|
+
diagnostics.append(
|
|
798
|
+
{
|
|
799
|
+
"family": fam,
|
|
800
|
+
"selected_method": selected_method,
|
|
801
|
+
"selected_summary": best,
|
|
802
|
+
"candidates": method_summaries,
|
|
803
|
+
}
|
|
804
|
+
)
|
|
805
|
+
return policy, diagnostics
|
|
806
|
+
|
|
689
807
|
def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
|
|
690
808
|
cfg_path = repo_path / '.gcie' / 'context_config.json'
|
|
691
809
|
if cfg_path.exists():
|
|
@@ -726,12 +844,11 @@ def run_post_init_adaptation(
|
|
|
726
844
|
benchmark_size: int = 10,
|
|
727
845
|
efficiency_iterations: int = 5,
|
|
728
846
|
clear_profile: bool = False,
|
|
847
|
+
adapt_workers: int | None = None,
|
|
729
848
|
) -> dict:
|
|
730
849
|
repo_path = Path(repo).resolve()
|
|
731
850
|
|
|
732
851
|
# Ensure all relative retrieval/evaluation calls execute in the target repo.
|
|
733
|
-
import os
|
|
734
|
-
|
|
735
852
|
os.chdir(repo_path)
|
|
736
853
|
run_index(repo_path.as_posix())
|
|
737
854
|
|
|
@@ -749,8 +866,9 @@ def run_post_init_adaptation(
|
|
|
749
866
|
'message': 'No repo-usable adaptation cases available.',
|
|
750
867
|
}
|
|
751
868
|
|
|
752
|
-
|
|
753
|
-
|
|
869
|
+
workers = _adapt_worker_count(adapt_workers)
|
|
870
|
+
families = sorted({_case_family(case) for case in cases})
|
|
871
|
+
family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families, workers=workers)
|
|
754
872
|
|
|
755
873
|
# Accuracy rounds: promote methods per failing family until lock.
|
|
756
874
|
accuracy_rounds_max = 5
|
|
@@ -758,7 +876,7 @@ def run_post_init_adaptation(
|
|
|
758
876
|
lock_streak = 0
|
|
759
877
|
|
|
760
878
|
for rnd in range(1, accuracy_rounds_max + 1):
|
|
761
|
-
rows, summary, by_family = _run_family_policy(cases, family_policy)
|
|
879
|
+
rows, summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
|
|
762
880
|
round_payload = {
|
|
763
881
|
'round': rnd,
|
|
764
882
|
'family_policy': dict(family_policy),
|
|
@@ -792,7 +910,7 @@ def run_post_init_adaptation(
|
|
|
792
910
|
)
|
|
793
911
|
|
|
794
912
|
family_policy = dict(selected_accuracy_round['family_policy'])
|
|
795
|
-
rows, current_summary, by_family = _run_family_policy(cases, family_policy)
|
|
913
|
+
rows, current_summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
|
|
796
914
|
|
|
797
915
|
# Efficiency rounds: attempt family-level cheaper method under hard 100% gate.
|
|
798
916
|
efficiency_trials: list[dict] = []
|
|
@@ -804,7 +922,7 @@ def run_post_init_adaptation(
|
|
|
804
922
|
continue
|
|
805
923
|
trial_policy = dict(family_policy)
|
|
806
924
|
trial_policy[fam] = cheaper
|
|
807
|
-
_, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy)
|
|
925
|
+
_, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy, workers=workers)
|
|
808
926
|
trial_payload = {
|
|
809
927
|
'iteration': idx + 1,
|
|
810
928
|
'family': fam,
|
|
@@ -825,15 +943,19 @@ def run_post_init_adaptation(
|
|
|
825
943
|
break
|
|
826
944
|
|
|
827
945
|
# Global candidate snapshots for transparency.
|
|
828
|
-
slices_rows =
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
946
|
+
slices_rows = _evaluate_cases_with_method(cases, 'slices', workers)
|
|
947
|
+
plain_min_rows = _evaluate_cases_with_method(cases, 'plain_minimal', workers)
|
|
948
|
+
plain_rows = _evaluate_cases_with_method(cases, 'plain', workers)
|
|
949
|
+
plain_force_rows = _evaluate_cases_with_method(cases, 'plain_force', workers)
|
|
950
|
+
plain_gap_rows = _evaluate_cases_with_method(cases, 'plain_gapfill', workers)
|
|
951
|
+
plain_rescue_rows = _evaluate_cases_with_method(cases, 'plain_rescue', workers)
|
|
832
952
|
slices_summary = _summarize('slices_accuracy_stage', slices_rows)
|
|
953
|
+
plain_min_summary = _summarize('plain_minimal_accuracy_stage', plain_min_rows)
|
|
833
954
|
plain_summary = _summarize('plain_accuracy_stage', plain_rows)
|
|
955
|
+
plain_force_summary = _summarize('plain_force_accuracy_stage', plain_force_rows)
|
|
834
956
|
plain_gap_summary = _summarize('plain_gapfill_accuracy_stage', plain_gap_rows)
|
|
835
957
|
plain_rescue_summary = _summarize('plain_rescue_accuracy_stage', plain_rescue_rows)
|
|
836
|
-
candidates = [slices_summary, plain_summary, plain_gap_summary, plain_rescue_summary]
|
|
958
|
+
candidates = [slices_summary, plain_min_summary, plain_summary, plain_force_summary, plain_gap_summary, plain_rescue_summary]
|
|
837
959
|
|
|
838
960
|
active = {
|
|
839
961
|
'label': 'family_policy_selected',
|
|
@@ -877,11 +999,12 @@ def run_post_init_adaptation(
|
|
|
877
999
|
'benchmark_size': len(cases),
|
|
878
1000
|
'requested_benchmark_size': int(benchmark_size),
|
|
879
1001
|
'efficiency_iterations': int(efficiency_iterations),
|
|
1002
|
+
'adapt_workers': workers,
|
|
880
1003
|
'case_source': case_source,
|
|
881
1004
|
'family_policy': family_policy,
|
|
882
1005
|
'cost_analysis': cost_analysis,
|
|
883
|
-
'phases': {
|
|
884
|
-
'bootstrap': bootstrap_diagnostics,
|
|
1006
|
+
'phases': {
|
|
1007
|
+
'bootstrap': bootstrap_diagnostics,
|
|
885
1008
|
'accuracy_rounds': accuracy_rounds,
|
|
886
1009
|
'selected_accuracy_round': selected_accuracy_round,
|
|
887
1010
|
'efficiency_trials': efficiency_trials,
|
|
@@ -920,6 +1043,8 @@ def run_post_init_adaptation(
|
|
|
920
1043
|
|
|
921
1044
|
|
|
922
1045
|
|
|
1046
|
+
|
|
1047
|
+
|
|
923
1048
|
|
|
924
1049
|
|
|
925
1050
|
|