@pmaddire/gcie 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/commands/adaptation.py +144 -10
- package/package.json +1 -1
|
@@ -30,7 +30,28 @@ class CaseResult:
|
|
|
30
30
|
context_complete: bool
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
@dataclass(frozen=True, slots=True)
|
|
34
|
+
class AdaptCase:
|
|
35
|
+
name: str
|
|
36
|
+
query: str
|
|
37
|
+
intent: str
|
|
38
|
+
baseline_files: tuple[str, ...]
|
|
39
|
+
expected_files: tuple[str, ...]
|
|
40
|
+
|
|
41
|
+
|
|
33
42
|
_WORD_RE = re.compile(r"[A-Za-z0-9_./-]+")
|
|
43
|
+
_SOURCE_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".rs", ".cs", ".cpp", ".c", ".h"}
|
|
44
|
+
_IGNORED_DIRS = {
|
|
45
|
+
".git",
|
|
46
|
+
".gcie",
|
|
47
|
+
".planning",
|
|
48
|
+
".venv",
|
|
49
|
+
"node_modules",
|
|
50
|
+
"__pycache__",
|
|
51
|
+
"dist",
|
|
52
|
+
"build",
|
|
53
|
+
"coverage",
|
|
54
|
+
}
|
|
34
55
|
|
|
35
56
|
|
|
36
57
|
def _query_keywords(text: str) -> list[str]:
|
|
@@ -252,7 +273,121 @@ def _summarize(label: str, rows: list[CaseResult]) -> dict:
|
|
|
252
273
|
}
|
|
253
274
|
|
|
254
275
|
|
|
255
|
-
def
|
|
276
|
+
def _collect_source_files(repo_path: Path) -> list[str]:
|
|
277
|
+
files: list[str] = []
|
|
278
|
+
for path in repo_path.rglob("*"):
|
|
279
|
+
if not path.is_file():
|
|
280
|
+
continue
|
|
281
|
+
rel = path.relative_to(repo_path)
|
|
282
|
+
if any(part in _IGNORED_DIRS for part in rel.parts):
|
|
283
|
+
continue
|
|
284
|
+
if path.suffix.lower() not in _SOURCE_EXTS:
|
|
285
|
+
continue
|
|
286
|
+
files.append(rel.as_posix())
|
|
287
|
+
return sorted(files)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _static_cases_for_repo(repo_path: Path) -> list[AdaptCase]:
|
|
291
|
+
out: list[AdaptCase] = []
|
|
292
|
+
for case in list(BENCHMARK_CASES):
|
|
293
|
+
expected = tuple(case.expected_files)
|
|
294
|
+
if not expected:
|
|
295
|
+
continue
|
|
296
|
+
if not all((repo_path / rel).exists() for rel in expected):
|
|
297
|
+
continue
|
|
298
|
+
baseline = tuple(rel for rel in case.baseline_files if (repo_path / rel).exists())
|
|
299
|
+
if not baseline:
|
|
300
|
+
baseline = expected
|
|
301
|
+
out.append(
|
|
302
|
+
AdaptCase(
|
|
303
|
+
name=case.name,
|
|
304
|
+
query=case.query,
|
|
305
|
+
intent=case.intent,
|
|
306
|
+
baseline_files=baseline,
|
|
307
|
+
expected_files=expected,
|
|
308
|
+
)
|
|
309
|
+
)
|
|
310
|
+
return out
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
|
|
314
|
+
files = _collect_source_files(repo_path)
|
|
315
|
+
if not files:
|
|
316
|
+
return []
|
|
317
|
+
|
|
318
|
+
by_dir: dict[str, list[str]] = {}
|
|
319
|
+
for rel in files:
|
|
320
|
+
parent = str(Path(rel).parent).replace("\\", "/")
|
|
321
|
+
by_dir.setdefault(parent, []).append(rel)
|
|
322
|
+
|
|
323
|
+
rows: list[AdaptCase] = []
|
|
324
|
+
seen_names: set[str] = set()
|
|
325
|
+
|
|
326
|
+
def add_case(name: str, expected: tuple[str, ...], intent: str = "explore") -> None:
|
|
327
|
+
if len(rows) >= needed:
|
|
328
|
+
return
|
|
329
|
+
safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", name).strip("_").lower() or "case"
|
|
330
|
+
if safe_name in seen_names:
|
|
331
|
+
idx = 2
|
|
332
|
+
while f"{safe_name}_{idx}" in seen_names:
|
|
333
|
+
idx += 1
|
|
334
|
+
safe_name = f"{safe_name}_{idx}"
|
|
335
|
+
seen_names.add(safe_name)
|
|
336
|
+
symbols = []
|
|
337
|
+
for rel in expected:
|
|
338
|
+
stem = Path(rel).stem.lower()
|
|
339
|
+
symbols.extend([stem, "flow", "wiring"])
|
|
340
|
+
query = f"{' '.join(expected)} {' '.join(symbols[:6])}".strip()
|
|
341
|
+
rows.append(
|
|
342
|
+
AdaptCase(
|
|
343
|
+
name=safe_name,
|
|
344
|
+
query=query,
|
|
345
|
+
intent=intent,
|
|
346
|
+
baseline_files=expected,
|
|
347
|
+
expected_files=expected,
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Single-file probes.
|
|
352
|
+
for rel in files:
|
|
353
|
+
add_case(f"single_{Path(rel).stem}", (rel,), intent="explore")
|
|
354
|
+
if len(rows) >= max(needed // 2, 1):
|
|
355
|
+
break
|
|
356
|
+
|
|
357
|
+
# Same-directory pairs.
|
|
358
|
+
for parent, group in sorted(by_dir.items(), key=lambda item: item[0]):
|
|
359
|
+
if len(group) < 2:
|
|
360
|
+
continue
|
|
361
|
+
group = sorted(group)
|
|
362
|
+
for idx in range(len(group) - 1):
|
|
363
|
+
add_case(f"pair_{parent}_{idx}", (group[idx], group[idx + 1]), intent="explore")
|
|
364
|
+
if len(rows) >= needed:
|
|
365
|
+
return rows[:needed]
|
|
366
|
+
|
|
367
|
+
# Cross-directory pairs if still needed.
|
|
368
|
+
tops: dict[str, str] = {}
|
|
369
|
+
for rel in files:
|
|
370
|
+
top = Path(rel).parts[0] if Path(rel).parts else rel
|
|
371
|
+
tops.setdefault(top, rel)
|
|
372
|
+
top_files = list(tops.values())
|
|
373
|
+
for idx in range(len(top_files) - 1):
|
|
374
|
+
add_case(f"cross_{idx}", (top_files[idx], top_files[idx + 1]), intent="explore")
|
|
375
|
+
if len(rows) >= needed:
|
|
376
|
+
break
|
|
377
|
+
|
|
378
|
+
return rows[:needed]
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list[AdaptCase], str]:
|
|
382
|
+
"""Select adaptation cases generated entirely from the target repo."""
|
|
383
|
+
benchmark_size = max(1, int(benchmark_size))
|
|
384
|
+
generated = _generated_cases_for_repo(repo_path, benchmark_size)
|
|
385
|
+
if generated:
|
|
386
|
+
return generated[:benchmark_size], "generated_repo_local"
|
|
387
|
+
return [], "none_available"
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _write_back(repo_path: Path, best: dict, case_source: str) -> None:
|
|
256
391
|
cfg_path = repo_path / ".gcie" / "context_config.json"
|
|
257
392
|
if cfg_path.exists():
|
|
258
393
|
try:
|
|
@@ -268,6 +403,7 @@ def _write_back(repo_path: Path, best: dict) -> None:
|
|
|
268
403
|
"best_label": best.get("label"),
|
|
269
404
|
"full_hit_rate_pct": best.get("full_hit_rate_pct"),
|
|
270
405
|
"tokens_per_query": best.get("tokens_per_query"),
|
|
406
|
+
"case_source": case_source,
|
|
271
407
|
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
272
408
|
}
|
|
273
409
|
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -290,17 +426,15 @@ def run_post_init_adaptation(
|
|
|
290
426
|
|
|
291
427
|
clear_adaptive_profile(repo_path.as_posix())
|
|
292
428
|
|
|
293
|
-
cases =
|
|
429
|
+
cases, case_source = _select_adaptation_cases(repo_path, benchmark_size)
|
|
294
430
|
if not cases:
|
|
295
431
|
return {
|
|
296
432
|
"status": "no_benchmark_cases",
|
|
297
433
|
"repo": repo_path.as_posix(),
|
|
298
|
-
"
|
|
434
|
+
"case_source": case_source,
|
|
435
|
+
"message": "No repo-usable adaptation cases available.",
|
|
299
436
|
}
|
|
300
437
|
|
|
301
|
-
benchmark_size = max(1, min(len(cases), int(benchmark_size)))
|
|
302
|
-
cases = cases[:benchmark_size]
|
|
303
|
-
|
|
304
438
|
slices_rows = [_evaluate_slices_case(case) for case in cases]
|
|
305
439
|
plain_rows = [_evaluate_plain_case(case, allow_gapfill=False) for case in cases]
|
|
306
440
|
plain_gap_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
|
|
@@ -327,13 +461,15 @@ def run_post_init_adaptation(
|
|
|
327
461
|
if trial["full_hit_rate_pct"] >= active["full_hit_rate_pct"] and trial["tokens_per_query"] < active["tokens_per_query"]:
|
|
328
462
|
active = trial
|
|
329
463
|
|
|
330
|
-
_write_back(repo_path, active)
|
|
464
|
+
_write_back(repo_path, active, case_source)
|
|
331
465
|
|
|
332
466
|
report = {
|
|
333
467
|
"status": "ok",
|
|
334
468
|
"repo": repo_path.as_posix(),
|
|
335
|
-
"benchmark_size":
|
|
469
|
+
"benchmark_size": len(cases),
|
|
470
|
+
"requested_benchmark_size": int(benchmark_size),
|
|
336
471
|
"efficiency_iterations": int(efficiency_iterations),
|
|
472
|
+
"case_source": case_source,
|
|
337
473
|
"stages": {
|
|
338
474
|
"accuracy_candidates": [slices_summary, plain_summary, plain_gap_summary],
|
|
339
475
|
"selected_after_accuracy": best,
|
|
@@ -348,5 +484,3 @@ def run_post_init_adaptation(
|
|
|
348
484
|
out_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
|
|
349
485
|
report["report_path"] = out_path.as_posix()
|
|
350
486
|
return report
|
|
351
|
-
|
|
352
|
-
|