@pmaddire/gcie 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/GCIE_USAGE.md CHANGED
@@ -288,3 +288,58 @@ query -> scope -> profile/budget escalation -> targeted gap-fill -> rg fallback.
288
288
  1. This file is intentionally generalized and adaptive for any repo.
289
289
  2. Keep repo-specific tuning in learned overrides and `.gcie` state, not in global defaults.
290
290
  3. If in doubt, choose the higher-accuracy path first, then optimize tokens after lock.
291
+
292
+ ## Cross-Repo Adaptation Rules (Required)
293
+
294
+ Use these rules to keep adaptation portable across repositories.
295
+
296
+ 1. Adaptation case source must be repo-local:
297
+ - Prefer generated cases from actual files in the target repo.
298
+ - Do not rely on hardcoded expected files from another codebase family.
299
+ - If report `case_source` is not repo-local, treat the run as invalid.
300
+
301
+ 2. Accuracy lock is required, but selection must be cost-aware:
302
+ - First gate: `100%` must-have full-hit.
303
+ - If multiple candidates pass `100%`, choose the lowest `tokens_per_expected_hit`.
304
+ - Do not keep `slices` as active default when a `plain` candidate also has `100%` and is cheaper.
305
+
306
+ 3. Near-miss rescue before expensive lock-in:
307
+ - If a cheaper candidate is below lock by one file/family (for example `90%`), run a short rescue cycle before accepting an expensive `100%` candidate:
308
+ 1) targeted gap-fill for missing must-have file(s)
309
+ 2) scope correction (subtree if clustered)
310
+ 3) one budget rung increase
311
+ - Re-evaluate after rescue; prefer the cheaper candidate if it reaches `100%`.
312
+
313
+ 4. Cost sanity guardrail:
314
+ - If selected active candidate is `>40%` more expensive than the cheapest candidate, mark status `accuracy_locked_but_cost_risky` and continue family-level refinement.
315
+ - Keep accuracy lock, but do not finalize global defaults until cost risk is reduced.
316
+
317
+ 5. Family-scoped finalization:
318
+ - Finalize routing per family, not as one global winner.
319
+ - Example: keep `slices` only for families where it is uniquely required for `100%`; use `plain` on families where it is cheaper at equal hit rate.
320
+
321
+ 6. Required report checks each run:
322
+ - `case_source`
323
+ - `full_hit_rate_pct`
324
+ - `tokens_per_query`
325
+ - `tokens_per_expected_hit`
326
+ - token delta between selected candidate and cheapest candidate
327
+
328
+ ## Portable Validation Checklist (Any New Repo)
329
+
330
+ After running adaptation:
331
+ 1. Confirm `status: ok`.
332
+ 2. Confirm `case_source: generated_repo_local`.
333
+ 3. Confirm `full_hit_rate_pct: 100` for selected final profile.
334
+ 4. Compare selected profile vs cheapest candidate:
335
+ - if selected is much more expensive, run one rescue iteration.
336
+ 5. Run a 50-query unique validation before trusting defaults broadly.
337
+
338
+ Commands:
339
+ ```powershell
340
+ gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5 --clear-profile
341
+ ```
342
+ ```powershell
343
+ gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5
344
+ ```
345
+
@@ -387,7 +387,7 @@ def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list
387
387
  return [], "none_available"
388
388
 
389
389
 
390
- def _write_back(repo_path: Path, best: dict, case_source: str) -> None:
390
+ def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict) -> None:
391
391
  cfg_path = repo_path / ".gcie" / "context_config.json"
392
392
  if cfg_path.exists():
393
393
  try:
@@ -399,11 +399,12 @@ def _write_back(repo_path: Path, best: dict, case_source: str) -> None:
399
399
  else:
400
400
  cfg = {}
401
401
  cfg["adaptation_pipeline"] = {
402
- "status": "complete",
402
+ "status": pipeline_status,
403
403
  "best_label": best.get("label"),
404
404
  "full_hit_rate_pct": best.get("full_hit_rate_pct"),
405
405
  "tokens_per_query": best.get("tokens_per_query"),
406
406
  "case_source": case_source,
407
+ "cost_analysis": cost_analysis,
407
408
  "updated_at": datetime.now(timezone.utc).isoformat(),
408
409
  }
409
410
  cfg_path.parent.mkdir(parents=True, exist_ok=True)
@@ -461,15 +462,37 @@ def run_post_init_adaptation(
461
462
  if trial["full_hit_rate_pct"] >= active["full_hit_rate_pct"] and trial["tokens_per_query"] < active["tokens_per_query"]:
462
463
  active = trial
463
464
 
464
- _write_back(repo_path, active, case_source)
465
+ cheapest = min(candidates, key=lambda item: (item["tokens_per_expected_hit"] or 10**9, item["tokens_per_query"]))
466
+ token_delta = int(active["total_tokens"] - cheapest["total_tokens"])
467
+ pct_delta = round((token_delta / max(1, int(cheapest["total_tokens"]))) * 100, 1)
468
+
469
+ pipeline_status = "ok"
470
+ if (
471
+ active.get("full_hit_rate_pct", 0.0) >= 100.0
472
+ and active.get("label") != cheapest.get("label")
473
+ and pct_delta > 40.0
474
+ ):
475
+ pipeline_status = "accuracy_locked_but_cost_risky"
476
+
477
+ cost_analysis = {
478
+ "cheapest_label": cheapest.get("label"),
479
+ "selected_label": active.get("label"),
480
+ "selected_vs_cheapest_token_delta": token_delta,
481
+ "selected_vs_cheapest_pct_delta": pct_delta,
482
+ "risk_threshold_pct": 40.0,
483
+ "cost_risky": pipeline_status == "accuracy_locked_but_cost_risky",
484
+ }
485
+
486
+ _write_back(repo_path, active, case_source, pipeline_status, cost_analysis)
465
487
 
466
488
  report = {
467
- "status": "ok",
489
+ "status": pipeline_status,
468
490
  "repo": repo_path.as_posix(),
469
491
  "benchmark_size": len(cases),
470
492
  "requested_benchmark_size": int(benchmark_size),
471
493
  "efficiency_iterations": int(efficiency_iterations),
472
494
  "case_source": case_source,
495
+ "cost_analysis": cost_analysis,
473
496
  "stages": {
474
497
  "accuracy_candidates": [slices_summary, plain_summary, plain_gap_summary],
475
498
  "selected_after_accuracy": best,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pmaddire/gcie",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "description": "GraphCode Intelligence Engine one-command setup and context CLI",
5
5
  "bin": {
6
6
  "gcie": "bin/gcie.js",