@pmaddire/gcie 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,7 +30,28 @@ class CaseResult:
30
30
  context_complete: bool
31
31
 
32
32
 
33
+ @dataclass(frozen=True, slots=True)
34
+ class AdaptCase:
35
+ name: str
36
+ query: str
37
+ intent: str
38
+ baseline_files: tuple[str, ...]
39
+ expected_files: tuple[str, ...]
40
+
41
+
33
42
  _WORD_RE = re.compile(r"[A-Za-z0-9_./-]+")
43
+ _SOURCE_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".rs", ".cs", ".cpp", ".c", ".h"}
44
+ _IGNORED_DIRS = {
45
+ ".git",
46
+ ".gcie",
47
+ ".planning",
48
+ ".venv",
49
+ "node_modules",
50
+ "__pycache__",
51
+ "dist",
52
+ "build",
53
+ "coverage",
54
+ }
34
55
 
35
56
 
36
57
  def _query_keywords(text: str) -> list[str]:
@@ -252,7 +273,121 @@ def _summarize(label: str, rows: list[CaseResult]) -> dict:
252
273
  }
253
274
 
254
275
 
255
- def _write_back(repo_path: Path, best: dict) -> None:
276
+ def _collect_source_files(repo_path: Path) -> list[str]:
277
+ files: list[str] = []
278
+ for path in repo_path.rglob("*"):
279
+ if not path.is_file():
280
+ continue
281
+ rel = path.relative_to(repo_path)
282
+ if any(part in _IGNORED_DIRS for part in rel.parts):
283
+ continue
284
+ if path.suffix.lower() not in _SOURCE_EXTS:
285
+ continue
286
+ files.append(rel.as_posix())
287
+ return sorted(files)
288
+
289
+
290
+ def _static_cases_for_repo(repo_path: Path) -> list[AdaptCase]:
291
+ out: list[AdaptCase] = []
292
+ for case in list(BENCHMARK_CASES):
293
+ expected = tuple(case.expected_files)
294
+ if not expected:
295
+ continue
296
+ if not all((repo_path / rel).exists() for rel in expected):
297
+ continue
298
+ baseline = tuple(rel for rel in case.baseline_files if (repo_path / rel).exists())
299
+ if not baseline:
300
+ baseline = expected
301
+ out.append(
302
+ AdaptCase(
303
+ name=case.name,
304
+ query=case.query,
305
+ intent=case.intent,
306
+ baseline_files=baseline,
307
+ expected_files=expected,
308
+ )
309
+ )
310
+ return out
311
+
312
+
313
+ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
314
+ files = _collect_source_files(repo_path)
315
+ if not files:
316
+ return []
317
+
318
+ by_dir: dict[str, list[str]] = {}
319
+ for rel in files:
320
+ parent = str(Path(rel).parent).replace("\\", "/")
321
+ by_dir.setdefault(parent, []).append(rel)
322
+
323
+ rows: list[AdaptCase] = []
324
+ seen_names: set[str] = set()
325
+
326
+ def add_case(name: str, expected: tuple[str, ...], intent: str = "explore") -> None:
327
+ if len(rows) >= needed:
328
+ return
329
+ safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", name).strip("_").lower() or "case"
330
+ if safe_name in seen_names:
331
+ idx = 2
332
+ while f"{safe_name}_{idx}" in seen_names:
333
+ idx += 1
334
+ safe_name = f"{safe_name}_{idx}"
335
+ seen_names.add(safe_name)
336
+ symbols = []
337
+ for rel in expected:
338
+ stem = Path(rel).stem.lower()
339
+ symbols.extend([stem, "flow", "wiring"])
340
+ query = f"{' '.join(expected)} {' '.join(symbols[:6])}".strip()
341
+ rows.append(
342
+ AdaptCase(
343
+ name=safe_name,
344
+ query=query,
345
+ intent=intent,
346
+ baseline_files=expected,
347
+ expected_files=expected,
348
+ )
349
+ )
350
+
351
+ # Single-file probes.
352
+ for rel in files:
353
+ add_case(f"single_{Path(rel).stem}", (rel,), intent="explore")
354
+ if len(rows) >= max(needed // 2, 1):
355
+ break
356
+
357
+ # Same-directory pairs.
358
+ for parent, group in sorted(by_dir.items(), key=lambda item: item[0]):
359
+ if len(group) < 2:
360
+ continue
361
+ group = sorted(group)
362
+ for idx in range(len(group) - 1):
363
+ add_case(f"pair_{parent}_{idx}", (group[idx], group[idx + 1]), intent="explore")
364
+ if len(rows) >= needed:
365
+ return rows[:needed]
366
+
367
+ # Cross-directory pairs if still needed.
368
+ tops: dict[str, str] = {}
369
+ for rel in files:
370
+ top = Path(rel).parts[0] if Path(rel).parts else rel
371
+ tops.setdefault(top, rel)
372
+ top_files = list(tops.values())
373
+ for idx in range(len(top_files) - 1):
374
+ add_case(f"cross_{idx}", (top_files[idx], top_files[idx + 1]), intent="explore")
375
+ if len(rows) >= needed:
376
+ break
377
+
378
+ return rows[:needed]
379
+
380
+
381
+ def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list[AdaptCase], str]:
382
+ """Select adaptation cases generated entirely from the target repo."""
383
+ benchmark_size = max(1, int(benchmark_size))
384
+ generated = _generated_cases_for_repo(repo_path, benchmark_size)
385
+ if generated:
386
+ return generated[:benchmark_size], "generated_repo_local"
387
+ return [], "none_available"
388
+
389
+
390
+ def _write_back(repo_path: Path, best: dict, case_source: str) -> None:
256
391
  cfg_path = repo_path / ".gcie" / "context_config.json"
257
392
  if cfg_path.exists():
258
393
  try:
@@ -268,6 +403,7 @@ def _write_back(repo_path: Path, best: dict) -> None:
268
403
  "best_label": best.get("label"),
269
404
  "full_hit_rate_pct": best.get("full_hit_rate_pct"),
270
405
  "tokens_per_query": best.get("tokens_per_query"),
406
+ "case_source": case_source,
271
407
  "updated_at": datetime.now(timezone.utc).isoformat(),
272
408
  }
273
409
  cfg_path.parent.mkdir(parents=True, exist_ok=True)
@@ -290,17 +426,15 @@ def run_post_init_adaptation(
290
426
 
291
427
  clear_adaptive_profile(repo_path.as_posix())
292
428
 
293
- cases = list(BENCHMARK_CASES)
429
+ cases, case_source = _select_adaptation_cases(repo_path, benchmark_size)
294
430
  if not cases:
295
431
  return {
296
432
  "status": "no_benchmark_cases",
297
433
  "repo": repo_path.as_posix(),
298
- "message": "No benchmark cases available for accuracy-locked adaptation.",
434
+ "case_source": case_source,
435
+ "message": "No repo-usable adaptation cases available.",
299
436
  }
300
437
 
301
- benchmark_size = max(1, min(len(cases), int(benchmark_size)))
302
- cases = cases[:benchmark_size]
303
-
304
438
  slices_rows = [_evaluate_slices_case(case) for case in cases]
305
439
  plain_rows = [_evaluate_plain_case(case, allow_gapfill=False) for case in cases]
306
440
  plain_gap_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
@@ -327,13 +461,15 @@ def run_post_init_adaptation(
327
461
  if trial["full_hit_rate_pct"] >= active["full_hit_rate_pct"] and trial["tokens_per_query"] < active["tokens_per_query"]:
328
462
  active = trial
329
463
 
330
- _write_back(repo_path, active)
464
+ _write_back(repo_path, active, case_source)
331
465
 
332
466
  report = {
333
467
  "status": "ok",
334
468
  "repo": repo_path.as_posix(),
335
- "benchmark_size": benchmark_size,
469
+ "benchmark_size": len(cases),
470
+ "requested_benchmark_size": int(benchmark_size),
336
471
  "efficiency_iterations": int(efficiency_iterations),
472
+ "case_source": case_source,
337
473
  "stages": {
338
474
  "accuracy_candidates": [slices_summary, plain_summary, plain_gap_summary],
339
475
  "selected_after_accuracy": best,
@@ -348,5 +484,3 @@ def run_post_init_adaptation(
348
484
  out_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
349
485
  report["report_path"] = out_path.as_posix()
350
486
  return report
351
-
352
-
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pmaddire/gcie",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "description": "GraphCode Intelligence Engine one-command setup and context CLI",
5
5
  "bin": {
6
6
  "gcie": "bin/gcie.js",