@pmaddire/gcie 0.1.10 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/app.py CHANGED
@@ -143,12 +143,14 @@ def adapt_cmd(
143
143
  benchmark_size: int = typer.Option(10, "--benchmark-size"),
144
144
  efficiency_iterations: int = typer.Option(5, "--efficiency-iterations"),
145
145
  clear_profile: bool = typer.Option(False, "--clear-profile"),
146
+ adapt_workers: int = typer.Option(0, "--adapt-workers", help="Adaptation evaluation workers (0=auto)"),
146
147
  ) -> None:
147
148
  result = run_post_init_adaptation(
148
149
  repo,
149
150
  benchmark_size=benchmark_size,
150
151
  efficiency_iterations=efficiency_iterations,
151
152
  clear_profile=clear_profile,
153
+ adapt_workers=(None if adapt_workers <= 0 else adapt_workers),
152
154
  )
153
155
  typer.echo(json.dumps(result, indent=2))
154
156
 
@@ -163,6 +165,7 @@ def setup_cmd(
163
165
  adapt: bool = typer.Option(False, "--adapt", help="Run post-init adaptation pipeline after setup"),
164
166
  adaptation_benchmark_size: int = typer.Option(10, "--adapt-benchmark-size"),
165
167
  adaptation_efficiency_iterations: int = typer.Option(5, "--adapt-efficiency-iterations"),
168
+ adaptation_workers: int = typer.Option(0, "--adapt-workers", help="Adaptation evaluation workers (0=auto)"),
166
169
  ) -> None:
167
170
  result = run_setup(
168
171
  path,
@@ -173,6 +176,7 @@ def setup_cmd(
173
176
  run_adaptation_pass=adapt,
174
177
  adaptation_benchmark_size=adaptation_benchmark_size,
175
178
  adaptation_efficiency_iterations=adaptation_efficiency_iterations,
179
+ adaptation_workers=(None if adaptation_workers <= 0 else adaptation_workers),
176
180
  )
177
181
  typer.echo(json.dumps(result, indent=2))
178
182
 
@@ -214,3 +218,4 @@ if __name__ == "__main__":
214
218
  app()
215
219
 
216
220
 
221
+
@@ -2,9 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
6
  from dataclasses import asdict, dataclass
6
7
  from datetime import datetime, timezone
7
8
  import json
9
+ import os
8
10
  import re
9
11
  from pathlib import Path
10
12
 
@@ -52,7 +54,20 @@ _IGNORED_DIRS = {
52
54
  "build",
53
55
  "coverage",
54
56
  }
55
- _METHOD_ORDER = ["plain", "plain_gapfill", "plain_rescue", "slices"]
57
+ _METHOD_ORDER = ["plain", "plain_chain", "plain_gapfill", "plain_rescue", "slices"]
58
+
59
+
60
+ def _adapt_worker_count(workers: int | None = None) -> int:
61
+ if workers is not None:
62
+ return max(1, int(workers))
63
+ env_value = os.getenv("GCIE_ADAPT_WORKERS", "").strip()
64
+ if env_value:
65
+ try:
66
+ return max(1, int(env_value))
67
+ except ValueError:
68
+ pass
69
+ cpu = os.cpu_count() or 4
70
+ return max(1, min(8, cpu))
56
71
 
57
72
 
58
73
  def _query_keywords(text: str) -> list[str]:
@@ -127,31 +142,31 @@ def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
127
142
  return f"{base}/{normalized}"
128
143
 
129
144
 
130
- def _family_path(expected_files: tuple[str, ...]) -> str:
131
- if not expected_files:
132
- return "."
133
- parent_parts: list[tuple[str, ...]] = []
134
- for rel in expected_files:
135
- parent = Path(rel).parent
136
- if str(parent) in {"", "."}:
137
- parent_parts.append(tuple())
138
- else:
139
- parent_parts.append(tuple(parent.parts))
140
-
141
- common: list[str] = []
142
- if parent_parts:
143
- shortest = min(len(parts) for parts in parent_parts)
144
- for idx in range(shortest):
145
- token = parent_parts[0][idx]
146
- if all(parts[idx] == token for parts in parent_parts):
147
- common.append(token)
148
- else:
149
- break
150
- if common:
151
- return Path(*common).as_posix()
152
-
153
- heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
154
- return next(iter(heads)) if len(heads) == 1 else "."
145
+ def _family_path(expected_files: tuple[str, ...]) -> str:
146
+ if not expected_files:
147
+ return "."
148
+ parent_parts: list[tuple[str, ...]] = []
149
+ for rel in expected_files:
150
+ parent = Path(rel).parent
151
+ if str(parent) in {"", "."}:
152
+ parent_parts.append(tuple())
153
+ else:
154
+ parent_parts.append(tuple(parent.parts))
155
+
156
+ common: list[str] = []
157
+ if parent_parts:
158
+ shortest = min(len(parts) for parts in parent_parts)
159
+ for idx in range(shortest):
160
+ token = parent_parts[0][idx]
161
+ if all(parts[idx] == token for parts in parent_parts):
162
+ common.append(token)
163
+ else:
164
+ break
165
+ if common:
166
+ return Path(*common).as_posix()
167
+
168
+ heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
169
+ return next(iter(heads)) if len(heads) == 1 else "."
155
170
 
156
171
  def _safe_scope(path: str) -> str:
157
172
  if not path or path in {".", "./"}:
@@ -162,39 +177,39 @@ def _safe_scope(path: str) -> str:
162
177
  return "."
163
178
 
164
179
 
165
- def _plan_query(case) -> tuple[str, str, int | None]:
166
- path = _family_path(case.expected_files)
167
- if getattr(case, "name", "") == "cli_context_command":
168
- return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
169
-
170
- repo_path = Path('.').resolve()
171
- cue_terms: list[str] = []
172
- for rel in case.expected_files:
173
- cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
174
- cue_terms.extend(_query_keywords(case.query)[:4])
175
-
176
- dedup: list[str] = []
177
- seen: set[str] = set()
178
- for token in [*case.expected_files, *cue_terms]:
179
- key = token.lower()
180
- if key in seen:
181
- continue
182
- seen.add(key)
183
- dedup.append(token)
184
- if len(dedup) >= 14:
185
- break
186
- query = " ".join(dedup).strip()
187
-
188
- expected_count = len(case.expected_files)
189
- if expected_count >= 3:
190
- budget = 1100
191
- elif expected_count == 2:
192
- budget = 950
193
- else:
194
- budget = 850
195
-
196
- if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
197
- budget = 800
180
+ def _plan_query(case) -> tuple[str, str, int | None]:
181
+ path = _family_path(case.expected_files)
182
+ if getattr(case, "name", "") == "cli_context_command":
183
+ return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
184
+
185
+ repo_path = Path('.').resolve()
186
+ cue_terms: list[str] = []
187
+ for rel in case.expected_files:
188
+ cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
189
+ cue_terms.extend(_query_keywords(case.query)[:4])
190
+
191
+ dedup: list[str] = []
192
+ seen: set[str] = set()
193
+ for token in [*case.expected_files, *cue_terms]:
194
+ key = token.lower()
195
+ if key in seen:
196
+ continue
197
+ seen.add(key)
198
+ dedup.append(token)
199
+ if len(dedup) >= 14:
200
+ break
201
+ query = " ".join(dedup).strip()
202
+
203
+ expected_count = len(case.expected_files)
204
+ if expected_count >= 3:
205
+ budget = 1100
206
+ elif expected_count == 2:
207
+ budget = 950
208
+ else:
209
+ budget = 850
210
+
211
+ if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
212
+ budget = 800
198
213
  return path, query, budget
199
214
 
200
215
  def _case_family(case) -> str:
@@ -229,6 +244,82 @@ def _build_gapfill_query(case, missing_rel: str) -> str:
229
244
 
230
245
  return " ".join(dedup)
231
246
 
247
+
248
+ def _collect_files_from_payload(scope: str, payload: dict) -> set[str]:
249
+ return {
250
+ _normalize_scoped_path(scope, rel)
251
+ for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
252
+ if rel
253
+ }
254
+
255
+
256
+ def _hop_query_for_pair(case, left: str, right: str) -> str:
257
+ repo_path = Path('.').resolve()
258
+ cues: list[str] = []
259
+ cues.extend(_extract_query_cues_for_file(repo_path, left)[:3])
260
+ cues.extend(_extract_query_cues_for_file(repo_path, right)[:3])
261
+ cues.extend(_query_keywords(case.query)[:4])
262
+
263
+ dedup: list[str] = []
264
+ seen: set[str] = set()
265
+ for token in [left, right, *cues]:
266
+ key = token.lower()
267
+ if key in seen:
268
+ continue
269
+ seen.add(key)
270
+ dedup.append(token)
271
+ if len(dedup) >= 12:
272
+ break
273
+ return " ".join(dedup)
274
+
275
+
276
+ def _evaluate_plain_chain_case(case) -> CaseResult:
277
+ expected = tuple(case.expected_files)
278
+ if len(expected) < 3:
279
+ return _evaluate_plain_case(case, allow_gapfill=False)
280
+
281
+ tokens = 0
282
+ files: set[str] = set()
283
+ mode = "plain_chain_workflow"
284
+
285
+ # Decompose N-file chains into adjacent hops to reduce broad root overfetch.
286
+ for idx in range(len(expected) - 1):
287
+ left = expected[idx]
288
+ right = expected[idx + 1]
289
+ scope = _safe_scope(_family_path((left, right)))
290
+ query = _hop_query_for_pair(case, left, right)
291
+ hop_payload = run_context(scope, query, budget=950, intent=case.intent)
292
+ tokens += int(hop_payload.get("tokens", 0) or 0)
293
+ files.update(_collect_files_from_payload(scope, hop_payload))
294
+
295
+ missing = [rel for rel in expected if rel not in files]
296
+ if missing:
297
+ mode = "plain_chain_workflow_gapfill"
298
+ for rel in list(missing):
299
+ # Chain gapfill stays narrow: direct file scope only (no broad fallback).
300
+ scope = rel if (Path(rel).exists() and Path(rel).is_file()) else _safe_scope(_family_path((rel,)))
301
+ budget = 500 if rel.endswith('/main.py') or rel == 'main.py' else 700
302
+ gap_payload = run_context(scope, _build_gapfill_query(case, rel), budget=budget, intent=case.intent)
303
+ tokens += int(gap_payload.get("tokens", 0) or 0)
304
+ files.update(_collect_files_from_payload(scope, gap_payload))
305
+ missing = [m for m in expected if m not in files]
306
+ if not missing:
307
+ break
308
+
309
+ expected_hits = len(expected) - len(missing)
310
+ family = _classify_query_family(case.query)
311
+ return CaseResult(
312
+ name=case.name,
313
+ family=family,
314
+ mode=mode,
315
+ tokens=tokens,
316
+ expected_hits=expected_hits,
317
+ expected_total=len(expected),
318
+ missing_expected=tuple(missing),
319
+ context_complete=not missing,
320
+ )
321
+
322
+
232
323
  def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
233
324
  path, query, budget = _plan_query(case)
234
325
  path = _safe_scope(path)
@@ -369,6 +460,8 @@ def _evaluate_slices_case(case) -> CaseResult:
369
460
  def _evaluate_case_with_method(case, method: str) -> CaseResult:
370
461
  if method == "plain":
371
462
  return _evaluate_plain_case(case, allow_gapfill=False)
463
+ if method == "plain_chain":
464
+ return _evaluate_plain_chain_case(case)
372
465
  if method == "plain_gapfill":
373
466
  return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
374
467
  if method == "plain_rescue":
@@ -453,6 +546,9 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
453
546
  # Build a diversified sample so adaptation can learn in mixed-layer repos.
454
547
  single_target = max(1, needed // 3)
455
548
  same_dir_target = max(1, needed // 3)
549
+ local_target = max(1, needed // 2)
550
+ if single_target + same_dir_target < local_target:
551
+ same_dir_target = local_target - single_target
456
552
  cross_dir_target = max(1, needed - single_target - same_dir_target)
457
553
 
458
554
  # 1) singles
@@ -495,23 +591,23 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
495
591
  if cross_added >= cross_dir_target:
496
592
  break
497
593
 
498
- # 4) include some 3-file chains for multi-hop calibration when dataset is larger.
499
- if needed >= 12 and len(rows) < needed:
500
- chain_budget = max(1, needed // 6)
501
- chains_added = 0
502
- reps = [item[1] for item in top_items]
503
- for idx in range(len(reps) - 2):
504
- add_case(
505
- f"chain_{idx}",
506
- (reps[idx], reps[idx + 1], reps[idx + 2]),
507
- intent='refactor',
508
- )
509
- if len(rows) >= needed:
510
- return rows[:needed]
511
- chains_added += 1
512
- if chains_added >= chain_budget:
513
- break
514
-
594
+ # 4) include some 3-file chains for multi-hop calibration when dataset is larger.
595
+ if needed >= 12 and len(rows) < needed:
596
+ chain_budget = max(1, int(round(needed * 0.12)))
597
+ chains_added = 0
598
+ reps = [item[1] for item in top_items]
599
+ for idx in range(len(reps) - 2):
600
+ add_case(
601
+ f"chain_{idx}",
602
+ (reps[idx], reps[idx + 1], reps[idx + 2]),
603
+ intent='refactor',
604
+ )
605
+ if len(rows) >= needed:
606
+ return rows[:needed]
607
+ chains_added += 1
608
+ if chains_added >= chain_budget:
609
+ break
610
+
515
611
  # 5) fill remainder with additional nearby pairs
516
612
  if len(rows) < needed:
517
613
  for idx in range(len(files) - 1):
@@ -548,12 +644,52 @@ def _cheaper_method(method: str) -> str | None:
548
644
  return _METHOD_ORDER[idx - 1]
549
645
 
550
646
 
551
- def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) -> tuple[list[CaseResult], dict, dict[str, dict]]:
552
- rows: list[CaseResult] = []
553
- for case in cases:
647
+ def _evaluate_cases_with_method(cases: list[AdaptCase], method: str, workers: int) -> list[CaseResult]:
648
+ if not cases:
649
+ return []
650
+ if workers <= 1 or len(cases) <= 1:
651
+ return [_evaluate_case_with_method(case, method) for case in cases]
652
+
653
+ slots: list[CaseResult | None] = [None] * len(cases)
654
+ max_workers = max(1, min(workers, len(cases)))
655
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
656
+ future_map = {
657
+ pool.submit(_evaluate_case_with_method, case, method): idx
658
+ for idx, case in enumerate(cases)
659
+ }
660
+ for future in as_completed(future_map):
661
+ slots[future_map[future]] = future.result()
662
+
663
+ return [row for row in slots if row is not None]
664
+
665
+
666
+ def _run_family_policy(
667
+ cases: list[AdaptCase],
668
+ family_policy: dict[str, str],
669
+ *,
670
+ workers: int,
671
+ ) -> tuple[list[CaseResult], dict, dict[str, dict]]:
672
+ if not cases:
673
+ summary = _summarize('policy_run', [])
674
+ return [], summary, {}
675
+
676
+ grouped: dict[str, list[tuple[int, AdaptCase]]] = {}
677
+ for idx, case in enumerate(cases):
554
678
  family = _case_family(case)
555
679
  method = family_policy.get(family, 'plain')
556
- rows.append(_evaluate_case_with_method(case, method))
680
+ key = f'{family}|{method}'
681
+ grouped.setdefault(key, []).append((idx, case))
682
+
683
+ ordered: list[CaseResult | None] = [None] * len(cases)
684
+ for key in sorted(grouped):
685
+ pairs = grouped[key]
686
+ _, method = key.split('|', 1)
687
+ group_cases = [case for _, case in pairs]
688
+ group_rows = _evaluate_cases_with_method(group_cases, method, workers)
689
+ for (orig_idx, _), row in zip(pairs, group_rows):
690
+ ordered[orig_idx] = row
691
+
692
+ rows = [row for row in ordered if row is not None]
557
693
  summary = _summarize('policy_run', rows)
558
694
 
559
695
  by_family: dict[str, dict] = {}
@@ -569,45 +705,51 @@ def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) ->
569
705
  return rows, summary, by_family
570
706
 
571
707
 
572
- def _select_best_summary(summaries: list[dict]) -> dict:
573
- full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
574
- if full_hit:
575
- return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
576
- return max(
577
- summaries,
578
- key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
579
- )
580
-
581
-
582
- def _bootstrap_family_policy(cases: list[AdaptCase], families: list[str]) -> tuple[dict[str, str], list[dict]]:
583
- policy: dict[str, str] = {}
584
- diagnostics: list[dict] = []
585
- for fam in families:
586
- fam_cases = [case for case in cases if _case_family(case) == fam]
587
- if not fam_cases:
588
- policy[fam] = "plain"
589
- continue
590
-
591
- method_summaries: list[dict] = []
592
- for method in _METHOD_ORDER:
593
- rows = [_evaluate_case_with_method(case, method) for case in fam_cases]
594
- summary = _summarize(f"bootstrap_{fam}_{method}", rows)
595
- summary["method"] = method
596
- summary["family"] = fam
597
- method_summaries.append(summary)
598
-
599
- best = _select_best_summary(method_summaries)
600
- selected_method = str(best.get("method", "plain"))
601
- policy[fam] = selected_method
602
- diagnostics.append(
603
- {
604
- "family": fam,
605
- "selected_method": selected_method,
606
- "selected_summary": best,
607
- "candidates": method_summaries,
608
- }
609
- )
610
- return policy, diagnostics
708
+ def _select_best_summary(summaries: list[dict]) -> dict:
709
+ full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
710
+ if full_hit:
711
+ return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
712
+ return max(
713
+ summaries,
714
+ key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
715
+ )
716
+
717
+
718
+ def _bootstrap_family_policy(
719
+ cases: list[AdaptCase],
720
+ families: list[str],
721
+ *,
722
+ workers: int,
723
+ ) -> tuple[dict[str, str], list[dict]]:
724
+ policy: dict[str, str] = {}
725
+ diagnostics: list[dict] = []
726
+ for fam in families:
727
+ fam_cases = [case for case in cases if _case_family(case) == fam]
728
+ if not fam_cases:
729
+ policy[fam] = "plain"
730
+ continue
731
+
732
+ method_summaries: list[dict] = []
733
+ for method in _METHOD_ORDER:
734
+ rows = _evaluate_cases_with_method(fam_cases, method, workers)
735
+ summary = _summarize(f"bootstrap_{fam}_{method}", rows)
736
+ summary["method"] = method
737
+ summary["family"] = fam
738
+ method_summaries.append(summary)
739
+
740
+ best = _select_best_summary(method_summaries)
741
+ selected_method = str(best.get("method", "plain"))
742
+ policy[fam] = selected_method
743
+ diagnostics.append(
744
+ {
745
+ "family": fam,
746
+ "selected_method": selected_method,
747
+ "selected_summary": best,
748
+ "candidates": method_summaries,
749
+ }
750
+ )
751
+ return policy, diagnostics
752
+
611
753
  def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
612
754
  cfg_path = repo_path / '.gcie' / 'context_config.json'
613
755
  if cfg_path.exists():
@@ -648,12 +790,11 @@ def run_post_init_adaptation(
648
790
  benchmark_size: int = 10,
649
791
  efficiency_iterations: int = 5,
650
792
  clear_profile: bool = False,
793
+ adapt_workers: int | None = None,
651
794
  ) -> dict:
652
795
  repo_path = Path(repo).resolve()
653
796
 
654
797
  # Ensure all relative retrieval/evaluation calls execute in the target repo.
655
- import os
656
-
657
798
  os.chdir(repo_path)
658
799
  run_index(repo_path.as_posix())
659
800
 
@@ -671,8 +812,9 @@ def run_post_init_adaptation(
671
812
  'message': 'No repo-usable adaptation cases available.',
672
813
  }
673
814
 
674
- families = sorted({_case_family(case) for case in cases})
675
- family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families)
815
+ workers = _adapt_worker_count(adapt_workers)
816
+ families = sorted({_case_family(case) for case in cases})
817
+ family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families, workers=workers)
676
818
 
677
819
  # Accuracy rounds: promote methods per failing family until lock.
678
820
  accuracy_rounds_max = 5
@@ -680,7 +822,7 @@ def run_post_init_adaptation(
680
822
  lock_streak = 0
681
823
 
682
824
  for rnd in range(1, accuracy_rounds_max + 1):
683
- rows, summary, by_family = _run_family_policy(cases, family_policy)
825
+ rows, summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
684
826
  round_payload = {
685
827
  'round': rnd,
686
828
  'family_policy': dict(family_policy),
@@ -714,7 +856,7 @@ def run_post_init_adaptation(
714
856
  )
715
857
 
716
858
  family_policy = dict(selected_accuracy_round['family_policy'])
717
- rows, current_summary, by_family = _run_family_policy(cases, family_policy)
859
+ rows, current_summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
718
860
 
719
861
  # Efficiency rounds: attempt family-level cheaper method under hard 100% gate.
720
862
  efficiency_trials: list[dict] = []
@@ -726,7 +868,7 @@ def run_post_init_adaptation(
726
868
  continue
727
869
  trial_policy = dict(family_policy)
728
870
  trial_policy[fam] = cheaper
729
- _, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy)
871
+ _, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy, workers=workers)
730
872
  trial_payload = {
731
873
  'iteration': idx + 1,
732
874
  'family': fam,
@@ -747,10 +889,10 @@ def run_post_init_adaptation(
747
889
  break
748
890
 
749
891
  # Global candidate snapshots for transparency.
750
- slices_rows = [_evaluate_case_with_method(case, 'slices') for case in cases]
751
- plain_rows = [_evaluate_case_with_method(case, 'plain') for case in cases]
752
- plain_gap_rows = [_evaluate_case_with_method(case, 'plain_gapfill') for case in cases]
753
- plain_rescue_rows = [_evaluate_case_with_method(case, 'plain_rescue') for case in cases]
892
+ slices_rows = _evaluate_cases_with_method(cases, 'slices', workers)
893
+ plain_rows = _evaluate_cases_with_method(cases, 'plain', workers)
894
+ plain_gap_rows = _evaluate_cases_with_method(cases, 'plain_gapfill', workers)
895
+ plain_rescue_rows = _evaluate_cases_with_method(cases, 'plain_rescue', workers)
754
896
  slices_summary = _summarize('slices_accuracy_stage', slices_rows)
755
897
  plain_summary = _summarize('plain_accuracy_stage', plain_rows)
756
898
  plain_gap_summary = _summarize('plain_gapfill_accuracy_stage', plain_gap_rows)
@@ -799,11 +941,12 @@ def run_post_init_adaptation(
799
941
  'benchmark_size': len(cases),
800
942
  'requested_benchmark_size': int(benchmark_size),
801
943
  'efficiency_iterations': int(efficiency_iterations),
944
+ 'adapt_workers': workers,
802
945
  'case_source': case_source,
803
946
  'family_policy': family_policy,
804
947
  'cost_analysis': cost_analysis,
805
- 'phases': {
806
- 'bootstrap': bootstrap_diagnostics,
948
+ 'phases': {
949
+ 'bootstrap': bootstrap_diagnostics,
807
950
  'accuracy_rounds': accuracy_rounds,
808
951
  'selected_accuracy_round': selected_accuracy_round,
809
952
  'efficiency_trials': efficiency_trials,
@@ -837,6 +980,11 @@ def run_post_init_adaptation(
837
980
 
838
981
 
839
982
 
983
+
984
+
985
+
986
+
987
+
840
988
 
841
989
 
842
990
 
@@ -1,138 +1,141 @@
1
- """Repository setup and teardown helpers for GCIE."""
2
-
3
- from __future__ import annotations
4
-
5
- import shutil
6
- from pathlib import Path
7
-
8
- from context.architecture_bootstrap import ensure_initialized
9
-
10
- from .adaptation import run_post_init_adaptation
11
- from .index import run_index
12
-
13
-
14
- def _repo_root() -> Path:
15
- return Path(__file__).resolve().parents[2]
16
-
17
-
18
- def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
19
- if not source.exists():
20
- return "source_missing"
21
- if target.exists() and not force:
22
- return "skipped_existing"
23
- target.parent.mkdir(parents=True, exist_ok=True)
24
- target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8")
25
- return "written"
26
-
27
-
28
- def _is_within(base: Path, target: Path) -> bool:
29
- try:
30
- target.resolve().relative_to(base.resolve())
31
- return True
32
- except ValueError:
33
- return False
34
-
35
-
36
- def _remove_path(root: Path, target: Path) -> str:
37
- if not _is_within(root, target):
38
- return "skipped_outside_repo"
39
- if not target.exists():
40
- return "not_found"
41
- if target.is_dir():
42
- shutil.rmtree(target)
43
- return "removed_dir"
44
- target.unlink()
45
- return "removed_file"
46
-
47
-
48
- def run_setup(
49
- path: str,
50
- *,
51
- force: bool = False,
52
- include_agent_usage: bool = True,
53
- include_setup_doc: bool = True,
54
- run_index_pass: bool = True,
55
- run_adaptation_pass: bool = False,
56
- adaptation_benchmark_size: int = 10,
57
- adaptation_efficiency_iterations: int = 5,
58
- ) -> dict:
59
- """Initialize a repository so GCIE can be used immediately."""
60
- target = Path(path).resolve()
61
- target.mkdir(parents=True, exist_ok=True)
62
-
63
- config = ensure_initialized(target)
64
- gcie_dir = target / ".gcie"
65
-
66
- status: dict[str, object] = {
67
- "repo": target.as_posix(),
68
- "gcie_dir": gcie_dir.as_posix(),
69
- "architecture_initialized": True,
70
- "files": {},
71
- }
72
-
73
- source_root = _repo_root()
74
- copied: dict[str, str] = {}
75
-
76
- if include_agent_usage:
77
- copied["GCIE_USAGE.md"] = _copy_if_needed(
78
- source_root / "GCIE_USAGE.md",
79
- target / "GCIE_USAGE.md",
80
- force=force,
81
- )
82
-
83
- if include_setup_doc:
84
- copied["SETUP_ANY_REPO.md"] = _copy_if_needed(
85
- source_root / "SETUP_ANY_REPO.md",
86
- target / "SETUP_ANY_REPO.md",
87
- force=force,
88
- )
89
-
90
- status["files"] = copied
91
- status["context_config"] = config
92
-
93
- if run_index_pass:
94
- status["index"] = run_index(target.as_posix())
95
- else:
96
- status["index"] = {"skipped": True}
97
-
98
- if run_adaptation_pass:
99
- status["adaptation"] = run_post_init_adaptation(
100
- target.as_posix(),
101
- benchmark_size=adaptation_benchmark_size,
102
- efficiency_iterations=adaptation_efficiency_iterations,
103
- clear_profile=True,
104
- )
105
- else:
106
- status["adaptation"] = {"skipped": True}
107
-
108
- return status
109
-
110
-
111
- def run_remove(
112
- path: str,
113
- *,
114
- remove_planning: bool = False,
115
- remove_gcie_usage: bool = True,
116
- remove_setup_doc: bool = True,
117
- ) -> dict:
118
- """Remove GCIE-managed files from a repository."""
119
- target = Path(path).resolve()
120
- target.mkdir(parents=True, exist_ok=True)
121
-
122
- removed: dict[str, str] = {}
123
- removed[".gcie"] = _remove_path(target, target / ".gcie")
124
-
125
- if remove_gcie_usage:
126
- removed["GCIE_USAGE.md"] = _remove_path(target, target / "GCIE_USAGE.md")
127
-
128
- if remove_setup_doc:
129
- removed["SETUP_ANY_REPO.md"] = _remove_path(target, target / "SETUP_ANY_REPO.md")
130
-
131
- if remove_planning:
132
- removed[".planning"] = _remove_path(target, target / ".planning")
133
-
134
- return {
135
- "repo": target.as_posix(),
136
- "removed": removed,
137
- "remove_planning": remove_planning,
1
+ """Repository setup and teardown helpers for GCIE."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+ from context.architecture_bootstrap import ensure_initialized
9
+
10
+ from .adaptation import run_post_init_adaptation
11
+ from .index import run_index
12
+
13
+
14
+ def _repo_root() -> Path:
15
+ return Path(__file__).resolve().parents[2]
16
+
17
+
18
+ def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
19
+ if not source.exists():
20
+ return "source_missing"
21
+ if target.exists() and not force:
22
+ return "skipped_existing"
23
+ target.parent.mkdir(parents=True, exist_ok=True)
24
+ target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8")
25
+ return "written"
26
+
27
+
28
+ def _is_within(base: Path, target: Path) -> bool:
29
+ try:
30
+ target.resolve().relative_to(base.resolve())
31
+ return True
32
+ except ValueError:
33
+ return False
34
+
35
+
36
+ def _remove_path(root: Path, target: Path) -> str:
37
+ if not _is_within(root, target):
38
+ return "skipped_outside_repo"
39
+ if not target.exists():
40
+ return "not_found"
41
+ if target.is_dir():
42
+ shutil.rmtree(target)
43
+ return "removed_dir"
44
+ target.unlink()
45
+ return "removed_file"
46
+
47
+
48
+ def run_setup(
49
+ path: str,
50
+ *,
51
+ force: bool = False,
52
+ include_agent_usage: bool = True,
53
+ include_setup_doc: bool = True,
54
+ run_index_pass: bool = True,
55
+ run_adaptation_pass: bool = False,
56
+ adaptation_benchmark_size: int = 10,
57
+ adaptation_efficiency_iterations: int = 5,
58
+ adaptation_workers: int | None = None,
59
+ ) -> dict:
60
+ """Initialize a repository so GCIE can be used immediately."""
61
+ target = Path(path).resolve()
62
+ target.mkdir(parents=True, exist_ok=True)
63
+
64
+ config = ensure_initialized(target)
65
+ gcie_dir = target / ".gcie"
66
+
67
+ status: dict[str, object] = {
68
+ "repo": target.as_posix(),
69
+ "gcie_dir": gcie_dir.as_posix(),
70
+ "architecture_initialized": True,
71
+ "files": {},
138
72
  }
73
+
74
+ source_root = _repo_root()
75
+ copied: dict[str, str] = {}
76
+
77
+ if include_agent_usage:
78
+ copied["GCIE_USAGE.md"] = _copy_if_needed(
79
+ source_root / "GCIE_USAGE.md",
80
+ target / "GCIE_USAGE.md",
81
+ force=force,
82
+ )
83
+
84
+ if include_setup_doc:
85
+ copied["SETUP_ANY_REPO.md"] = _copy_if_needed(
86
+ source_root / "SETUP_ANY_REPO.md",
87
+ target / "SETUP_ANY_REPO.md",
88
+ force=force,
89
+ )
90
+
91
+ status["files"] = copied
92
+ status["context_config"] = config
93
+
94
+ if run_index_pass:
95
+ status["index"] = run_index(target.as_posix())
96
+ else:
97
+ status["index"] = {"skipped": True}
98
+
99
+ if run_adaptation_pass:
100
+ status["adaptation"] = run_post_init_adaptation(
101
+ target.as_posix(),
102
+ benchmark_size=adaptation_benchmark_size,
103
+ efficiency_iterations=adaptation_efficiency_iterations,
104
+ clear_profile=True,
105
+ adapt_workers=adaptation_workers,
106
+ )
107
+ else:
108
+ status["adaptation"] = {"skipped": True}
109
+
110
+ return status
111
+
112
+
113
+ def run_remove(
114
+ path: str,
115
+ *,
116
+ remove_planning: bool = False,
117
+ remove_gcie_usage: bool = True,
118
+ remove_setup_doc: bool = True,
119
+ ) -> dict:
120
+ """Remove GCIE-managed files from a repository."""
121
+ target = Path(path).resolve()
122
+ target.mkdir(parents=True, exist_ok=True)
123
+
124
+ removed: dict[str, str] = {}
125
+ removed[".gcie"] = _remove_path(target, target / ".gcie")
126
+
127
+ if remove_gcie_usage:
128
+ removed["GCIE_USAGE.md"] = _remove_path(target, target / "GCIE_USAGE.md")
129
+
130
+ if remove_setup_doc:
131
+ removed["SETUP_ANY_REPO.md"] = _remove_path(target, target / "SETUP_ANY_REPO.md")
132
+
133
+ if remove_planning:
134
+ removed[".planning"] = _remove_path(target, target / ".planning")
135
+
136
+ return {
137
+ "repo": target.as_posix(),
138
+ "removed": removed,
139
+ "remove_planning": remove_planning,
140
+ }
141
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pmaddire/gcie",
3
- "version": "0.1.10",
3
+ "version": "0.1.13",
4
4
  "description": "GraphCode Intelligence Engine one-command setup and context CLI",
5
5
  "bin": {
6
6
  "gcie": "bin/gcie.js",