@pmaddire/gcie 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/app.py CHANGED
@@ -143,12 +143,14 @@ def adapt_cmd(
143
143
  benchmark_size: int = typer.Option(10, "--benchmark-size"),
144
144
  efficiency_iterations: int = typer.Option(5, "--efficiency-iterations"),
145
145
  clear_profile: bool = typer.Option(False, "--clear-profile"),
146
+ adapt_workers: int = typer.Option(0, "--adapt-workers", help="Adaptation evaluation workers (0=auto)"),
146
147
  ) -> None:
147
148
  result = run_post_init_adaptation(
148
149
  repo,
149
150
  benchmark_size=benchmark_size,
150
151
  efficiency_iterations=efficiency_iterations,
151
152
  clear_profile=clear_profile,
153
+ adapt_workers=(None if adapt_workers <= 0 else adapt_workers),
152
154
  )
153
155
  typer.echo(json.dumps(result, indent=2))
154
156
 
@@ -163,6 +165,7 @@ def setup_cmd(
163
165
  adapt: bool = typer.Option(False, "--adapt", help="Run post-init adaptation pipeline after setup"),
164
166
  adaptation_benchmark_size: int = typer.Option(10, "--adapt-benchmark-size"),
165
167
  adaptation_efficiency_iterations: int = typer.Option(5, "--adapt-efficiency-iterations"),
168
+ adaptation_workers: int = typer.Option(0, "--adapt-workers", help="Adaptation evaluation workers (0=auto)"),
166
169
  ) -> None:
167
170
  result = run_setup(
168
171
  path,
@@ -173,6 +176,7 @@ def setup_cmd(
173
176
  run_adaptation_pass=adapt,
174
177
  adaptation_benchmark_size=adaptation_benchmark_size,
175
178
  adaptation_efficiency_iterations=adaptation_efficiency_iterations,
179
+ adaptation_workers=(None if adaptation_workers <= 0 else adaptation_workers),
176
180
  )
177
181
  typer.echo(json.dumps(result, indent=2))
178
182
 
@@ -214,3 +218,4 @@ if __name__ == "__main__":
214
218
  app()
215
219
 
216
220
 
221
+
@@ -2,9 +2,11 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
6
  from dataclasses import asdict, dataclass
6
7
  from datetime import datetime, timezone
7
8
  import json
9
+ import os
8
10
  import re
9
11
  from pathlib import Path
10
12
 
@@ -55,6 +57,19 @@ _IGNORED_DIRS = {
55
57
  _METHOD_ORDER = ["plain", "plain_chain", "plain_gapfill", "plain_rescue", "slices"]
56
58
 
57
59
 
60
+ def _adapt_worker_count(workers: int | None = None) -> int:
61
+ if workers is not None:
62
+ return max(1, int(workers))
63
+ env_value = os.getenv("GCIE_ADAPT_WORKERS", "").strip()
64
+ if env_value:
65
+ try:
66
+ return max(1, int(env_value))
67
+ except ValueError:
68
+ pass
69
+ cpu = os.cpu_count() or 4
70
+ return max(1, min(8, cpu))
71
+
72
+
58
73
  def _query_keywords(text: str) -> list[str]:
59
74
  return [t for t in _WORD_RE.findall(text.lower()) if len(t) >= 4][:8]
60
75
 
@@ -127,31 +142,31 @@ def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
127
142
  return f"{base}/{normalized}"
128
143
 
129
144
 
130
- def _family_path(expected_files: tuple[str, ...]) -> str:
131
- if not expected_files:
132
- return "."
133
- parent_parts: list[tuple[str, ...]] = []
134
- for rel in expected_files:
135
- parent = Path(rel).parent
136
- if str(parent) in {"", "."}:
137
- parent_parts.append(tuple())
138
- else:
139
- parent_parts.append(tuple(parent.parts))
140
-
141
- common: list[str] = []
142
- if parent_parts:
143
- shortest = min(len(parts) for parts in parent_parts)
144
- for idx in range(shortest):
145
- token = parent_parts[0][idx]
146
- if all(parts[idx] == token for parts in parent_parts):
147
- common.append(token)
148
- else:
149
- break
150
- if common:
151
- return Path(*common).as_posix()
152
-
153
- heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
154
- return next(iter(heads)) if len(heads) == 1 else "."
145
+ def _family_path(expected_files: tuple[str, ...]) -> str:
146
+ if not expected_files:
147
+ return "."
148
+ parent_parts: list[tuple[str, ...]] = []
149
+ for rel in expected_files:
150
+ parent = Path(rel).parent
151
+ if str(parent) in {"", "."}:
152
+ parent_parts.append(tuple())
153
+ else:
154
+ parent_parts.append(tuple(parent.parts))
155
+
156
+ common: list[str] = []
157
+ if parent_parts:
158
+ shortest = min(len(parts) for parts in parent_parts)
159
+ for idx in range(shortest):
160
+ token = parent_parts[0][idx]
161
+ if all(parts[idx] == token for parts in parent_parts):
162
+ common.append(token)
163
+ else:
164
+ break
165
+ if common:
166
+ return Path(*common).as_posix()
167
+
168
+ heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
169
+ return next(iter(heads)) if len(heads) == 1 else "."
155
170
 
156
171
  def _safe_scope(path: str) -> str:
157
172
  if not path or path in {".", "./"}:
@@ -162,39 +177,39 @@ def _safe_scope(path: str) -> str:
162
177
  return "."
163
178
 
164
179
 
165
- def _plan_query(case) -> tuple[str, str, int | None]:
166
- path = _family_path(case.expected_files)
167
- if getattr(case, "name", "") == "cli_context_command":
168
- return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
169
-
170
- repo_path = Path('.').resolve()
171
- cue_terms: list[str] = []
172
- for rel in case.expected_files:
173
- cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
174
- cue_terms.extend(_query_keywords(case.query)[:4])
175
-
176
- dedup: list[str] = []
177
- seen: set[str] = set()
178
- for token in [*case.expected_files, *cue_terms]:
179
- key = token.lower()
180
- if key in seen:
181
- continue
182
- seen.add(key)
183
- dedup.append(token)
184
- if len(dedup) >= 14:
185
- break
186
- query = " ".join(dedup).strip()
187
-
188
- expected_count = len(case.expected_files)
189
- if expected_count >= 3:
190
- budget = 1100
191
- elif expected_count == 2:
192
- budget = 950
193
- else:
194
- budget = 850
195
-
196
- if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
197
- budget = 800
180
+ def _plan_query(case) -> tuple[str, str, int | None]:
181
+ path = _family_path(case.expected_files)
182
+ if getattr(case, "name", "") == "cli_context_command":
183
+ return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
184
+
185
+ repo_path = Path('.').resolve()
186
+ cue_terms: list[str] = []
187
+ for rel in case.expected_files:
188
+ cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
189
+ cue_terms.extend(_query_keywords(case.query)[:4])
190
+
191
+ dedup: list[str] = []
192
+ seen: set[str] = set()
193
+ for token in [*case.expected_files, *cue_terms]:
194
+ key = token.lower()
195
+ if key in seen:
196
+ continue
197
+ seen.add(key)
198
+ dedup.append(token)
199
+ if len(dedup) >= 14:
200
+ break
201
+ query = " ".join(dedup).strip()
202
+
203
+ expected_count = len(case.expected_files)
204
+ if expected_count >= 3:
205
+ budget = 1100
206
+ elif expected_count == 2:
207
+ budget = 950
208
+ else:
209
+ budget = 850
210
+
211
+ if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
212
+ budget = 800
198
213
  return path, query, budget
199
214
 
200
215
  def _case_family(case) -> str:
@@ -227,84 +242,84 @@ def _build_gapfill_query(case, missing_rel: str) -> str:
227
242
  if len(dedup) >= 14:
228
243
  break
229
244
 
230
- return " ".join(dedup)
231
-
232
-
233
- def _collect_files_from_payload(scope: str, payload: dict) -> set[str]:
234
- return {
235
- _normalize_scoped_path(scope, rel)
236
- for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
237
- if rel
238
- }
239
-
240
-
241
- def _hop_query_for_pair(case, left: str, right: str) -> str:
242
- repo_path = Path('.').resolve()
243
- cues: list[str] = []
244
- cues.extend(_extract_query_cues_for_file(repo_path, left)[:3])
245
- cues.extend(_extract_query_cues_for_file(repo_path, right)[:3])
246
- cues.extend(_query_keywords(case.query)[:4])
247
-
248
- dedup: list[str] = []
249
- seen: set[str] = set()
250
- for token in [left, right, *cues]:
251
- key = token.lower()
252
- if key in seen:
253
- continue
254
- seen.add(key)
255
- dedup.append(token)
256
- if len(dedup) >= 12:
257
- break
258
- return " ".join(dedup)
259
-
260
-
261
- def _evaluate_plain_chain_case(case) -> CaseResult:
262
- expected = tuple(case.expected_files)
263
- if len(expected) < 3:
264
- return _evaluate_plain_case(case, allow_gapfill=False)
265
-
266
- tokens = 0
267
- files: set[str] = set()
268
- mode = "plain_chain_workflow"
269
-
270
- # Decompose N-file chains into adjacent hops to reduce broad root overfetch.
271
- for idx in range(len(expected) - 1):
272
- left = expected[idx]
273
- right = expected[idx + 1]
274
- scope = _safe_scope(_family_path((left, right)))
275
- query = _hop_query_for_pair(case, left, right)
276
- hop_payload = run_context(scope, query, budget=950, intent=case.intent)
277
- tokens += int(hop_payload.get("tokens", 0) or 0)
278
- files.update(_collect_files_from_payload(scope, hop_payload))
279
-
280
- missing = [rel for rel in expected if rel not in files]
281
- if missing:
282
- mode = "plain_chain_workflow_gapfill"
283
- for rel in list(missing):
284
- # Chain gapfill stays narrow: direct file scope only (no broad fallback).
285
- scope = rel if (Path(rel).exists() and Path(rel).is_file()) else _safe_scope(_family_path((rel,)))
286
- budget = 500 if rel.endswith('/main.py') or rel == 'main.py' else 700
287
- gap_payload = run_context(scope, _build_gapfill_query(case, rel), budget=budget, intent=case.intent)
288
- tokens += int(gap_payload.get("tokens", 0) or 0)
289
- files.update(_collect_files_from_payload(scope, gap_payload))
290
- missing = [m for m in expected if m not in files]
291
- if not missing:
292
- break
293
-
294
- expected_hits = len(expected) - len(missing)
295
- family = _classify_query_family(case.query)
296
- return CaseResult(
297
- name=case.name,
298
- family=family,
299
- mode=mode,
300
- tokens=tokens,
301
- expected_hits=expected_hits,
302
- expected_total=len(expected),
303
- missing_expected=tuple(missing),
304
- context_complete=not missing,
305
- )
306
-
307
-
245
+ return " ".join(dedup)
246
+
247
+
248
+ def _collect_files_from_payload(scope: str, payload: dict) -> set[str]:
249
+ return {
250
+ _normalize_scoped_path(scope, rel)
251
+ for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
252
+ if rel
253
+ }
254
+
255
+
256
+ def _hop_query_for_pair(case, left: str, right: str) -> str:
257
+ repo_path = Path('.').resolve()
258
+ cues: list[str] = []
259
+ cues.extend(_extract_query_cues_for_file(repo_path, left)[:3])
260
+ cues.extend(_extract_query_cues_for_file(repo_path, right)[:3])
261
+ cues.extend(_query_keywords(case.query)[:4])
262
+
263
+ dedup: list[str] = []
264
+ seen: set[str] = set()
265
+ for token in [left, right, *cues]:
266
+ key = token.lower()
267
+ if key in seen:
268
+ continue
269
+ seen.add(key)
270
+ dedup.append(token)
271
+ if len(dedup) >= 12:
272
+ break
273
+ return " ".join(dedup)
274
+
275
+
276
+ def _evaluate_plain_chain_case(case) -> CaseResult:
277
+ expected = tuple(case.expected_files)
278
+ if len(expected) < 3:
279
+ return _evaluate_plain_case(case, allow_gapfill=False)
280
+
281
+ tokens = 0
282
+ files: set[str] = set()
283
+ mode = "plain_chain_workflow"
284
+
285
+ # Decompose N-file chains into adjacent hops to reduce broad root overfetch.
286
+ for idx in range(len(expected) - 1):
287
+ left = expected[idx]
288
+ right = expected[idx + 1]
289
+ scope = _safe_scope(_family_path((left, right)))
290
+ query = _hop_query_for_pair(case, left, right)
291
+ hop_payload = run_context(scope, query, budget=950, intent=case.intent)
292
+ tokens += int(hop_payload.get("tokens", 0) or 0)
293
+ files.update(_collect_files_from_payload(scope, hop_payload))
294
+
295
+ missing = [rel for rel in expected if rel not in files]
296
+ if missing:
297
+ mode = "plain_chain_workflow_gapfill"
298
+ for rel in list(missing):
299
+ # Chain gapfill stays narrow: direct file scope only (no broad fallback).
300
+ scope = rel if (Path(rel).exists() and Path(rel).is_file()) else _safe_scope(_family_path((rel,)))
301
+ budget = 500 if rel.endswith('/main.py') or rel == 'main.py' else 700
302
+ gap_payload = run_context(scope, _build_gapfill_query(case, rel), budget=budget, intent=case.intent)
303
+ tokens += int(gap_payload.get("tokens", 0) or 0)
304
+ files.update(_collect_files_from_payload(scope, gap_payload))
305
+ missing = [m for m in expected if m not in files]
306
+ if not missing:
307
+ break
308
+
309
+ expected_hits = len(expected) - len(missing)
310
+ family = _classify_query_family(case.query)
311
+ return CaseResult(
312
+ name=case.name,
313
+ family=family,
314
+ mode=mode,
315
+ tokens=tokens,
316
+ expected_hits=expected_hits,
317
+ expected_total=len(expected),
318
+ missing_expected=tuple(missing),
319
+ context_complete=not missing,
320
+ )
321
+
322
+
308
323
  def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
309
324
  path, query, budget = _plan_query(case)
310
325
  path = _safe_scope(path)
@@ -442,11 +457,11 @@ def _evaluate_slices_case(case) -> CaseResult:
442
457
  )
443
458
 
444
459
 
445
- def _evaluate_case_with_method(case, method: str) -> CaseResult:
446
- if method == "plain":
447
- return _evaluate_plain_case(case, allow_gapfill=False)
448
- if method == "plain_chain":
449
- return _evaluate_plain_chain_case(case)
460
+ def _evaluate_case_with_method(case, method: str) -> CaseResult:
461
+ if method == "plain":
462
+ return _evaluate_plain_case(case, allow_gapfill=False)
463
+ if method == "plain_chain":
464
+ return _evaluate_plain_chain_case(case)
450
465
  if method == "plain_gapfill":
451
466
  return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
452
467
  if method == "plain_rescue":
@@ -531,6 +546,9 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
531
546
  # Build a diversified sample so adaptation can learn in mixed-layer repos.
532
547
  single_target = max(1, needed // 3)
533
548
  same_dir_target = max(1, needed // 3)
549
+ local_target = max(1, needed // 2)
550
+ if single_target + same_dir_target < local_target:
551
+ same_dir_target = local_target - single_target
534
552
  cross_dir_target = max(1, needed - single_target - same_dir_target)
535
553
 
536
554
  # 1) singles
@@ -573,23 +591,23 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
573
591
  if cross_added >= cross_dir_target:
574
592
  break
575
593
 
576
- # 4) include some 3-file chains for multi-hop calibration when dataset is larger.
577
- if needed >= 12 and len(rows) < needed:
578
- chain_budget = max(1, needed // 6)
579
- chains_added = 0
580
- reps = [item[1] for item in top_items]
581
- for idx in range(len(reps) - 2):
582
- add_case(
583
- f"chain_{idx}",
584
- (reps[idx], reps[idx + 1], reps[idx + 2]),
585
- intent='refactor',
586
- )
587
- if len(rows) >= needed:
588
- return rows[:needed]
589
- chains_added += 1
590
- if chains_added >= chain_budget:
591
- break
592
-
594
+ # 4) include some 3-file chains for multi-hop calibration when dataset is larger.
595
+ if needed >= 12 and len(rows) < needed:
596
+ chain_budget = max(1, int(round(needed * 0.12)))
597
+ chains_added = 0
598
+ reps = [item[1] for item in top_items]
599
+ for idx in range(len(reps) - 2):
600
+ add_case(
601
+ f"chain_{idx}",
602
+ (reps[idx], reps[idx + 1], reps[idx + 2]),
603
+ intent='refactor',
604
+ )
605
+ if len(rows) >= needed:
606
+ return rows[:needed]
607
+ chains_added += 1
608
+ if chains_added >= chain_budget:
609
+ break
610
+
593
611
  # 5) fill remainder with additional nearby pairs
594
612
  if len(rows) < needed:
595
613
  for idx in range(len(files) - 1):
@@ -626,12 +644,52 @@ def _cheaper_method(method: str) -> str | None:
626
644
  return _METHOD_ORDER[idx - 1]
627
645
 
628
646
 
629
- def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) -> tuple[list[CaseResult], dict, dict[str, dict]]:
630
- rows: list[CaseResult] = []
631
- for case in cases:
647
+ def _evaluate_cases_with_method(cases: list[AdaptCase], method: str, workers: int) -> list[CaseResult]:
648
+ if not cases:
649
+ return []
650
+ if workers <= 1 or len(cases) <= 1:
651
+ return [_evaluate_case_with_method(case, method) for case in cases]
652
+
653
+ slots: list[CaseResult | None] = [None] * len(cases)
654
+ max_workers = max(1, min(workers, len(cases)))
655
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
656
+ future_map = {
657
+ pool.submit(_evaluate_case_with_method, case, method): idx
658
+ for idx, case in enumerate(cases)
659
+ }
660
+ for future in as_completed(future_map):
661
+ slots[future_map[future]] = future.result()
662
+
663
+ return [row for row in slots if row is not None]
664
+
665
+
666
+ def _run_family_policy(
667
+ cases: list[AdaptCase],
668
+ family_policy: dict[str, str],
669
+ *,
670
+ workers: int,
671
+ ) -> tuple[list[CaseResult], dict, dict[str, dict]]:
672
+ if not cases:
673
+ summary = _summarize('policy_run', [])
674
+ return [], summary, {}
675
+
676
+ grouped: dict[str, list[tuple[int, AdaptCase]]] = {}
677
+ for idx, case in enumerate(cases):
632
678
  family = _case_family(case)
633
679
  method = family_policy.get(family, 'plain')
634
- rows.append(_evaluate_case_with_method(case, method))
680
+ key = f'{family}|{method}'
681
+ grouped.setdefault(key, []).append((idx, case))
682
+
683
+ ordered: list[CaseResult | None] = [None] * len(cases)
684
+ for key in sorted(grouped):
685
+ pairs = grouped[key]
686
+ _, method = key.split('|', 1)
687
+ group_cases = [case for _, case in pairs]
688
+ group_rows = _evaluate_cases_with_method(group_cases, method, workers)
689
+ for (orig_idx, _), row in zip(pairs, group_rows):
690
+ ordered[orig_idx] = row
691
+
692
+ rows = [row for row in ordered if row is not None]
635
693
  summary = _summarize('policy_run', rows)
636
694
 
637
695
  by_family: dict[str, dict] = {}
@@ -647,45 +705,51 @@ def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) ->
647
705
  return rows, summary, by_family
648
706
 
649
707
 
650
- def _select_best_summary(summaries: list[dict]) -> dict:
651
- full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
652
- if full_hit:
653
- return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
654
- return max(
655
- summaries,
656
- key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
657
- )
658
-
659
-
660
- def _bootstrap_family_policy(cases: list[AdaptCase], families: list[str]) -> tuple[dict[str, str], list[dict]]:
661
- policy: dict[str, str] = {}
662
- diagnostics: list[dict] = []
663
- for fam in families:
664
- fam_cases = [case for case in cases if _case_family(case) == fam]
665
- if not fam_cases:
666
- policy[fam] = "plain"
667
- continue
668
-
669
- method_summaries: list[dict] = []
670
- for method in _METHOD_ORDER:
671
- rows = [_evaluate_case_with_method(case, method) for case in fam_cases]
672
- summary = _summarize(f"bootstrap_{fam}_{method}", rows)
673
- summary["method"] = method
674
- summary["family"] = fam
675
- method_summaries.append(summary)
676
-
677
- best = _select_best_summary(method_summaries)
678
- selected_method = str(best.get("method", "plain"))
679
- policy[fam] = selected_method
680
- diagnostics.append(
681
- {
682
- "family": fam,
683
- "selected_method": selected_method,
684
- "selected_summary": best,
685
- "candidates": method_summaries,
686
- }
687
- )
688
- return policy, diagnostics
708
+ def _select_best_summary(summaries: list[dict]) -> dict:
709
+ full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
710
+ if full_hit:
711
+ return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
712
+ return max(
713
+ summaries,
714
+ key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
715
+ )
716
+
717
+
718
+ def _bootstrap_family_policy(
719
+ cases: list[AdaptCase],
720
+ families: list[str],
721
+ *,
722
+ workers: int,
723
+ ) -> tuple[dict[str, str], list[dict]]:
724
+ policy: dict[str, str] = {}
725
+ diagnostics: list[dict] = []
726
+ for fam in families:
727
+ fam_cases = [case for case in cases if _case_family(case) == fam]
728
+ if not fam_cases:
729
+ policy[fam] = "plain"
730
+ continue
731
+
732
+ method_summaries: list[dict] = []
733
+ for method in _METHOD_ORDER:
734
+ rows = _evaluate_cases_with_method(fam_cases, method, workers)
735
+ summary = _summarize(f"bootstrap_{fam}_{method}", rows)
736
+ summary["method"] = method
737
+ summary["family"] = fam
738
+ method_summaries.append(summary)
739
+
740
+ best = _select_best_summary(method_summaries)
741
+ selected_method = str(best.get("method", "plain"))
742
+ policy[fam] = selected_method
743
+ diagnostics.append(
744
+ {
745
+ "family": fam,
746
+ "selected_method": selected_method,
747
+ "selected_summary": best,
748
+ "candidates": method_summaries,
749
+ }
750
+ )
751
+ return policy, diagnostics
752
+
689
753
  def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
690
754
  cfg_path = repo_path / '.gcie' / 'context_config.json'
691
755
  if cfg_path.exists():
@@ -726,12 +790,11 @@ def run_post_init_adaptation(
726
790
  benchmark_size: int = 10,
727
791
  efficiency_iterations: int = 5,
728
792
  clear_profile: bool = False,
793
+ adapt_workers: int | None = None,
729
794
  ) -> dict:
730
795
  repo_path = Path(repo).resolve()
731
796
 
732
797
  # Ensure all relative retrieval/evaluation calls execute in the target repo.
733
- import os
734
-
735
798
  os.chdir(repo_path)
736
799
  run_index(repo_path.as_posix())
737
800
 
@@ -749,8 +812,9 @@ def run_post_init_adaptation(
749
812
  'message': 'No repo-usable adaptation cases available.',
750
813
  }
751
814
 
752
- families = sorted({_case_family(case) for case in cases})
753
- family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families)
815
+ workers = _adapt_worker_count(adapt_workers)
816
+ families = sorted({_case_family(case) for case in cases})
817
+ family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families, workers=workers)
754
818
 
755
819
  # Accuracy rounds: promote methods per failing family until lock.
756
820
  accuracy_rounds_max = 5
@@ -758,7 +822,7 @@ def run_post_init_adaptation(
758
822
  lock_streak = 0
759
823
 
760
824
  for rnd in range(1, accuracy_rounds_max + 1):
761
- rows, summary, by_family = _run_family_policy(cases, family_policy)
825
+ rows, summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
762
826
  round_payload = {
763
827
  'round': rnd,
764
828
  'family_policy': dict(family_policy),
@@ -792,7 +856,7 @@ def run_post_init_adaptation(
792
856
  )
793
857
 
794
858
  family_policy = dict(selected_accuracy_round['family_policy'])
795
- rows, current_summary, by_family = _run_family_policy(cases, family_policy)
859
+ rows, current_summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
796
860
 
797
861
  # Efficiency rounds: attempt family-level cheaper method under hard 100% gate.
798
862
  efficiency_trials: list[dict] = []
@@ -804,7 +868,7 @@ def run_post_init_adaptation(
804
868
  continue
805
869
  trial_policy = dict(family_policy)
806
870
  trial_policy[fam] = cheaper
807
- _, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy)
871
+ _, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy, workers=workers)
808
872
  trial_payload = {
809
873
  'iteration': idx + 1,
810
874
  'family': fam,
@@ -825,10 +889,10 @@ def run_post_init_adaptation(
825
889
  break
826
890
 
827
891
  # Global candidate snapshots for transparency.
828
- slices_rows = [_evaluate_case_with_method(case, 'slices') for case in cases]
829
- plain_rows = [_evaluate_case_with_method(case, 'plain') for case in cases]
830
- plain_gap_rows = [_evaluate_case_with_method(case, 'plain_gapfill') for case in cases]
831
- plain_rescue_rows = [_evaluate_case_with_method(case, 'plain_rescue') for case in cases]
892
+ slices_rows = _evaluate_cases_with_method(cases, 'slices', workers)
893
+ plain_rows = _evaluate_cases_with_method(cases, 'plain', workers)
894
+ plain_gap_rows = _evaluate_cases_with_method(cases, 'plain_gapfill', workers)
895
+ plain_rescue_rows = _evaluate_cases_with_method(cases, 'plain_rescue', workers)
832
896
  slices_summary = _summarize('slices_accuracy_stage', slices_rows)
833
897
  plain_summary = _summarize('plain_accuracy_stage', plain_rows)
834
898
  plain_gap_summary = _summarize('plain_gapfill_accuracy_stage', plain_gap_rows)
@@ -877,11 +941,12 @@ def run_post_init_adaptation(
877
941
  'benchmark_size': len(cases),
878
942
  'requested_benchmark_size': int(benchmark_size),
879
943
  'efficiency_iterations': int(efficiency_iterations),
944
+ 'adapt_workers': workers,
880
945
  'case_source': case_source,
881
946
  'family_policy': family_policy,
882
947
  'cost_analysis': cost_analysis,
883
- 'phases': {
884
- 'bootstrap': bootstrap_diagnostics,
948
+ 'phases': {
949
+ 'bootstrap': bootstrap_diagnostics,
885
950
  'accuracy_rounds': accuracy_rounds,
886
951
  'selected_accuracy_round': selected_accuracy_round,
887
952
  'efficiency_trials': efficiency_trials,
@@ -920,6 +985,8 @@ def run_post_init_adaptation(
920
985
 
921
986
 
922
987
 
988
+
989
+
923
990
 
924
991
 
925
992
 
@@ -1,138 +1,141 @@
1
- """Repository setup and teardown helpers for GCIE."""
2
-
3
- from __future__ import annotations
4
-
5
- import shutil
6
- from pathlib import Path
7
-
8
- from context.architecture_bootstrap import ensure_initialized
9
-
10
- from .adaptation import run_post_init_adaptation
11
- from .index import run_index
12
-
13
-
14
- def _repo_root() -> Path:
15
- return Path(__file__).resolve().parents[2]
16
-
17
-
18
- def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
19
- if not source.exists():
20
- return "source_missing"
21
- if target.exists() and not force:
22
- return "skipped_existing"
23
- target.parent.mkdir(parents=True, exist_ok=True)
24
- target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8")
25
- return "written"
26
-
27
-
28
- def _is_within(base: Path, target: Path) -> bool:
29
- try:
30
- target.resolve().relative_to(base.resolve())
31
- return True
32
- except ValueError:
33
- return False
34
-
35
-
36
- def _remove_path(root: Path, target: Path) -> str:
37
- if not _is_within(root, target):
38
- return "skipped_outside_repo"
39
- if not target.exists():
40
- return "not_found"
41
- if target.is_dir():
42
- shutil.rmtree(target)
43
- return "removed_dir"
44
- target.unlink()
45
- return "removed_file"
46
-
47
-
48
- def run_setup(
49
- path: str,
50
- *,
51
- force: bool = False,
52
- include_agent_usage: bool = True,
53
- include_setup_doc: bool = True,
54
- run_index_pass: bool = True,
55
- run_adaptation_pass: bool = False,
56
- adaptation_benchmark_size: int = 10,
57
- adaptation_efficiency_iterations: int = 5,
58
- ) -> dict:
59
- """Initialize a repository so GCIE can be used immediately."""
60
- target = Path(path).resolve()
61
- target.mkdir(parents=True, exist_ok=True)
62
-
63
- config = ensure_initialized(target)
64
- gcie_dir = target / ".gcie"
65
-
66
- status: dict[str, object] = {
67
- "repo": target.as_posix(),
68
- "gcie_dir": gcie_dir.as_posix(),
69
- "architecture_initialized": True,
70
- "files": {},
71
- }
72
-
73
- source_root = _repo_root()
74
- copied: dict[str, str] = {}
75
-
76
- if include_agent_usage:
77
- copied["GCIE_USAGE.md"] = _copy_if_needed(
78
- source_root / "GCIE_USAGE.md",
79
- target / "GCIE_USAGE.md",
80
- force=force,
81
- )
82
-
83
- if include_setup_doc:
84
- copied["SETUP_ANY_REPO.md"] = _copy_if_needed(
85
- source_root / "SETUP_ANY_REPO.md",
86
- target / "SETUP_ANY_REPO.md",
87
- force=force,
88
- )
89
-
90
- status["files"] = copied
91
- status["context_config"] = config
92
-
93
- if run_index_pass:
94
- status["index"] = run_index(target.as_posix())
95
- else:
96
- status["index"] = {"skipped": True}
97
-
98
- if run_adaptation_pass:
99
- status["adaptation"] = run_post_init_adaptation(
100
- target.as_posix(),
101
- benchmark_size=adaptation_benchmark_size,
102
- efficiency_iterations=adaptation_efficiency_iterations,
103
- clear_profile=True,
104
- )
105
- else:
106
- status["adaptation"] = {"skipped": True}
107
-
108
- return status
109
-
110
-
111
- def run_remove(
112
- path: str,
113
- *,
114
- remove_planning: bool = False,
115
- remove_gcie_usage: bool = True,
116
- remove_setup_doc: bool = True,
117
- ) -> dict:
118
- """Remove GCIE-managed files from a repository."""
119
- target = Path(path).resolve()
120
- target.mkdir(parents=True, exist_ok=True)
121
-
122
- removed: dict[str, str] = {}
123
- removed[".gcie"] = _remove_path(target, target / ".gcie")
124
-
125
- if remove_gcie_usage:
126
- removed["GCIE_USAGE.md"] = _remove_path(target, target / "GCIE_USAGE.md")
127
-
128
- if remove_setup_doc:
129
- removed["SETUP_ANY_REPO.md"] = _remove_path(target, target / "SETUP_ANY_REPO.md")
130
-
131
- if remove_planning:
132
- removed[".planning"] = _remove_path(target, target / ".planning")
133
-
134
- return {
135
- "repo": target.as_posix(),
136
- "removed": removed,
137
- "remove_planning": remove_planning,
1
+ """Repository setup and teardown helpers for GCIE."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+ from context.architecture_bootstrap import ensure_initialized
9
+
10
+ from .adaptation import run_post_init_adaptation
11
+ from .index import run_index
12
+
13
+
14
+ def _repo_root() -> Path:
15
+ return Path(__file__).resolve().parents[2]
16
+
17
+
18
+ def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
19
+ if not source.exists():
20
+ return "source_missing"
21
+ if target.exists() and not force:
22
+ return "skipped_existing"
23
+ target.parent.mkdir(parents=True, exist_ok=True)
24
+ target.write_text(source.read_text(encoding="utf-8"), encoding="utf-8")
25
+ return "written"
26
+
27
+
28
+ def _is_within(base: Path, target: Path) -> bool:
29
+ try:
30
+ target.resolve().relative_to(base.resolve())
31
+ return True
32
+ except ValueError:
33
+ return False
34
+
35
+
36
+ def _remove_path(root: Path, target: Path) -> str:
37
+ if not _is_within(root, target):
38
+ return "skipped_outside_repo"
39
+ if not target.exists():
40
+ return "not_found"
41
+ if target.is_dir():
42
+ shutil.rmtree(target)
43
+ return "removed_dir"
44
+ target.unlink()
45
+ return "removed_file"
46
+
47
+
48
+ def run_setup(
49
+ path: str,
50
+ *,
51
+ force: bool = False,
52
+ include_agent_usage: bool = True,
53
+ include_setup_doc: bool = True,
54
+ run_index_pass: bool = True,
55
+ run_adaptation_pass: bool = False,
56
+ adaptation_benchmark_size: int = 10,
57
+ adaptation_efficiency_iterations: int = 5,
58
+ adaptation_workers: int | None = None,
59
+ ) -> dict:
60
+ """Initialize a repository so GCIE can be used immediately."""
61
+ target = Path(path).resolve()
62
+ target.mkdir(parents=True, exist_ok=True)
63
+
64
+ config = ensure_initialized(target)
65
+ gcie_dir = target / ".gcie"
66
+
67
+ status: dict[str, object] = {
68
+ "repo": target.as_posix(),
69
+ "gcie_dir": gcie_dir.as_posix(),
70
+ "architecture_initialized": True,
71
+ "files": {},
138
72
  }
73
+
74
+ source_root = _repo_root()
75
+ copied: dict[str, str] = {}
76
+
77
+ if include_agent_usage:
78
+ copied["GCIE_USAGE.md"] = _copy_if_needed(
79
+ source_root / "GCIE_USAGE.md",
80
+ target / "GCIE_USAGE.md",
81
+ force=force,
82
+ )
83
+
84
+ if include_setup_doc:
85
+ copied["SETUP_ANY_REPO.md"] = _copy_if_needed(
86
+ source_root / "SETUP_ANY_REPO.md",
87
+ target / "SETUP_ANY_REPO.md",
88
+ force=force,
89
+ )
90
+
91
+ status["files"] = copied
92
+ status["context_config"] = config
93
+
94
+ if run_index_pass:
95
+ status["index"] = run_index(target.as_posix())
96
+ else:
97
+ status["index"] = {"skipped": True}
98
+
99
+ if run_adaptation_pass:
100
+ status["adaptation"] = run_post_init_adaptation(
101
+ target.as_posix(),
102
+ benchmark_size=adaptation_benchmark_size,
103
+ efficiency_iterations=adaptation_efficiency_iterations,
104
+ clear_profile=True,
105
+ adapt_workers=adaptation_workers,
106
+ )
107
+ else:
108
+ status["adaptation"] = {"skipped": True}
109
+
110
+ return status
111
+
112
+
113
+ def run_remove(
114
+ path: str,
115
+ *,
116
+ remove_planning: bool = False,
117
+ remove_gcie_usage: bool = True,
118
+ remove_setup_doc: bool = True,
119
+ ) -> dict:
120
+ """Remove GCIE-managed files from a repository."""
121
+ target = Path(path).resolve()
122
+ target.mkdir(parents=True, exist_ok=True)
123
+
124
+ removed: dict[str, str] = {}
125
+ removed[".gcie"] = _remove_path(target, target / ".gcie")
126
+
127
+ if remove_gcie_usage:
128
+ removed["GCIE_USAGE.md"] = _remove_path(target, target / "GCIE_USAGE.md")
129
+
130
+ if remove_setup_doc:
131
+ removed["SETUP_ANY_REPO.md"] = _remove_path(target, target / "SETUP_ANY_REPO.md")
132
+
133
+ if remove_planning:
134
+ removed[".planning"] = _remove_path(target, target / ".planning")
135
+
136
+ return {
137
+ "repo": target.as_posix(),
138
+ "removed": removed,
139
+ "remove_planning": remove_planning,
140
+ }
141
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pmaddire/gcie",
3
- "version": "0.1.11",
3
+ "version": "0.1.13",
4
4
  "description": "GraphCode Intelligence Engine one-command setup and context CLI",
5
5
  "bin": {
6
6
  "gcie": "bin/gcie.js",