@pmaddire/gcie 0.1.11 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,16 @@
1
- """Post-initialization adaptation pipeline (accuracy rounds first, then efficiency rounds)."""
1
+ """Post-initialization adaptation pipeline (accuracy rounds first, then efficiency rounds)."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
6
  from dataclasses import asdict, dataclass
6
7
  from datetime import datetime, timezone
7
8
  import json
9
+ import os
8
10
  import re
9
11
  from pathlib import Path
10
12
 
11
- from .context import run_context
13
+ from .context import run_context, run_context_basic
12
14
  from .context_slices import _classify_query_family, run_context_slices
13
15
  from .index import run_index
14
16
 
@@ -52,7 +54,20 @@ _IGNORED_DIRS = {
52
54
  "build",
53
55
  "coverage",
54
56
  }
55
- _METHOD_ORDER = ["plain", "plain_chain", "plain_gapfill", "plain_rescue", "slices"]
57
+ _METHOD_ORDER = ["plain_minimal", "plain", "plain_force", "plain_chain", "plain_gapfill", "plain_rescue", "slices"]
58
+
59
+
60
+ def _adapt_worker_count(workers: int | None = None) -> int:
61
+ if workers is not None:
62
+ return max(1, int(workers))
63
+ env_value = os.getenv("GCIE_ADAPT_WORKERS", "").strip()
64
+ if env_value:
65
+ try:
66
+ return max(1, int(env_value))
67
+ except ValueError:
68
+ pass
69
+ cpu = os.cpu_count() or 4
70
+ return max(1, min(8, cpu))
56
71
 
57
72
 
58
73
  def _query_keywords(text: str) -> list[str]:
@@ -127,31 +142,31 @@ def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
127
142
  return f"{base}/{normalized}"
128
143
 
129
144
 
130
- def _family_path(expected_files: tuple[str, ...]) -> str:
131
- if not expected_files:
132
- return "."
133
- parent_parts: list[tuple[str, ...]] = []
134
- for rel in expected_files:
135
- parent = Path(rel).parent
136
- if str(parent) in {"", "."}:
137
- parent_parts.append(tuple())
138
- else:
139
- parent_parts.append(tuple(parent.parts))
140
-
141
- common: list[str] = []
142
- if parent_parts:
143
- shortest = min(len(parts) for parts in parent_parts)
144
- for idx in range(shortest):
145
- token = parent_parts[0][idx]
146
- if all(parts[idx] == token for parts in parent_parts):
147
- common.append(token)
148
- else:
149
- break
150
- if common:
151
- return Path(*common).as_posix()
152
-
153
- heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
154
- return next(iter(heads)) if len(heads) == 1 else "."
145
+ def _family_path(expected_files: tuple[str, ...]) -> str:
146
+ if not expected_files:
147
+ return "."
148
+ parent_parts: list[tuple[str, ...]] = []
149
+ for rel in expected_files:
150
+ parent = Path(rel).parent
151
+ if str(parent) in {"", "."}:
152
+ parent_parts.append(tuple())
153
+ else:
154
+ parent_parts.append(tuple(parent.parts))
155
+
156
+ common: list[str] = []
157
+ if parent_parts:
158
+ shortest = min(len(parts) for parts in parent_parts)
159
+ for idx in range(shortest):
160
+ token = parent_parts[0][idx]
161
+ if all(parts[idx] == token for parts in parent_parts):
162
+ common.append(token)
163
+ else:
164
+ break
165
+ if common:
166
+ return Path(*common).as_posix()
167
+
168
+ heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
169
+ return next(iter(heads)) if len(heads) == 1 else "."
155
170
 
156
171
  def _safe_scope(path: str) -> str:
157
172
  if not path or path in {".", "./"}:
@@ -162,39 +177,39 @@ def _safe_scope(path: str) -> str:
162
177
  return "."
163
178
 
164
179
 
165
- def _plan_query(case) -> tuple[str, str, int | None]:
166
- path = _family_path(case.expected_files)
167
- if getattr(case, "name", "") == "cli_context_command":
168
- return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
169
-
170
- repo_path = Path('.').resolve()
171
- cue_terms: list[str] = []
172
- for rel in case.expected_files:
173
- cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
174
- cue_terms.extend(_query_keywords(case.query)[:4])
175
-
176
- dedup: list[str] = []
177
- seen: set[str] = set()
178
- for token in [*case.expected_files, *cue_terms]:
179
- key = token.lower()
180
- if key in seen:
181
- continue
182
- seen.add(key)
183
- dedup.append(token)
184
- if len(dedup) >= 14:
185
- break
186
- query = " ".join(dedup).strip()
187
-
188
- expected_count = len(case.expected_files)
189
- if expected_count >= 3:
190
- budget = 1100
191
- elif expected_count == 2:
192
- budget = 950
193
- else:
194
- budget = 850
195
-
196
- if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
197
- budget = 800
180
+ def _plan_query(case) -> tuple[str, str, int | None]:
181
+ path = _family_path(case.expected_files)
182
+ if getattr(case, "name", "") == "cli_context_command":
183
+ return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
184
+
185
+ repo_path = Path('.').resolve()
186
+ cue_terms: list[str] = []
187
+ for rel in case.expected_files:
188
+ cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
189
+ cue_terms.extend(_query_keywords(case.query)[:4])
190
+
191
+ dedup: list[str] = []
192
+ seen: set[str] = set()
193
+ for token in [*case.expected_files, *cue_terms]:
194
+ key = token.lower()
195
+ if key in seen:
196
+ continue
197
+ seen.add(key)
198
+ dedup.append(token)
199
+ if len(dedup) >= 14:
200
+ break
201
+ query = " ".join(dedup).strip()
202
+
203
+ expected_count = len(case.expected_files)
204
+ if expected_count >= 3:
205
+ budget = 1100
206
+ elif expected_count == 2:
207
+ budget = 950
208
+ else:
209
+ budget = 850
210
+
211
+ if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
212
+ budget = 800
198
213
  return path, query, budget
199
214
 
200
215
  def _case_family(case) -> str:
@@ -227,84 +242,84 @@ def _build_gapfill_query(case, missing_rel: str) -> str:
227
242
  if len(dedup) >= 14:
228
243
  break
229
244
 
230
- return " ".join(dedup)
231
-
232
-
233
- def _collect_files_from_payload(scope: str, payload: dict) -> set[str]:
234
- return {
235
- _normalize_scoped_path(scope, rel)
236
- for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
237
- if rel
238
- }
239
-
240
-
241
- def _hop_query_for_pair(case, left: str, right: str) -> str:
242
- repo_path = Path('.').resolve()
243
- cues: list[str] = []
244
- cues.extend(_extract_query_cues_for_file(repo_path, left)[:3])
245
- cues.extend(_extract_query_cues_for_file(repo_path, right)[:3])
246
- cues.extend(_query_keywords(case.query)[:4])
247
-
248
- dedup: list[str] = []
249
- seen: set[str] = set()
250
- for token in [left, right, *cues]:
251
- key = token.lower()
252
- if key in seen:
253
- continue
254
- seen.add(key)
255
- dedup.append(token)
256
- if len(dedup) >= 12:
257
- break
258
- return " ".join(dedup)
259
-
260
-
261
- def _evaluate_plain_chain_case(case) -> CaseResult:
262
- expected = tuple(case.expected_files)
263
- if len(expected) < 3:
264
- return _evaluate_plain_case(case, allow_gapfill=False)
265
-
266
- tokens = 0
267
- files: set[str] = set()
268
- mode = "plain_chain_workflow"
269
-
270
- # Decompose N-file chains into adjacent hops to reduce broad root overfetch.
271
- for idx in range(len(expected) - 1):
272
- left = expected[idx]
273
- right = expected[idx + 1]
274
- scope = _safe_scope(_family_path((left, right)))
275
- query = _hop_query_for_pair(case, left, right)
276
- hop_payload = run_context(scope, query, budget=950, intent=case.intent)
277
- tokens += int(hop_payload.get("tokens", 0) or 0)
278
- files.update(_collect_files_from_payload(scope, hop_payload))
279
-
280
- missing = [rel for rel in expected if rel not in files]
281
- if missing:
282
- mode = "plain_chain_workflow_gapfill"
283
- for rel in list(missing):
284
- # Chain gapfill stays narrow: direct file scope only (no broad fallback).
285
- scope = rel if (Path(rel).exists() and Path(rel).is_file()) else _safe_scope(_family_path((rel,)))
286
- budget = 500 if rel.endswith('/main.py') or rel == 'main.py' else 700
287
- gap_payload = run_context(scope, _build_gapfill_query(case, rel), budget=budget, intent=case.intent)
288
- tokens += int(gap_payload.get("tokens", 0) or 0)
289
- files.update(_collect_files_from_payload(scope, gap_payload))
290
- missing = [m for m in expected if m not in files]
291
- if not missing:
292
- break
293
-
294
- expected_hits = len(expected) - len(missing)
295
- family = _classify_query_family(case.query)
296
- return CaseResult(
297
- name=case.name,
298
- family=family,
299
- mode=mode,
300
- tokens=tokens,
301
- expected_hits=expected_hits,
302
- expected_total=len(expected),
303
- missing_expected=tuple(missing),
304
- context_complete=not missing,
305
- )
306
-
307
-
245
+ return " ".join(dedup)
246
+
247
+
248
+ def _collect_files_from_payload(scope: str, payload: dict) -> set[str]:
249
+ return {
250
+ _normalize_scoped_path(scope, rel)
251
+ for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
252
+ if rel
253
+ }
254
+
255
+
256
+ def _hop_query_for_pair(case, left: str, right: str) -> str:
257
+ repo_path = Path('.').resolve()
258
+ cues: list[str] = []
259
+ cues.extend(_extract_query_cues_for_file(repo_path, left)[:3])
260
+ cues.extend(_extract_query_cues_for_file(repo_path, right)[:3])
261
+ cues.extend(_query_keywords(case.query)[:4])
262
+
263
+ dedup: list[str] = []
264
+ seen: set[str] = set()
265
+ for token in [left, right, *cues]:
266
+ key = token.lower()
267
+ if key in seen:
268
+ continue
269
+ seen.add(key)
270
+ dedup.append(token)
271
+ if len(dedup) >= 12:
272
+ break
273
+ return " ".join(dedup)
274
+
275
+
276
+ def _evaluate_plain_chain_case(case) -> CaseResult:
277
+ expected = tuple(case.expected_files)
278
+ if len(expected) < 3:
279
+ return _evaluate_plain_case(case, allow_gapfill=False)
280
+
281
+ tokens = 0
282
+ files: set[str] = set()
283
+ mode = "plain_chain_workflow"
284
+
285
+ # Decompose N-file chains into adjacent hops to reduce broad root overfetch.
286
+ for idx in range(len(expected) - 1):
287
+ left = expected[idx]
288
+ right = expected[idx + 1]
289
+ scope = _safe_scope(_family_path((left, right)))
290
+ query = _hop_query_for_pair(case, left, right)
291
+ hop_payload = run_context(scope, query, budget=950, intent=case.intent)
292
+ tokens += int(hop_payload.get("tokens", 0) or 0)
293
+ files.update(_collect_files_from_payload(scope, hop_payload))
294
+
295
+ missing = [rel for rel in expected if rel not in files]
296
+ if missing:
297
+ mode = "plain_chain_workflow_gapfill"
298
+ for rel in list(missing):
299
+ # Chain gapfill stays narrow: direct file scope only (no broad fallback).
300
+ scope = rel if (Path(rel).exists() and Path(rel).is_file()) else _safe_scope(_family_path((rel,)))
301
+ budget = 500 if rel.endswith('/main.py') or rel == 'main.py' else 700
302
+ gap_payload = run_context(scope, _build_gapfill_query(case, rel), budget=budget, intent=case.intent)
303
+ tokens += int(gap_payload.get("tokens", 0) or 0)
304
+ files.update(_collect_files_from_payload(scope, gap_payload))
305
+ missing = [m for m in expected if m not in files]
306
+ if not missing:
307
+ break
308
+
309
+ expected_hits = len(expected) - len(missing)
310
+ family = _classify_query_family(case.query)
311
+ return CaseResult(
312
+ name=case.name,
313
+ family=family,
314
+ mode=mode,
315
+ tokens=tokens,
316
+ expected_hits=expected_hits,
317
+ expected_total=len(expected),
318
+ missing_expected=tuple(missing),
319
+ context_complete=not missing,
320
+ )
321
+
322
+
308
323
  def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
309
324
  path, query, budget = _plan_query(case)
310
325
  path = _safe_scope(path)
@@ -370,7 +385,57 @@ def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill
370
385
  missing_expected=tuple(missing),
371
386
  context_complete=not missing,
372
387
  )
373
-
388
+
389
+ def _evaluate_plain_minimal_case(case) -> CaseResult:
390
+ path, query, budget = _plan_query(case)
391
+ path = _safe_scope(path)
392
+ payload = run_context_basic(path, query, budget=budget, intent=case.intent)
393
+ files = {
394
+ _normalize_scoped_path(path, rel)
395
+ for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
396
+ if rel
397
+ }
398
+ expected = tuple(case.expected_files)
399
+ missing = [rel for rel in expected if rel not in files]
400
+ tokens = int(payload.get("tokens", 0) or 0)
401
+ expected_hits = len(expected) - len(missing)
402
+ family = _classify_query_family(query)
403
+ return CaseResult(
404
+ name=case.name,
405
+ family=family,
406
+ mode="plain_context_workflow_minimal",
407
+ tokens=tokens,
408
+ expected_hits=expected_hits,
409
+ expected_total=len(expected),
410
+ missing_expected=tuple(missing),
411
+ context_complete=not missing,
412
+ )
413
+
414
+
415
+ def _evaluate_plain_force_case(case) -> CaseResult:
416
+ path, query, budget = _plan_query(case)
417
+ path = _safe_scope(path)
418
+ payload = run_context(path, query, budget=budget, intent=case.intent, strict_accuracy=True)
419
+ files = {
420
+ _normalize_scoped_path(path, rel)
421
+ for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
422
+ if rel
423
+ }
424
+ expected = tuple(case.expected_files)
425
+ missing = [rel for rel in expected if rel not in files]
426
+ tokens = int(payload.get("tokens", 0) or 0)
427
+ expected_hits = len(expected) - len(missing)
428
+ family = _classify_query_family(query)
429
+ return CaseResult(
430
+ name=case.name,
431
+ family=family,
432
+ mode="plain_context_workflow_force",
433
+ tokens=tokens,
434
+ expected_hits=expected_hits,
435
+ expected_total=len(expected),
436
+ missing_expected=tuple(missing),
437
+ context_complete=not missing,
438
+ )
374
439
 
375
440
  def _evaluate_slices_case(case) -> CaseResult:
376
441
  payload = run_context_slices(
@@ -443,14 +508,18 @@ def _evaluate_slices_case(case) -> CaseResult:
443
508
 
444
509
 
445
510
  def _evaluate_case_with_method(case, method: str) -> CaseResult:
511
+ if method == "plain_minimal":
512
+ return _evaluate_plain_minimal_case(case)
446
513
  if method == "plain":
447
514
  return _evaluate_plain_case(case, allow_gapfill=False)
515
+ if method == "plain_force":
516
+ return _evaluate_plain_force_case(case)
448
517
  if method == "plain_chain":
449
518
  return _evaluate_plain_chain_case(case)
450
- if method == "plain_gapfill":
451
- return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
452
- if method == "plain_rescue":
453
- return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
519
+ if method == "plain_gapfill":
520
+ return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
521
+ if method == "plain_rescue":
522
+ return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
454
523
  return _evaluate_slices_case(case)
455
524
 
456
525
 
@@ -531,6 +600,9 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
531
600
  # Build a diversified sample so adaptation can learn in mixed-layer repos.
532
601
  single_target = max(1, needed // 3)
533
602
  same_dir_target = max(1, needed // 3)
603
+ local_target = max(1, needed // 2)
604
+ if single_target + same_dir_target < local_target:
605
+ same_dir_target = local_target - single_target
534
606
  cross_dir_target = max(1, needed - single_target - same_dir_target)
535
607
 
536
608
  # 1) singles
@@ -573,23 +645,23 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
573
645
  if cross_added >= cross_dir_target:
574
646
  break
575
647
 
576
- # 4) include some 3-file chains for multi-hop calibration when dataset is larger.
577
- if needed >= 12 and len(rows) < needed:
578
- chain_budget = max(1, needed // 6)
579
- chains_added = 0
580
- reps = [item[1] for item in top_items]
581
- for idx in range(len(reps) - 2):
582
- add_case(
583
- f"chain_{idx}",
584
- (reps[idx], reps[idx + 1], reps[idx + 2]),
585
- intent='refactor',
586
- )
587
- if len(rows) >= needed:
588
- return rows[:needed]
589
- chains_added += 1
590
- if chains_added >= chain_budget:
591
- break
592
-
648
+ # 4) include some 3-file chains for multi-hop calibration when dataset is larger.
649
+ if needed >= 12 and len(rows) < needed:
650
+ chain_budget = max(1, int(round(needed * 0.12)))
651
+ chains_added = 0
652
+ reps = [item[1] for item in top_items]
653
+ for idx in range(len(reps) - 2):
654
+ add_case(
655
+ f"chain_{idx}",
656
+ (reps[idx], reps[idx + 1], reps[idx + 2]),
657
+ intent='refactor',
658
+ )
659
+ if len(rows) >= needed:
660
+ return rows[:needed]
661
+ chains_added += 1
662
+ if chains_added >= chain_budget:
663
+ break
664
+
593
665
  # 5) fill remainder with additional nearby pairs
594
666
  if len(rows) < needed:
595
667
  for idx in range(len(files) - 1):
@@ -626,12 +698,52 @@ def _cheaper_method(method: str) -> str | None:
626
698
  return _METHOD_ORDER[idx - 1]
627
699
 
628
700
 
629
- def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) -> tuple[list[CaseResult], dict, dict[str, dict]]:
630
- rows: list[CaseResult] = []
631
- for case in cases:
701
+ def _evaluate_cases_with_method(cases: list[AdaptCase], method: str, workers: int) -> list[CaseResult]:
702
+ if not cases:
703
+ return []
704
+ if workers <= 1 or len(cases) <= 1:
705
+ return [_evaluate_case_with_method(case, method) for case in cases]
706
+
707
+ slots: list[CaseResult | None] = [None] * len(cases)
708
+ max_workers = max(1, min(workers, len(cases)))
709
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
710
+ future_map = {
711
+ pool.submit(_evaluate_case_with_method, case, method): idx
712
+ for idx, case in enumerate(cases)
713
+ }
714
+ for future in as_completed(future_map):
715
+ slots[future_map[future]] = future.result()
716
+
717
+ return [row for row in slots if row is not None]
718
+
719
+
720
+ def _run_family_policy(
721
+ cases: list[AdaptCase],
722
+ family_policy: dict[str, str],
723
+ *,
724
+ workers: int,
725
+ ) -> tuple[list[CaseResult], dict, dict[str, dict]]:
726
+ if not cases:
727
+ summary = _summarize('policy_run', [])
728
+ return [], summary, {}
729
+
730
+ grouped: dict[str, list[tuple[int, AdaptCase]]] = {}
731
+ for idx, case in enumerate(cases):
632
732
  family = _case_family(case)
633
733
  method = family_policy.get(family, 'plain')
634
- rows.append(_evaluate_case_with_method(case, method))
734
+ key = f'{family}|{method}'
735
+ grouped.setdefault(key, []).append((idx, case))
736
+
737
+ ordered: list[CaseResult | None] = [None] * len(cases)
738
+ for key in sorted(grouped):
739
+ pairs = grouped[key]
740
+ _, method = key.split('|', 1)
741
+ group_cases = [case for _, case in pairs]
742
+ group_rows = _evaluate_cases_with_method(group_cases, method, workers)
743
+ for (orig_idx, _), row in zip(pairs, group_rows):
744
+ ordered[orig_idx] = row
745
+
746
+ rows = [row for row in ordered if row is not None]
635
747
  summary = _summarize('policy_run', rows)
636
748
 
637
749
  by_family: dict[str, dict] = {}
@@ -647,45 +759,51 @@ def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) ->
647
759
  return rows, summary, by_family
648
760
 
649
761
 
650
- def _select_best_summary(summaries: list[dict]) -> dict:
651
- full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
652
- if full_hit:
653
- return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
654
- return max(
655
- summaries,
656
- key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
657
- )
658
-
659
-
660
- def _bootstrap_family_policy(cases: list[AdaptCase], families: list[str]) -> tuple[dict[str, str], list[dict]]:
661
- policy: dict[str, str] = {}
662
- diagnostics: list[dict] = []
663
- for fam in families:
664
- fam_cases = [case for case in cases if _case_family(case) == fam]
665
- if not fam_cases:
666
- policy[fam] = "plain"
667
- continue
668
-
669
- method_summaries: list[dict] = []
670
- for method in _METHOD_ORDER:
671
- rows = [_evaluate_case_with_method(case, method) for case in fam_cases]
672
- summary = _summarize(f"bootstrap_{fam}_{method}", rows)
673
- summary["method"] = method
674
- summary["family"] = fam
675
- method_summaries.append(summary)
676
-
677
- best = _select_best_summary(method_summaries)
678
- selected_method = str(best.get("method", "plain"))
679
- policy[fam] = selected_method
680
- diagnostics.append(
681
- {
682
- "family": fam,
683
- "selected_method": selected_method,
684
- "selected_summary": best,
685
- "candidates": method_summaries,
686
- }
687
- )
688
- return policy, diagnostics
762
+ def _select_best_summary(summaries: list[dict]) -> dict:
763
+ full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
764
+ if full_hit:
765
+ return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
766
+ return max(
767
+ summaries,
768
+ key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
769
+ )
770
+
771
+
772
+ def _bootstrap_family_policy(
773
+ cases: list[AdaptCase],
774
+ families: list[str],
775
+ *,
776
+ workers: int,
777
+ ) -> tuple[dict[str, str], list[dict]]:
778
+ policy: dict[str, str] = {}
779
+ diagnostics: list[dict] = []
780
+ for fam in families:
781
+ fam_cases = [case for case in cases if _case_family(case) == fam]
782
+ if not fam_cases:
783
+ policy[fam] = "plain"
784
+ continue
785
+
786
+ method_summaries: list[dict] = []
787
+ for method in _METHOD_ORDER:
788
+ rows = _evaluate_cases_with_method(fam_cases, method, workers)
789
+ summary = _summarize(f"bootstrap_{fam}_{method}", rows)
790
+ summary["method"] = method
791
+ summary["family"] = fam
792
+ method_summaries.append(summary)
793
+
794
+ best = _select_best_summary(method_summaries)
795
+ selected_method = str(best.get("method", "plain"))
796
+ policy[fam] = selected_method
797
+ diagnostics.append(
798
+ {
799
+ "family": fam,
800
+ "selected_method": selected_method,
801
+ "selected_summary": best,
802
+ "candidates": method_summaries,
803
+ }
804
+ )
805
+ return policy, diagnostics
806
+
689
807
  def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
690
808
  cfg_path = repo_path / '.gcie' / 'context_config.json'
691
809
  if cfg_path.exists():
@@ -726,12 +844,11 @@ def run_post_init_adaptation(
726
844
  benchmark_size: int = 10,
727
845
  efficiency_iterations: int = 5,
728
846
  clear_profile: bool = False,
847
+ adapt_workers: int | None = None,
729
848
  ) -> dict:
730
849
  repo_path = Path(repo).resolve()
731
850
 
732
851
  # Ensure all relative retrieval/evaluation calls execute in the target repo.
733
- import os
734
-
735
852
  os.chdir(repo_path)
736
853
  run_index(repo_path.as_posix())
737
854
 
@@ -749,8 +866,9 @@ def run_post_init_adaptation(
749
866
  'message': 'No repo-usable adaptation cases available.',
750
867
  }
751
868
 
752
- families = sorted({_case_family(case) for case in cases})
753
- family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families)
869
+ workers = _adapt_worker_count(adapt_workers)
870
+ families = sorted({_case_family(case) for case in cases})
871
+ family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families, workers=workers)
754
872
 
755
873
  # Accuracy rounds: promote methods per failing family until lock.
756
874
  accuracy_rounds_max = 5
@@ -758,7 +876,7 @@ def run_post_init_adaptation(
758
876
  lock_streak = 0
759
877
 
760
878
  for rnd in range(1, accuracy_rounds_max + 1):
761
- rows, summary, by_family = _run_family_policy(cases, family_policy)
879
+ rows, summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
762
880
  round_payload = {
763
881
  'round': rnd,
764
882
  'family_policy': dict(family_policy),
@@ -792,7 +910,7 @@ def run_post_init_adaptation(
792
910
  )
793
911
 
794
912
  family_policy = dict(selected_accuracy_round['family_policy'])
795
- rows, current_summary, by_family = _run_family_policy(cases, family_policy)
913
+ rows, current_summary, by_family = _run_family_policy(cases, family_policy, workers=workers)
796
914
 
797
915
  # Efficiency rounds: attempt family-level cheaper method under hard 100% gate.
798
916
  efficiency_trials: list[dict] = []
@@ -804,7 +922,7 @@ def run_post_init_adaptation(
804
922
  continue
805
923
  trial_policy = dict(family_policy)
806
924
  trial_policy[fam] = cheaper
807
- _, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy)
925
+ _, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy, workers=workers)
808
926
  trial_payload = {
809
927
  'iteration': idx + 1,
810
928
  'family': fam,
@@ -825,15 +943,19 @@ def run_post_init_adaptation(
825
943
  break
826
944
 
827
945
  # Global candidate snapshots for transparency.
828
- slices_rows = [_evaluate_case_with_method(case, 'slices') for case in cases]
829
- plain_rows = [_evaluate_case_with_method(case, 'plain') for case in cases]
830
- plain_gap_rows = [_evaluate_case_with_method(case, 'plain_gapfill') for case in cases]
831
- plain_rescue_rows = [_evaluate_case_with_method(case, 'plain_rescue') for case in cases]
946
+ slices_rows = _evaluate_cases_with_method(cases, 'slices', workers)
947
+ plain_min_rows = _evaluate_cases_with_method(cases, 'plain_minimal', workers)
948
+ plain_rows = _evaluate_cases_with_method(cases, 'plain', workers)
949
+ plain_force_rows = _evaluate_cases_with_method(cases, 'plain_force', workers)
950
+ plain_gap_rows = _evaluate_cases_with_method(cases, 'plain_gapfill', workers)
951
+ plain_rescue_rows = _evaluate_cases_with_method(cases, 'plain_rescue', workers)
832
952
  slices_summary = _summarize('slices_accuracy_stage', slices_rows)
953
+ plain_min_summary = _summarize('plain_minimal_accuracy_stage', plain_min_rows)
833
954
  plain_summary = _summarize('plain_accuracy_stage', plain_rows)
955
+ plain_force_summary = _summarize('plain_force_accuracy_stage', plain_force_rows)
834
956
  plain_gap_summary = _summarize('plain_gapfill_accuracy_stage', plain_gap_rows)
835
957
  plain_rescue_summary = _summarize('plain_rescue_accuracy_stage', plain_rescue_rows)
836
- candidates = [slices_summary, plain_summary, plain_gap_summary, plain_rescue_summary]
958
+ candidates = [slices_summary, plain_min_summary, plain_summary, plain_force_summary, plain_gap_summary, plain_rescue_summary]
837
959
 
838
960
  active = {
839
961
  'label': 'family_policy_selected',
@@ -877,11 +999,12 @@ def run_post_init_adaptation(
877
999
  'benchmark_size': len(cases),
878
1000
  'requested_benchmark_size': int(benchmark_size),
879
1001
  'efficiency_iterations': int(efficiency_iterations),
1002
+ 'adapt_workers': workers,
880
1003
  'case_source': case_source,
881
1004
  'family_policy': family_policy,
882
1005
  'cost_analysis': cost_analysis,
883
- 'phases': {
884
- 'bootstrap': bootstrap_diagnostics,
1006
+ 'phases': {
1007
+ 'bootstrap': bootstrap_diagnostics,
885
1008
  'accuracy_rounds': accuracy_rounds,
886
1009
  'selected_accuracy_round': selected_accuracy_round,
887
1010
  'efficiency_trials': efficiency_trials,
@@ -920,6 +1043,8 @@ def run_post_init_adaptation(
920
1043
 
921
1044
 
922
1045
 
1046
+
1047
+
923
1048
 
924
1049
 
925
1050