@pmaddire/gcie 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/GCIE_USAGE.md CHANGED
@@ -38,8 +38,13 @@ gcie.cmd adaptive-profile . --clear
38
38
  ```
39
39
 
40
40
  Post-init adaptation pipeline:
41
+ - run from the target repo root (cd <repo> first); use . as scope
42
+ - adaptation now bootstraps per-family method defaults before accuracy rounds (plain/plain-gapfill/plain-rescue/slices)
43
+ - adaptation case generation is mixed by design (single-file, same-layer pairs, cross-subtree pairs, and some 3-file chains on larger runs)
41
44
  ```powershell
42
45
  gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5 --clear-profile
46
+ # mixed-layer repos: use wider calibration
47
+ gcie.cmd adapt . --benchmark-size 25 --efficiency-iterations 5 --clear-profile
43
48
  ```
44
49
 
45
50
  One-shot setup + adaptation:
@@ -148,7 +153,7 @@ When retrieval is weak, apply in this exact order:
148
153
  1. Query upgrade: add explicit files, symbols, caller/entry anchor
149
154
  2. Scope correction: subtree vs root
150
155
  3. One profile/budget escalation
151
- 4. Targeted gap-fill for only missing must-have file(s)
156
+ 4. Targeted gap-fill for only missing must-have file(s), preferring direct file-path scope first
152
157
  5. Multi-hop decomposition only if still incomplete
153
158
 
154
159
  Stop condition:
@@ -189,6 +194,29 @@ gcie.cmd index .
189
194
  - Proceed to calibration only after coverage is reachable with stable behavior.
190
195
  - If not reachable, keep safer fallback mode for affected families and continue tracking.
191
196
 
197
+ ## Calibration Quality Gate (Cross-Repo, Required)
198
+
199
+ Before accepting adaptation results, verify calibration quality:
200
+
201
+ 1. Family diversity floor:
202
+ - the generated benchmark set should cover at least 3 task families when the repo has multiple top-level subsystems
203
+ - if adaptation output is dominated by only `single_file` and `same_layer_pair`, treat it as underfit
204
+
205
+ 2. Underfit recovery:
206
+ - rerun adaptation with wider calibration
207
+ ```powershell
208
+ gcie.cmd adapt . --benchmark-size 25 --efficiency-iterations 5 --clear-profile
209
+ ```
210
+ - keep `benchmark-size 10` only for small/single-layer repos or quick smoke checks
211
+
212
+ 3. Accuracy-first acceptance:
213
+ - do not accept a profile below `100%` full-hit if a recoverable path exists
214
+ - run one rescue cycle (query upgrade -> scope correction -> one budget/profile rung -> targeted gap-fill)
215
+ - only then finalize family defaults
216
+
217
+ 4. Cost lock sanity:
218
+ - if selected profile is much more expensive than cheapest (`>40%` token delta), keep status as cost-risk and continue family-level refinement
219
+ - do not freeze expensive global defaults unless they are uniquely required for `100%`
192
220
  ## Automatic Post-Trigger Adaptation (Required)
193
221
 
194
222
  After trigger detection in a repo session:
@@ -338,8 +366,37 @@ After running adaptation:
338
366
  Commands:
339
367
  ```powershell
340
368
  gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5 --clear-profile
369
+ # mixed-layer repos: use wider calibration
370
+ gcie.cmd adapt . --benchmark-size 25 --efficiency-iterations 5 --clear-profile
341
371
  ```
342
372
  ```powershell
343
373
  gcie.cmd adapt . --benchmark-size 10 --efficiency-iterations 5
344
374
  ```
345
375
 
376
+
377
+ ## Remove GCIE From A Repo
378
+
379
+ To remove GCIE-managed files from the current repo:
380
+
381
+ ```powershell
382
+ gcie.cmd remove .
383
+ ```
384
+
385
+ Options:
386
+ - keep `GCIE_USAGE.md`: `--keep-usage`
387
+ - keep `SETUP_ANY_REPO.md`: `--keep-setup-doc`
388
+ - also remove `.planning` artifacts: `--remove-planning`
389
+
390
+ Example:
391
+
392
+ ```powershell
393
+ gcie.cmd remove . --remove-planning
394
+ ```
395
+
396
+
397
+
398
+
399
+
400
+
401
+
402
+
package/cli/app.py CHANGED
@@ -14,7 +14,7 @@ from .commands.context_slices import adaptive_profile_summary, clear_adaptive_pr
14
14
  from .commands.debug import run_debug
15
15
  from .commands.index import run_index
16
16
  from .commands.query import run_query
17
- from .commands.setup import run_setup
17
+ from .commands.setup import run_remove, run_setup
18
18
 
19
19
  app = typer.Typer(help="GraphCode Intelligence Engine CLI")
20
20
 
@@ -177,6 +177,21 @@ def setup_cmd(
177
177
  typer.echo(json.dumps(result, indent=2))
178
178
 
179
179
 
180
+ @app.command("remove")
181
+ def remove_cmd(
182
+ path: str = typer.Argument("."),
183
+ remove_planning: bool = typer.Option(False, "--remove-planning", help="Also remove .planning artifacts"),
184
+ keep_usage: bool = typer.Option(False, "--keep-usage", help="Keep GCIE_USAGE.md in place"),
185
+ keep_setup_doc: bool = typer.Option(False, "--keep-setup-doc", help="Keep SETUP_ANY_REPO.md in place"),
186
+ ) -> None:
187
+ result = run_remove(
188
+ path,
189
+ remove_planning=remove_planning,
190
+ remove_gcie_usage=not keep_usage,
191
+ remove_setup_doc=not keep_setup_doc,
192
+ )
193
+ typer.echo(json.dumps(result, indent=2))
194
+
180
195
  @app.command("cache-clear")
181
196
  def cache_clear_cmd(path: str = typer.Argument(".")) -> None:
182
197
  result = clear_cache(path)
@@ -197,3 +212,5 @@ def cache_warm_cmd(path: str = typer.Argument(".")) -> None:
197
212
 
198
213
  if __name__ == "__main__":
199
214
  app()
215
+
216
+
@@ -1,4 +1,4 @@
1
- """Post-initialization adaptation pipeline (accuracy first, then efficiency)."""
1
+ """Post-initialization adaptation pipeline (accuracy rounds first, then efficiency rounds)."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -14,7 +14,7 @@ from .index import run_index
14
14
 
15
15
  try:
16
16
  from performance.context_benchmark import BENCHMARK_CASES
17
- except Exception: # pragma: no cover - fallback for limited installs
17
+ except Exception: # pragma: no cover
18
18
  BENCHMARK_CASES = ()
19
19
 
20
20
 
@@ -52,16 +52,60 @@ _IGNORED_DIRS = {
52
52
  "build",
53
53
  "coverage",
54
54
  }
55
+ _METHOD_ORDER = ["plain", "plain_gapfill", "plain_rescue", "slices"]
55
56
 
56
57
 
57
58
  def _query_keywords(text: str) -> list[str]:
58
- terms: list[str] = []
59
- for token in _WORD_RE.findall(text.lower()):
60
- if len(token) < 4:
61
- continue
62
- terms.append(token)
63
- return terms[:8]
59
+ return [t for t in _WORD_RE.findall(text.lower()) if len(t) >= 4][:8]
60
+
61
+
62
+ def _extract_query_cues_for_file(repo_path: Path, rel: str) -> list[str]:
63
+ path = repo_path / rel
64
+ try:
65
+ text = path.read_text(encoding='utf-8', errors='ignore')
66
+ except Exception:
67
+ return [Path(rel).stem.lower()]
68
+
69
+ body = text[:12000]
70
+ cues: list[str] = [Path(rel).stem.lower()]
71
+
72
+ patterns = [
73
+ r"^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)",
74
+ r"^\s*class\s+([A-Za-z_][A-Za-z0-9_]*)",
75
+ r"^\s*(?:async\s+)?function\s+([A-Za-z_][A-Za-z0-9_]*)",
76
+ r"^\s*const\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:async\s*)?(?:\(|function\b)",
77
+ r"^\s*export\s+function\s+([A-Za-z_][A-Za-z0-9_]*)",
78
+ ]
79
+ for pat in patterns:
80
+ for name in re.findall(pat, body, flags=re.MULTILINE):
81
+ token = str(name).lower()
82
+ if len(token) >= 4:
83
+ cues.append(token)
84
+ if len(cues) >= 8:
85
+ break
86
+ if len(cues) >= 8:
87
+ break
64
88
 
89
+ for route in re.findall(r"['\"](/api/[A-Za-z0-9_/{}/-]+)['\"]", body):
90
+ cues.append(route.lower())
91
+ if len(cues) >= 10:
92
+ break
93
+
94
+ for key in re.findall(r"\b[A-Z][A-Z0-9_]{3,}\b", body):
95
+ cues.append(key.lower())
96
+ if len(cues) >= 12:
97
+ break
98
+
99
+ dedup: list[str] = []
100
+ seen: set[str] = set()
101
+ for cue in cues:
102
+ if cue in seen:
103
+ continue
104
+ seen.add(cue)
105
+ dedup.append(cue)
106
+ if len(dedup) >= 8:
107
+ break
108
+ return dedup
65
109
 
66
110
  def _node_to_file(node_id: str) -> str | None:
67
111
  if node_id.startswith("file:"):
@@ -83,17 +127,33 @@ def _normalize_scoped_path(plan_path: str, rel_path: str) -> str:
83
127
  return f"{base}/{normalized}"
84
128
 
85
129
 
86
- def _family_path(expected_files: tuple[str, ...]) -> str:
87
- if not expected_files:
88
- return "."
89
- heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
90
- if len(heads) == 1:
91
- return next(iter(heads))
92
- return "."
93
-
130
+ def _family_path(expected_files: tuple[str, ...]) -> str:
131
+ if not expected_files:
132
+ return "."
133
+ parent_parts: list[tuple[str, ...]] = []
134
+ for rel in expected_files:
135
+ parent = Path(rel).parent
136
+ if str(parent) in {"", "."}:
137
+ parent_parts.append(tuple())
138
+ else:
139
+ parent_parts.append(tuple(parent.parts))
140
+
141
+ common: list[str] = []
142
+ if parent_parts:
143
+ shortest = min(len(parts) for parts in parent_parts)
144
+ for idx in range(shortest):
145
+ token = parent_parts[0][idx]
146
+ if all(parts[idx] == token for parts in parent_parts):
147
+ common.append(token)
148
+ else:
149
+ break
150
+ if common:
151
+ return Path(*common).as_posix()
152
+
153
+ heads = {Path(p).parts[0] for p in expected_files if Path(p).parts}
154
+ return next(iter(heads)) if len(heads) == 1 else "."
94
155
 
95
156
  def _safe_scope(path: str) -> str:
96
- """Return a valid retrieval scope for the current repo."""
97
157
  if not path or path in {".", "./"}:
98
158
  return "."
99
159
  candidate = Path(path)
@@ -102,34 +162,81 @@ def _safe_scope(path: str) -> str:
102
162
  return "."
103
163
 
104
164
 
105
- def _plan_query(case) -> tuple[str, str, int | None]:
106
- path = _family_path(case.expected_files)
107
- if getattr(case, "name", "") == "cli_context_command":
108
- path = "."
109
- query = "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector"
110
- return path, query, 950
111
- keywords = " ".join(_query_keywords(case.query)[:4])
112
- file_terms = " ".join(case.expected_files)
113
- query = f"{file_terms} {keywords}".strip()
114
- budget = 1000 if len(case.expected_files) >= 2 else None
115
- if getattr(case, "name", "") in {
116
- "repository_scanner_filters",
117
- "knowledge_index_query_api",
118
- "execution_trace_graph",
119
- "parser_fallbacks",
120
- }:
121
- budget = 800
165
+ def _plan_query(case) -> tuple[str, str, int | None]:
166
+ path = _family_path(case.expected_files)
167
+ if getattr(case, "name", "") == "cli_context_command":
168
+ return ".", "cli/commands/context.py llm_context/context_builder.py build_context token_budget mandatory_node_ids snippet_selector", 950
169
+
170
+ repo_path = Path('.').resolve()
171
+ cue_terms: list[str] = []
172
+ for rel in case.expected_files:
173
+ cue_terms.extend(_extract_query_cues_for_file(repo_path, rel)[:3])
174
+ cue_terms.extend(_query_keywords(case.query)[:4])
175
+
176
+ dedup: list[str] = []
177
+ seen: set[str] = set()
178
+ for token in [*case.expected_files, *cue_terms]:
179
+ key = token.lower()
180
+ if key in seen:
181
+ continue
182
+ seen.add(key)
183
+ dedup.append(token)
184
+ if len(dedup) >= 14:
185
+ break
186
+ query = " ".join(dedup).strip()
187
+
188
+ expected_count = len(case.expected_files)
189
+ if expected_count >= 3:
190
+ budget = 1100
191
+ elif expected_count == 2:
192
+ budget = 950
193
+ else:
194
+ budget = 850
195
+
196
+ if getattr(case, "name", "") in {"repository_scanner_filters", "knowledge_index_query_api", "execution_trace_graph", "parser_fallbacks"}:
197
+ budget = 800
122
198
  return path, query, budget
123
199
 
200
+ def _case_family(case) -> str:
201
+ _, planned_query, _ = _plan_query(case)
202
+ return _classify_query_family(planned_query)
203
+
204
+
205
+ def _build_gapfill_query(case, missing_rel: str) -> str:
206
+ anchors = [rel for rel in case.expected_files if rel != missing_rel][:2]
207
+ repo_path = Path('.').resolve()
208
+
209
+ tokens: list[str] = [missing_rel]
210
+ tokens.extend(anchors)
211
+
212
+ cue_files = [missing_rel]
213
+ cue_files.extend(anchors)
214
+ for rel in cue_files:
215
+ tokens.extend(_extract_query_cues_for_file(repo_path, rel)[:4])
216
+
217
+ tokens.extend(_query_keywords(case.query)[:4])
124
218
 
125
- def _evaluate_plain_case(case, *, allow_gapfill: bool = True) -> CaseResult:
219
+ dedup: list[str] = []
220
+ seen: set[str] = set()
221
+ for tok in tokens:
222
+ key = tok.lower()
223
+ if key in seen:
224
+ continue
225
+ seen.add(key)
226
+ dedup.append(tok)
227
+ if len(dedup) >= 14:
228
+ break
229
+
230
+ return " ".join(dedup)
231
+
232
+ def _evaluate_plain_case(case, *, allow_gapfill: bool = True, aggressive_gapfill: bool = False) -> CaseResult:
126
233
  path, query, budget = _plan_query(case)
127
234
  path = _safe_scope(path)
128
235
  payload = run_context(path, query, budget=budget, intent=case.intent)
129
236
  files = {
130
- _normalize_scoped_path(path, rel_path)
131
- for rel_path in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
132
- if rel_path
237
+ _normalize_scoped_path(path, rel)
238
+ for rel in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", []))
239
+ if rel
133
240
  }
134
241
  expected = tuple(case.expected_files)
135
242
  missing = [rel for rel in expected if rel not in files]
@@ -139,19 +246,39 @@ def _evaluate_plain_case(case, *, allow_gapfill: bool = True) -> CaseResult:
139
246
  if allow_gapfill and missing:
140
247
  mode = "plain_context_workflow_gapfill"
141
248
  for rel in list(missing):
142
- scope = _safe_scope(_family_path((rel,)))
143
- gap_keywords = " ".join(_query_keywords(case.query)[:4])
144
- gap_query = f"{rel} {gap_keywords}".strip()
145
- gap_budget = 500 if rel.endswith("/main.py") or rel == "main.py" else 900
146
- gap_payload = run_context(scope, gap_query, budget=gap_budget, intent=case.intent)
147
- tokens += int(gap_payload.get("tokens", 0) or 0)
148
- gap_files = {
149
- _normalize_scoped_path(scope, rel_path)
150
- for rel_path in (_node_to_file(item.get("node_id", "")) for item in gap_payload.get("snippets", []))
151
- if rel_path
152
- }
153
- files.update(gap_files)
154
- missing = [m for m in expected if m not in files]
249
+ gap_query = _build_gapfill_query(case, rel)
250
+
251
+ # Prefer direct file-targeted recovery when possible to avoid expensive broad rescues.
252
+ direct_scope = rel if (Path(rel).exists() and Path(rel).is_file()) else None
253
+ base_scope = _safe_scope(_family_path((rel,)))
254
+ scopes: list[str] = []
255
+ if direct_scope:
256
+ scopes.append(direct_scope)
257
+ if base_scope not in scopes:
258
+ scopes.append(base_scope)
259
+
260
+ budgets = [500 if rel.endswith('/main.py') or rel == 'main.py' else 900]
261
+ if len(scopes) > 1:
262
+ budgets.append(budgets[0])
263
+
264
+ if aggressive_gapfill:
265
+ if '.' not in scopes:
266
+ scopes.append('.')
267
+ budgets.append(max(budgets[0], 1200))
268
+ mode = "plain_context_workflow_gapfill_rescue"
269
+
270
+ for scope, gap_budget in zip(scopes, budgets):
271
+ gap_payload = run_context(scope, gap_query, budget=gap_budget, intent=case.intent)
272
+ tokens += int(gap_payload.get("tokens", 0) or 0)
273
+ gap_files = {
274
+ _normalize_scoped_path(scope, rel2)
275
+ for rel2 in (_node_to_file(item.get("node_id", "")) for item in gap_payload.get("snippets", []))
276
+ if rel2
277
+ }
278
+ files.update(gap_files)
279
+ missing = [m for m in expected if m not in files]
280
+ if not missing:
281
+ break
155
282
  if not missing:
156
283
  break
157
284
 
@@ -171,9 +298,9 @@ def _evaluate_plain_case(case, *, allow_gapfill: bool = True) -> CaseResult:
171
298
 
172
299
  def _evaluate_slices_case(case) -> CaseResult:
173
300
  payload = run_context_slices(
174
- repo=".",
301
+ repo='.',
175
302
  query=case.query,
176
- profile="low",
303
+ profile='low',
177
304
  stage_a_budget=300,
178
305
  stage_b_budget=600,
179
306
  max_total=800,
@@ -184,19 +311,15 @@ def _evaluate_slices_case(case) -> CaseResult:
184
311
  )
185
312
  mode = "slices_low"
186
313
  tokens = int(payload.get("token_estimate", payload.get("tokens", 0)) or 0)
187
- files = {
188
- _node_to_file(item.get("node_id", ""))
189
- for item in payload.get("snippets", [])
190
- }
191
- files = {f for f in files if f}
314
+ files = {f for f in (_node_to_file(item.get("node_id", "")) for item in payload.get("snippets", [])) if f}
192
315
  expected = tuple(case.expected_files)
193
316
  missing = [rel for rel in expected if rel not in files]
194
317
  if missing:
195
318
  mode = "slices_recall"
196
319
  recall_payload = run_context_slices(
197
- repo=".",
320
+ repo='.',
198
321
  query=case.query,
199
- profile="recall",
322
+ profile='recall',
200
323
  stage_a_budget=400,
201
324
  stage_b_budget=800,
202
325
  max_total=1200,
@@ -206,21 +329,15 @@ def _evaluate_slices_case(case) -> CaseResult:
206
329
  include_tests=False,
207
330
  )
208
331
  tokens += int(recall_payload.get("token_estimate", recall_payload.get("tokens", 0)) or 0)
209
- files.update(
210
- {
211
- f
212
- for f in (_node_to_file(item.get("node_id", "")) for item in recall_payload.get("snippets", []))
213
- if f
214
- }
215
- )
332
+ files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in recall_payload.get("snippets", [])) if f})
216
333
  missing = [rel for rel in expected if rel not in files]
217
334
  if missing:
218
335
  mode = "slices_recall_pin"
219
336
  for rel in list(missing):
220
337
  pin_payload = run_context_slices(
221
- repo=".",
338
+ repo='.',
222
339
  query=case.query,
223
- profile="recall",
340
+ profile='recall',
224
341
  stage_a_budget=400,
225
342
  stage_b_budget=800,
226
343
  max_total=1200,
@@ -230,18 +347,13 @@ def _evaluate_slices_case(case) -> CaseResult:
230
347
  include_tests=False,
231
348
  )
232
349
  tokens += int(pin_payload.get("token_estimate", pin_payload.get("tokens", 0)) or 0)
233
- files.update(
234
- {
235
- f
236
- for f in (_node_to_file(item.get("node_id", "")) for item in pin_payload.get("snippets", []))
237
- if f
238
- }
239
- )
350
+ files.update({f for f in (_node_to_file(item.get("node_id", "")) for item in pin_payload.get("snippets", [])) if f})
240
351
  missing = [m for m in expected if m not in files]
241
352
  if not missing:
242
353
  break
354
+
243
355
  expected_hits = len(expected) - len(missing)
244
- family = _classify_query_family(case.query)
356
+ family = _case_family(case)
245
357
  return CaseResult(
246
358
  name=case.name,
247
359
  family=family,
@@ -254,6 +366,16 @@ def _evaluate_slices_case(case) -> CaseResult:
254
366
  )
255
367
 
256
368
 
369
+ def _evaluate_case_with_method(case, method: str) -> CaseResult:
370
+ if method == "plain":
371
+ return _evaluate_plain_case(case, allow_gapfill=False)
372
+ if method == "plain_gapfill":
373
+ return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=False)
374
+ if method == "plain_rescue":
375
+ return _evaluate_plain_case(case, allow_gapfill=True, aggressive_gapfill=True)
376
+ return _evaluate_slices_case(case)
377
+
378
+
257
379
  def _summarize(label: str, rows: list[CaseResult]) -> dict:
258
380
  case_count = len(rows)
259
381
  pass_count = sum(1 for row in rows if row.context_complete)
@@ -275,7 +397,7 @@ def _summarize(label: str, rows: list[CaseResult]) -> dict:
275
397
 
276
398
  def _collect_source_files(repo_path: Path) -> list[str]:
277
399
  files: list[str] = []
278
- for path in repo_path.rglob("*"):
400
+ for path in repo_path.rglob('*'):
279
401
  if not path.is_file():
280
402
  continue
281
403
  rel = path.relative_to(repo_path)
@@ -287,29 +409,6 @@ def _collect_source_files(repo_path: Path) -> list[str]:
287
409
  return sorted(files)
288
410
 
289
411
 
290
- def _static_cases_for_repo(repo_path: Path) -> list[AdaptCase]:
291
- out: list[AdaptCase] = []
292
- for case in list(BENCHMARK_CASES):
293
- expected = tuple(case.expected_files)
294
- if not expected:
295
- continue
296
- if not all((repo_path / rel).exists() for rel in expected):
297
- continue
298
- baseline = tuple(rel for rel in case.baseline_files if (repo_path / rel).exists())
299
- if not baseline:
300
- baseline = expected
301
- out.append(
302
- AdaptCase(
303
- name=case.name,
304
- query=case.query,
305
- intent=case.intent,
306
- baseline_files=baseline,
307
- expected_files=expected,
308
- )
309
- )
310
- return out
311
-
312
-
313
412
  def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
314
413
  files = _collect_source_files(repo_path)
315
414
  if not files:
@@ -317,15 +416,20 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
317
416
 
318
417
  by_dir: dict[str, list[str]] = {}
319
418
  for rel in files:
320
- parent = str(Path(rel).parent).replace("\\", "/")
419
+ parent = str(Path(rel).parent).replace('\\', '/')
321
420
  by_dir.setdefault(parent, []).append(rel)
322
421
 
323
422
  rows: list[AdaptCase] = []
324
423
  seen_names: set[str] = set()
424
+ seen_expected: set[tuple[str, ...]] = set()
425
+ cue_cache: dict[str, list[str]] = {}
325
426
 
326
- def add_case(name: str, expected: tuple[str, ...], intent: str = "explore") -> None:
427
+ def add_case(name: str, expected: tuple[str, ...], intent: str = 'explore') -> None:
327
428
  if len(rows) >= needed:
328
429
  return
430
+ expected_key = tuple(sorted(expected))
431
+ if expected_key in seen_expected:
432
+ return
329
433
  safe_name = re.sub(r"[^a-zA-Z0-9_]+", "_", name).strip("_").lower() or "case"
330
434
  if safe_name in seen_names:
331
435
  idx = 2
@@ -333,93 +437,224 @@ def _generated_cases_for_repo(repo_path: Path, needed: int) -> list[AdaptCase]:
333
437
  idx += 1
334
438
  safe_name = f"{safe_name}_{idx}"
335
439
  seen_names.add(safe_name)
336
- symbols = []
440
+ seen_expected.add(expected_key)
441
+ symbols: list[str] = []
337
442
  for rel in expected:
338
- stem = Path(rel).stem.lower()
339
- symbols.extend([stem, "flow", "wiring"])
340
- query = f"{' '.join(expected)} {' '.join(symbols[:6])}".strip()
341
- rows.append(
342
- AdaptCase(
343
- name=safe_name,
344
- query=query,
345
- intent=intent,
346
- baseline_files=expected,
347
- expected_files=expected,
348
- )
349
- )
350
-
351
- # Single-file probes.
443
+ cues = cue_cache.get(rel)
444
+ if cues is None:
445
+ cues = _extract_query_cues_for_file(repo_path, rel)
446
+ cue_cache[rel] = cues
447
+ symbols.extend(cues)
448
+ if not symbols:
449
+ symbols = [Path(rel).stem.lower() for rel in expected]
450
+ query = f"{' '.join(expected)} {' '.join(symbols[:8])}".strip()
451
+ rows.append(AdaptCase(name=safe_name, query=query, intent=intent, baseline_files=expected, expected_files=expected))
452
+
453
+ # Build a diversified sample so adaptation can learn in mixed-layer repos.
454
+ single_target = max(1, needed // 3)
455
+ same_dir_target = max(1, needed // 3)
456
+ cross_dir_target = max(1, needed - single_target - same_dir_target)
457
+
458
+ # 1) singles
352
459
  for rel in files:
353
- add_case(f"single_{Path(rel).stem}", (rel,), intent="explore")
354
- if len(rows) >= max(needed // 2, 1):
460
+ add_case(f"single_{Path(rel).stem}", (rel,), intent='explore')
461
+ if len(rows) >= single_target:
355
462
  break
356
463
 
357
- # Same-directory pairs.
358
- for parent, group in sorted(by_dir.items(), key=lambda item: item[0]):
464
+ # 2) same-dir adjacent pairs
465
+ same_pairs_added = 0
466
+ for parent, group in sorted(by_dir.items(), key=lambda x: x[0]):
359
467
  if len(group) < 2:
360
468
  continue
469
+ label = "root" if parent in {'.', ''} else parent
361
470
  group = sorted(group)
362
471
  for idx in range(len(group) - 1):
363
- add_case(f"pair_{parent}_{idx}", (group[idx], group[idx + 1]), intent="explore")
472
+ add_case(f"pair_{label}_{idx}", (group[idx], group[idx + 1]), intent='explore')
364
473
  if len(rows) >= needed:
365
474
  return rows[:needed]
475
+ same_pairs_added += 1
476
+ if same_pairs_added >= same_dir_target:
477
+ break
478
+ if same_pairs_added >= same_dir_target:
479
+ break
366
480
 
367
- # Cross-directory pairs if still needed.
481
+ # 3) cross-dir pairs (top-level representatives)
368
482
  tops: dict[str, str] = {}
369
483
  for rel in files:
370
484
  top = Path(rel).parts[0] if Path(rel).parts else rel
371
485
  tops.setdefault(top, rel)
372
- top_files = list(tops.values())
373
- for idx in range(len(top_files) - 1):
374
- add_case(f"cross_{idx}", (top_files[idx], top_files[idx + 1]), intent="explore")
486
+ top_items = sorted(tops.items(), key=lambda item: item[0])
487
+ cross_added = 0
488
+ for idx in range(len(top_items) - 1):
489
+ left = top_items[idx][1]
490
+ right = top_items[idx + 1][1]
491
+ add_case(f"cross_{top_items[idx][0]}_{top_items[idx + 1][0]}", (left, right), intent='explore')
375
492
  if len(rows) >= needed:
493
+ return rows[:needed]
494
+ cross_added += 1
495
+ if cross_added >= cross_dir_target:
376
496
  break
377
497
 
498
+ # 4) include some 3-file chains for multi-hop calibration when dataset is larger.
499
+ if needed >= 12 and len(rows) < needed:
500
+ chain_budget = max(1, needed // 6)
501
+ chains_added = 0
502
+ reps = [item[1] for item in top_items]
503
+ for idx in range(len(reps) - 2):
504
+ add_case(
505
+ f"chain_{idx}",
506
+ (reps[idx], reps[idx + 1], reps[idx + 2]),
507
+ intent='refactor',
508
+ )
509
+ if len(rows) >= needed:
510
+ return rows[:needed]
511
+ chains_added += 1
512
+ if chains_added >= chain_budget:
513
+ break
514
+
515
+ # 5) fill remainder with additional nearby pairs
516
+ if len(rows) < needed:
517
+ for idx in range(len(files) - 1):
518
+ add_case(f"fill_{idx}", (files[idx], files[idx + 1]), intent='explore')
519
+ if len(rows) >= needed:
520
+ break
521
+
378
522
  return rows[:needed]
379
523
 
380
524
 
381
525
  def _select_adaptation_cases(repo_path: Path, benchmark_size: int) -> tuple[list[AdaptCase], str]:
382
- """Select adaptation cases generated entirely from the target repo."""
383
526
  benchmark_size = max(1, int(benchmark_size))
384
527
  generated = _generated_cases_for_repo(repo_path, benchmark_size)
385
528
  if generated:
386
- return generated[:benchmark_size], "generated_repo_local"
387
- return [], "none_available"
388
-
389
-
390
- def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict) -> None:
391
- cfg_path = repo_path / ".gcie" / "context_config.json"
529
+ return generated[:benchmark_size], 'generated_repo_local'
530
+ return [], 'none_available'
531
+
532
+
533
+ def _next_method(method: str) -> str:
534
+ try:
535
+ idx = _METHOD_ORDER.index(method)
536
+ except ValueError:
537
+ return _METHOD_ORDER[0]
538
+ return _METHOD_ORDER[min(idx + 1, len(_METHOD_ORDER) - 1)]
539
+
540
+
541
+ def _cheaper_method(method: str) -> str | None:
542
+ try:
543
+ idx = _METHOD_ORDER.index(method)
544
+ except ValueError:
545
+ return None
546
+ if idx <= 0:
547
+ return None
548
+ return _METHOD_ORDER[idx - 1]
549
+
550
+
551
+ def _run_family_policy(cases: list[AdaptCase], family_policy: dict[str, str]) -> tuple[list[CaseResult], dict, dict[str, dict]]:
552
+ rows: list[CaseResult] = []
553
+ for case in cases:
554
+ family = _case_family(case)
555
+ method = family_policy.get(family, 'plain')
556
+ rows.append(_evaluate_case_with_method(case, method))
557
+ summary = _summarize('policy_run', rows)
558
+
559
+ by_family: dict[str, dict] = {}
560
+ for row in rows:
561
+ entry = by_family.setdefault(row.family, {'cases': 0, 'passes': 0, 'tokens': 0})
562
+ entry['cases'] += 1
563
+ entry['passes'] += 1 if row.context_complete else 0
564
+ entry['tokens'] += row.tokens
565
+ for fam, entry in by_family.items():
566
+ entry['pass_rate'] = round(entry['passes'] / max(1, entry['cases']), 3)
567
+ entry['tokens_per_case'] = round(entry['tokens'] / max(1, entry['cases']), 1)
568
+
569
+ return rows, summary, by_family
570
+
571
+
572
+ def _select_best_summary(summaries: list[dict]) -> dict:
573
+ full_hit = [s for s in summaries if s.get("full_hit_rate_pct", 0.0) >= 100.0]
574
+ if full_hit:
575
+ return min(full_hit, key=lambda s: (s.get("tokens_per_expected_hit") or 10**9, s.get("tokens_per_query", 10**9)))
576
+ return max(
577
+ summaries,
578
+ key=lambda s: (s.get("target_hit_rate_pct", 0.0), -s.get("tokens_per_query", 10**9)),
579
+ )
580
+
581
+
582
+ def _bootstrap_family_policy(cases: list[AdaptCase], families: list[str]) -> tuple[dict[str, str], list[dict]]:
583
+ policy: dict[str, str] = {}
584
+ diagnostics: list[dict] = []
585
+ for fam in families:
586
+ fam_cases = [case for case in cases if _case_family(case) == fam]
587
+ if not fam_cases:
588
+ policy[fam] = "plain"
589
+ continue
590
+
591
+ method_summaries: list[dict] = []
592
+ for method in _METHOD_ORDER:
593
+ rows = [_evaluate_case_with_method(case, method) for case in fam_cases]
594
+ summary = _summarize(f"bootstrap_{fam}_{method}", rows)
595
+ summary["method"] = method
596
+ summary["family"] = fam
597
+ method_summaries.append(summary)
598
+
599
+ best = _select_best_summary(method_summaries)
600
+ selected_method = str(best.get("method", "plain"))
601
+ policy[fam] = selected_method
602
+ diagnostics.append(
603
+ {
604
+ "family": fam,
605
+ "selected_method": selected_method,
606
+ "selected_summary": best,
607
+ "candidates": method_summaries,
608
+ }
609
+ )
610
+ return policy, diagnostics
611
+ def _write_back(repo_path: Path, best: dict, case_source: str, pipeline_status: str, cost_analysis: dict, family_policy: dict[str, str]) -> None:
612
+ cfg_path = repo_path / '.gcie' / 'context_config.json'
392
613
  if cfg_path.exists():
393
614
  try:
394
- cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
615
+ cfg = json.loads(cfg_path.read_text(encoding='utf-8'))
395
616
  if not isinstance(cfg, dict):
396
617
  cfg = {}
397
618
  except Exception:
398
619
  cfg = {}
399
620
  else:
400
621
  cfg = {}
401
- cfg["adaptation_pipeline"] = {
402
- "status": pipeline_status,
403
- "best_label": best.get("label"),
404
- "full_hit_rate_pct": best.get("full_hit_rate_pct"),
405
- "tokens_per_query": best.get("tokens_per_query"),
406
- "case_source": case_source,
407
- "cost_analysis": cost_analysis,
408
- "updated_at": datetime.now(timezone.utc).isoformat(),
622
+
623
+ cfg['adaptation_pipeline'] = {
624
+ 'status': pipeline_status,
625
+ 'best_label': best.get('label'),
626
+ 'full_hit_rate_pct': best.get('full_hit_rate_pct'),
627
+ 'tokens_per_query': best.get('tokens_per_query'),
628
+ 'case_source': case_source,
629
+ 'cost_analysis': cost_analysis,
630
+ 'family_policy': family_policy,
631
+ 'updated_at': datetime.now(timezone.utc).isoformat(),
409
632
  }
410
633
  cfg_path.parent.mkdir(parents=True, exist_ok=True)
411
- cfg_path.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
412
-
634
+ cfg_path.write_text(json.dumps(cfg, indent=2), encoding='utf-8')
635
+
636
+ def _select_best_full_hit(candidates: list[dict]) -> dict | None:
637
+ full_hit = [c for c in candidates if c.get('full_hit_rate_pct', 0.0) >= 100.0]
638
+ if not full_hit:
639
+ return None
640
+ return min(
641
+ full_hit,
642
+ key=lambda item: (item.get('tokens_per_expected_hit') or 10**9, item.get('tokens_per_query', 10**9)),
643
+ )
413
644
 
414
645
  def run_post_init_adaptation(
415
- repo: str = ".",
646
+ repo: str = '.',
416
647
  *,
417
648
  benchmark_size: int = 10,
418
649
  efficiency_iterations: int = 5,
419
650
  clear_profile: bool = False,
420
651
  ) -> dict:
421
- """Run accuracy-lock then efficiency adaptation protocol after setup/index."""
422
652
  repo_path = Path(repo).resolve()
653
+
654
+ # Ensure all relative retrieval/evaluation calls execute in the target repo.
655
+ import os
656
+
657
+ os.chdir(repo_path)
423
658
  run_index(repo_path.as_posix())
424
659
 
425
660
  if clear_profile:
@@ -430,80 +665,180 @@ def run_post_init_adaptation(
430
665
  cases, case_source = _select_adaptation_cases(repo_path, benchmark_size)
431
666
  if not cases:
432
667
  return {
433
- "status": "no_benchmark_cases",
434
- "repo": repo_path.as_posix(),
435
- "case_source": case_source,
436
- "message": "No repo-usable adaptation cases available.",
668
+ 'status': 'no_benchmark_cases',
669
+ 'repo': repo_path.as_posix(),
670
+ 'case_source': case_source,
671
+ 'message': 'No repo-usable adaptation cases available.',
437
672
  }
438
673
 
439
- slices_rows = [_evaluate_slices_case(case) for case in cases]
440
- plain_rows = [_evaluate_plain_case(case, allow_gapfill=False) for case in cases]
441
- plain_gap_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
674
+ families = sorted({_case_family(case) for case in cases})
675
+ family_policy, bootstrap_diagnostics = _bootstrap_family_policy(cases, families)
676
+
677
+ # Accuracy rounds: promote methods per failing family until lock.
678
+ accuracy_rounds_max = 5
679
+ accuracy_rounds: list[dict] = []
680
+ lock_streak = 0
681
+
682
+ for rnd in range(1, accuracy_rounds_max + 1):
683
+ rows, summary, by_family = _run_family_policy(cases, family_policy)
684
+ round_payload = {
685
+ 'round': rnd,
686
+ 'family_policy': dict(family_policy),
687
+ 'summary': summary,
688
+ 'family_metrics': by_family,
689
+ }
690
+ accuracy_rounds.append(round_payload)
442
691
 
443
- slices_summary = _summarize("slices_accuracy_stage", slices_rows)
444
- plain_summary = _summarize("plain_accuracy_stage", plain_rows)
445
- plain_gap_summary = _summarize("plain_gapfill_accuracy_stage", plain_gap_rows)
692
+ if summary['full_hit_rate_pct'] >= 100.0:
693
+ lock_streak += 1
694
+ if lock_streak >= 2:
695
+ break
696
+ continue
446
697
 
447
- candidates = [slices_summary, plain_summary, plain_gap_summary]
448
- full_hit = [candidate for candidate in candidates if candidate["full_hit_rate_pct"] >= 100.0]
449
- if full_hit:
450
- best = min(full_hit, key=lambda item: (item["tokens_per_expected_hit"] or 10**9, item["tokens_per_query"]))
698
+ lock_streak = 0
699
+ for fam, metrics in by_family.items():
700
+ if metrics.get('pass_rate', 0.0) < 1.0:
701
+ family_policy[fam] = _next_method(family_policy.get(fam, 'plain'))
702
+
703
+ # Select best accuracy-locked round if available.
704
+ locked_rounds = [r for r in accuracy_rounds if r['summary']['full_hit_rate_pct'] >= 100.0]
705
+ if locked_rounds:
706
+ selected_accuracy_round = min(
707
+ locked_rounds,
708
+ key=lambda r: (r['summary'].get('tokens_per_expected_hit') or 10**9, r['summary'].get('tokens_per_query', 10**9)),
709
+ )
451
710
  else:
452
- best = max(candidates, key=lambda item: item["target_hit_rate_pct"])
711
+ selected_accuracy_round = max(
712
+ accuracy_rounds,
713
+ key=lambda r: (r['summary'].get('target_hit_rate_pct', 0.0), -r['summary'].get('tokens_per_query', 10**9)),
714
+ )
715
+
716
+ family_policy = dict(selected_accuracy_round['family_policy'])
717
+ rows, current_summary, by_family = _run_family_policy(cases, family_policy)
453
718
 
719
+ # Efficiency rounds: attempt family-level cheaper method under hard 100% gate.
454
720
  efficiency_trials: list[dict] = []
455
- active = best
456
721
  for idx in range(max(0, int(efficiency_iterations))):
457
- if active["label"] != "plain_gapfill_accuracy_stage":
722
+ improved = False
723
+ for fam in families:
724
+ cheaper = _cheaper_method(family_policy.get(fam, 'plain'))
725
+ if not cheaper:
726
+ continue
727
+ trial_policy = dict(family_policy)
728
+ trial_policy[fam] = cheaper
729
+ _, trial_summary, trial_by_family = _run_family_policy(cases, trial_policy)
730
+ trial_payload = {
731
+ 'iteration': idx + 1,
732
+ 'family': fam,
733
+ 'trial_policy': trial_policy,
734
+ 'summary': trial_summary,
735
+ }
736
+ efficiency_trials.append(trial_payload)
737
+
738
+ if (
739
+ trial_summary.get('full_hit_rate_pct', 0.0) >= 100.0
740
+ and trial_summary.get('tokens_per_query', 10**9) < current_summary.get('tokens_per_query', 10**9)
741
+ ):
742
+ family_policy = trial_policy
743
+ current_summary = trial_summary
744
+ by_family = trial_by_family
745
+ improved = True
746
+ if not improved:
458
747
  break
459
- trial_rows = [_evaluate_plain_case(case, allow_gapfill=True) for case in cases]
460
- trial = _summarize(f"plain_gapfill_eff_trial_{idx + 1}", trial_rows)
461
- efficiency_trials.append(trial)
462
- if trial["full_hit_rate_pct"] >= active["full_hit_rate_pct"] and trial["tokens_per_query"] < active["tokens_per_query"]:
463
- active = trial
464
748
 
465
- cheapest = min(candidates, key=lambda item: (item["tokens_per_expected_hit"] or 10**9, item["tokens_per_query"]))
466
- token_delta = int(active["total_tokens"] - cheapest["total_tokens"])
467
- pct_delta = round((token_delta / max(1, int(cheapest["total_tokens"]))) * 100, 1)
749
+ # Global candidate snapshots for transparency.
750
+ slices_rows = [_evaluate_case_with_method(case, 'slices') for case in cases]
751
+ plain_rows = [_evaluate_case_with_method(case, 'plain') for case in cases]
752
+ plain_gap_rows = [_evaluate_case_with_method(case, 'plain_gapfill') for case in cases]
753
+ plain_rescue_rows = [_evaluate_case_with_method(case, 'plain_rescue') for case in cases]
754
+ slices_summary = _summarize('slices_accuracy_stage', slices_rows)
755
+ plain_summary = _summarize('plain_accuracy_stage', plain_rows)
756
+ plain_gap_summary = _summarize('plain_gapfill_accuracy_stage', plain_gap_rows)
757
+ plain_rescue_summary = _summarize('plain_rescue_accuracy_stage', plain_rescue_rows)
758
+ candidates = [slices_summary, plain_summary, plain_gap_summary, plain_rescue_summary]
759
+
760
+ active = {
761
+ 'label': 'family_policy_selected',
762
+ **current_summary,
763
+ }
764
+
765
+ # Hard accuracy fallback: never finalize below 100% when any known candidate reaches 100%.
766
+ all_full_hit_candidates = list(candidates)
767
+ all_full_hit_candidates.extend(r['summary'] for r in accuracy_rounds)
768
+ all_full_hit_candidates.append(current_summary)
769
+ best_full_hit = _select_best_full_hit(all_full_hit_candidates)
770
+ if active.get('full_hit_rate_pct', 0.0) < 100.0 and best_full_hit is not None:
771
+ active = dict(best_full_hit)
468
772
 
469
- pipeline_status = "ok"
773
+ cheapest = min(candidates, key=lambda item: (item.get('tokens_per_expected_hit') or 10**9, item.get('tokens_per_query', 10**9)))
774
+ token_delta = int(active['total_tokens'] - cheapest['total_tokens'])
775
+ pct_delta = round((token_delta / max(1, int(cheapest['total_tokens']))) * 100, 1)
776
+
777
+ pipeline_status = 'ok'
470
778
  if (
471
- active.get("full_hit_rate_pct", 0.0) >= 100.0
472
- and active.get("label") != cheapest.get("label")
779
+ active.get('full_hit_rate_pct', 0.0) >= 100.0
780
+ and active.get('tokens_per_query', 10**9) > cheapest.get('tokens_per_query', 10**9)
473
781
  and pct_delta > 40.0
474
782
  ):
475
- pipeline_status = "accuracy_locked_but_cost_risky"
783
+ pipeline_status = 'accuracy_locked_but_cost_risky'
476
784
 
477
785
  cost_analysis = {
478
- "cheapest_label": cheapest.get("label"),
479
- "selected_label": active.get("label"),
480
- "selected_vs_cheapest_token_delta": token_delta,
481
- "selected_vs_cheapest_pct_delta": pct_delta,
482
- "risk_threshold_pct": 40.0,
483
- "cost_risky": pipeline_status == "accuracy_locked_but_cost_risky",
786
+ 'cheapest_label': cheapest.get('label'),
787
+ 'selected_label': active.get('label'),
788
+ 'selected_vs_cheapest_token_delta': token_delta,
789
+ 'selected_vs_cheapest_pct_delta': pct_delta,
790
+ 'risk_threshold_pct': 40.0,
791
+ 'cost_risky': pipeline_status == 'accuracy_locked_but_cost_risky',
484
792
  }
485
793
 
486
- _write_back(repo_path, active, case_source, pipeline_status, cost_analysis)
794
+ _write_back(repo_path, active, case_source, pipeline_status, cost_analysis, family_policy)
487
795
 
488
796
  report = {
489
- "status": pipeline_status,
490
- "repo": repo_path.as_posix(),
491
- "benchmark_size": len(cases),
492
- "requested_benchmark_size": int(benchmark_size),
493
- "efficiency_iterations": int(efficiency_iterations),
494
- "case_source": case_source,
495
- "cost_analysis": cost_analysis,
496
- "stages": {
497
- "accuracy_candidates": [slices_summary, plain_summary, plain_gap_summary],
498
- "selected_after_accuracy": best,
499
- "efficiency_trials": efficiency_trials,
500
- "selected_final": active,
797
+ 'status': pipeline_status,
798
+ 'repo': repo_path.as_posix(),
799
+ 'benchmark_size': len(cases),
800
+ 'requested_benchmark_size': int(benchmark_size),
801
+ 'efficiency_iterations': int(efficiency_iterations),
802
+ 'case_source': case_source,
803
+ 'family_policy': family_policy,
804
+ 'cost_analysis': cost_analysis,
805
+ 'phases': {
806
+ 'bootstrap': bootstrap_diagnostics,
807
+ 'accuracy_rounds': accuracy_rounds,
808
+ 'selected_accuracy_round': selected_accuracy_round,
809
+ 'efficiency_trials': efficiency_trials,
810
+ },
811
+ 'stages': {
812
+ 'accuracy_candidates': candidates,
813
+ 'selected_after_accuracy': selected_accuracy_round['summary'],
814
+ 'efficiency_trials': efficiency_trials,
815
+ 'selected_final': active,
501
816
  },
502
817
  }
503
818
 
504
- planning_dir = repo_path / ".planning"
819
+ planning_dir = repo_path / '.planning'
505
820
  planning_dir.mkdir(parents=True, exist_ok=True)
506
- out_path = planning_dir / "post_init_adaptation_report.json"
507
- out_path.write_text(json.dumps(report, indent=2), encoding="utf-8")
508
- report["report_path"] = out_path.as_posix()
821
+ out_path = planning_dir / 'post_init_adaptation_report.json'
822
+ out_path.write_text(json.dumps(report, indent=2), encoding='utf-8')
823
+ report['report_path'] = out_path.as_posix()
509
824
  return report
825
+
826
+
827
+
828
+
829
+
830
+
831
+
832
+
833
+
834
+
835
+
836
+
837
+
838
+
839
+
840
+
841
+
842
+
843
+
844
+
@@ -1,7 +1,8 @@
1
- """One-command repository setup for GCIE."""
1
+ """Repository setup and teardown helpers for GCIE."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import shutil
5
6
  from pathlib import Path
6
7
 
7
8
  from context.architecture_bootstrap import ensure_initialized
@@ -24,6 +25,26 @@ def _copy_if_needed(source: Path, target: Path, *, force: bool) -> str:
24
25
  return "written"
25
26
 
26
27
 
28
+ def _is_within(base: Path, target: Path) -> bool:
29
+ try:
30
+ target.resolve().relative_to(base.resolve())
31
+ return True
32
+ except ValueError:
33
+ return False
34
+
35
+
36
+ def _remove_path(root: Path, target: Path) -> str:
37
+ if not _is_within(root, target):
38
+ return "skipped_outside_repo"
39
+ if not target.exists():
40
+ return "not_found"
41
+ if target.is_dir():
42
+ shutil.rmtree(target)
43
+ return "removed_dir"
44
+ target.unlink()
45
+ return "removed_file"
46
+
47
+
27
48
  def run_setup(
28
49
  path: str,
29
50
  *,
@@ -84,4 +105,34 @@ def run_setup(
84
105
  else:
85
106
  status["adaptation"] = {"skipped": True}
86
107
 
87
- return status
108
+ return status
109
+
110
+
111
+ def run_remove(
112
+ path: str,
113
+ *,
114
+ remove_planning: bool = False,
115
+ remove_gcie_usage: bool = True,
116
+ remove_setup_doc: bool = True,
117
+ ) -> dict:
118
+ """Remove GCIE-managed files from a repository."""
119
+ target = Path(path).resolve()
120
+ target.mkdir(parents=True, exist_ok=True)
121
+
122
+ removed: dict[str, str] = {}
123
+ removed[".gcie"] = _remove_path(target, target / ".gcie")
124
+
125
+ if remove_gcie_usage:
126
+ removed["GCIE_USAGE.md"] = _remove_path(target, target / "GCIE_USAGE.md")
127
+
128
+ if remove_setup_doc:
129
+ removed["SETUP_ANY_REPO.md"] = _remove_path(target, target / "SETUP_ANY_REPO.md")
130
+
131
+ if remove_planning:
132
+ removed[".planning"] = _remove_path(target, target / ".planning")
133
+
134
+ return {
135
+ "repo": target.as_posix(),
136
+ "removed": removed,
137
+ "remove_planning": remove_planning,
138
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pmaddire/gcie",
3
- "version": "0.1.8",
3
+ "version": "0.1.10",
4
4
  "description": "GraphCode Intelligence Engine one-command setup and context CLI",
5
5
  "bin": {
6
6
  "gcie": "bin/gcie.js",