@pmaddire/gcie 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- """One-command repository setup for GCIE."""
1
+ """One-command repository setup for GCIE."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
 
7
7
  from context.architecture_bootstrap import ensure_initialized
8
8
 
9
+ from .adaptation import run_post_init_adaptation
9
10
  from .index import run_index
10
11
 
11
12
 
@@ -30,6 +31,9 @@ def run_setup(
30
31
  include_agent_usage: bool = True,
31
32
  include_setup_doc: bool = True,
32
33
  run_index_pass: bool = True,
34
+ run_adaptation_pass: bool = False,
35
+ adaptation_benchmark_size: int = 10,
36
+ adaptation_efficiency_iterations: int = 5,
33
37
  ) -> dict:
34
38
  """Initialize a repository so GCIE can be used immediately."""
35
39
  target = Path(path).resolve()
@@ -49,9 +53,9 @@ def run_setup(
49
53
  copied: dict[str, str] = {}
50
54
 
51
55
  if include_agent_usage:
52
- copied["AGENT_USAGE.md"] = _copy_if_needed(
53
- source_root / "AGENT_USAGE.md",
54
- target / "AGENT_USAGE.md",
56
+ copied["GCIE_USAGE.md"] = _copy_if_needed(
57
+ source_root / "GCIE_USAGE.md",
58
+ target / "GCIE_USAGE.md",
55
59
  force=force,
56
60
  )
57
61
 
@@ -70,4 +74,14 @@ def run_setup(
70
74
  else:
71
75
  status["index"] = {"skipped": True}
72
76
 
77
+ if run_adaptation_pass:
78
+ status["adaptation"] = run_post_init_adaptation(
79
+ target.as_posix(),
80
+ benchmark_size=adaptation_benchmark_size,
81
+ efficiency_iterations=adaptation_efficiency_iterations,
82
+ clear_profile=True,
83
+ )
84
+ else:
85
+ status["adaptation"] = {"skipped": True}
86
+
73
87
  return status
@@ -51,6 +51,7 @@ _ARCH_KEYWORDS = {
51
51
  }
52
52
 
53
53
 
54
+
54
55
  @dataclass
55
56
  class ArchitectureSliceResult:
56
57
  query: str
@@ -100,7 +101,7 @@ def _snippet_from_lines(lines: list[str], max_lines: int) -> str:
100
101
  def _collect_snippets(repo_path: Path, files: list[str], max_lines: int = 120) -> tuple[list[dict], list[str]]:
101
102
  snippets: list[dict] = []
102
103
  missing: list[str] = []
103
- for rel_path in files:
104
+ for idx, rel_path in enumerate(files):
104
105
  file_path = repo_path / rel_path
105
106
  if not file_path.exists():
106
107
  missing.append(rel_path)
@@ -54,4 +54,4 @@ def select_snippets(
54
54
  seen_contents.add(item.content)
55
55
  used_tokens += t
56
56
 
57
- return tuple(selected)
57
+ return tuple(selected)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pmaddire/gcie",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "GraphCode Intelligence Engine one-command setup and context CLI",
5
5
  "bin": {
6
6
  "gcie": "bin/gcie.js",
@@ -126,7 +126,14 @@ def _semantic_node_scores(
126
126
  return {}, ()
127
127
 
128
128
  retriever = SemanticRetriever([text for _, text in entries])
129
- semantic_top_k = min(max(top_k * 4, 24), len(entries))
129
+ # Keep semantic fan-out intentionally bounded to reduce noisy candidates.
130
+ if len(query_terms) >= 10:
131
+ fanout = 3.0
132
+ elif len(query_terms) >= 6:
133
+ fanout = 2.5
134
+ else:
135
+ fanout = 2.0
136
+ semantic_top_k = min(max(int(round(top_k * fanout)), max(12, top_k + 4)), len(entries))
130
137
  hits = retriever.retrieve(query, top_k=semantic_top_k)
131
138
 
132
139
  aggregates: dict[str, _SemanticAggregate] = {}
@@ -247,3 +254,4 @@ def hybrid_retrieve(
247
254
  out.append(HybridCandidate(node_id=item.node_id, score=item.score, rationale=rationale))
248
255
 
249
256
  return tuple(out)
257
+
package/AGENT_USAGE.md DELETED
@@ -1,231 +0,0 @@
1
- # GCIE Agent Usage (Portable Default)
2
-
3
- This file is designed to be dropped into any repository and used immediately.
4
-
5
- ## Goal
6
-
7
- Retrieve the smallest useful context while preserving edit safety.
8
-
9
- Priority order:
10
- 1. accuracy (must-have coverage)
11
- 2. full-hit reliability
12
- 3. token efficiency
13
-
14
- ## Quick Start (Any Repo)
15
-
16
- 1. Identify must-have context categories for the task:
17
- - implementation file(s)
18
- - wiring/orchestration file(s)
19
- - validation surface when risk is non-trivial
20
- - this may be a test, spec, schema, contract, migration, config, or CLI surface depending on the repo
21
-
22
- 2. Run one primary retrieval with a file-first, symbol-heavy query:
23
- ```powershell
24
- gcie.cmd context <path> "<file-first symbol-heavy query>" --intent <edit|debug|refactor|explore> --budget <shape budget>
25
- ```
26
-
27
- 3. Check must-have coverage.
28
-
29
- 4. If one must-have file is missing, run targeted gap-fill for only that file.
30
-
31
- 5. Stop immediately when must-have coverage is complete.
32
-
33
- ## Retrieval Modes (Adaptive Router)
34
-
35
- Use three modes and choose by task family:
36
-
37
- 1. `plain-context-first` (default for most tasks)
38
- 2. `slicer-first` (for hard routed architecture or multi-hop families)
39
- 3. `direct-file-check` (verification and fast gap closure)
40
-
41
- Plain-context command:
42
- ```powershell
43
- gcie.cmd context <path> "<query>" --intent <edit|debug|refactor|explore> --budget <shape budget>
44
- ```
45
-
46
- Slicer-first command:
47
- ```powershell
48
- gcie.cmd context-slices <path> "<query>" --intent <edit|debug|refactor|explore>
49
- ```
50
-
51
- Direct-file-check command:
52
- ```powershell
53
- rg -n "<symbol1|symbol2|symbol3>" <likely files or subtree>
54
- ```
55
-
56
- Mode-switch rule:
57
- - start with `plain-context-first` unless setup calibration proved another mode is better for that family
58
- - use `slicer-first` only for families where routing/architecture slices repeatedly outperform plain context
59
- - use `direct-file-check` whenever must-have coverage is uncertain or one file remains missing
60
- - do not keep retrying the same mode indefinitely; switch after one weak result
61
-
62
- Portable starter policy:
63
- - default all families to `plain-context-first`
64
- - after first 10-20 tasks, promote individual families to `slicer-first` only if benchmarked better
65
- - keep a family on plain-context if slicer is more expensive with no accuracy gain
66
-
67
- ## Architecture Tracking (Portable, In-Repo)
68
-
69
- To make slicer mode adapt as the repo changes, keep architecture tracking inside the repo where GCIE runs.
70
-
71
- Track these files under `.gcie/`:
72
- - `.gcie/architecture.md`
73
- - `.gcie/architecture_index.json`
74
- - `.gcie/context_config.json`
75
-
76
- How to keep it adaptive:
77
- 1. Bootstrap from user docs once (read-only):
78
- - `ARCHITECTURE.md`, `README.md`, `PROJECT.md`, `docs/architecture.md`, `docs/system_design.md`
79
- 2. Use `.gcie/architecture.md` as GCIE-owned working architecture map.
80
- 3. Refresh `.gcie/architecture.md` and `.gcie/architecture_index.json` when structural changes happen:
81
- - new subsystem
82
- - major module split/merge
83
- - interface/boundary change
84
- - dependency-direction change
85
- - active work-area shift
86
- 4. Do not overwrite user-owned docs unless explicitly asked.
87
-
88
- Architecture confidence rule:
89
- - if architecture slice confidence is low or required mappings are stale/missing, fallback to plain `context` automatically
90
- - record fallback reason in `.gcie/context_config.json` when bypassing slicer mode
91
-
92
- ## Portable Defaults (Task-Shape Based)
93
-
94
- Use these as a starting point in new repos.
95
-
96
- Primary pass budgets:
97
- - `auto`: simple same-layer or strong single-file lookup
98
- - `900`: same-family two-file lookup, frontend-local component lookup
99
- - `1100`: backend/config pair, same-layer backend pair
100
- - `1150`: cross-layer UI/API flow
101
- - `1300-1400`: explicit multi-hop chain (3+ linked files)
102
-
103
- Gap-fill budgets:
104
- - missing general implementation/wiring file: `900`
105
- - missing small orchestration or entry file: `500`
106
-
107
- Scope rule:
108
- - use the smallest path scope that still contains the expected files
109
- - use repo root (`.`) only for true cross-layer or backend orchestration recovery
110
- - if explicit targets cluster in one subtree, broad repo-root retrieval is often worse than subtree retrieval
111
-
112
- ## Query Construction (Portable)
113
-
114
- Use this pattern:
115
-
116
- `<file-a> <file-b> <function/component> <state-or-arg> <route/flag> <config-key>`
117
-
118
- Guidelines:
119
- - include explicit file paths when known
120
- - include 2 to 6 distinctive symbols
121
- - include a caller or entry anchor when the target is indirect
122
- - avoid vague summaries and long laundry-list queries
123
-
124
- ## Adaptive Loop (When Retrieval Is Weak)
125
-
126
- Treat retrieval as weak if any are true:
127
- - missing implementation or wiring category
128
- - generic entry/support files dominate
129
- - only tiny snippets from the target file appear, with no useful implementation body
130
- - expected cross-layer endpoint is missing
131
-
132
- Adapt in this order, one change at a time:
133
-
134
- 1. Query upgrade:
135
- - add explicit file paths
136
- - add missing symbols such as functions, props, routes, flags, or keys
137
- - add caller or entry anchor
138
-
139
- 2. Scope correction:
140
- - noisy root results: move to subtree scope
141
- - missing cross-layer or backend anchor: use a targeted root query for that file
142
-
143
- 3. Budget bump:
144
- - raise one rung only, roughly `+100` to `+250`
145
-
146
- 4. Targeted gap-fill:
147
- - fetch only the missing must-have file(s)
148
-
149
- 5. Decompose chain, only if needed:
150
- - for 4+ hops, split into adjacent 2-3 file hops
151
-
152
- ## Safe Efficiency Mode
153
-
154
- Use only after stable coverage is achieved.
155
-
156
- Rules:
157
- - do not lower primary budgets for known hard shapes
158
- - for a single missing file, try `800` before `900` only if the first pass already found same-family context
159
- - if `800` misses, immediately retry the stable default
160
- - if any miss persists, revert that task family to stable settings
161
-
162
- Note:
163
- - `800` is an experimental efficiency step-down, not a portable default truth
164
- - keep it only if it preserves full must-have coverage in the current repo
165
-
166
- ## Verification Rule
167
-
168
- Always verify with a quick local symbol check before editing:
169
-
170
- ```powershell
171
- rg -n "symbol1|symbol2|symbol3" <likely files>
172
- ```
173
-
174
- GCIE is a context compressor, not the final truth gate.
175
-
176
- If one required file is still missing after retrieval, do direct-file-check first, then run one targeted GCIE call only for that file.
177
-
178
- ## Portable Stop Rule
179
-
180
- Stop retrieval when all must-have categories are covered:
181
- - implementation
182
- - wiring/orchestration
183
- - validation surface, when risk justifies it
184
-
185
- Do not continue increasing budgets after sufficiency is reached.
186
-
187
- ## First 5 Tasks Calibration (Minimal)
188
-
189
- For a new repo, track these fields for the first 5 tasks:
190
- - task shape
191
- - primary budget
192
- - gap-fill used (Y/N)
193
- - must-have full-hit (Y/N)
194
- - total tokens
195
-
196
- If a miss pattern repeats 2+ times in one task family:
197
- - add one local override for that family only
198
- - keep all other families on portable defaults
199
-
200
- Update necessity rule:
201
- - explicit workflow updates are optional, not required for baseline operation
202
- - if results are stable, keep using portable defaults without changes
203
- - add or update a local override only when the same miss pattern repeats 2-3 times
204
-
205
- ## Optional Appendix: Repo-Specific Overrides (Example)
206
-
207
- These are examples from one mixed-layer repo and are not universal defaults.
208
-
209
- 1. `cross_layer_ui_api` override:
210
- ```powershell
211
- gcie.cmd context frontend "src/App.jsx src/main.jsx <symbols>" --intent edit --budget 900
212
- gcie.cmd context . "app.py start_convert selected_theme selectedTheme no_ai" --intent edit --budget 900
213
- ```
214
-
215
- 2. Stage 3/4 planner-builder pair override (`Plan_slides.py` + `Build_pptx.py`):
216
- ```powershell
217
- gcie.cmd context . "Plan_slides.py content_slides section_divider figure_slides table_slide" --intent <intent> --budget 900
218
- gcie.cmd context . "Build_pptx.py build_pptx render_eq_png apply_theme THEME_CHOICES" --intent <intent> --budget 900
219
- ```
220
-
221
- 3. Stage 1/2 with `main.py` override:
222
- ```powershell
223
- gcie.cmd context . "Analyze_pdf_structure.py Extract_pdf_content.py extract_pages split_into_sections extract_images enrich_with_ai" --intent explore --budget 1100
224
- gcie.cmd context . "main.py Stage 1 Stage 2 extract_pages enrich_with_ai" --intent explore --budget 500
225
- ```
226
-
227
- 4. Guardrail example:
228
- - keep the stable workflow for families that regress under split retrieval
229
- - example: `llm_client.py + Analyze_pdf_structure.py + Extract_pdf_content.py` in one benchmarked repo
230
-
231
- If this appendix does not match your repo, ignore it and use only the portable sections above.