@pmaddire/gcie 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +6 -2
- package/GCIE_USAGE.md +290 -0
- package/README.md +31 -9
- package/SETUP_ANY_REPO.md +2 -2
- package/bench_questions.py +69 -0
- package/cli/app.py +198 -162
- package/cli/commands/adaptation.py +341 -0
- package/cli/commands/context.py +682 -34
- package/cli/commands/context_slices.py +1322 -601
- package/cli/commands/setup.py +18 -4
- package/context/architecture_slicer.py +2 -1
- package/llm_context/snippet_selector.py +1 -1
- package/package.json +1 -1
- package/retrieval/hybrid_retriever.py +9 -1
- package/AGENT_USAGE.md +0 -231
package/cli/commands/setup.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""One-command repository setup for GCIE."""
|
|
1
|
+
"""One-command repository setup for GCIE."""
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
|
|
7
7
|
from context.architecture_bootstrap import ensure_initialized
|
|
8
8
|
|
|
9
|
+
from .adaptation import run_post_init_adaptation
|
|
9
10
|
from .index import run_index
|
|
10
11
|
|
|
11
12
|
|
|
@@ -30,6 +31,9 @@ def run_setup(
|
|
|
30
31
|
include_agent_usage: bool = True,
|
|
31
32
|
include_setup_doc: bool = True,
|
|
32
33
|
run_index_pass: bool = True,
|
|
34
|
+
run_adaptation_pass: bool = False,
|
|
35
|
+
adaptation_benchmark_size: int = 10,
|
|
36
|
+
adaptation_efficiency_iterations: int = 5,
|
|
33
37
|
) -> dict:
|
|
34
38
|
"""Initialize a repository so GCIE can be used immediately."""
|
|
35
39
|
target = Path(path).resolve()
|
|
@@ -49,9 +53,9 @@ def run_setup(
|
|
|
49
53
|
copied: dict[str, str] = {}
|
|
50
54
|
|
|
51
55
|
if include_agent_usage:
|
|
52
|
-
copied["
|
|
53
|
-
source_root / "
|
|
54
|
-
target / "
|
|
56
|
+
copied["GCIE_USAGE.md"] = _copy_if_needed(
|
|
57
|
+
source_root / "GCIE_USAGE.md",
|
|
58
|
+
target / "GCIE_USAGE.md",
|
|
55
59
|
force=force,
|
|
56
60
|
)
|
|
57
61
|
|
|
@@ -70,4 +74,14 @@ def run_setup(
|
|
|
70
74
|
else:
|
|
71
75
|
status["index"] = {"skipped": True}
|
|
72
76
|
|
|
77
|
+
if run_adaptation_pass:
|
|
78
|
+
status["adaptation"] = run_post_init_adaptation(
|
|
79
|
+
target.as_posix(),
|
|
80
|
+
benchmark_size=adaptation_benchmark_size,
|
|
81
|
+
efficiency_iterations=adaptation_efficiency_iterations,
|
|
82
|
+
clear_profile=True,
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
status["adaptation"] = {"skipped": True}
|
|
86
|
+
|
|
73
87
|
return status
|
|
@@ -51,6 +51,7 @@ _ARCH_KEYWORDS = {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
|
|
54
|
+
|
|
54
55
|
@dataclass
|
|
55
56
|
class ArchitectureSliceResult:
|
|
56
57
|
query: str
|
|
@@ -100,7 +101,7 @@ def _snippet_from_lines(lines: list[str], max_lines: int) -> str:
|
|
|
100
101
|
def _collect_snippets(repo_path: Path, files: list[str], max_lines: int = 120) -> tuple[list[dict], list[str]]:
|
|
101
102
|
snippets: list[dict] = []
|
|
102
103
|
missing: list[str] = []
|
|
103
|
-
for rel_path in files:
|
|
104
|
+
for idx, rel_path in enumerate(files):
|
|
104
105
|
file_path = repo_path / rel_path
|
|
105
106
|
if not file_path.exists():
|
|
106
107
|
missing.append(rel_path)
|
package/package.json
CHANGED
|
@@ -126,7 +126,14 @@ def _semantic_node_scores(
|
|
|
126
126
|
return {}, ()
|
|
127
127
|
|
|
128
128
|
retriever = SemanticRetriever([text for _, text in entries])
|
|
129
|
-
|
|
129
|
+
# Keep semantic fan-out intentionally bounded to reduce noisy candidates.
|
|
130
|
+
if len(query_terms) >= 10:
|
|
131
|
+
fanout = 3.0
|
|
132
|
+
elif len(query_terms) >= 6:
|
|
133
|
+
fanout = 2.5
|
|
134
|
+
else:
|
|
135
|
+
fanout = 2.0
|
|
136
|
+
semantic_top_k = min(max(int(round(top_k * fanout)), max(12, top_k + 4)), len(entries))
|
|
130
137
|
hits = retriever.retrieve(query, top_k=semantic_top_k)
|
|
131
138
|
|
|
132
139
|
aggregates: dict[str, _SemanticAggregate] = {}
|
|
@@ -247,3 +254,4 @@ def hybrid_retrieve(
|
|
|
247
254
|
out.append(HybridCandidate(node_id=item.node_id, score=item.score, rationale=rationale))
|
|
248
255
|
|
|
249
256
|
return tuple(out)
|
|
257
|
+
|
package/AGENT_USAGE.md
DELETED
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
# GCIE Agent Usage (Portable Default)
|
|
2
|
-
|
|
3
|
-
This file is designed to be dropped into any repository and used immediately.
|
|
4
|
-
|
|
5
|
-
## Goal
|
|
6
|
-
|
|
7
|
-
Retrieve the smallest useful context while preserving edit safety.
|
|
8
|
-
|
|
9
|
-
Priority order:
|
|
10
|
-
1. accuracy (must-have coverage)
|
|
11
|
-
2. full-hit reliability
|
|
12
|
-
3. token efficiency
|
|
13
|
-
|
|
14
|
-
## Quick Start (Any Repo)
|
|
15
|
-
|
|
16
|
-
1. Identify must-have context categories for the task:
|
|
17
|
-
- implementation file(s)
|
|
18
|
-
- wiring/orchestration file(s)
|
|
19
|
-
- validation surface when risk is non-trivial
|
|
20
|
-
- this may be a test, spec, schema, contract, migration, config, or CLI surface depending on the repo
|
|
21
|
-
|
|
22
|
-
2. Run one primary retrieval with a file-first, symbol-heavy query:
|
|
23
|
-
```powershell
|
|
24
|
-
gcie.cmd context <path> "<file-first symbol-heavy query>" --intent <edit|debug|refactor|explore> --budget <shape budget>
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
3. Check must-have coverage.
|
|
28
|
-
|
|
29
|
-
4. If one must-have file is missing, run targeted gap-fill for only that file.
|
|
30
|
-
|
|
31
|
-
5. Stop immediately when must-have coverage is complete.
|
|
32
|
-
|
|
33
|
-
## Retrieval Modes (Adaptive Router)
|
|
34
|
-
|
|
35
|
-
Use three modes and choose by task family:
|
|
36
|
-
|
|
37
|
-
1. `plain-context-first` (default for most tasks)
|
|
38
|
-
2. `slicer-first` (for hard routed architecture or multi-hop families)
|
|
39
|
-
3. `direct-file-check` (verification and fast gap closure)
|
|
40
|
-
|
|
41
|
-
Plain-context command:
|
|
42
|
-
```powershell
|
|
43
|
-
gcie.cmd context <path> "<query>" --intent <edit|debug|refactor|explore> --budget <shape budget>
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
Slicer-first command:
|
|
47
|
-
```powershell
|
|
48
|
-
gcie.cmd context-slices <path> "<query>" --intent <edit|debug|refactor|explore>
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
Direct-file-check command:
|
|
52
|
-
```powershell
|
|
53
|
-
rg -n "<symbol1|symbol2|symbol3>" <likely files or subtree>
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
Mode-switch rule:
|
|
57
|
-
- start with `plain-context-first` unless setup calibration proved another mode is better for that family
|
|
58
|
-
- use `slicer-first` only for families where routing/architecture slices repeatedly outperform plain context
|
|
59
|
-
- use `direct-file-check` whenever must-have coverage is uncertain or one file remains missing
|
|
60
|
-
- do not keep retrying the same mode indefinitely; switch after one weak result
|
|
61
|
-
|
|
62
|
-
Portable starter policy:
|
|
63
|
-
- default all families to `plain-context-first`
|
|
64
|
-
- after first 10-20 tasks, promote individual families to `slicer-first` only if benchmarked better
|
|
65
|
-
- keep a family on plain-context if slicer is more expensive with no accuracy gain
|
|
66
|
-
|
|
67
|
-
## Architecture Tracking (Portable, In-Repo)
|
|
68
|
-
|
|
69
|
-
To make slicer mode adapt as the repo changes, keep architecture tracking inside the repo where GCIE runs.
|
|
70
|
-
|
|
71
|
-
Track these files under `.gcie/`:
|
|
72
|
-
- `.gcie/architecture.md`
|
|
73
|
-
- `.gcie/architecture_index.json`
|
|
74
|
-
- `.gcie/context_config.json`
|
|
75
|
-
|
|
76
|
-
How to keep it adaptive:
|
|
77
|
-
1. Bootstrap from user docs once (read-only):
|
|
78
|
-
- `ARCHITECTURE.md`, `README.md`, `PROJECT.md`, `docs/architecture.md`, `docs/system_design.md`
|
|
79
|
-
2. Use `.gcie/architecture.md` as GCIE-owned working architecture map.
|
|
80
|
-
3. Refresh `.gcie/architecture.md` and `.gcie/architecture_index.json` when structural changes happen:
|
|
81
|
-
- new subsystem
|
|
82
|
-
- major module split/merge
|
|
83
|
-
- interface/boundary change
|
|
84
|
-
- dependency-direction change
|
|
85
|
-
- active work-area shift
|
|
86
|
-
4. Do not overwrite user-owned docs unless explicitly asked.
|
|
87
|
-
|
|
88
|
-
Architecture confidence rule:
|
|
89
|
-
- if architecture slice confidence is low or required mappings are stale/missing, fallback to plain `context` automatically
|
|
90
|
-
- record fallback reason in `.gcie/context_config.json` when bypassing slicer mode
|
|
91
|
-
|
|
92
|
-
## Portable Defaults (Task-Shape Based)
|
|
93
|
-
|
|
94
|
-
Use these as a starting point in new repos.
|
|
95
|
-
|
|
96
|
-
Primary pass budgets:
|
|
97
|
-
- `auto`: simple same-layer or strong single-file lookup
|
|
98
|
-
- `900`: same-family two-file lookup, frontend-local component lookup
|
|
99
|
-
- `1100`: backend/config pair, same-layer backend pair
|
|
100
|
-
- `1150`: cross-layer UI/API flow
|
|
101
|
-
- `1300-1400`: explicit multi-hop chain (3+ linked files)
|
|
102
|
-
|
|
103
|
-
Gap-fill budgets:
|
|
104
|
-
- missing general implementation/wiring file: `900`
|
|
105
|
-
- missing small orchestration or entry file: `500`
|
|
106
|
-
|
|
107
|
-
Scope rule:
|
|
108
|
-
- use the smallest path scope that still contains the expected files
|
|
109
|
-
- use repo root (`.`) only for true cross-layer or backend orchestration recovery
|
|
110
|
-
- if explicit targets cluster in one subtree, broad repo-root retrieval is often worse than subtree retrieval
|
|
111
|
-
|
|
112
|
-
## Query Construction (Portable)
|
|
113
|
-
|
|
114
|
-
Use this pattern:
|
|
115
|
-
|
|
116
|
-
`<file-a> <file-b> <function/component> <state-or-arg> <route/flag> <config-key>`
|
|
117
|
-
|
|
118
|
-
Guidelines:
|
|
119
|
-
- include explicit file paths when known
|
|
120
|
-
- include 2 to 6 distinctive symbols
|
|
121
|
-
- include a caller or entry anchor when the target is indirect
|
|
122
|
-
- avoid vague summaries and long laundry-list queries
|
|
123
|
-
|
|
124
|
-
## Adaptive Loop (When Retrieval Is Weak)
|
|
125
|
-
|
|
126
|
-
Treat retrieval as weak if any are true:
|
|
127
|
-
- missing implementation or wiring category
|
|
128
|
-
- generic entry/support files dominate
|
|
129
|
-
- only tiny snippets from the target file appear, with no useful implementation body
|
|
130
|
-
- expected cross-layer endpoint is missing
|
|
131
|
-
|
|
132
|
-
Adapt in this order, one change at a time:
|
|
133
|
-
|
|
134
|
-
1. Query upgrade:
|
|
135
|
-
- add explicit file paths
|
|
136
|
-
- add missing symbols such as functions, props, routes, flags, or keys
|
|
137
|
-
- add caller or entry anchor
|
|
138
|
-
|
|
139
|
-
2. Scope correction:
|
|
140
|
-
- noisy root results: move to subtree scope
|
|
141
|
-
- missing cross-layer or backend anchor: use a targeted root query for that file
|
|
142
|
-
|
|
143
|
-
3. Budget bump:
|
|
144
|
-
- raise one rung only, roughly `+100` to `+250`
|
|
145
|
-
|
|
146
|
-
4. Targeted gap-fill:
|
|
147
|
-
- fetch only the missing must-have file(s)
|
|
148
|
-
|
|
149
|
-
5. Decompose chain, only if needed:
|
|
150
|
-
- for 4+ hops, split into adjacent 2-3 file hops
|
|
151
|
-
|
|
152
|
-
## Safe Efficiency Mode
|
|
153
|
-
|
|
154
|
-
Use only after stable coverage is achieved.
|
|
155
|
-
|
|
156
|
-
Rules:
|
|
157
|
-
- do not lower primary budgets for known hard shapes
|
|
158
|
-
- for a single missing file, try `800` before `900` only if the first pass already found same-family context
|
|
159
|
-
- if `800` misses, immediately retry the stable default
|
|
160
|
-
- if any miss persists, revert that task family to stable settings
|
|
161
|
-
|
|
162
|
-
Note:
|
|
163
|
-
- `800` is an experimental efficiency step-down, not a portable default truth
|
|
164
|
-
- keep it only if it preserves full must-have coverage in the current repo
|
|
165
|
-
|
|
166
|
-
## Verification Rule
|
|
167
|
-
|
|
168
|
-
Always verify with a quick local symbol check before editing:
|
|
169
|
-
|
|
170
|
-
```powershell
|
|
171
|
-
rg -n "symbol1|symbol2|symbol3" <likely files>
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
GCIE is a context compressor, not the final truth gate.
|
|
175
|
-
|
|
176
|
-
If one required file is still missing after retrieval, do direct-file-check first, then run one targeted GCIE call only for that file.
|
|
177
|
-
|
|
178
|
-
## Portable Stop Rule
|
|
179
|
-
|
|
180
|
-
Stop retrieval when all must-have categories are covered:
|
|
181
|
-
- implementation
|
|
182
|
-
- wiring/orchestration
|
|
183
|
-
- validation surface, when risk justifies it
|
|
184
|
-
|
|
185
|
-
Do not continue increasing budgets after sufficiency is reached.
|
|
186
|
-
|
|
187
|
-
## First 5 Tasks Calibration (Minimal)
|
|
188
|
-
|
|
189
|
-
For a new repo, track these fields for the first 5 tasks:
|
|
190
|
-
- task shape
|
|
191
|
-
- primary budget
|
|
192
|
-
- gap-fill used (Y/N)
|
|
193
|
-
- must-have full-hit (Y/N)
|
|
194
|
-
- total tokens
|
|
195
|
-
|
|
196
|
-
If a miss pattern repeats 2+ times in one task family:
|
|
197
|
-
- add one local override for that family only
|
|
198
|
-
- keep all other families on portable defaults
|
|
199
|
-
|
|
200
|
-
Update necessity rule:
|
|
201
|
-
- explicit workflow updates are optional, not required for baseline operation
|
|
202
|
-
- if results are stable, keep using portable defaults without changes
|
|
203
|
-
- add or update a local override only when the same miss pattern repeats 2-3 times
|
|
204
|
-
|
|
205
|
-
## Optional Appendix: Repo-Specific Overrides (Example)
|
|
206
|
-
|
|
207
|
-
These are examples from one mixed-layer repo and are not universal defaults.
|
|
208
|
-
|
|
209
|
-
1. `cross_layer_ui_api` override:
|
|
210
|
-
```powershell
|
|
211
|
-
gcie.cmd context frontend "src/App.jsx src/main.jsx <symbols>" --intent edit --budget 900
|
|
212
|
-
gcie.cmd context . "app.py start_convert selected_theme selectedTheme no_ai" --intent edit --budget 900
|
|
213
|
-
```
|
|
214
|
-
|
|
215
|
-
2. Stage 3/4 planner-builder pair override (`Plan_slides.py` + `Build_pptx.py`):
|
|
216
|
-
```powershell
|
|
217
|
-
gcie.cmd context . "Plan_slides.py content_slides section_divider figure_slides table_slide" --intent <intent> --budget 900
|
|
218
|
-
gcie.cmd context . "Build_pptx.py build_pptx render_eq_png apply_theme THEME_CHOICES" --intent <intent> --budget 900
|
|
219
|
-
```
|
|
220
|
-
|
|
221
|
-
3. Stage 1/2 with `main.py` override:
|
|
222
|
-
```powershell
|
|
223
|
-
gcie.cmd context . "Analyze_pdf_structure.py Extract_pdf_content.py extract_pages split_into_sections extract_images enrich_with_ai" --intent explore --budget 1100
|
|
224
|
-
gcie.cmd context . "main.py Stage 1 Stage 2 extract_pages enrich_with_ai" --intent explore --budget 500
|
|
225
|
-
```
|
|
226
|
-
|
|
227
|
-
4. Guardrail example:
|
|
228
|
-
- keep the stable workflow for families that regress under split retrieval
|
|
229
|
-
- example: `llm_client.py + Analyze_pdf_structure.py + Extract_pdf_content.py` in one benchmarked repo
|
|
230
|
-
|
|
231
|
-
If this appendix does not match your repo, ignore it and use only the portable sections above.
|