@pmaddire/gcie 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +256 -0
- package/AGENT_USAGE.md +231 -0
- package/ARCHITECTURE.md +151 -0
- package/CLAUDE.md +69 -0
- package/DEBUGGING_PLAYBOOK.md +160 -0
- package/KNOWLEDGE_INDEX.md +154 -0
- package/POTENTIAL_UPDATES +130 -0
- package/PROJECT.md +141 -0
- package/README.md +371 -0
- package/REPO_DIGITAL_TWIN.md +98 -0
- package/ROADMAP.md +301 -0
- package/SETUP_ANY_REPO.md +85 -0
- package/bin/gcie-init.js +20 -0
- package/bin/gcie.js +45 -0
- package/cli/__init__.py +1 -0
- package/cli/app.py +163 -0
- package/cli/commands/__init__.py +1 -0
- package/cli/commands/cache.py +35 -0
- package/cli/commands/context.py +2426 -0
- package/cli/commands/context_slices.py +617 -0
- package/cli/commands/debug.py +24 -0
- package/cli/commands/index.py +17 -0
- package/cli/commands/query.py +20 -0
- package/cli/commands/setup.py +73 -0
- package/config/__init__.py +1 -0
- package/config/scanner_config.py +82 -0
- package/context/__init__.py +1 -0
- package/context/architecture_bootstrap.py +170 -0
- package/context/architecture_index.py +185 -0
- package/context/architecture_parser.py +170 -0
- package/context/architecture_slicer.py +308 -0
- package/context/context_router.py +70 -0
- package/context/fallback_evaluator.py +21 -0
- package/coverage_integration/__init__.py +1 -0
- package/coverage_integration/coverage_loader.py +55 -0
- package/debugging/__init__.py +12 -0
- package/debugging/bug_localizer.py +81 -0
- package/debugging/execution_path_analyzer.py +42 -0
- package/embeddings/__init__.py +6 -0
- package/embeddings/encoder.py +45 -0
- package/embeddings/faiss_index.py +72 -0
- package/git_integration/__init__.py +1 -0
- package/git_integration/git_miner.py +78 -0
- package/graphs/__init__.py +17 -0
- package/graphs/call_graph.py +70 -0
- package/graphs/code_graph.py +81 -0
- package/graphs/execution_graph.py +35 -0
- package/graphs/git_graph.py +43 -0
- package/graphs/graph_store.py +25 -0
- package/graphs/node_factory.py +21 -0
- package/graphs/test_graph.py +65 -0
- package/graphs/validators.py +28 -0
- package/graphs/variable_graph.py +51 -0
- package/knowledge_index/__init__.py +1 -0
- package/knowledge_index/index_builder.py +60 -0
- package/knowledge_index/models.py +35 -0
- package/knowledge_index/query_api.py +38 -0
- package/knowledge_index/store.py +23 -0
- package/llm_context/__init__.py +6 -0
- package/llm_context/context_builder.py +67 -0
- package/llm_context/snippet_selector.py +57 -0
- package/package.json +14 -0
- package/parser/__init__.py +18 -0
- package/parser/ast_parser.py +216 -0
- package/parser/call_resolver.py +52 -0
- package/parser/models.py +75 -0
- package/parser/tree_sitter_adapter.py +56 -0
- package/parser/variable_extractor.py +31 -0
- package/retrieval/__init__.py +17 -0
- package/retrieval/cache.py +22 -0
- package/retrieval/hybrid_retriever.py +249 -0
- package/retrieval/query_parser.py +38 -0
- package/retrieval/ranking.py +43 -0
- package/retrieval/semantic_retriever.py +39 -0
- package/retrieval/symbolic_retriever.py +80 -0
- package/scanner/__init__.py +5 -0
- package/scanner/file_filters.py +37 -0
- package/scanner/models.py +44 -0
- package/scanner/repository_scanner.py +55 -0
- package/scripts/bootstrap_from_github.ps1 +41 -0
- package/tracing/__init__.py +1 -0
- package/tracing/runtime_tracer.py +60 -0
package/ROADMAP.md
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# ROADMAP.md
|
|
2
|
+
|
|
3
|
+
## Roadmap: GraphCode Intelligence Engine (GCIE)
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This roadmap follows the GSD workflow: define phased outcomes, execute atomic tasks, verify phase outputs, then proceed sequentially.
|
|
8
|
+
No implementation begins until the phase plan is approved and phase execution starts.
|
|
9
|
+
|
|
10
|
+
## Phases
|
|
11
|
+
|
|
12
|
+
1. Repository Scanning
|
|
13
|
+
2. AST Parsing Engine
|
|
14
|
+
3. Code Structure Graph
|
|
15
|
+
4. Call Graph
|
|
16
|
+
5. Variable Dependency Graph
|
|
17
|
+
6. Execution Trace Graph
|
|
18
|
+
7. Git History Graph
|
|
19
|
+
8. Test Coverage Graph
|
|
20
|
+
9. Knowledge Index
|
|
21
|
+
10. Symbolic Retrieval System
|
|
22
|
+
11. Semantic Retrieval System
|
|
23
|
+
12. Hybrid Retrieval Pipeline
|
|
24
|
+
13. Bug Localization System
|
|
25
|
+
14. LLM Context Builder
|
|
26
|
+
15. CLI Interface
|
|
27
|
+
16. Performance Optimization
|
|
28
|
+
17. Testing and Documentation
|
|
29
|
+
|
|
30
|
+
## Phase Plans (Atomic Tasks)
|
|
31
|
+
|
|
32
|
+
### Phase 1: Repository Scanning
|
|
33
|
+
Goal: Discover indexable repository artifacts and produce normalized file manifests.
|
|
34
|
+
|
|
35
|
+
Task 1.1
|
|
36
|
+
Target files: `scanner/repository_scanner.py`, `scanner/file_filters.py`, `scanner/models.py`
|
|
37
|
+
Implementation instructions: build recursive scanner, extension filters, ignore rules, and manifest dataclasses.
|
|
38
|
+
Verification steps: run scanner on sample repo; confirm deterministic manifest output.
|
|
39
|
+
Completion criteria: scanner returns stable list of source/test/config files with metadata.
|
|
40
|
+
|
|
41
|
+
Task 1.2
|
|
42
|
+
Target files: `config/scanner_config.py`, `tests/scanner/test_repository_scanner.py`
|
|
43
|
+
Implementation instructions: add include/exclude config model and tests for hidden dirs, large files, and unsupported extensions.
|
|
44
|
+
Verification steps: execute scanner tests and inspect edge-case fixtures.
|
|
45
|
+
Completion criteria: scanner behavior is configurable and validated by passing tests.
|
|
46
|
+
|
|
47
|
+
### Phase 2: AST Parsing Engine
|
|
48
|
+
Goal: Parse Python code and emit normalized symbol-level IR.
|
|
49
|
+
|
|
50
|
+
Task 2.1
|
|
51
|
+
Target files: `parser/ast_parser.py`, `parser/models.py`
|
|
52
|
+
Implementation instructions: implement AST walker extracting modules, classes, functions, parameters, docstrings, assignments, imports, calls.
|
|
53
|
+
Verification steps: parse fixture files and assert extracted symbols/line ranges.
|
|
54
|
+
Completion criteria: parser emits complete symbol IR for valid Python files.
|
|
55
|
+
|
|
56
|
+
Task 2.2
|
|
57
|
+
Target files: `parser/tree_sitter_adapter.py`, `tests/parser/test_parser_fallbacks.py`
|
|
58
|
+
Implementation instructions: define Tree-sitter adapter interface and fallback policy when AST parsing is unavailable or partial.
|
|
59
|
+
Verification steps: simulate fallback scenarios using malformed fixtures.
|
|
60
|
+
Completion criteria: parser pipeline provides predictable fallback behavior with tests.
|
|
61
|
+
|
|
62
|
+
### Phase 3: Code Structure Graph
|
|
63
|
+
Goal: Build structural relationships between files, classes, and functions.
|
|
64
|
+
|
|
65
|
+
Task 3.1
|
|
66
|
+
Target files: `graphs/code_graph.py`, `graphs/node_factory.py`
|
|
67
|
+
Implementation instructions: map IR entities to graph nodes and create `DEFINES`, `CONTAINS`, and `IMPORTS` edges.
|
|
68
|
+
Verification steps: graph snapshot tests on fixture projects.
|
|
69
|
+
Completion criteria: structure graph is connected and queryable by symbol/file.
|
|
70
|
+
|
|
71
|
+
Task 3.2
|
|
72
|
+
Target files: `tests/graphs/test_code_graph.py`, `graphs/validators.py`
|
|
73
|
+
Implementation instructions: add integrity checks for node uniqueness, edge consistency, and missing references.
|
|
74
|
+
Verification steps: run graph validation tests against positive and negative fixtures.
|
|
75
|
+
Completion criteria: invalid graph states are detected and reported.
|
|
76
|
+
|
|
77
|
+
### Phase 4: Call Graph
|
|
78
|
+
Goal: Build function-to-function call relationships.
|
|
79
|
+
|
|
80
|
+
Task 4.1
|
|
81
|
+
Target files: `graphs/call_graph.py`, `parser/call_resolver.py`
|
|
82
|
+
Implementation instructions: resolve local and module-qualified calls; add `CALLS` edges with source location metadata.
|
|
83
|
+
Verification steps: test direct calls, nested calls, and method calls.
|
|
84
|
+
Completion criteria: call graph captures expected caller-callee chains.
|
|
85
|
+
|
|
86
|
+
Task 4.2
|
|
87
|
+
Target files: `tests/graphs/test_call_graph.py`
|
|
88
|
+
Implementation instructions: create fixtures for recursion, alias imports, and unresolved external calls.
|
|
89
|
+
Verification steps: execute tests and verify unresolved calls are labeled, not dropped.
|
|
90
|
+
Completion criteria: call graph behavior is deterministic for known edge cases.
|
|
91
|
+
|
|
92
|
+
### Phase 5: Variable Dependency Graph
|
|
93
|
+
Goal: Model read/write/modify relationships for variables.
|
|
94
|
+
|
|
95
|
+
Task 5.1
|
|
96
|
+
Target files: `graphs/variable_graph.py`, `parser/variable_extractor.py`
|
|
97
|
+
Implementation instructions: detect variable definitions, reads, writes, and updates; create `READS`, `WRITES`, `MODIFIES` edges.
|
|
98
|
+
Verification steps: run tests on assignment and mutation patterns.
|
|
99
|
+
Completion criteria: variable graph correctly identifies modifier functions for target variables.
|
|
100
|
+
|
|
101
|
+
Task 5.2
|
|
102
|
+
Target files: `tests/graphs/test_variable_graph.py`
|
|
103
|
+
Implementation instructions: add fixtures for local/global scope, closures, attributes, and tuple unpacking.
|
|
104
|
+
Verification steps: assert correct scope attribution and edge generation.
|
|
105
|
+
Completion criteria: variable dependency extraction is validated for common Python patterns.
|
|
106
|
+
|
|
107
|
+
### Phase 6: Execution Trace Graph
|
|
108
|
+
Goal: Capture runtime execution paths and map traces to symbols.
|
|
109
|
+
|
|
110
|
+
Task 6.1
|
|
111
|
+
Target files: `graphs/execution_graph.py`, `tracing/runtime_tracer.py`
|
|
112
|
+
Implementation instructions: implement `sys.settrace` event capture and transform call/return events into execution graph edges.
|
|
113
|
+
Verification steps: run tracer on deterministic sample program and compare trace order.
|
|
114
|
+
Completion criteria: execution trace graph reproduces runtime path with timestamped edges.
|
|
115
|
+
|
|
116
|
+
Task 6.2
|
|
117
|
+
Target files: `tests/tracing/test_runtime_tracer.py`, `tests/graphs/test_execution_graph.py`
|
|
118
|
+
Implementation instructions: add tests for recursion, exceptions, and multi-function flows.
|
|
119
|
+
Verification steps: validate path continuity and symbol mapping in traces.
|
|
120
|
+
Completion criteria: trace graph is reliable for debugging path reconstruction.
|
|
121
|
+
|
|
122
|
+
### Phase 7: Git History Graph
|
|
123
|
+
Goal: Relate commits to files and symbols for recency-aware ranking.
|
|
124
|
+
|
|
125
|
+
Task 7.1
|
|
126
|
+
Target files: `graphs/git_graph.py`, `git_integration/git_miner.py`
|
|
127
|
+
Implementation instructions: ingest commit history via GitPython and map changed files/symbol spans to `CHANGED_IN` edges.
|
|
128
|
+
Verification steps: run against fixture repo with known commit history.
|
|
129
|
+
Completion criteria: graph exposes commit recency and symbol touch history.
|
|
130
|
+
|
|
131
|
+
Task 7.2
|
|
132
|
+
Target files: `tests/git/test_git_miner.py`, `tests/graphs/test_git_graph.py`
|
|
133
|
+
Implementation instructions: validate rename handling, author/date extraction, and empty history behavior.
|
|
134
|
+
Verification steps: run tests using temporary repositories.
|
|
135
|
+
Completion criteria: git graph ingestion is resilient and test-covered.
|
|
136
|
+
|
|
137
|
+
### Phase 8: Test Coverage Graph
|
|
138
|
+
Goal: Map tests to covered symbols and files.
|
|
139
|
+
|
|
140
|
+
Task 8.1
|
|
141
|
+
Target files: `graphs/test_graph.py`, `coverage_integration/coverage_loader.py`
|
|
142
|
+
Implementation instructions: import Coverage.py results and link tests to functions/files using `COVERED_BY` edges.
|
|
143
|
+
Verification steps: run sample tests with coverage and compare expected mappings.
|
|
144
|
+
Completion criteria: coverage graph quantifies coverage for retrieval weighting.
|
|
145
|
+
|
|
146
|
+
Task 8.2
|
|
147
|
+
Target files: `tests/coverage/test_coverage_loader.py`, `tests/graphs/test_test_graph.py`
|
|
148
|
+
Implementation instructions: test partial coverage, missing reports, and branch coverage metadata.
|
|
149
|
+
Verification steps: execute loader and graph tests.
|
|
150
|
+
Completion criteria: coverage integration handles both complete and sparse reports.
|
|
151
|
+
|
|
152
|
+
### Phase 9: Knowledge Index
|
|
153
|
+
Goal: Provide fast metadata lookup for symbols and dependencies.
|
|
154
|
+
|
|
155
|
+
Task 9.1
|
|
156
|
+
Target files: `knowledge_index/models.py`, `knowledge_index/index_builder.py`, `knowledge_index/store.py`
|
|
157
|
+
Implementation instructions: implement in-memory entries for files, classes, functions, variables, imports, dependencies.
|
|
158
|
+
Verification steps: build index from fixture IR and assert entry completeness.
|
|
159
|
+
Completion criteria: index supports required entry formats and lookups.
|
|
160
|
+
|
|
161
|
+
Task 9.2
|
|
162
|
+
Target files: `knowledge_index/query_api.py`, `tests/knowledge_index/test_query_api.py`
|
|
163
|
+
Implementation instructions: implement queries: variable modifiers, callers, imports, inheritance.
|
|
164
|
+
Verification steps: run query API tests against known fixtures.
|
|
165
|
+
Completion criteria: index answers core structural queries without LLM calls.
|
|
166
|
+
|
|
167
|
+
### Phase 10: Symbolic Retrieval System
|
|
168
|
+
Goal: Retrieve execution-relevant candidates via graph traversal.
|
|
169
|
+
|
|
170
|
+
Task 10.1
|
|
171
|
+
Target files: `retrieval/symbolic_retriever.py`, `retrieval/query_parser.py`
|
|
172
|
+
Implementation instructions: extract symbols/intents and perform seeded traversal across structure/call/variable/trace graphs.
|
|
173
|
+
Verification steps: evaluate retrieval on debugging query fixtures.
|
|
174
|
+
Completion criteria: symbolic retriever returns focused candidate subgraphs.
|
|
175
|
+
|
|
176
|
+
Task 10.2
|
|
177
|
+
Target files: `tests/retrieval/test_symbolic_retriever.py`
|
|
178
|
+
Implementation instructions: add tests for ambiguous symbols, missing symbols, and multi-hop traversal limits.
|
|
179
|
+
Verification steps: run tests and inspect ranked symbolic results.
|
|
180
|
+
Completion criteria: symbolic retrieval precision is acceptable on benchmark fixtures.
|
|
181
|
+
|
|
182
|
+
### Phase 11: Semantic Retrieval System
|
|
183
|
+
Goal: Rank code candidates by semantic relevance.
|
|
184
|
+
|
|
185
|
+
Task 11.1
|
|
186
|
+
Target files: `embeddings/encoder.py`, `embeddings/faiss_index.py`, `retrieval/semantic_retriever.py`
|
|
187
|
+
Implementation instructions: generate embeddings, maintain FAISS index, and return similarity-ranked snippets.
|
|
188
|
+
Verification steps: run embedding/index smoke tests on fixture corpus.
|
|
189
|
+
Completion criteria: semantic retriever returns deterministic top-k results.
|
|
190
|
+
|
|
191
|
+
Task 11.2
|
|
192
|
+
Target files: `tests/retrieval/test_semantic_retriever.py`, `tests/embeddings/test_faiss_index.py`
|
|
193
|
+
Implementation instructions: validate indexing, updates, persistence hooks, and retrieval quality thresholds.
|
|
194
|
+
Verification steps: execute test suite and compare expected rankings.
|
|
195
|
+
Completion criteria: semantic retrieval module is stable and test-covered.
|
|
196
|
+
|
|
197
|
+
### Phase 12: Hybrid Retrieval Pipeline
|
|
198
|
+
Goal: Fuse symbolic and semantic retrieval with risk weighting.
|
|
199
|
+
|
|
200
|
+
Task 12.1
|
|
201
|
+
Target files: `retrieval/hybrid_retriever.py`, `retrieval/ranking.py`
|
|
202
|
+
Implementation instructions: combine symbolic distance, semantic score, git recency, and coverage risk into final rank.
|
|
203
|
+
Verification steps: run controlled ranking scenarios with synthetic weights.
|
|
204
|
+
Completion criteria: hybrid ranking is explainable and configurable.
|
|
205
|
+
|
|
206
|
+
Task 12.2
|
|
207
|
+
Target files: `tests/retrieval/test_hybrid_retriever.py`
|
|
208
|
+
Implementation instructions: assert ranking order for debugging scenarios and regression fixtures.
|
|
209
|
+
Verification steps: run tests and verify rationale metadata in output.
|
|
210
|
+
Completion criteria: hybrid pipeline consistently improves relevance over symbolic-only baseline.
|
|
211
|
+
|
|
212
|
+
### Phase 13: Bug Localization System
|
|
213
|
+
Goal: Produce structured root-cause candidates for debugging queries.
|
|
214
|
+
|
|
215
|
+
Task 13.1
|
|
216
|
+
Target files: `debugging/bug_localizer.py`, `debugging/execution_path_analyzer.py`
|
|
217
|
+
Implementation instructions: implement workflow from symbol extraction to modifier detection and upstream/downstream path tracing.
|
|
218
|
+
Verification steps: run end-to-end bug-localization fixtures (including `diff`-style queries).
|
|
219
|
+
Completion criteria: output includes relevant functions, call chain, and variable modifications.
|
|
220
|
+
|
|
221
|
+
Task 13.2
|
|
222
|
+
Target files: `tests/debugging/test_bug_localizer.py`
|
|
223
|
+
Implementation instructions: test ranking heuristics for recent commits, low coverage, and frequent execution paths.
|
|
224
|
+
Verification steps: execute tests and review explanation payload.
|
|
225
|
+
Completion criteria: bug-localization quality meets defined fixture expectations.
|
|
226
|
+
|
|
227
|
+
### Phase 14: LLM Context Builder
|
|
228
|
+
Goal: Build minimal, ordered context payloads for LLM prompts.
|
|
229
|
+
|
|
230
|
+
Task 14.1
|
|
231
|
+
Target files: `llm_context/context_builder.py`, `llm_context/snippet_selector.py`
|
|
232
|
+
Implementation instructions: select minimal snippets from ranked candidates; deduplicate and preserve execution order.
|
|
233
|
+
Verification steps: compare output token estimates with full-file baseline.
|
|
234
|
+
Completion criteria: context builder emits compact payloads with traceable provenance.
|
|
235
|
+
|
|
236
|
+
Task 14.2
|
|
237
|
+
Target files: `tests/llm_context/test_context_builder.py`
|
|
238
|
+
Implementation instructions: add tests for token-budget clipping, snippet overlap, and mandatory-symbol retention.
|
|
239
|
+
Verification steps: run tests and ensure deterministic output for fixed inputs.
|
|
240
|
+
Completion criteria: context output is reproducible and bounded by budget.
|
|
241
|
+
|
|
242
|
+
### Phase 15: CLI Interface
|
|
243
|
+
Goal: Expose GCIE capabilities through Typer commands.
|
|
244
|
+
|
|
245
|
+
Task 15.1
|
|
246
|
+
Target files: `cli/app.py`, `cli/commands/index.py`, `cli/commands/query.py`, `cli/commands/debug.py`
|
|
247
|
+
Implementation instructions: implement commands for indexing, graph build, retrieval query, and debug report.
|
|
248
|
+
Verification steps: run CLI help and command smoke tests in fixture workspace.
|
|
249
|
+
Completion criteria: users can execute end-to-end flow from CLI without direct Python API use.
|
|
250
|
+
|
|
251
|
+
Task 15.2
|
|
252
|
+
Target files: `tests/cli/test_cli_commands.py`
|
|
253
|
+
Implementation instructions: add CLI integration tests with fixture repositories and expected output schemas.
|
|
254
|
+
Verification steps: execute CLI tests and validate exit codes plus structured output.
|
|
255
|
+
Completion criteria: CLI is reliable for automation and local usage.
|
|
256
|
+
|
|
257
|
+
### Phase 16: Performance Optimization
|
|
258
|
+
Goal: Reduce indexing/query latency and memory cost while preserving relevance.
|
|
259
|
+
|
|
260
|
+
Task 16.1
|
|
261
|
+
Target files: `performance/profiler.py`, `retrieval/cache.py`, `graphs/graph_store.py`
|
|
262
|
+
Implementation instructions: add profiling instrumentation, caching, and incremental graph/index refresh paths.
|
|
263
|
+
Verification steps: run benchmark scenarios before/after optimization.
|
|
264
|
+
Completion criteria: measurable gains in indexing/query runtime and memory footprint.
|
|
265
|
+
|
|
266
|
+
Task 16.2
|
|
267
|
+
Target files: `tests/performance/test_benchmarks.py`, `docs/performance.md`
|
|
268
|
+
Implementation instructions: define benchmark suite and acceptance thresholds for latency/token reduction.
|
|
269
|
+
Verification steps: execute benchmark tests and capture baseline report.
|
|
270
|
+
Completion criteria: performance targets are versioned and regression-tested.
|
|
271
|
+
|
|
272
|
+
### Phase 17: Testing and Documentation
|
|
273
|
+
Goal: Finalize quality gates and project documentation.
|
|
274
|
+
|
|
275
|
+
Task 17.1
|
|
276
|
+
Target files: `tests/integration/test_end_to_end.py`, `tests/regression/test_query_regressions.py`
|
|
277
|
+
Implementation instructions: create end-to-end tests from indexing to minimal context output for canonical debugging queries.
|
|
278
|
+
Verification steps: run full test suite with coverage enabled.
|
|
279
|
+
Completion criteria: integration and regression suites pass with acceptable coverage.
|
|
280
|
+
|
|
281
|
+
Task 17.2
|
|
282
|
+
Target files: `README.md`, `docs/architecture.md`, `docs/retrieval.md`, `docs/debugging.md`
|
|
283
|
+
Implementation instructions: document architecture, phase outcomes, CLI usage, and troubleshooting workflow.
|
|
284
|
+
Verification steps: follow docs to execute first-time setup and sample query run.
|
|
285
|
+
Completion criteria: documentation supports onboarding and reproducible operation.
|
|
286
|
+
|
|
287
|
+
## Sequential Execution Rules
|
|
288
|
+
|
|
289
|
+
1. Phases execute in numeric order (1 through 17).
|
|
290
|
+
2. Do not start a phase until previous phase completion criteria are satisfied.
|
|
291
|
+
3. If a phase fails verification, fix within the same phase before proceeding.
|
|
292
|
+
4. Record verification evidence after each phase.
|
|
293
|
+
5. Re-scope only through roadmap update, not ad hoc implementation.
|
|
294
|
+
|
|
295
|
+
## Milestone Outcome Targets
|
|
296
|
+
|
|
297
|
+
1. Repository indexing and graph construction are operational.
|
|
298
|
+
2. Symbolic + semantic hybrid retrieval is operational.
|
|
299
|
+
3. Debugging flow returns minimal execution-relevant context.
|
|
300
|
+
4. CLI and documentation enable practical day-to-day use.
|
|
301
|
+
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# GCIE Setup In Any Repo
|
|
2
|
+
|
|
3
|
+
Use this file to onboard GCIE quickly in a new project.
|
|
4
|
+
|
|
5
|
+
## 0) One-Command Bootstrap
|
|
6
|
+
|
|
7
|
+
If GCIE is already installed locally:
|
|
8
|
+
|
|
9
|
+
```powershell
|
|
10
|
+
gcie.cmd setup .
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
NPX one-liner (after npm publish):
|
|
14
|
+
|
|
15
|
+
```powershell
|
|
16
|
+
npx gcie@latest
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
GitHub one-liner bootstrap (no prior setup required):
|
|
20
|
+
|
|
21
|
+
```powershell
|
|
22
|
+
powershell -ExecutionPolicy Bypass -Command "iwr https://raw.githubusercontent.com/pmaddire/GBCRSS/main/scripts/bootstrap_from_github.ps1 | iex"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
This initializes `.gcie` architecture tracking, writes portable workflow docs, and runs an initial index.
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## 1) Install And Verify
|
|
29
|
+
|
|
30
|
+
```powershell
|
|
31
|
+
gcie.cmd --help
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
If this fails, use your local install method first (`npm link`, `npx`, or Python module invocation).
|
|
35
|
+
|
|
36
|
+
## 2) Index Once
|
|
37
|
+
|
|
38
|
+
```powershell
|
|
39
|
+
gcie.cmd index .
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Re-run indexing after major structural changes.
|
|
43
|
+
|
|
44
|
+
## 3) Add Agent Workflow File
|
|
45
|
+
|
|
46
|
+
Copy `AGENT_USAGE.md` from GCIE into the target repo root.
|
|
47
|
+
|
|
48
|
+
## 4) Start With Portable Defaults
|
|
49
|
+
|
|
50
|
+
```powershell
|
|
51
|
+
gcie.cmd context . "<task>" --intent <edit|debug|refactor|explore> --budget auto
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Then apply the adaptive loop in `AGENT_USAGE.md` if must-have coverage is incomplete.
|
|
55
|
+
|
|
56
|
+
## 5) Use Adaptive Mode Routing
|
|
57
|
+
|
|
58
|
+
- default: `plain-context-first`
|
|
59
|
+
- selective: `slicer-first` only for families where it benchmarks better
|
|
60
|
+
- always available: `direct-file-check` (`rg`) for verification/gap closure
|
|
61
|
+
|
|
62
|
+
## 6) Must-Have Gate Before Edits
|
|
63
|
+
|
|
64
|
+
Treat context as sufficient only when you have:
|
|
65
|
+
- implementation file(s)
|
|
66
|
+
- wiring/orchestration file(s)
|
|
67
|
+
- validation surface when risk is non-trivial
|
|
68
|
+
|
|
69
|
+
If missing, run targeted gap-fill for only the missing file.
|
|
70
|
+
|
|
71
|
+
## 7) Agent Prompt (Drop-In)
|
|
72
|
+
|
|
73
|
+
```text
|
|
74
|
+
Use GCIE as the primary context compressor.
|
|
75
|
+
Start with plain-context-first using file-first, symbol-heavy queries.
|
|
76
|
+
If must-have coverage is incomplete, adapt in this order:
|
|
77
|
+
1) improve query symbols/file anchors
|
|
78
|
+
2) adjust scope (subtree vs root)
|
|
79
|
+
3) raise budget one rung
|
|
80
|
+
4) targeted gap-fill for missing file(s)
|
|
81
|
+
5) decompose multi-hop chain only if still incomplete
|
|
82
|
+
Use slicer-first only for task families where it is benchmarked better.
|
|
83
|
+
Always verify with direct-file-check before edits when coverage is uncertain.
|
|
84
|
+
Stop retrieval as soon as must-have coverage is complete.
|
|
85
|
+
```
|
package/bin/gcie-init.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
|
|
4
|
+
const { spawnSync } = require("child_process");
|
|
5
|
+
const { resolve } = require("path");
|
|
6
|
+
|
|
7
|
+
function runGcie(args) {
|
|
8
|
+
const scriptDir = resolve(__dirname);
|
|
9
|
+
const gcieBin = resolve(scriptDir, "gcie.js");
|
|
10
|
+
const result = spawnSync(process.execPath, [gcieBin, ...args], { stdio: "inherit" });
|
|
11
|
+
return result.status || 0;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function main() {
|
|
15
|
+
const userArgs = process.argv.slice(2);
|
|
16
|
+
const setupArgs = ["setup", ".", ...userArgs];
|
|
17
|
+
process.exit(runGcie(setupArgs));
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
main();
|
package/bin/gcie.js
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
|
|
4
|
+
const { spawnSync } = require("child_process");
|
|
5
|
+
const { existsSync } = require("fs");
|
|
6
|
+
const { join, resolve, delimiter } = require("path");
|
|
7
|
+
|
|
8
|
+
function resolvePython(gcieRoot) {
|
|
9
|
+
const winVenv = join(gcieRoot, ".venv", "Scripts", "python.exe");
|
|
10
|
+
const nixVenv = join(gcieRoot, ".venv", "bin", "python");
|
|
11
|
+
|
|
12
|
+
if (existsSync(winVenv)) return winVenv;
|
|
13
|
+
if (existsSync(nixVenv)) return nixVenv;
|
|
14
|
+
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function tryCommand(cmd, args, env) {
|
|
19
|
+
const result = spawnSync(cmd, args, { stdio: "inherit", env });
|
|
20
|
+
return result.status === 0;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function main() {
|
|
24
|
+
const args = process.argv.slice(2);
|
|
25
|
+
const cliArgs = args.length === 0 ? ["setup", "."] : args;
|
|
26
|
+
|
|
27
|
+
const scriptDir = resolve(__dirname);
|
|
28
|
+
const gcieRoot = process.env.GCIE_ROOT ? resolve(process.env.GCIE_ROOT) : resolve(scriptDir, "..");
|
|
29
|
+
|
|
30
|
+
const env = { ...process.env };
|
|
31
|
+
env.PYTHONPATH = env.PYTHONPATH ? `${gcieRoot}${delimiter}${env.PYTHONPATH}` : gcieRoot;
|
|
32
|
+
|
|
33
|
+
const venvPython = resolvePython(gcieRoot);
|
|
34
|
+
if (venvPython) {
|
|
35
|
+
process.exit(spawnSync(venvPython, ["-m", "cli.app", ...cliArgs], { stdio: "inherit", env }).status || 0);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (tryCommand("python", ["-m", "cli.app", ...cliArgs], env)) return;
|
|
39
|
+
if (tryCommand("py", ["-3", "-m", "cli.app", ...cliArgs], env)) return;
|
|
40
|
+
|
|
41
|
+
console.error("No Python interpreter found. Create a .venv in the GCIE repo or install Python 3.11+.");
|
|
42
|
+
process.exit(1);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
main();
|
package/cli/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI package."""
|
package/cli/app.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Typer entrypoint for GCIE CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
from .commands.cache import cache_status, clear_cache, warm_cache
|
|
11
|
+
from .commands.context import run_context
|
|
12
|
+
from .commands.context_slices import run_context_slices
|
|
13
|
+
from .commands.debug import run_debug
|
|
14
|
+
from .commands.index import run_index
|
|
15
|
+
from .commands.query import run_query
|
|
16
|
+
from .commands.setup import run_setup
|
|
17
|
+
|
|
18
|
+
app = typer.Typer(help="GraphCode Intelligence Engine CLI")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _query_tokens(query: str) -> tuple[str, ...]:
|
|
22
|
+
return tuple(re.findall(r"[a-zA-Z_./{}-][a-zA-Z0-9_./{}-]*", query.lower()))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _auto_context_budget(query: str, intent: str | None) -> int | None:
|
|
26
|
+
tokens = _query_tokens(query)
|
|
27
|
+
lowered = query.lower()
|
|
28
|
+
effective_intent = intent or "explore"
|
|
29
|
+
|
|
30
|
+
file_terms = [token for token in tokens if "." in token or "/" in token or "{" in token]
|
|
31
|
+
explicit_files = [token for token in file_terms if token.endswith((".py", ".jsx", ".js", ".tsx", ".ts", ".html"))]
|
|
32
|
+
symbol_terms = [token for token in tokens if any(ch in token for ch in ("_", "/", ".", "{", "}"))]
|
|
33
|
+
|
|
34
|
+
has_frontend = any(token.startswith(("frontend/", "frontend\\")) for token in file_terms)
|
|
35
|
+
has_backend = any(
|
|
36
|
+
token.endswith(".py") or token.startswith(("backend/", "server/", "api/"))
|
|
37
|
+
for token in file_terms
|
|
38
|
+
)
|
|
39
|
+
cross_layer = has_frontend and has_backend
|
|
40
|
+
|
|
41
|
+
stage_pipeline = any(term in lowered for term in ("stage", "pipeline", "planner", "plan", "build", "orchestr"))
|
|
42
|
+
backend_config = any(term in lowered for term in ("backend", "config", "openai", "api_key", "llm", "no_ai", "backend_info"))
|
|
43
|
+
ai_chain = any(term in lowered for term in ("openai", "llm", "model", "agent")) and has_backend
|
|
44
|
+
same_layer_backend_pair = len([token for token in explicit_files if token.endswith(".py")]) >= 2 and not has_frontend
|
|
45
|
+
has_api = "/api/" in lowered or any("/api/" in token for token in file_terms)
|
|
46
|
+
|
|
47
|
+
if effective_intent in {"edit", "debug", "refactor"} and cross_layer and len(symbol_terms) >= 4:
|
|
48
|
+
return 1200 if has_api else 1150
|
|
49
|
+
if stage_pipeline and len(explicit_files) >= 2:
|
|
50
|
+
return 1400
|
|
51
|
+
if same_layer_backend_pair and (backend_config or ai_chain):
|
|
52
|
+
return 1100
|
|
53
|
+
if len(explicit_files) >= 3 and effective_intent in {"edit", "debug", "refactor"}:
|
|
54
|
+
return 1200
|
|
55
|
+
if effective_intent in {"edit", "debug"} and len(explicit_files) >= 2:
|
|
56
|
+
return 1000
|
|
57
|
+
if effective_intent == "refactor" and len(explicit_files) >= 2:
|
|
58
|
+
return 1000
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@app.command("index")
|
|
63
|
+
def index_cmd(path: str = typer.Argument(".")) -> None:
|
|
64
|
+
result = run_index(path)
|
|
65
|
+
typer.echo(json.dumps(result, indent=2))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@app.command("query")
|
|
69
|
+
def query_cmd(path: str, query: str, max_hops: int = 2) -> None:
|
|
70
|
+
result = run_query(path, query, max_hops=max_hops)
|
|
71
|
+
typer.echo(json.dumps(result, indent=2))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@app.command("debug")
|
|
75
|
+
def debug_cmd(path: str, query: str) -> None:
|
|
76
|
+
result = run_debug(path, query)
|
|
77
|
+
typer.echo(json.dumps(result, indent=2))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@app.command("context")
|
|
81
|
+
def context_cmd(
|
|
82
|
+
path: str,
|
|
83
|
+
query: str,
|
|
84
|
+
budget: str = typer.Option("auto", "--budget"),
|
|
85
|
+
intent: str | None = typer.Option(None, "--intent"),
|
|
86
|
+
) -> None:
|
|
87
|
+
if budget == "auto":
|
|
88
|
+
budget_val = _auto_context_budget(query, intent)
|
|
89
|
+
else:
|
|
90
|
+
budget_val = int(budget)
|
|
91
|
+
|
|
92
|
+
result = run_context(path, query, budget=budget_val, intent=intent)
|
|
93
|
+
typer.echo(json.dumps(result, indent=2))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@app.command("context-slices")
|
|
97
|
+
def context_slices_cmd(
|
|
98
|
+
repo: str,
|
|
99
|
+
query: str,
|
|
100
|
+
profile: str | None = typer.Option("recall", "--profile"),
|
|
101
|
+
stage_a_budget: int = typer.Option(400, "--stage-a"),
|
|
102
|
+
stage_b_budget: int = typer.Option(800, "--stage-b"),
|
|
103
|
+
max_total: int = typer.Option(1200, "--max-total"),
|
|
104
|
+
intent: str | None = typer.Option(None, "--intent"),
|
|
105
|
+
pin: str | None = typer.Option(None, "--pin"),
|
|
106
|
+
pin_budget: int = typer.Option(300, "--pin-budget"),
|
|
107
|
+
include_tests: bool = typer.Option(False, "--include-tests"),
|
|
108
|
+
) -> None:
|
|
109
|
+
result = run_context_slices(
|
|
110
|
+
repo,
|
|
111
|
+
query,
|
|
112
|
+
stage_a_budget=stage_a_budget,
|
|
113
|
+
stage_b_budget=stage_b_budget,
|
|
114
|
+
max_total=max_total,
|
|
115
|
+
intent=intent,
|
|
116
|
+
pin=pin,
|
|
117
|
+
pin_budget=pin_budget,
|
|
118
|
+
include_tests=include_tests,
|
|
119
|
+
profile=profile,
|
|
120
|
+
)
|
|
121
|
+
typer.echo(json.dumps(result, indent=2))
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@app.command("setup")
|
|
127
|
+
def setup_cmd(
|
|
128
|
+
path: str = typer.Argument("."),
|
|
129
|
+
force: bool = typer.Option(False, "--force", help="Overwrite existing setup files"),
|
|
130
|
+
no_agent_usage: bool = typer.Option(False, "--no-agent-usage", help="Do not copy AGENT_USAGE.md"),
|
|
131
|
+
no_setup_doc: bool = typer.Option(False, "--no-setup-doc", help="Do not copy SETUP_ANY_REPO.md"),
|
|
132
|
+
no_index: bool = typer.Option(False, "--no-index", help="Skip initial indexing pass"),
|
|
133
|
+
) -> None:
|
|
134
|
+
result = run_setup(
|
|
135
|
+
path,
|
|
136
|
+
force=force,
|
|
137
|
+
include_agent_usage=not no_agent_usage,
|
|
138
|
+
include_setup_doc=not no_setup_doc,
|
|
139
|
+
run_index_pass=not no_index,
|
|
140
|
+
)
|
|
141
|
+
typer.echo(json.dumps(result, indent=2))
|
|
142
|
+
|
|
143
|
+
@app.command("cache-clear")
|
|
144
|
+
def cache_clear_cmd(path: str = typer.Argument(".")) -> None:
|
|
145
|
+
result = clear_cache(path)
|
|
146
|
+
typer.echo(json.dumps(result, indent=2))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@app.command("cache-status")
|
|
150
|
+
def cache_status_cmd(path: str = typer.Argument(".")) -> None:
|
|
151
|
+
result = cache_status(path)
|
|
152
|
+
typer.echo(json.dumps(result, indent=2))
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@app.command("cache-warm")
|
|
156
|
+
def cache_warm_cmd(path: str = typer.Argument(".")) -> None:
|
|
157
|
+
result = warm_cache(path)
|
|
158
|
+
typer.echo(json.dumps(result, indent=2))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
app()
|
|
163
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI commands package."""
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""CLI command: cache."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import shutil
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .context import _collect_repo_modules
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def clear_cache(path: str) -> dict[str, str]:
|
|
13
|
+
repo = Path(path)
|
|
14
|
+
cache_dir = repo / ".gcie" / "cache"
|
|
15
|
+
if cache_dir.exists():
|
|
16
|
+
shutil.rmtree(cache_dir)
|
|
17
|
+
return {"status": "cleared", "path": str(cache_dir)}
|
|
18
|
+
return {"status": "missing", "path": str(cache_dir)}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def cache_status(path: str) -> dict[str, str]:
|
|
22
|
+
repo = Path(path)
|
|
23
|
+
cache_file = repo / ".gcie" / "cache" / "context_cache.json"
|
|
24
|
+
if cache_file.exists():
|
|
25
|
+
return {"status": "ready", "path": str(cache_file)}
|
|
26
|
+
return {"status": "missing", "path": str(cache_file)}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def warm_cache(path: str) -> dict[str, str]:
|
|
30
|
+
repo = Path(path)
|
|
31
|
+
_collect_repo_modules(repo)
|
|
32
|
+
cache_file = repo / ".gcie" / "cache" / "context_cache.json"
|
|
33
|
+
if cache_file.exists():
|
|
34
|
+
return {"status": "warmed", "path": str(cache_file)}
|
|
35
|
+
return {"status": "missing", "path": str(cache_file)}
|