aidex-graphra 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +463 -0
  3. package/dist/chunker.d.ts +3 -0
  4. package/dist/chunker.d.ts.map +1 -0
  5. package/dist/chunker.js +116 -0
  6. package/dist/chunker.js.map +1 -0
  7. package/dist/cli.d.ts +3 -0
  8. package/dist/cli.d.ts.map +1 -0
  9. package/dist/cli.js +821 -0
  10. package/dist/cli.js.map +1 -0
  11. package/dist/graph.d.ts +9 -0
  12. package/dist/graph.d.ts.map +1 -0
  13. package/dist/graph.js +97 -0
  14. package/dist/graph.js.map +1 -0
  15. package/dist/init.d.ts +27 -0
  16. package/dist/init.d.ts.map +1 -0
  17. package/dist/init.js +306 -0
  18. package/dist/init.js.map +1 -0
  19. package/dist/mcp.d.ts +13 -0
  20. package/dist/mcp.d.ts.map +1 -0
  21. package/dist/mcp.js +19 -0
  22. package/dist/mcp.js.map +1 -0
  23. package/dist/mcpServer.d.ts +14 -0
  24. package/dist/mcpServer.d.ts.map +1 -0
  25. package/dist/mcpServer.js +373 -0
  26. package/dist/mcpServer.js.map +1 -0
  27. package/dist/neuralEmbedder.d.ts +21 -0
  28. package/dist/neuralEmbedder.d.ts.map +1 -0
  29. package/dist/neuralEmbedder.js +98 -0
  30. package/dist/neuralEmbedder.js.map +1 -0
  31. package/dist/scanner.d.ts +3 -0
  32. package/dist/scanner.d.ts.map +1 -0
  33. package/dist/scanner.js +43 -0
  34. package/dist/scanner.js.map +1 -0
  35. package/dist/search.d.ts +37 -0
  36. package/dist/search.d.ts.map +1 -0
  37. package/dist/search.js +252 -0
  38. package/dist/search.js.map +1 -0
  39. package/dist/signatureExtractor.d.ts +25 -0
  40. package/dist/signatureExtractor.d.ts.map +1 -0
  41. package/dist/signatureExtractor.js +173 -0
  42. package/dist/signatureExtractor.js.map +1 -0
  43. package/dist/storage.d.ts +59 -0
  44. package/dist/storage.d.ts.map +1 -0
  45. package/dist/storage.js +322 -0
  46. package/dist/storage.js.map +1 -0
  47. package/dist/tokenBudget.d.ts +52 -0
  48. package/dist/tokenBudget.d.ts.map +1 -0
  49. package/dist/tokenBudget.js +175 -0
  50. package/dist/tokenBudget.js.map +1 -0
  51. package/dist/types.d.ts +62 -0
  52. package/dist/types.d.ts.map +1 -0
  53. package/dist/types.js +6 -0
  54. package/dist/types.js.map +1 -0
  55. package/dist/utils/hash.d.ts +6 -0
  56. package/dist/utils/hash.d.ts.map +1 -0
  57. package/dist/utils/hash.js +45 -0
  58. package/dist/utils/hash.js.map +1 -0
  59. package/package.json +69 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Graphra
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,463 @@
1
+ <p align="center">
2
+ <h1 align="center">⚡ Graphra</h1>
3
+ <p align="center"><strong>The universal code context engine for AI tools.</strong></p>
4
+ <p align="center">Zero-config · Local-first · Works with every AI tool</p>
5
+ </p>
6
+
7
+ <p align="center">
8
+ <img src="https://img.shields.io/badge/version-1.0.0-blue" alt="Version" />
9
+ <img src="https://img.shields.io/badge/languages-JS%2FTS-yellow" alt="Languages" />
10
+ <img src="https://img.shields.io/badge/license-MIT-green" alt="License" />
11
+ <img src="https://img.shields.io/badge/API%20keys-none%20required-brightgreen" alt="No API Keys" />
12
+ </p>
13
+
14
+ ---
15
+
16
+ AI coding tools re-read your entire codebase on every prompt. **graphra fixes that.** It builds a structural index of your code, then gives any AI tool — Copilot, Cursor, Claude, ChatGPT, or local LLMs — precisely the context it needs.
17
+
18
+ ```bash
19
+ npm install -g graphra # Install once
20
+ cd your-project
21
+ graphra init # Auto-detect language, framework, structure
22
+ graphra generate # Index codebase (incremental, <1s on re-runs)
23
+ graphra setup # Generate configs for Claude/Cursor/VS Code
24
+ # Done. Restart your AI tool — context flows automatically.
25
+ ```
26
+
27
+ `graphra setup` generates:
28
+ - `.vscode/mcp.json` — VS Code discovers Graphra's MCP tools
29
+ - `.cursor/mcp.json` — Cursor discovers Graphra's MCP tools
30
+ - `.github/copilot-instructions.md` — Copilot reads codebase overview on **every** message
31
+ - Claude Desktop config (printed to console)
32
+
33
+ ## Why Graphra?
34
+
35
+ | Problem | How graphra solves it |
36
+ |---------|----------------------|
37
+ | AI tools don't understand your codebase | Builds a structural index with dependency graph |
38
+ | Sending full files wastes tokens | Sends only relevant **code signatures**, not full files |
39
+ | Keyword search misses semantic matches | **Hybrid search**: BM25 + neural embeddings + PageRank + git recency |
40
+ | Every tool needs different setup | **One index, every tool**: Copilot, Cursor, Claude, ChatGPT, local LLMs |
41
+ | Cloud-based tools send your code to servers | **100% local** — no API keys, no cloud, no data leaves your machine |
42
+ | Re-indexing is slow | **Incremental** — only re-processes changed files (0.4s when nothing changed) |
43
+
44
+ ## Features
45
+
46
+ ### 🔍 Hybrid Search Engine
47
+ Combines 4 ranking signals for the most relevant results:
48
+
49
+ | Signal | What it does |
50
+ |--------|-------------|
51
+ | **BM25 full-text** | Exact keyword matching with term frequency weighting |
52
+ | **Neural embeddings** | Semantic understanding — "auth" matches "login" matches "sign in" |
53
+ | **PageRank** | Files imported by many others rank higher (structural importance) |
54
+ | **Git recency** | Recently modified code ranks higher (temporal relevance) |
55
+
56
+ ### 📝 Aider-Style Code Signatures
57
+ Instead of generating lossy text summaries, graphra extracts **actual code signatures**:
58
+
59
+ ```
60
+ # What other tools show:
61
+ "This function handles user authentication with session management"
62
+
63
+ # What graphra shows:
64
+ user/user.service.js: async login(email, inputPassword, session, sessionDetails, keepSessionActive = false)
65
+ ```
66
+
67
+ The AI sees real code — parameter names, types, return types — not a summary's interpretation.
68
+
69
+ ### 🧠 Local Neural Embeddings
70
+ Uses **all-MiniLM-L6-v2** (same model as Continue.dev) running 100% locally via TransformersJS:
71
+ - 384-dimensional vectors
72
+ - Understands semantic meaning ("authentication" ≈ "login" ≈ "sign in")
73
+ - First run downloads ~23MB model, then cached forever
74
+ - No API keys, no cloud, no cost
75
+
76
+ ### 💾 SQLite Storage
77
+ Compact, fast, single-file database:
78
+ - **13MB** for a 206-file repo (vs 39MB JSON in v1)
79
+ - Binary float32 embedding blobs
80
+ - Incremental updates — only re-embeds changed files
81
+ - WAL mode for concurrent reads
82
+
83
+ ### 🔌 Universal AI Tool Support
84
+ One index works with every AI tool:
85
+
86
+ | Tool | Integration | How |
87
+ |------|------------|-----|
88
+ | **Claude Desktop** | MCP (stdio) | `graphra setup --claude` |
89
+ | **Cursor** | MCP (stdio) | `graphra setup --cursor` |
90
+ | **VS Code Copilot** | MCP (stdio) | `graphra setup --vscode` |
91
+ | **ChatGPT** | Clipboard | `graphra context file -t "task" -f clipboard` |
92
+ | **Any AI** | REST API | `graphra serve` → `POST /context` |
93
+ | **Custom tools** | JSON export | `graphra context file -t "task" -f json` |
94
+
95
+ ### 📊 Context Confidence Scoring
96
+ Every response includes a confidence score:
97
+ ```json
98
+ {
99
+ "confidence": 87,
100
+ "confidenceLabel": "high",
101
+ "tokenEstimate": 1240
102
+ }
103
+ ```
104
+
105
+ ### 🎯 Model-Aware Token Budgets
106
+ Automatically optimizes context size for your model. graphra uses a smart token estimation heuristic (word-level analysis, ~15% accuracy vs tiktoken) and priority-based packing (graph entries get 1.5x boost, sorted by relevance score).
107
+
108
+ | Model | Token Budget | | Model | Token Budget |
109
+ |-------|-------------|---|-------|-------------|
110
+ | GPT-4o | 8,000 | | Claude 4 Opus | 12,000 |
111
+ | GPT-4o-mini | 4,000 | | Claude 3.5 Sonnet | 10,000 |
112
+ | o3 | 10,000 | | Claude 3 Haiku | 4,000 |
113
+ | o3-mini | 6,000 | | Gemini 2 | 10,000 |
114
+ | Llama 3.1 405B | 6,000 | | DeepSeek v2 | 6,000 |
115
+ | Llama 3 70B | 4,000 | | Qwen 2.5 Coder | 4,000 |
116
+ | Llama 3 8B | 2,000 | | Mistral 7B | 2,000 |
117
+ | Mixtral | 4,000 | | StarCoder 2 | 3,000 |
118
+
119
+ **30+ models** pre-configured. Pass `model: "gpt-4o"` and graphra auto-packs the best context within that budget. Works in CLI (`--tokens`), REST API (`model` field), and MCP tools (`maxTokens` or `model` param).
120
+
121
+ **Token packing is applied everywhere:**
122
+ - ✅ CLI: `graphra context file -t "task" --tokens 2000`
123
+ - ✅ REST API: `POST /context { model: "gpt-4o" }` → auto 8K budget
124
+ - ✅ MCP Server: All 4 tools respect token budgets (default 4K)
125
+ - Architecture entries capped at 40% of budget, remaining 60% for search results
126
+
127
+ ### ⚡ Incremental Re-indexing
128
+ | Scenario | Time |
129
+ |----------|------|
130
+ | Full build (206 files, 2923 chunks) | 23s |
131
+ | No changes | **0.4s** |
132
+ | 1 file changed | **1.1s** |
133
+
134
+ ## Quick Start
135
+
136
+ ### 1. Install
137
+
138
+ ```bash
139
+ npm install -g graphra
140
+ ```
141
+
142
+ ### 2. Initialize & Index
143
+
144
+ ```bash
145
+ cd your-project
146
+ graphra init # Auto-detects: language, framework, structure, entry points
147
+ graphra generate # Indexes codebase with neural embeddings
148
+ ```
149
+
150
+ ### 3. Use
151
+
152
+ ```bash
153
+ # Search your codebase
154
+ graphra search "authentication login"
155
+ graphra search "database query postgres"
156
+ graphra search "send email notification"
157
+
158
+ # Get context for a task
159
+ graphra context src/auth.js -t "Add rate limiting to login" -f clipboard
160
+ # → Copied to clipboard! Paste into ChatGPT/Claude/any AI.
161
+
162
+ # Architecture overview
163
+ graphra explain
164
+
165
+ # PR/diff context
166
+ graphra diff -b main
167
+
168
+ # Database stats
169
+ graphra stats
170
+ ```
171
+
172
+ ### 4. Connect to AI Tools
173
+
174
+ ```bash
175
+ graphra setup # Generates configs for Claude, Cursor, VS Code
176
+ ```
177
+
178
+ Then restart your AI tool. Graphra's tools appear automatically:
179
+ - `Graphra_search` — Hybrid codebase search
180
+ - `Graphra_context` — Task-specific context with architecture
181
+ - `Graphra_explain` — Architecture overview
182
+ - `Graphra_stats` — Index statistics
183
+
184
+ ## CLI Reference
185
+
186
+ | Command | Description |
187
+ |---------|-------------|
188
+ | `graphra init` | Auto-detect project language, framework, and structure |
189
+ | `graphra generate` | Index codebase (incremental — only re-processes changed files) |
190
+ | `graphra generate --force` | Full rebuild (ignores cache) |
191
+ | `graphra search <query>` | Hybrid search: BM25 + neural + PageRank + git recency |
192
+ | `graphra search <query> -k 20` | Return top 20 results |
193
+ | `graphra context <file> -t <task>` | Build context for a file + task |
194
+ | `graphra context <file> -t <task> -f json` | Export as JSON |
195
+ | `graphra context <file> -t <task> -f markdown` | Export as Markdown |
196
+ | `graphra context <file> -t <task> -f clipboard` | Copy to clipboard |
197
+ | `graphra context <file> -t <task> --tokens 4000` | Pack into 4K token budget |
198
+ | `graphra diff` | Context for changed files in current branch |
199
+ | `graphra diff -b develop` | Diff against a specific branch |
200
+ | `graphra explain` | Auto-generated architecture overview |
201
+ | `graphra stats` | Database statistics |
202
+ | `graphra serve` | Start REST API server (port 4567) |
203
+ | `graphra mcp` | Start MCP stdio server (for Claude/Cursor/VS Code) |
204
+ | `graphra setup` | Generate AI tool configs |
205
+
206
+ ## Architecture
207
+
208
+ ```
209
+ ┌─────────────────────────────────────────────────────────────────┐
210
+ │ CLI (cli.ts) │
211
+ │ init · generate · search · context · diff · explain · stats │
212
+ │ serve · mcp · setup │
213
+ └──────────┬──────────────────────────────────────────────────────┘
214
+
215
+
216
+ ┌─────────────────────────────────────────────────────────────────┐
217
+ │ Core Pipeline │
218
+ │ │
219
+ │ ┌──────────┐ ┌──────────┐ ┌────────────────┐ ┌──────────┐ │
220
+ │ │ Scanner │→ │ Chunker │→ │ Signature │→ │ Neural │ │
221
+ │ │ │ │ (ts-morph)│ │ Extractor │ │ Embedder │ │
222
+ │ │ globby + │ │ │ │ (Aider-style) │ │ (MiniLM) │ │
223
+ │ │ gitignore│ │ AST parse│ │ │ │ 384-dim │ │
224
+ │ └──────────┘ └──────────┘ └────────────────┘ └──────────┘ │
225
+ │ │
226
+ │ ┌──────────┐ ┌──────────────────────────────────────────────┐ │
227
+ │ │ Graph │ │ Hybrid Search │ │
228
+ │ │ Builder │ │ BM25 + Neural + PageRank + Git Recency │ │
229
+ │ │ import + │ │ → Confidence scoring │ │
230
+ │ │ require │ │ → Model-aware token packing │ │
231
+ │ └──────────┘ └──────────────────────────────────────────────┘ │
232
+ └──────────┬──────────────────────────────────────────────────────┘
233
+
234
+
235
+ ┌─────────────────────────────────────────────────────────────────┐
236
+ │ SQLite Storage │
237
+ │ │
238
+ │ ┌──────────┐ ┌────────────┐ ┌─────────┐ ┌──────┐ ┌─────────┐│
239
+ │ │ chunks │ │ embeddings │ │ graph │ │ meta │ │ files ││
240
+ │ │ id,file, │ │ chunk_id, │ │ source, │ │ key, │ │ path, ││
241
+ │ │ name,sig,│ │ vector │ │ target │ │ value│ │ mtime ││
242
+ │ │ code,hash│ │ (float32) │ │ │ │ │ │ ││
243
+ │ └──────────┘ └────────────┘ └─────────┘ └──────┘ └─────────┘│
244
+ │ │
245
+ │ .graphra/graphra.db (single file, WAL mode) │
246
+ └─────────────────────────────────────────────────────────────────┘
247
+
248
+
249
+ ┌─────────────────────────────────────────────────────────────────┐
250
+ │ Output Layer │
251
+ │ │
252
+ │ ┌──────────────────────────────────────────────────────────┐ │
253
+ │ │ Token Budget (tokenBudget.ts) │ │
254
+ │ │ estimateTokens() · getTokenBudget() · packEntries() │ │
255
+ │ │ 30+ model presets · priority-based packing │ │
256
+ │ │ Architecture capped at 40% · search results get 60% │ │
257
+ │ └──────────────────────────────────────────────────────────┘ │
258
+ │ │
259
+ │ ┌──────────────┐ ┌──────────────┐ ┌────────────────────────┐│
260
+ │ │ MCP Server │ │ REST API │ │ CLI Output ││
261
+ │ │ (stdio) │ │ (HTTP) │ │ ││
262
+ │ │ token-aware │ │ token-aware │ │ text · json · markdown ││
263
+ │ │ Claude │ │ /context/ │ │ clipboard ││
264
+ │ │ Cursor │ │ copilot │ │ --tokens flag ││
265
+ │ │ VS Code │ │ cursor │ │ ││
266
+ │ │ │ │ chatgpt │ │ ││
267
+ │ │ │ │ claude │ │ ││
268
+ │ └──────────────┘ └──────────────┘ └────────────────────────┘│
269
+ └─────────────────────────────────────────────────────────────────┘
270
+ ```
271
+
272
+ ### Data Flow
273
+
274
+ ```
275
+ 1. SCAN globby scans files, respects .gitignore + always ignores node_modules
276
+
277
+ 2. CHUNK ts-morph parses AST → extracts functions, classes, methods,
278
+ interfaces, types, arrow functions, constants
279
+
280
+ 3. SIGNATURE Extracts actual code declaration lines (Aider-style)
281
+ "async login(email, password, session)" not "handles login"
282
+
283
+ 4. EMBED all-MiniLM-L6-v2 generates 384-dim vectors locally
284
+ Understands semantic meaning: "auth" ≈ "login" ≈ "sign in"
285
+
286
+ 5. GRAPH Parses import/require statements → builds dependency graph
287
+ Computes PageRank for file importance ranking
288
+
289
+ 6. STORE Everything saved to SQLite (.graphra/graphra.db)
290
+ Embeddings as binary float32 blobs (compact)
291
+ File mtimes tracked for incremental updates
292
+
293
+ 7. SEARCH Query → BM25 + Neural + PageRank + Git Recency
294
+ Results ranked by combined score with confidence
295
+
296
+ 8. PACK Token budget applied (model-aware: GPT-4o→8K, Llama→2K)
297
+ Priority packing: graph entries 1.5x boost, sorted by score
298
+ Architecture capped at 40%, search results get 60%
299
+
300
+ 9. SERVE MCP (stdio) for Claude/Cursor/VS Code — token-aware
301
+ REST API for any HTTP client — model-aware budgets
302
+ CLI for terminal usage — --tokens flag
303
+ ```
304
+
305
+ ### Module Map
306
+
307
+ ```
308
+ src/
309
+ ├── cli.ts # Commander CLI — 10 commands
310
+ ├── mcp.ts # MCP entry point (spawned by AI tools)
311
+ ├── mcpServer.ts # MCP protocol (5 tools: auto, search, context, explain, stats)
312
+ ├── tokenBudget.ts # Token estimation, 30+ model budgets, priority packing
313
+ ├── init.ts # Auto-detect language/framework/structure
314
+ ├── scanner.ts # File discovery (globby + gitignore)
315
+ ├── chunker.ts # AST chunking (ts-morph)
316
+ ├── signatureExtractor.ts # Aider-style signature extraction
317
+ ├── neuralEmbedder.ts # TransformersJS (all-MiniLM-L6-v2)
318
+ ├── search.ts # Hybrid search (BM25 + Neural + PageRank + Git)
319
+ ├── graph.ts # Dependency graph builder (import + require)
320
+ ├── storage.ts # SQLite (better-sqlite3)
321
+ ├── types.ts # Core type definitions
322
+ └── utils/
323
+ └── hash.ts # MD5 hash utility
324
+
325
+ 14 files, ~2,400 lines of TypeScript.
326
+ ```
327
+
328
+ ## How It Compares
329
+
330
+ | Feature | graphra | Aider | Continue.dev | code-review-graph |
331
+ |---------|----------|-------|-------------|-------------------|
332
+ | **Zero config** | ✅ `npm install` + `init` | ❌ Needs LLM | ❌ Needs config | ❌ `pip install` + `build` |
333
+ | **Languages** | JS/TS | Multi (tree-sitter) | Multi | Multi (tree-sitter) |
334
+ | **Code signatures** | ✅ Aider-style | ✅ | ❌ | ✅ |
335
+ | **Neural embeddings** | ✅ Local (MiniLM) | ❌ | ✅ (MiniLM) | ✅ (optional) |
336
+ | **BM25 full-text** | ✅ | ❌ | ✅ | ✅ (FTS5) |
337
+ | **PageRank** | ✅ | ✅ (graph) | ❌ | ❌ |
338
+ | **Git recency** | ✅ | ❌ | ❌ | ❌ |
339
+ | **Incremental** | ✅ mtime-based | ✅ | ✅ | ✅ git-based |
340
+ | **MCP server** | ✅ | ❌ | ❌ | ✅ (22 tools) |
341
+ | **Universal AI export** | ✅ All tools | ❌ Chat only | ❌ IDE only | ❌ Claude only |
342
+ | **Confidence scoring** | ✅ | ❌ | ❌ | ❌ |
343
+ | **Model-aware tokens** | ✅ 30+ models, priority packing | ❌ | ❌ | ✅ (detail levels) |
344
+ | **Clipboard export** | ✅ | ❌ | ❌ | ❌ |
345
+ | **PR/diff context** | ✅ | ❌ | ❌ | ✅ |
346
+ | **Architecture explain** | ✅ | ❌ | ❌ | ✅ |
347
+ | **Storage** | SQLite (13MB) | SQLite | LanceDB | SQLite |
348
+ | **API keys required** | ❌ None | ✅ LLM key | ✅ Embed key | ❌ None |
349
+
350
+ ### Graphra's unique position:
351
+
352
+ > **The only zero-config, local-first codebase context engine that works with every AI tool.**
353
+
354
+ ## REST API
355
+
356
+ Start with `graphra serve` (default port 4567):
357
+
358
+ | Endpoint | Method | Description |
359
+ |----------|--------|-------------|
360
+ | `/health` | GET | Health check + chunk count |
361
+ | `/stats` | GET | Database statistics |
362
+ | `/search` | POST | Hybrid search `{ query, topK? }` |
363
+ | `/context` | POST | Raw context `{ file?, task, topK?, maxTokens?, model? }` |
364
+ | `/context/copilot` | POST | GitHub Copilot optimized format |
365
+ | `/context/cursor` | POST | Cursor optimized format |
366
+ | `/context/chatgpt` | POST | ChatGPT paste-ready format |
367
+ | `/context/claude` | POST | Claude MCP format |
368
+
369
+ ## MCP Tools
370
+
371
+ When connected via `graphra mcp` or `graphra setup`:
372
+
373
+ | Tool | Description |
374
+ |------|-------------|
375
+ | `Graphra_auto` | **Primary tool — called on every coding question.** Takes the user's message + active file, returns ~2K tokens of compact context: current file signatures, dependencies, and related code. |
376
+ | `Graphra_search` | Hybrid codebase search — returns code signatures. Params: `query`, `topK?`, `maxTokens?`, `model?` |
377
+ | `Graphra_context` | Full context with architecture + related code. Token-budget aware. Params: `task`, `file?`, `topK?`, `maxTokens?`, `model?` |
378
+ | `Graphra_explain` | Codebase architecture overview (auto-truncated to budget) |
379
+ | `Graphra_stats` | Index statistics |
380
+
381
+ All MCP tools respect token budgets. Default: 4,000 tokens. `Graphra_auto` uses a compact 2K budget.
382
+
383
+ ## How Context Reaches the AI — 3 Layers
384
+
385
+ graphra delivers context through three complementary layers:
386
+
387
+ ```
388
+ Layer 1: STATIC (every message, zero latency)
389
+ │ .github/copilot-instructions.md
390
+ │ → Auto-generated file listing top 30 files + exports
391
+ │ → Copilot reads this on EVERY chat message automatically
392
+ │ → AI already knows what exists before you even ask
393
+
394
+ Layer 2: AUTO TOOL (called on coding questions)
395
+ │ Graphra_auto(message, activeFile)
396
+ │ → Tool description tells AI: "IMPORTANT: Call BEFORE answering
397
+ │ ANY coding question"
398
+ │ → Returns ~2K tokens: current file, dependencies, related code
399
+ │ → AI sees real signatures, not summaries
400
+
401
+ Layer 3: DEEP TOOLS (called when AI needs more)
402
+ Graphra_search(query) → find specific code
403
+ Graphra_context(task) → full context with architecture
404
+ Graphra_explain() → architecture overview
405
+ ```
406
+
407
+ **Layer 1** works on every single message with zero tool calls. **Layer 2** the AI calls on most coding questions. **Layer 3** is for deep dives.
408
+
409
+ ## Benchmarks
410
+
411
+ Tested on a real-world 206-file Node.js/Express codebase:
412
+
413
+ | Metric | Value |
414
+ |--------|-------|
415
+ | Files indexed | 206 |
416
+ | Chunks extracted | 2,923 |
417
+ | Embeddings | 2,923 (384-dim) |
418
+ | Graph edges | 585 |
419
+ | Full build time | **23.6s** |
420
+ | Incremental (no changes) | **0.4s** |
421
+ | Incremental (1 file) | **1.1s** |
422
+ | Database size | **13MB** |
423
+ | Search accuracy (top-5) | **92.5%** |
424
+
425
+ ### Search Accuracy (8 queries, top-5 precision)
426
+
427
+ | Query | Precision | #1 Result |
428
+ |-------|-----------|-----------|
429
+ | "authentication login" | 5/5 | `UserController.login` ✅ |
430
+ | "send email notification" | 5/5 | `sendNotificationEmailToInfluencer` ✅ |
431
+ | "database query postgres" | 4/5 | `readAndWriteDataFromSQLiteToPostgres` ✅ |
432
+ | "cron job scheduler" | 5/5 | `CronJobScheduler.init` ✅ |
433
+ | "file upload S3 storage" | 5/5 | `directUploadToS3Bucket` ✅ |
434
+ | "validate user input" | 4/5 | `validatePassword` ✅ |
435
+ | "redis cache session" | 4/5 | `IORedisClient` ✅ |
436
+ | "password hash bcrypt" | 5/5 | `generatePasswordHash` ✅ |
437
+
438
+ **Overall: 37/40 = 92.5% top-5 precision**
439
+
440
+ ## Tech Stack
441
+
442
+ | Component | Technology |
443
+ |-----------|-----------|
444
+ | Language | TypeScript |
445
+ | AST Parser | ts-morph |
446
+ | Embeddings | TransformersJS (all-MiniLM-L6-v2) |
447
+ | Storage | better-sqlite3 |
448
+ | Search | Custom BM25 + cosine similarity + PageRank + git recency |
449
+ | Token optimization | Custom word-level estimator, 30+ model presets, priority packing |
450
+ | CLI | Commander.js |
451
+ | File scanning | globby |
452
+ | MCP Protocol | @modelcontextprotocol/sdk (stdio transport) |
453
+ | Schema validation | Zod |
454
+
455
+ ## License
456
+
457
+ MIT
458
+
459
+ ---
460
+
461
+ <p align="center">
462
+ Built with ❤️ for developers who want AI tools that actually understand their codebase.
463
+ </p>
@@ -0,0 +1,3 @@
1
+ import { Chunk } from "./types";
2
+ export declare function chunkFile(filePath: string): Chunk[];
3
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAE,MAAM,SAAS,CAAC;AAahC,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,KAAK,EAAE,CA4GnD"}
@@ -0,0 +1,116 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.chunkFile = chunkFile;
4
+ const ts_morph_1 = require("ts-morph");
5
+ const hash_1 = require("./utils/hash");
6
+ /**
7
+ * ES-2.1 — AST Chunk Extraction
8
+ * Parses a file using ts-morph and extracts functions/classes as individual chunks.
9
+ *
10
+ * - Each function/class/method is a separate chunk
11
+ * - No full-file chunks
12
+ */
13
+ const project = new ts_morph_1.Project({ skipAddingFilesFromTsConfig: true });
14
+ function chunkFile(filePath) {
15
+ const sourceFile = project.addSourceFileAtPath(filePath);
16
+ const chunks = [];
17
+ // Extract top-level functions
18
+ for (const fn of sourceFile.getFunctions()) {
19
+ const name = fn.getName() ?? "anonymous";
20
+ const code = fn.getFullText().trim();
21
+ chunks.push({
22
+ id: `${filePath}#${name}`,
23
+ file: filePath,
24
+ type: "function",
25
+ name,
26
+ code,
27
+ hash: (0, hash_1.md5)(code),
28
+ });
29
+ }
30
+ // Extract classes and their methods
31
+ for (const cls of sourceFile.getClasses()) {
32
+ const className = cls.getName() ?? "anonymous";
33
+ const classCode = cls.getFullText().trim();
34
+ chunks.push({
35
+ id: `${filePath}#${className}`,
36
+ file: filePath,
37
+ type: "class",
38
+ name: className,
39
+ code: classCode,
40
+ hash: (0, hash_1.md5)(classCode),
41
+ });
42
+ for (const method of cls.getMethods()) {
43
+ const methodName = `${className}.${method.getName()}`;
44
+ const methodCode = method.getFullText().trim();
45
+ chunks.push({
46
+ id: `${filePath}#${methodName}`,
47
+ file: filePath,
48
+ type: "method",
49
+ name: methodName,
50
+ code: methodCode,
51
+ hash: (0, hash_1.md5)(methodCode),
52
+ });
53
+ }
54
+ }
55
+ // Extract arrow functions (exported and non-exported top-level)
56
+ for (const varStmt of sourceFile.getVariableStatements()) {
57
+ for (const decl of varStmt.getDeclarations()) {
58
+ const init = decl.getInitializer();
59
+ if (init && init.getKind() === ts_morph_1.SyntaxKind.ArrowFunction) {
60
+ const name = decl.getName();
61
+ const code = varStmt.getFullText().trim();
62
+ chunks.push({
63
+ id: `${filePath}#${name}`,
64
+ file: filePath,
65
+ type: "arrow-function",
66
+ name,
67
+ code,
68
+ hash: (0, hash_1.md5)(code),
69
+ });
70
+ }
71
+ else if (varStmt.isExported() && init) {
72
+ // Exported constants (objects, arrays, primitives — not arrow functions)
73
+ const name = decl.getName();
74
+ const code = varStmt.getFullText().trim();
75
+ chunks.push({
76
+ id: `${filePath}#${name}`,
77
+ file: filePath,
78
+ type: "constant",
79
+ name,
80
+ code,
81
+ hash: (0, hash_1.md5)(code),
82
+ });
83
+ }
84
+ }
85
+ }
86
+ // Extract interfaces
87
+ for (const iface of sourceFile.getInterfaces()) {
88
+ const name = iface.getName();
89
+ const code = iface.getFullText().trim();
90
+ chunks.push({
91
+ id: `${filePath}#${name}`,
92
+ file: filePath,
93
+ type: "interface",
94
+ name,
95
+ code,
96
+ hash: (0, hash_1.md5)(code),
97
+ });
98
+ }
99
+ // Extract type aliases
100
+ for (const typeAlias of sourceFile.getTypeAliases()) {
101
+ const name = typeAlias.getName();
102
+ const code = typeAlias.getFullText().trim();
103
+ chunks.push({
104
+ id: `${filePath}#${name}`,
105
+ file: filePath,
106
+ type: "type-alias",
107
+ name,
108
+ code,
109
+ hash: (0, hash_1.md5)(code),
110
+ });
111
+ }
112
+ // Clean up to avoid memory leaks on repeated calls
113
+ project.removeSourceFile(sourceFile);
114
+ return chunks;
115
+ }
116
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":";;AAcA,8BA4GC;AA1HD,uCAAiE;AAEjE,uCAAmC;AAEnC;;;;;;GAMG;AAEH,MAAM,OAAO,GAAG,IAAI,kBAAO,CAAC,EAAE,2BAA2B,EAAE,IAAI,EAAE,CAAC,CAAC;AAEnE,SAAgB,SAAS,CAAC,QAAgB;IACxC,MAAM,UAAU,GAAG,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC;IACzD,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,8BAA8B;IAC9B,KAAK,MAAM,EAAE,IAAI,UAAU,CAAC,YAAY,EAAE,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAG,EAAE,CAAC,OAAO,EAAE,IAAI,WAAW,CAAC;QACzC,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,GAAG,QAAQ,IAAI,IAAI,EAAE;YACzB,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,UAAU;YAChB,IAAI;YACJ,IAAI;YACJ,IAAI,EAAE,IAAA,UAAG,EAAC,IAAI,CAAC;SAChB,CAAC,CAAC;IACL,CAAC;IAED,oCAAoC;IACpC,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,UAAU,EAAE,EAAE,CAAC;QAC1C,MAAM,SAAS,GAAG,GAAG,CAAC,OAAO,EAAE,IAAI,WAAW,CAAC;QAC/C,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAC3C,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,GAAG,QAAQ,IAAI,SAAS,EAAE;YAC9B,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,OAAO;YACb,IAAI,EAAE,SAAS;YACf,IAAI,EAAE,SAAS;YACf,IAAI,EAAE,IAAA,UAAG,EAAC,SAAS,CAAC;SACrB,CAAC,CAAC;QAEH,KAAK,MAAM,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,EAAE,CAAC;YACtC,MAAM,UAAU,GAAG,GAAG,SAAS,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;YACtD,MAAM,UAAU,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;YAC/C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,GAAG,QAAQ,IAAI,UAAU,EAAE;gBAC/B,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,UAAU;gBAChB,IAAI,EAAE,UAAU;gBAChB,IAAI,EAAE,IAAA,UAAG,EAAC,UAAU,CAAC;aACtB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,gEAAgE;IAChE,KAAK,MAAM,OAAO,IAAI,UAAU,CAAC,qBAAqB,EAAE,EAAE,CAAC;QACzD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;YAC7C,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;YACnC,IAAI,IAAI,IAAI,IAAI,CAAC,OAAO,EAAE,KAAK,qBAAU,CAAC,aAAa,EAAE,CAAC;gBACxD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;gBAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC1C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,GAAG,QAAQ,IAAI,IAAI,EAAE;oBACzB,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,gBAAgB;oBACtB,IAAI;oBACJ,IAAI;oBACJ,IAAI,EAAE,IAAA,UAAG,EAAC,IAAI,CAAC;iBAChB,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,OAAO,CAAC,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC;gBACxC,yEAAyE;gBACzE,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;gBAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;gBAC1C,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,GAAG,QAAQ,IAAI,IAAI,EAAE;oBACzB,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,UAAU;oBAChB,IAAI;oBACJ,IAAI;oBACJ,IAAI,EAAE,IAAA,UAAG,EAAC,IAAI,CAAC;iBAChB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,qBAAqB;IACrB,KAAK,MAAM,KAAK,IAAI,UAAU,CAAC,aAAa,EAAE,EAAE,CAAC;QAC/C,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,GAAG,QAAQ,IAAI,IAAI,EAAE;YACzB,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,WAAW;YACjB,IAAI;YACJ,IAAI;YACJ,IAAI,EAAE,IAAA,UAAG,EAAC,IAAI,CAAC;SAChB,CAAC,CAAC;IACL,CAAC;IAED,uBAAuB;IACvB,KAAK,MAAM,SAAS,IAAI,UAAU,CAAC,cAAc,EAAE,EAAE,CAAC;QACpD,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,SAAS,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,GAAG,QAAQ,IAAI,IAAI,EAAE;YACzB,IAAI,EAAE,QAAQ;YACd,IAAI,EAAE,YAAY;YAClB,IAAI;YACJ,IAAI;YACJ,IAAI,EAAE,IAAA,UAAG,EAAC,IAAI,CAAC;SAChB,CAAC,CAAC;IACL,CAAC;IAED,mDAAmD;IACnD,OAAO,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;IAErC,OAAO,MAAM,CAAC;AAChB,CAAC"}
package/dist/cli.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}