seu-claude 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/CHANGELOG.md +37 -0
  2. package/LICENSE +21 -0
  3. package/README.md +296 -0
  4. package/dist/index.d.ts +3 -0
  5. package/dist/index.d.ts.map +1 -0
  6. package/dist/index.js +41 -0
  7. package/dist/index.js.map +1 -0
  8. package/dist/indexer/chunker.d.ts +31 -0
  9. package/dist/indexer/chunker.d.ts.map +1 -0
  10. package/dist/indexer/chunker.js +184 -0
  11. package/dist/indexer/chunker.js.map +1 -0
  12. package/dist/indexer/crawler.d.ts +27 -0
  13. package/dist/indexer/crawler.d.ts.map +1 -0
  14. package/dist/indexer/crawler.js +105 -0
  15. package/dist/indexer/crawler.js.map +1 -0
  16. package/dist/indexer/parser.d.ts +28 -0
  17. package/dist/indexer/parser.d.ts.map +1 -0
  18. package/dist/indexer/parser.js +232 -0
  19. package/dist/indexer/parser.js.map +1 -0
  20. package/dist/server.d.ts +22 -0
  21. package/dist/server.d.ts.map +1 -0
  22. package/dist/server.js +220 -0
  23. package/dist/server.js.map +1 -0
  24. package/dist/tools/index-codebase.d.ts +25 -0
  25. package/dist/tools/index-codebase.d.ts.map +1 -0
  26. package/dist/tools/index-codebase.js +99 -0
  27. package/dist/tools/index-codebase.js.map +1 -0
  28. package/dist/tools/read-context.d.ts +30 -0
  29. package/dist/tools/read-context.d.ts.map +1 -0
  30. package/dist/tools/read-context.js +81 -0
  31. package/dist/tools/read-context.js.map +1 -0
  32. package/dist/tools/search-codebase.d.ts +30 -0
  33. package/dist/tools/search-codebase.d.ts.map +1 -0
  34. package/dist/tools/search-codebase.js +68 -0
  35. package/dist/tools/search-codebase.js.map +1 -0
  36. package/dist/utils/config.d.ts +14 -0
  37. package/dist/utils/config.d.ts.map +1 -0
  38. package/dist/utils/config.js +84 -0
  39. package/dist/utils/config.js.map +1 -0
  40. package/dist/utils/logger.d.ts +16 -0
  41. package/dist/utils/logger.d.ts.map +1 -0
  42. package/dist/utils/logger.js +47 -0
  43. package/dist/utils/logger.js.map +1 -0
  44. package/dist/utils/validation.d.ts +39 -0
  45. package/dist/utils/validation.d.ts.map +1 -0
  46. package/dist/utils/validation.js +114 -0
  47. package/dist/utils/validation.js.map +1 -0
  48. package/dist/vector/embed.d.ts +18 -0
  49. package/dist/vector/embed.d.ts.map +1 -0
  50. package/dist/vector/embed.js +196 -0
  51. package/dist/vector/embed.js.map +1 -0
  52. package/dist/vector/store.d.ts +33 -0
  53. package/dist/vector/store.d.ts.map +1 -0
  54. package/dist/vector/store.js +189 -0
  55. package/dist/vector/store.js.map +1 -0
  56. package/languages/tree-sitter-cpp.wasm +0 -0
  57. package/languages/tree-sitter-go.wasm +0 -0
  58. package/languages/tree-sitter-java.wasm +0 -0
  59. package/languages/tree-sitter-javascript.wasm +0 -0
  60. package/languages/tree-sitter-python.wasm +0 -0
  61. package/languages/tree-sitter-rust.wasm +0 -0
  62. package/languages/tree-sitter-typescript.wasm +0 -0
  63. package/package.json +84 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,37 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [1.0.0] - 2026-01-15
9
+
10
+ ### Added
11
+ - 🎉 Initial stable release
12
+ - **MCP Server** - Full Model Context Protocol implementation for Claude Code/Desktop
13
+ - **Semantic Indexing** - AST-based code chunking using Tree-sitter
14
+ - **Vector Search** - LanceDB-powered similarity search with 384-dimension embeddings
15
+ - **Multi-language Support** - TypeScript, JavaScript, Python, Java, C/C++, Go, Rust
16
+ - **Three MCP Tools**:
17
+ - `index_codebase` - Index entire codebase with incremental updates
18
+ - `search_codebase` - Semantic search across indexed code
19
+ - `read_semantic_context` - Read code with AST-aware context
20
+
21
+ ### Technical Details
22
+ - **Embedding Model**: `Xenova/all-MiniLM-L6-v2` (no authentication required)
23
+ - **Vector Dimensions**: 384
24
+ - **Test Coverage**: 72.72% (214 tests passing)
25
+ - **Memory Usage**: ~100MB idle, ~500MB during indexing
26
+
27
+ ### Performance
28
+ - Indexed 26-file TypeScript project in 5.39 seconds
29
+ - Created 359 semantic chunks
30
+ - Query latency ~50ms
31
+
32
+ ## [0.1.0] - 2026-01-14
33
+
34
+ ### Added
35
+ - Initial development release
36
+ - Core indexing and search functionality
37
+ - Basic MCP integration
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 jardhel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,296 @@
1
+ # seu-claude
2
+
3
+ **S**elf-**E**volving **U**nderstanding for Claude - A Local Codebase RAG MCP Server
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![Node.js Version](https://img.shields.io/badge/node-%3E%3D20.0.0-brightgreen)](https://nodejs.org)
7
+
8
+ > Give Claude Code deep, proactive understanding of your entire codebase - not just the files it has touched.
9
+
10
+ ## The Problem
11
+
12
+ Current memory plugins for Claude Code suffer from "goldfish memory":
13
+ - They only remember files the AI has explicitly accessed
14
+ - Heavy resource usage (35GB+ RAM with in-memory vector databases)
15
+ - Complex Python dependencies that conflict with your environment
16
+ - Zombie processes that accumulate over time
17
+
18
+ **seu-claude** solves this by implementing **proactive semantic indexing** - your entire codebase is parsed, understood, and made searchable before Claude even asks.
19
+
20
+ ## Features
21
+
22
+ - 🧠 **AST-Based Semantic Chunking** - Uses Tree-sitter to understand code structure, not just text
23
+ - 💾 **Minimal Resource Usage** - LanceDB's zero-copy architecture keeps RAM under 200MB
24
+ - 🔒 **100% Local** - All processing happens on your machine, no data leaves
25
+ - ⚡ **Incremental Indexing** - Only re-processes changed files
26
+ - 🌐 **Multi-Language Support** - TypeScript, JavaScript, Python, Rust, Go, Java, C/C++, and more
27
+ - 🔌 **Native MCP Integration** - Works with Claude Code and Claude Desktop
28
+
29
+ ## Quick Start
30
+
31
+ ### Installation
32
+
33
+ ```bash
34
+ # Install globally
35
+ npm install -g seu-claude
36
+
37
+ # Or use npx
38
+ npx seu-claude
39
+ ```
40
+
41
+ ### Configuration
42
+
43
+ #### Claude Code
44
+
45
+ Add to your project's `.claude/settings.json`:
46
+
47
+ ```json
48
+ {
49
+ "mcpServers": {
50
+ "seu-claude": {
51
+ "command": "npx",
52
+ "args": ["seu-claude"],
53
+ "env": {
54
+ "PROJECT_ROOT": "."
55
+ }
56
+ }
57
+ }
58
+ }
59
+ ```
60
+
61
+ #### Claude Desktop
62
+
63
+ Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows):
64
+
65
+ ```json
66
+ {
67
+ "mcpServers": {
68
+ "seu-claude": {
69
+ "command": "npx",
70
+ "args": ["seu-claude"],
71
+ "env": {
72
+ "PROJECT_ROOT": "/path/to/your/project"
73
+ }
74
+ }
75
+ }
76
+ }
77
+ ```
78
+
79
+ ### First Run
80
+
81
+ Once configured, Claude will have access to three new tools:
82
+
83
+ 1. **Index your codebase** (run once, then incremental):
84
+ > "Index this codebase for semantic search"
85
+
86
+ 2. **Search semantically**:
87
+ > "Where is the user authentication logic?"
88
+ > "Find all database connection handling code"
89
+ > "Show me how API rate limiting is implemented"
90
+
91
+ 3. **Read with context**:
92
+ > "Read the AuthService.login method with its surrounding context"
93
+
94
+ ## How It Works
95
+
96
+ ### Architecture
97
+
98
+ ```
99
+ ┌─────────────────────────────────────────────────────────────┐
100
+ │ Claude Code / Desktop │
101
+ └─────────────────────────────────────────────────────────────┘
102
+ │ MCP Protocol (stdio)
103
+
104
+ ┌─────────────────────────────────────────────────────────────┐
105
+ │ seu-claude MCP Server │
106
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
107
+ │ │ Tools │ │ Indexer │ │ Vector Store │ │
108
+ │ │ - search │ │ - crawler │ │ (LanceDB) │ │
109
+ │ │ - index │ │ - parser │ │ - zero-copy │ │
110
+ │ │ - context │ │ - chunker │ │ - disk-based │ │
111
+ │ └─────────────┘ └─────────────┘ └─────────────────────┘ │
112
+ │ │ │
113
+ │ ┌──────────┴──────────┐ │
114
+ │ │ Embedding Engine │ │
115
+ │ │ (Transformers.js) │ │
116
+ │ │ - local inference │ │
117
+ │ │ - 384-dim vectors │ │
118
+ │ └─────────────────────┘ │
119
+ └─────────────────────────────────────────────────────────────┘
120
+ ```
121
+
122
+ ### Semantic Chunking (cAST)
123
+
124
+ Unlike naive text splitting that breaks code mid-function, seu-claude uses Abstract Syntax Tree analysis to create semantically meaningful chunks:
125
+
126
+ | Code Structure | Chunking Logic | Metadata |
127
+ |----------------|----------------|----------|
128
+ | Function | Complete function with signature | Type, Name, Scope |
129
+ | Class | Header + methods as separate chunks | Type, Name, Methods |
130
+ | Interface | Complete definition | Type, Module |
131
+ | Method | Full body with context | Parent Class, Signature |
132
+
133
+ ### Technology Stack
134
+
135
+ | Component | Technology | Why |
136
+ |-----------|------------|-----|
137
+ | Runtime | Node.js 20+ | Native MCP compatibility |
138
+ | Parser | web-tree-sitter | WASM-based, multi-language |
139
+ | Vector DB | LanceDB | Disk-based, <100MB RAM |
140
+ | Embeddings | Transformers.js | Local, GPU-accelerated |
141
+
142
+ ## MCP Tools
143
+
144
+ ### `index_codebase`
145
+
146
+ Scans and indexes your entire codebase for semantic search.
147
+
148
+ ```typescript
149
+ {
150
+ path?: string; // Project root (defaults to PROJECT_ROOT env)
151
+ force?: boolean; // Force full re-index (default: incremental)
152
+ }
153
+ ```
154
+
155
+ ### `search_codebase`
156
+
157
+ Semantic search across your indexed code.
158
+
159
+ ```typescript
160
+ {
161
+ query: string; // Natural language query
162
+ limit?: number; // Max results (default: 10)
163
+ filter_type?: string; // "function" | "class" | "method" | etc.
164
+ filter_language?: string; // "typescript" | "python" | etc.
165
+ }
166
+ ```
167
+
168
+ ### `read_semantic_context`
169
+
170
+ Read code with AST-aware context.
171
+
172
+ ```typescript
173
+ {
174
+ file_path: string; // Absolute file path
175
+ symbol?: string; // Focus on specific function/class
176
+ context_lines?: number; // Lines of context (default: 5)
177
+ }
178
+ ```
179
+
180
+ ## Configuration
181
+
182
+ ### Environment Variables
183
+
184
+ | Variable | Default | Description |
185
+ |----------|---------|-------------|
186
+ | `PROJECT_ROOT` | Current directory | Root of codebase to index |
187
+ | `DATA_DIR` | `~/.seu-claude` | Where to store index data |
188
+ | `EMBEDDING_MODEL` | `Xenova/all-MiniLM-L6-v2` | HuggingFace model |
189
+ | `EMBEDDING_DIMENSIONS` | `384` | Vector dimensions |
190
+ | `LOG_LEVEL` | `info` | debug, info, warn, error |
191
+
192
+ ### Ignore Patterns
193
+
194
+ Create a `.claudeignore` file in your project root to exclude files:
195
+
196
+ ```
197
+ # Ignore test fixtures
198
+ **/fixtures/**
199
+
200
+ # Ignore generated code
201
+ **/generated/**
202
+
203
+ # Ignore specific large files
204
+ path/to/large/file.ts
205
+ ```
206
+
207
+ ## Performance
208
+
209
+ ### Benchmark Results (seu-claude codebase - 26 TypeScript files)
210
+
211
+ | Metric | Result |
212
+ |--------|--------|
213
+ | Indexing time | 5.39s |
214
+ | Files processed | 26 |
215
+ | Chunks created | 359 |
216
+ | Memory (idle) | ~100MB |
217
+ | Memory (indexing) | ~500MB |
218
+ | Query latency | ~50ms |
219
+
220
+ ### Comparison
221
+
222
+ | Metric | seu-claude | Traditional RAG |
223
+ |--------|------------|-----------------|
224
+ | RAM (idle) | ~100MB | 35GB+ |
225
+ | RAM (indexing) | ~500MB | N/A |
226
+ | Index time (26 files) | ~5s | Minutes |
227
+ | Query latency | ~50ms | <10ms |
228
+ | Startup time | <2s | 30s+ |
229
+ | Dependencies | Node.js only | Python + CUDA |
230
+
231
+ ## Development
232
+
233
+ ### Building from Source
234
+
235
+ ```bash
236
+ git clone https://github.com/jardhel/seu-claude.git
237
+ cd seu-claude
238
+ npm install
239
+ npm run build
240
+ ```
241
+
242
+ ### Project Structure
243
+
244
+ ```
245
+ seu-claude/
246
+ ├── src/
247
+ │ ├── index.ts # Entry point
248
+ │ ├── server.ts # MCP server
249
+ │ ├── indexer/
250
+ │ │ ├── crawler.ts # File enumeration
251
+ │ │ ├── parser.ts # Tree-sitter AST
252
+ │ │ └── chunker.ts # Semantic chunking
253
+ │ ├── vector/
254
+ │ │ ├── store.ts # LanceDB operations
255
+ │ │ └── embed.ts # Transformers.js
256
+ │ └── tools/
257
+ │ ├── index-codebase.ts
258
+ │ ├── search-codebase.ts
259
+ │ └── read-context.ts
260
+ ├── languages/ # Tree-sitter WASM grammars
261
+ └── models/ # Downloaded embedding models
262
+ ```
263
+
264
+ ### Running Tests
265
+
266
+ ```bash
267
+ npm test
268
+ ```
269
+
270
+ ## Roadmap
271
+
272
+ - [ ] Language Server Protocol integration for better symbol resolution
273
+ - [ ] Git-aware indexing (prioritize recent changes)
274
+ - [ ] Cross-reference graph (callers/callees)
275
+ - [ ] VSCode extension for index management
276
+ - [ ] Support for more languages (Kotlin, Swift, PHP)
277
+
278
+ ## Contributing
279
+
280
+ Contributions are welcome! Please read our [Contributing Guidelines](CONTRIBUTING.md) before submitting PRs.
281
+
282
+ ## License
283
+
284
+ MIT License - see [LICENSE](LICENSE) for details.
285
+
286
+ ## Acknowledgments
287
+
288
+ - Inspired by the challenges documented in [claude-mem](https://github.com/anthropics/claude-mem) discussions
289
+ - Built on the excellent [Model Context Protocol](https://modelcontextprotocol.io/)
290
+ - AST parsing powered by [Tree-sitter](https://tree-sitter.github.io/tree-sitter/)
291
+ - Vector search by [LanceDB](https://lancedb.com/)
292
+ - Local embeddings via [Transformers.js](https://huggingface.co/docs/transformers.js)
293
+
294
+ ---
295
+
296
+ **seu-claude** - Because your AI coding assistant should know your codebase as well as you do.
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
package/dist/index.js ADDED
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env node
2
+ import { SeuClaudeServer } from './server.js';
3
+ import { logger } from './utils/logger.js';
4
+ const log = logger.child('main');
5
+ async function main() {
6
+ const server = new SeuClaudeServer();
7
+ // Graceful shutdown handlers
8
+ const shutdown = async (signal) => {
9
+ log.info(`Received ${signal}, shutting down...`);
10
+ try {
11
+ await server.stop();
12
+ process.exit(0);
13
+ }
14
+ catch (err) {
15
+ log.error('Error during shutdown:', err);
16
+ process.exit(1);
17
+ }
18
+ };
19
+ process.on('SIGINT', () => void shutdown('SIGINT'));
20
+ process.on('SIGTERM', () => void shutdown('SIGTERM'));
21
+ // Handle uncaught errors
22
+ process.on('uncaughtException', err => {
23
+ log.error('Uncaught exception:', err);
24
+ void shutdown('uncaughtException');
25
+ });
26
+ process.on('unhandledRejection', (reason, promise) => {
27
+ log.error('Unhandled rejection at:', promise, 'reason:', reason);
28
+ });
29
+ try {
30
+ await server.start();
31
+ }
32
+ catch (err) {
33
+ log.error('Failed to start server:', err);
34
+ process.exit(1);
35
+ }
36
+ }
37
+ main().catch(err => {
38
+ log.error('Fatal error:', err);
39
+ process.exit(1);
40
+ });
41
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;AAEjC,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;IAErC,6BAA6B;IAC7B,MAAM,QAAQ,GAAG,KAAK,EAAE,MAAc,EAAE,EAAE;QACxC,GAAG,CAAC,IAAI,CAAC,YAAY,MAAM,oBAAoB,CAAC,CAAC;QACjD,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,wBAAwB,EAAE,GAAG,CAAC,CAAC;YACzC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC,CAAC;IAEF,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,KAAK,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;IAEtD,yBAAyB;IACzB,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,GAAG,CAAC,EAAE;QACpC,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,GAAG,CAAC,CAAC;QACtC,KAAK,QAAQ,CAAC,mBAAmB,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACnD,GAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,GAAG,CAAC,CAAC;QAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;IACjB,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;IAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,31 @@
1
+ import { Config } from '../utils/config.js';
2
+ export interface CodeChunk {
3
+ id: string;
4
+ filePath: string;
5
+ relativePath: string;
6
+ code: string;
7
+ startLine: number;
8
+ endLine: number;
9
+ language: string;
10
+ type: string;
11
+ name: string | null;
12
+ scope: string;
13
+ docstring: string | null;
14
+ tokenEstimate: number;
15
+ }
16
+ export declare class SemanticChunker {
17
+ private parser;
18
+ private config;
19
+ private log;
20
+ constructor(config: Config, languagesDir?: string);
21
+ initialize(): Promise<void>;
22
+ chunkFile(filePath: string, relativePath: string, content: string, language: string): Promise<CodeChunk[]>;
23
+ private nodeToChunk;
24
+ private splitLargeChunk;
25
+ private createSubChunk;
26
+ private fallbackChunk;
27
+ private generateChunkId;
28
+ private estimateTokens;
29
+ private normalizeNodeType;
30
+ }
31
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAI5C,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,GAAG,CAA2B;gBAE1B,MAAM,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM;IAK3C,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAI3B,SAAS,CACb,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,SAAS,EAAE,CAAC;IAsCvB,OAAO,CAAC,WAAW;IA0CnB,OAAO,CAAC,eAAe;IAwCvB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,aAAa;IAsCrB,OAAO,CAAC,eAAe;IAUvB,OAAO,CAAC,cAAc;IAKtB,OAAO,CAAC,iBAAiB;CAiC1B"}
@@ -0,0 +1,184 @@
1
+ import { ASTParser } from './parser.js';
2
+ import { logger } from '../utils/logger.js';
3
+ import { createHash } from 'crypto';
4
+ export class SemanticChunker {
5
+ parser;
6
+ config;
7
+ log = logger.child('chunker');
8
+ constructor(config, languagesDir) {
9
+ this.config = config;
10
+ this.parser = new ASTParser(languagesDir);
11
+ }
12
+ async initialize() {
13
+ await this.parser.initialize();
14
+ }
15
+ async chunkFile(filePath, relativePath, content, language) {
16
+ const tree = await this.parser.parse(content, language);
17
+ if (!tree) {
18
+ // Fallback to simple chunking if parsing fails
19
+ return this.fallbackChunk(filePath, relativePath, content, language);
20
+ }
21
+ const nodes = this.parser.extractNodes(tree, language);
22
+ const chunks = [];
23
+ if (nodes.length === 0) {
24
+ // No extractable nodes, use fallback
25
+ return this.fallbackChunk(filePath, relativePath, content, language);
26
+ }
27
+ for (const node of nodes) {
28
+ const chunk = this.nodeToChunk(node, filePath, relativePath, language);
29
+ // Check if chunk is too large and needs splitting
30
+ if (chunk.tokenEstimate > this.config.maxChunkTokens) {
31
+ const subChunks = this.splitLargeChunk(chunk, content);
32
+ chunks.push(...subChunks);
33
+ }
34
+ else if (chunk.tokenEstimate >= this.config.minChunkLines * 10) {
35
+ // Only include chunks with sufficient content
36
+ chunks.push(chunk);
37
+ }
38
+ }
39
+ // If no chunks were created from AST, use fallback
40
+ if (chunks.length === 0) {
41
+ return this.fallbackChunk(filePath, relativePath, content, language);
42
+ }
43
+ this.log.debug(`Created ${chunks.length} chunks from ${relativePath}`);
44
+ return chunks;
45
+ }
46
+ nodeToChunk(node, filePath, relativePath, language) {
47
+ const scope = node.scope.join('.');
48
+ const tokenEstimate = this.estimateTokens(node.text);
49
+ // Create context-enriched text for better embeddings
50
+ let enrichedText = '';
51
+ // Add scope context
52
+ if (scope) {
53
+ enrichedText += `// Scope: ${scope}\n`;
54
+ }
55
+ // Add docstring if available
56
+ if (node.docstring) {
57
+ enrichedText += node.docstring + '\n';
58
+ }
59
+ enrichedText += node.text;
60
+ const id = this.generateChunkId(filePath, node.startLine, node.endLine, node.text);
61
+ return {
62
+ id,
63
+ filePath,
64
+ relativePath,
65
+ code: enrichedText,
66
+ startLine: node.startLine,
67
+ endLine: node.endLine,
68
+ language,
69
+ type: this.normalizeNodeType(node.type),
70
+ name: node.name,
71
+ scope,
72
+ docstring: node.docstring,
73
+ tokenEstimate,
74
+ };
75
+ }
76
+ splitLargeChunk(chunk, _fullContent) {
77
+ const lines = chunk.code.split('\n');
78
+ const chunks = [];
79
+ const maxLines = Math.floor(this.config.maxChunkTokens / 4); // Rough estimate: 4 tokens per line
80
+ let currentLines = [];
81
+ let currentStartLine = chunk.startLine;
82
+ for (let i = 0; i < lines.length; i++) {
83
+ currentLines.push(lines[i]);
84
+ if (currentLines.length >= maxLines) {
85
+ const subChunk = this.createSubChunk(chunk, currentLines.join('\n'), currentStartLine, currentStartLine + currentLines.length - 1, chunks.length);
86
+ chunks.push(subChunk);
87
+ currentLines = [];
88
+ currentStartLine = chunk.startLine + i + 1;
89
+ }
90
+ }
91
+ // Don't forget remaining lines
92
+ if (currentLines.length > 0) {
93
+ const subChunk = this.createSubChunk(chunk, currentLines.join('\n'), currentStartLine, currentStartLine + currentLines.length - 1, chunks.length);
94
+ chunks.push(subChunk);
95
+ }
96
+ return chunks;
97
+ }
98
+ createSubChunk(parent, code, startLine, endLine, index) {
99
+ return {
100
+ id: this.generateChunkId(parent.filePath, startLine, endLine, code),
101
+ filePath: parent.filePath,
102
+ relativePath: parent.relativePath,
103
+ code,
104
+ startLine,
105
+ endLine,
106
+ language: parent.language,
107
+ type: parent.type,
108
+ name: parent.name ? `${parent.name}_part${index}` : null,
109
+ scope: parent.scope,
110
+ docstring: index === 0 ? parent.docstring : null,
111
+ tokenEstimate: this.estimateTokens(code),
112
+ };
113
+ }
114
+ fallbackChunk(filePath, relativePath, content, language) {
115
+ const lines = content.split('\n');
116
+ const chunks = [];
117
+ const chunkSize = Math.floor(this.config.maxChunkTokens / 4);
118
+ const overlap = Math.floor(chunkSize / 4);
119
+ for (let i = 0; i < lines.length; i += chunkSize - overlap) {
120
+ const chunkLines = lines.slice(i, i + chunkSize);
121
+ const code = chunkLines.join('\n');
122
+ const startLine = i + 1;
123
+ const endLine = Math.min(i + chunkSize, lines.length);
124
+ if (code.trim().length > 0) {
125
+ chunks.push({
126
+ id: this.generateChunkId(filePath, startLine, endLine, code),
127
+ filePath,
128
+ relativePath,
129
+ code,
130
+ startLine,
131
+ endLine,
132
+ language,
133
+ type: 'block',
134
+ name: null,
135
+ scope: relativePath,
136
+ docstring: null,
137
+ tokenEstimate: this.estimateTokens(code),
138
+ });
139
+ }
140
+ }
141
+ return chunks;
142
+ }
143
+ generateChunkId(filePath, startLine, endLine, content) {
144
+ const input = `${filePath}:${startLine}:${endLine}:${content}`;
145
+ return createHash('sha256').update(input).digest('hex').slice(0, 16);
146
+ }
147
+ estimateTokens(text) {
148
+ // Rough estimate: ~4 characters per token for code
149
+ return Math.ceil(text.length / 4);
150
+ }
151
+ normalizeNodeType(type) {
152
+ // Normalize different language-specific types to common categories
153
+ const typeMap = {
154
+ function_declaration: 'function',
155
+ function_definition: 'function',
156
+ function_item: 'function',
157
+ arrow_function: 'function',
158
+ method_definition: 'method',
159
+ method_declaration: 'method',
160
+ class_declaration: 'class',
161
+ class_definition: 'class',
162
+ class_specifier: 'class',
163
+ interface_declaration: 'interface',
164
+ interface_type: 'interface',
165
+ type_alias_declaration: 'type',
166
+ type_declaration: 'type',
167
+ struct_item: 'struct',
168
+ struct_specifier: 'struct',
169
+ enum_declaration: 'enum',
170
+ enum_item: 'enum',
171
+ enum_specifier: 'enum',
172
+ impl_item: 'impl',
173
+ trait_item: 'trait',
174
+ mod_item: 'module',
175
+ module: 'module',
176
+ namespace_definition: 'namespace',
177
+ namespace_declaration: 'namespace',
178
+ export_statement: 'export',
179
+ decorated_definition: 'decorated',
180
+ };
181
+ return typeMap[type] || type;
182
+ }
183
+ }
184
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAc,MAAM,aAAa,CAAC;AAEpD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAiBpC,MAAM,OAAO,eAAe;IAClB,MAAM,CAAY;IAClB,MAAM,CAAS;IACf,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAEtC,YAAY,MAAc,EAAE,YAAqB;QAC/C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,IAAI,SAAS,CAAC,YAAY,CAAC,CAAC;IAC5C,CAAC;IAED,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,SAAS,CACb,QAAgB,EAChB,YAAoB,EACpB,OAAe,EACf,QAAgB;QAEhB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAExD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,+CAA+C;YAC/C,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACvE,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACvD,MAAM,MAAM,GAAgB,EAAE,CAAC;QAE/B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,qCAAqC;YACrC,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACvE,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC;YAEvE,kDAAkD;YAClD,IAAI,KAAK,CAAC,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;gBACrD,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;gBACvD,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;YAC5B,CAAC;iBAAM,IAAI,KAAK,CAAC,aAAa,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,GAAG,EAAE,EAAE,CAAC;gBACjE,8CAA8C;gBAC9C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,mDAAmD;QACnD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACvE,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,MAAM,CAAC,MAAM,gBAAgB,YAAY,EAAE,CAAC,CAAC;QACvE,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,WAAW,CACjB,IAAgB,EAChB,QAAgB,EAChB,YAAoB,EACpB,QAAgB;QAEhB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAErD,qDAAqD;QACrD,IAAI,YAAY,GAAG,EAAE,CAAC;QAEtB,oBAAoB;QACpB,IAAI,KAAK,EAAE,CAAC;YACV,YAAY,IAAI,aAAa,KAAK,IAAI,CAAC;QACzC,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,YAAY,IAAI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxC,CAAC;QAED,YAAY,IAAI,IAAI,CAAC,IAAI,CAAC;QAE1B,MAAM,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAEnF,OAAO;YACL,EAAE;YACF,QAAQ;YACR,YAAY;YACZ,IAAI,EAAE,YAAY;YAClB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,QAAQ;YACR,IAAI,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;YACvC,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK;YACL,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,aAAa;SACd,CAAC;IACJ,CAAC;IAEO,eAAe,CAAC,KAAgB,EAAE,YAAoB;QAC5D,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,oCAAoC;QAEjG,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,gBAAgB,GAAG,KAAK,CAAC,SAAS,CAAC;QAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAE5B,IAAI,YAAY,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;gBACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAClC,KAAK,EACL,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EACvB,gBAAgB,EAChB,gBAAgB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAC1C,MAAM,CAAC,MAAM,CACd,CAAC;gBACF,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACtB,YAAY,GAAG,EAAE,CAAC;gBAClB,gBAAgB,GAAG,KAAK,CAAC,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC;YAC7C,CAAC;QACH,CAAC;QAED,+BAA+B;QAC/B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAClC,KAAK,EACL,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EACvB,gBAAgB,EAChB,gBAAgB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAC1C,MAAM,CAAC,MAAM,CACd,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACxB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,cAAc,CACpB,MAAiB,EACjB,IAAY,EACZ,SAAiB,EACjB,OAAe,EACf,KAAa;QAEb,OAAO;YACL,EAAE,EAAE,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC;YACnE,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,IAAI;YACJ,SAAS;YACT,OAAO;YACP,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,QAAQ,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI;YACxD,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,SAAS,EAAE,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI;YAChD,aAAa,EAAE,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;SACzC,CAAC;IACJ,CAAC;IAEO,aAAa,CACnB,QAAgB,EAChB,YAAoB,EACpB,OAAe,EACf,QAAgB;QAEhB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;QAE1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,GAAG,OAAO,EAAE,CAAC;YAC3D,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;YACjD,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC;YACxB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;YAEtD,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC;oBAC5D,QAAQ;oBACR,YAAY;oBACZ,IAAI;oBACJ,SAAS;oBACT,OAAO;oBACP,QAAQ;oBACR,IAAI,EAAE,OAAO;oBACb,IAAI,EAAE,IAAI;oBACV,KAAK,EAAE,YAAY;oBACnB,SAAS,EAAE,IAAI;oBACf,aAAa,EAAE,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;iBACzC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,eAAe,CACrB,QAAgB,EAChB,SAAiB,EACjB,OAAe,EACf,OAAe;QAEf,MAAM,KAAK,GAAG,GAAG,QAAQ,IAAI,SAAS,IAAI,OAAO,IAAI,OAAO,EAAE,CAAC;QAC/D,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACvE,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,mDAAmD;QACnD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAEO,iBAAiB,CAAC,IAAY;QACpC,mEAAmE;QACnE,MAAM,OAAO,GAA2B;YACtC,oBAAoB,EAAE,UAAU;YAChC,mBAAmB,EAAE,UAAU;YAC/B,aAAa,EAAE,UAAU;YACzB,cAAc,EAAE,UAAU;YAC1B,iBAAiB,EAAE,QAAQ;YAC3B,kBAAkB,EAAE,QAAQ;YAC5B,iBAAiB,EAAE,OAAO;YAC1B,gBAAgB,EAAE,OAAO;YACzB,eAAe,EAAE,OAAO;YACxB,qBAAqB,EAAE,WAAW;YAClC,cAAc,EAAE,WAAW;YAC3B,sBAAsB,EAAE,MAAM;YAC9B,gBAAgB,EAAE,MAAM;YACxB,WAAW,EAAE,QAAQ;YACrB,gBAAgB,EAAE,QAAQ;YAC1B,gBAAgB,EAAE,MAAM;YACxB,SAAS,EAAE,MAAM;YACjB,cAAc,EAAE,MAAM;YACtB,SAAS,EAAE,MAAM;YACjB,UAAU,EAAE,OAAO;YACnB,QAAQ,EAAE,QAAQ;YAClB,MAAM,EAAE,QAAQ;YAChB,oBAAoB,EAAE,WAAW;YACjC,qBAAqB,EAAE,WAAW;YAClC,gBAAgB,EAAE,QAAQ;YAC1B,oBAAoB,EAAE,WAAW;SAClC,CAAC;QAEF,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;IAC/B,CAAC;CACF"}
@@ -0,0 +1,27 @@
1
+ import { Config } from '../utils/config.js';
2
+ export interface FileInfo {
3
+ path: string;
4
+ relativePath: string;
5
+ language: string;
6
+ hash: string;
7
+ size: number;
8
+ modifiedAt: Date;
9
+ }
10
+ export interface CrawlResult {
11
+ files: FileInfo[];
12
+ totalFiles: number;
13
+ totalSize: number;
14
+ languages: Record<string, number>;
15
+ }
16
+ export declare class Crawler {
17
+ private config;
18
+ private ignorer;
19
+ private log;
20
+ constructor(config: Config);
21
+ private setupIgnorePatterns;
22
+ loadGitignore(): Promise<void>;
23
+ crawl(): Promise<CrawlResult>;
24
+ hashContent(content: string): string;
25
+ getFileContent(filePath: string): Promise<string>;
26
+ }
27
+ //# sourceMappingURL=crawler.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"crawler.d.ts","sourceRoot":"","sources":["../../src/indexer/crawler.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,MAAM,EAA4B,MAAM,oBAAoB,CAAC;AAQtE,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,IAAI,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,GAAG,CAA2B;gBAE1B,MAAM,EAAE,MAAM;IAM1B,OAAO,CAAC,mBAAmB;IAKrB,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB9B,KAAK,IAAI,OAAO,CAAC,WAAW,CAAC;IAqEnC,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM;IAI9B,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAGxD"}