seu-claude 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/LICENSE +21 -0
- package/README.md +296 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/index.js.map +1 -0
- package/dist/indexer/chunker.d.ts +31 -0
- package/dist/indexer/chunker.d.ts.map +1 -0
- package/dist/indexer/chunker.js +184 -0
- package/dist/indexer/chunker.js.map +1 -0
- package/dist/indexer/crawler.d.ts +27 -0
- package/dist/indexer/crawler.d.ts.map +1 -0
- package/dist/indexer/crawler.js +105 -0
- package/dist/indexer/crawler.js.map +1 -0
- package/dist/indexer/parser.d.ts +28 -0
- package/dist/indexer/parser.d.ts.map +1 -0
- package/dist/indexer/parser.js +232 -0
- package/dist/indexer/parser.js.map +1 -0
- package/dist/server.d.ts +22 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +220 -0
- package/dist/server.js.map +1 -0
- package/dist/tools/index-codebase.d.ts +25 -0
- package/dist/tools/index-codebase.d.ts.map +1 -0
- package/dist/tools/index-codebase.js +99 -0
- package/dist/tools/index-codebase.js.map +1 -0
- package/dist/tools/read-context.d.ts +30 -0
- package/dist/tools/read-context.d.ts.map +1 -0
- package/dist/tools/read-context.js +81 -0
- package/dist/tools/read-context.js.map +1 -0
- package/dist/tools/search-codebase.d.ts +30 -0
- package/dist/tools/search-codebase.d.ts.map +1 -0
- package/dist/tools/search-codebase.js +68 -0
- package/dist/tools/search-codebase.js.map +1 -0
- package/dist/utils/config.d.ts +14 -0
- package/dist/utils/config.d.ts.map +1 -0
- package/dist/utils/config.js +84 -0
- package/dist/utils/config.js.map +1 -0
- package/dist/utils/logger.d.ts +16 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +47 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/validation.d.ts +39 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +114 -0
- package/dist/utils/validation.js.map +1 -0
- package/dist/vector/embed.d.ts +18 -0
- package/dist/vector/embed.d.ts.map +1 -0
- package/dist/vector/embed.js +196 -0
- package/dist/vector/embed.js.map +1 -0
- package/dist/vector/store.d.ts +33 -0
- package/dist/vector/store.d.ts.map +1 -0
- package/dist/vector/store.js +189 -0
- package/dist/vector/store.js.map +1 -0
- package/languages/tree-sitter-cpp.wasm +0 -0
- package/languages/tree-sitter-go.wasm +0 -0
- package/languages/tree-sitter-java.wasm +0 -0
- package/languages/tree-sitter-javascript.wasm +0 -0
- package/languages/tree-sitter-python.wasm +0 -0
- package/languages/tree-sitter-rust.wasm +0 -0
- package/languages/tree-sitter-typescript.wasm +0 -0
- package/package.json +84 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [1.0.0] - 2026-01-15
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- 🎉 Initial stable release
|
|
12
|
+
- **MCP Server** - Full Model Context Protocol implementation for Claude Code/Desktop
|
|
13
|
+
- **Semantic Indexing** - AST-based code chunking using Tree-sitter
|
|
14
|
+
- **Vector Search** - LanceDB-powered similarity search with 384-dimension embeddings
|
|
15
|
+
- **Multi-language Support** - TypeScript, JavaScript, Python, Java, C/C++, Go, Rust
|
|
16
|
+
- **Three MCP Tools**:
|
|
17
|
+
- `index_codebase` - Index entire codebase with incremental updates
|
|
18
|
+
- `search_codebase` - Semantic search across indexed code
|
|
19
|
+
- `read_semantic_context` - Read code with AST-aware context
|
|
20
|
+
|
|
21
|
+
### Technical Details
|
|
22
|
+
- **Embedding Model**: `Xenova/all-MiniLM-L6-v2` (no authentication required)
|
|
23
|
+
- **Vector Dimensions**: 384
|
|
24
|
+
- **Test Coverage**: 72.72% (214 tests passing)
|
|
25
|
+
- **Memory Usage**: ~100MB idle, ~500MB during indexing
|
|
26
|
+
|
|
27
|
+
### Performance
|
|
28
|
+
- Indexed 26-file TypeScript project in 5.39 seconds
|
|
29
|
+
- Created 359 semantic chunks
|
|
30
|
+
- Query latency ~50ms
|
|
31
|
+
|
|
32
|
+
## [0.1.0] - 2026-01-14
|
|
33
|
+
|
|
34
|
+
### Added
|
|
35
|
+
- Initial development release
|
|
36
|
+
- Core indexing and search functionality
|
|
37
|
+
- Basic MCP integration
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 jardhel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
# seu-claude
|
|
2
|
+
|
|
3
|
+
**S**elf-**E**volving **U**nderstanding for Claude - A Local Codebase RAG MCP Server
|
|
4
|
+
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://nodejs.org)
|
|
7
|
+
|
|
8
|
+
> Give Claude Code deep, proactive understanding of your entire codebase - not just the files it has touched.
|
|
9
|
+
|
|
10
|
+
## The Problem
|
|
11
|
+
|
|
12
|
+
Current memory plugins for Claude Code suffer from "goldfish memory":
|
|
13
|
+
- They only remember files the AI has explicitly accessed
|
|
14
|
+
- Heavy resource usage (35GB+ RAM with in-memory vector databases)
|
|
15
|
+
- Complex Python dependencies that conflict with your environment
|
|
16
|
+
- Zombie processes that accumulate over time
|
|
17
|
+
|
|
18
|
+
**seu-claude** solves this by implementing **proactive semantic indexing** - your entire codebase is parsed, understood, and made searchable before Claude even asks.
|
|
19
|
+
|
|
20
|
+
## Features
|
|
21
|
+
|
|
22
|
+
- 🧠 **AST-Based Semantic Chunking** - Uses Tree-sitter to understand code structure, not just text
|
|
23
|
+
- 💾 **Minimal Resource Usage** - LanceDB's zero-copy architecture keeps RAM under 200MB
|
|
24
|
+
- 🔒 **100% Local** - All processing happens on your machine, no data leaves
|
|
25
|
+
- ⚡ **Incremental Indexing** - Only re-processes changed files
|
|
26
|
+
- 🌐 **Multi-Language Support** - TypeScript, JavaScript, Python, Rust, Go, Java, C/C++, and more
|
|
27
|
+
- 🔌 **Native MCP Integration** - Works with Claude Code and Claude Desktop
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
### Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
# Install globally
|
|
35
|
+
npm install -g seu-claude
|
|
36
|
+
|
|
37
|
+
# Or use npx
|
|
38
|
+
npx seu-claude
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Configuration
|
|
42
|
+
|
|
43
|
+
#### Claude Code
|
|
44
|
+
|
|
45
|
+
Add to your project's `.claude/settings.json`:
|
|
46
|
+
|
|
47
|
+
```json
|
|
48
|
+
{
|
|
49
|
+
"mcpServers": {
|
|
50
|
+
"seu-claude": {
|
|
51
|
+
"command": "npx",
|
|
52
|
+
"args": ["seu-claude"],
|
|
53
|
+
"env": {
|
|
54
|
+
"PROJECT_ROOT": "."
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
#### Claude Desktop
|
|
62
|
+
|
|
63
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows):
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"mcpServers": {
|
|
68
|
+
"seu-claude": {
|
|
69
|
+
"command": "npx",
|
|
70
|
+
"args": ["seu-claude"],
|
|
71
|
+
"env": {
|
|
72
|
+
"PROJECT_ROOT": "/path/to/your/project"
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### First Run
|
|
80
|
+
|
|
81
|
+
Once configured, Claude will have access to three new tools:
|
|
82
|
+
|
|
83
|
+
1. **Index your codebase** (run once, then incremental):
|
|
84
|
+
> "Index this codebase for semantic search"
|
|
85
|
+
|
|
86
|
+
2. **Search semantically**:
|
|
87
|
+
> "Where is the user authentication logic?"
|
|
88
|
+
> "Find all database connection handling code"
|
|
89
|
+
> "Show me how API rate limiting is implemented"
|
|
90
|
+
|
|
91
|
+
3. **Read with context**:
|
|
92
|
+
> "Read the AuthService.login method with its surrounding context"
|
|
93
|
+
|
|
94
|
+
## How It Works
|
|
95
|
+
|
|
96
|
+
### Architecture
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
100
|
+
│ Claude Code / Desktop │
|
|
101
|
+
└─────────────────────────────────────────────────────────────┘
|
|
102
|
+
│ MCP Protocol (stdio)
|
|
103
|
+
▼
|
|
104
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
105
|
+
│ seu-claude MCP Server │
|
|
106
|
+
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
|
|
107
|
+
│ │ Tools │ │ Indexer │ │ Vector Store │ │
|
|
108
|
+
│ │ - search │ │ - crawler │ │ (LanceDB) │ │
|
|
109
|
+
│ │ - index │ │ - parser │ │ - zero-copy │ │
|
|
110
|
+
│ │ - context │ │ - chunker │ │ - disk-based │ │
|
|
111
|
+
│ └─────────────┘ └─────────────┘ └─────────────────────┘ │
|
|
112
|
+
│ │ │
|
|
113
|
+
│ ┌──────────┴──────────┐ │
|
|
114
|
+
│ │ Embedding Engine │ │
|
|
115
|
+
│ │ (Transformers.js) │ │
|
|
116
|
+
│ │ - local inference │ │
|
|
117
|
+
│ │ - 384-dim vectors │ │
|
|
118
|
+
│ └─────────────────────┘ │
|
|
119
|
+
└─────────────────────────────────────────────────────────────┘
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Semantic Chunking (cAST)
|
|
123
|
+
|
|
124
|
+
Unlike naive text splitting that breaks code mid-function, seu-claude uses Abstract Syntax Tree analysis to create semantically meaningful chunks:
|
|
125
|
+
|
|
126
|
+
| Code Structure | Chunking Logic | Metadata |
|
|
127
|
+
|----------------|----------------|----------|
|
|
128
|
+
| Function | Complete function with signature | Type, Name, Scope |
|
|
129
|
+
| Class | Header + methods as separate chunks | Type, Name, Methods |
|
|
130
|
+
| Interface | Complete definition | Type, Module |
|
|
131
|
+
| Method | Full body with context | Parent Class, Signature |
|
|
132
|
+
|
|
133
|
+
### Technology Stack
|
|
134
|
+
|
|
135
|
+
| Component | Technology | Why |
|
|
136
|
+
|-----------|------------|-----|
|
|
137
|
+
| Runtime | Node.js 20+ | Native MCP compatibility |
|
|
138
|
+
| Parser | web-tree-sitter | WASM-based, multi-language |
|
|
139
|
+
| Vector DB | LanceDB | Disk-based, <100MB RAM |
|
|
140
|
+
| Embeddings | Transformers.js | Local, GPU-accelerated |
|
|
141
|
+
|
|
142
|
+
## MCP Tools
|
|
143
|
+
|
|
144
|
+
### `index_codebase`
|
|
145
|
+
|
|
146
|
+
Scans and indexes your entire codebase for semantic search.
|
|
147
|
+
|
|
148
|
+
```typescript
|
|
149
|
+
{
|
|
150
|
+
path?: string; // Project root (defaults to PROJECT_ROOT env)
|
|
151
|
+
force?: boolean; // Force full re-index (default: incremental)
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### `search_codebase`
|
|
156
|
+
|
|
157
|
+
Semantic search across your indexed code.
|
|
158
|
+
|
|
159
|
+
```typescript
|
|
160
|
+
{
|
|
161
|
+
query: string; // Natural language query
|
|
162
|
+
limit?: number; // Max results (default: 10)
|
|
163
|
+
filter_type?: string; // "function" | "class" | "method" | etc.
|
|
164
|
+
filter_language?: string; // "typescript" | "python" | etc.
|
|
165
|
+
}
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### `read_semantic_context`
|
|
169
|
+
|
|
170
|
+
Read code with AST-aware context.
|
|
171
|
+
|
|
172
|
+
```typescript
|
|
173
|
+
{
|
|
174
|
+
file_path: string; // Absolute file path
|
|
175
|
+
symbol?: string; // Focus on specific function/class
|
|
176
|
+
context_lines?: number; // Lines of context (default: 5)
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Configuration
|
|
181
|
+
|
|
182
|
+
### Environment Variables
|
|
183
|
+
|
|
184
|
+
| Variable | Default | Description |
|
|
185
|
+
|----------|---------|-------------|
|
|
186
|
+
| `PROJECT_ROOT` | Current directory | Root of codebase to index |
|
|
187
|
+
| `DATA_DIR` | `~/.seu-claude` | Where to store index data |
|
|
188
|
+
| `EMBEDDING_MODEL` | `Xenova/all-MiniLM-L6-v2` | HuggingFace model |
|
|
189
|
+
| `EMBEDDING_DIMENSIONS` | `384` | Vector dimensions |
|
|
190
|
+
| `LOG_LEVEL` | `info` | debug, info, warn, error |
|
|
191
|
+
|
|
192
|
+
### Ignore Patterns
|
|
193
|
+
|
|
194
|
+
Create a `.claudeignore` file in your project root to exclude files:
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
# Ignore test fixtures
|
|
198
|
+
**/fixtures/**
|
|
199
|
+
|
|
200
|
+
# Ignore generated code
|
|
201
|
+
**/generated/**
|
|
202
|
+
|
|
203
|
+
# Ignore specific large files
|
|
204
|
+
path/to/large/file.ts
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Performance
|
|
208
|
+
|
|
209
|
+
### Benchmark Results (seu-claude codebase - 26 TypeScript files)
|
|
210
|
+
|
|
211
|
+
| Metric | Result |
|
|
212
|
+
|--------|--------|
|
|
213
|
+
| Indexing time | 5.39s |
|
|
214
|
+
| Files processed | 26 |
|
|
215
|
+
| Chunks created | 359 |
|
|
216
|
+
| Memory (idle) | ~100MB |
|
|
217
|
+
| Memory (indexing) | ~500MB |
|
|
218
|
+
| Query latency | ~50ms |
|
|
219
|
+
|
|
220
|
+
### Comparison
|
|
221
|
+
|
|
222
|
+
| Metric | seu-claude | Traditional RAG |
|
|
223
|
+
|--------|------------|-----------------|
|
|
224
|
+
| RAM (idle) | ~100MB | 35GB+ |
|
|
225
|
+
| RAM (indexing) | ~500MB | N/A |
|
|
226
|
+
| Index time (26 files) | ~5s | Minutes |
|
|
227
|
+
| Query latency | ~50ms | <10ms |
|
|
228
|
+
| Startup time | <2s | 30s+ |
|
|
229
|
+
| Dependencies | Node.js only | Python + CUDA |
|
|
230
|
+
|
|
231
|
+
## Development
|
|
232
|
+
|
|
233
|
+
### Building from Source
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
git clone https://github.com/jardhel/seu-claude.git
|
|
237
|
+
cd seu-claude
|
|
238
|
+
npm install
|
|
239
|
+
npm run build
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### Project Structure
|
|
243
|
+
|
|
244
|
+
```
|
|
245
|
+
seu-claude/
|
|
246
|
+
├── src/
|
|
247
|
+
│ ├── index.ts # Entry point
|
|
248
|
+
│ ├── server.ts # MCP server
|
|
249
|
+
│ ├── indexer/
|
|
250
|
+
│ │ ├── crawler.ts # File enumeration
|
|
251
|
+
│ │ ├── parser.ts # Tree-sitter AST
|
|
252
|
+
│ │ └── chunker.ts # Semantic chunking
|
|
253
|
+
│ ├── vector/
|
|
254
|
+
│ │ ├── store.ts # LanceDB operations
|
|
255
|
+
│ │ └── embed.ts # Transformers.js
|
|
256
|
+
│ └── tools/
|
|
257
|
+
│ ├── index-codebase.ts
|
|
258
|
+
│ ├── search-codebase.ts
|
|
259
|
+
│ └── read-context.ts
|
|
260
|
+
├── languages/ # Tree-sitter WASM grammars
|
|
261
|
+
└── models/ # Downloaded embedding models
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Running Tests
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
npm test
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
## Roadmap
|
|
271
|
+
|
|
272
|
+
- [ ] Language Server Protocol integration for better symbol resolution
|
|
273
|
+
- [ ] Git-aware indexing (prioritize recent changes)
|
|
274
|
+
- [ ] Cross-reference graph (callers/callees)
|
|
275
|
+
- [ ] VSCode extension for index management
|
|
276
|
+
- [ ] Support for more languages (Kotlin, Swift, PHP)
|
|
277
|
+
|
|
278
|
+
## Contributing
|
|
279
|
+
|
|
280
|
+
Contributions are welcome! Please read our [Contributing Guidelines](CONTRIBUTING.md) before submitting PRs.
|
|
281
|
+
|
|
282
|
+
## License
|
|
283
|
+
|
|
284
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
285
|
+
|
|
286
|
+
## Acknowledgments
|
|
287
|
+
|
|
288
|
+
- Inspired by the challenges documented in [claude-mem](https://github.com/anthropics/claude-mem) discussions
|
|
289
|
+
- Built on the excellent [Model Context Protocol](https://modelcontextprotocol.io/)
|
|
290
|
+
- AST parsing powered by [Tree-sitter](https://tree-sitter.github.io/tree-sitter/)
|
|
291
|
+
- Vector search by [LanceDB](https://lancedb.com/)
|
|
292
|
+
- Local embeddings via [Transformers.js](https://huggingface.co/docs/transformers.js)
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
**seu-claude** - Because your AI coding assistant should know your codebase as well as you do.
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { SeuClaudeServer } from './server.js';
|
|
3
|
+
import { logger } from './utils/logger.js';
|
|
4
|
+
const log = logger.child('main');
|
|
5
|
+
async function main() {
|
|
6
|
+
const server = new SeuClaudeServer();
|
|
7
|
+
// Graceful shutdown handlers
|
|
8
|
+
const shutdown = async (signal) => {
|
|
9
|
+
log.info(`Received ${signal}, shutting down...`);
|
|
10
|
+
try {
|
|
11
|
+
await server.stop();
|
|
12
|
+
process.exit(0);
|
|
13
|
+
}
|
|
14
|
+
catch (err) {
|
|
15
|
+
log.error('Error during shutdown:', err);
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
process.on('SIGINT', () => void shutdown('SIGINT'));
|
|
20
|
+
process.on('SIGTERM', () => void shutdown('SIGTERM'));
|
|
21
|
+
// Handle uncaught errors
|
|
22
|
+
process.on('uncaughtException', err => {
|
|
23
|
+
log.error('Uncaught exception:', err);
|
|
24
|
+
void shutdown('uncaughtException');
|
|
25
|
+
});
|
|
26
|
+
process.on('unhandledRejection', (reason, promise) => {
|
|
27
|
+
log.error('Unhandled rejection at:', promise, 'reason:', reason);
|
|
28
|
+
});
|
|
29
|
+
try {
|
|
30
|
+
await server.start();
|
|
31
|
+
}
|
|
32
|
+
catch (err) {
|
|
33
|
+
log.error('Failed to start server:', err);
|
|
34
|
+
process.exit(1);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
main().catch(err => {
|
|
38
|
+
log.error('Fatal error:', err);
|
|
39
|
+
process.exit(1);
|
|
40
|
+
});
|
|
41
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAC9C,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;AAEjC,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;IAErC,6BAA6B;IAC7B,MAAM,QAAQ,GAAG,KAAK,EAAE,MAAc,EAAE,EAAE;QACxC,GAAG,CAAC,IAAI,CAAC,YAAY,MAAM,oBAAoB,CAAC,CAAC;QACjD,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;YACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,wBAAwB,EAAE,GAAG,CAAC,CAAC;YACzC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC,CAAC;IAEF,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,KAAK,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;IAEtD,yBAAyB;IACzB,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,GAAG,CAAC,EAAE;QACpC,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,GAAG,CAAC,CAAC;QACtC,KAAK,QAAQ,CAAC,mBAAmB,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACnD,GAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,GAAG,CAAC,KAAK,CAAC,yBAAyB,EAAE,GAAG,CAAC,CAAC;QAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;IACjB,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;IAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { Config } from '../utils/config.js';
|
|
2
|
+
export interface CodeChunk {
|
|
3
|
+
id: string;
|
|
4
|
+
filePath: string;
|
|
5
|
+
relativePath: string;
|
|
6
|
+
code: string;
|
|
7
|
+
startLine: number;
|
|
8
|
+
endLine: number;
|
|
9
|
+
language: string;
|
|
10
|
+
type: string;
|
|
11
|
+
name: string | null;
|
|
12
|
+
scope: string;
|
|
13
|
+
docstring: string | null;
|
|
14
|
+
tokenEstimate: number;
|
|
15
|
+
}
|
|
16
|
+
export declare class SemanticChunker {
|
|
17
|
+
private parser;
|
|
18
|
+
private config;
|
|
19
|
+
private log;
|
|
20
|
+
constructor(config: Config, languagesDir?: string);
|
|
21
|
+
initialize(): Promise<void>;
|
|
22
|
+
chunkFile(filePath: string, relativePath: string, content: string, language: string): Promise<CodeChunk[]>;
|
|
23
|
+
private nodeToChunk;
|
|
24
|
+
private splitLargeChunk;
|
|
25
|
+
private createSubChunk;
|
|
26
|
+
private fallbackChunk;
|
|
27
|
+
private generateChunkId;
|
|
28
|
+
private estimateTokens;
|
|
29
|
+
private normalizeNodeType;
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAI5C,MAAM,WAAW,SAAS;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAY;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,GAAG,CAA2B;gBAE1B,MAAM,EAAE,MAAM,EAAE,YAAY,CAAC,EAAE,MAAM;IAK3C,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAI3B,SAAS,CACb,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,SAAS,EAAE,CAAC;IAsCvB,OAAO,CAAC,WAAW;IA0CnB,OAAO,CAAC,eAAe;IAwCvB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,aAAa;IAsCrB,OAAO,CAAC,eAAe;IAUvB,OAAO,CAAC,cAAc;IAKtB,OAAO,CAAC,iBAAiB;CAiC1B"}
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import { ASTParser } from './parser.js';
|
|
2
|
+
import { logger } from '../utils/logger.js';
|
|
3
|
+
import { createHash } from 'crypto';
|
|
4
|
+
export class SemanticChunker {
|
|
5
|
+
parser;
|
|
6
|
+
config;
|
|
7
|
+
log = logger.child('chunker');
|
|
8
|
+
constructor(config, languagesDir) {
|
|
9
|
+
this.config = config;
|
|
10
|
+
this.parser = new ASTParser(languagesDir);
|
|
11
|
+
}
|
|
12
|
+
async initialize() {
|
|
13
|
+
await this.parser.initialize();
|
|
14
|
+
}
|
|
15
|
+
async chunkFile(filePath, relativePath, content, language) {
|
|
16
|
+
const tree = await this.parser.parse(content, language);
|
|
17
|
+
if (!tree) {
|
|
18
|
+
// Fallback to simple chunking if parsing fails
|
|
19
|
+
return this.fallbackChunk(filePath, relativePath, content, language);
|
|
20
|
+
}
|
|
21
|
+
const nodes = this.parser.extractNodes(tree, language);
|
|
22
|
+
const chunks = [];
|
|
23
|
+
if (nodes.length === 0) {
|
|
24
|
+
// No extractable nodes, use fallback
|
|
25
|
+
return this.fallbackChunk(filePath, relativePath, content, language);
|
|
26
|
+
}
|
|
27
|
+
for (const node of nodes) {
|
|
28
|
+
const chunk = this.nodeToChunk(node, filePath, relativePath, language);
|
|
29
|
+
// Check if chunk is too large and needs splitting
|
|
30
|
+
if (chunk.tokenEstimate > this.config.maxChunkTokens) {
|
|
31
|
+
const subChunks = this.splitLargeChunk(chunk, content);
|
|
32
|
+
chunks.push(...subChunks);
|
|
33
|
+
}
|
|
34
|
+
else if (chunk.tokenEstimate >= this.config.minChunkLines * 10) {
|
|
35
|
+
// Only include chunks with sufficient content
|
|
36
|
+
chunks.push(chunk);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
// If no chunks were created from AST, use fallback
|
|
40
|
+
if (chunks.length === 0) {
|
|
41
|
+
return this.fallbackChunk(filePath, relativePath, content, language);
|
|
42
|
+
}
|
|
43
|
+
this.log.debug(`Created ${chunks.length} chunks from ${relativePath}`);
|
|
44
|
+
return chunks;
|
|
45
|
+
}
|
|
46
|
+
nodeToChunk(node, filePath, relativePath, language) {
|
|
47
|
+
const scope = node.scope.join('.');
|
|
48
|
+
const tokenEstimate = this.estimateTokens(node.text);
|
|
49
|
+
// Create context-enriched text for better embeddings
|
|
50
|
+
let enrichedText = '';
|
|
51
|
+
// Add scope context
|
|
52
|
+
if (scope) {
|
|
53
|
+
enrichedText += `// Scope: ${scope}\n`;
|
|
54
|
+
}
|
|
55
|
+
// Add docstring if available
|
|
56
|
+
if (node.docstring) {
|
|
57
|
+
enrichedText += node.docstring + '\n';
|
|
58
|
+
}
|
|
59
|
+
enrichedText += node.text;
|
|
60
|
+
const id = this.generateChunkId(filePath, node.startLine, node.endLine, node.text);
|
|
61
|
+
return {
|
|
62
|
+
id,
|
|
63
|
+
filePath,
|
|
64
|
+
relativePath,
|
|
65
|
+
code: enrichedText,
|
|
66
|
+
startLine: node.startLine,
|
|
67
|
+
endLine: node.endLine,
|
|
68
|
+
language,
|
|
69
|
+
type: this.normalizeNodeType(node.type),
|
|
70
|
+
name: node.name,
|
|
71
|
+
scope,
|
|
72
|
+
docstring: node.docstring,
|
|
73
|
+
tokenEstimate,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
splitLargeChunk(chunk, _fullContent) {
|
|
77
|
+
const lines = chunk.code.split('\n');
|
|
78
|
+
const chunks = [];
|
|
79
|
+
const maxLines = Math.floor(this.config.maxChunkTokens / 4); // Rough estimate: 4 tokens per line
|
|
80
|
+
let currentLines = [];
|
|
81
|
+
let currentStartLine = chunk.startLine;
|
|
82
|
+
for (let i = 0; i < lines.length; i++) {
|
|
83
|
+
currentLines.push(lines[i]);
|
|
84
|
+
if (currentLines.length >= maxLines) {
|
|
85
|
+
const subChunk = this.createSubChunk(chunk, currentLines.join('\n'), currentStartLine, currentStartLine + currentLines.length - 1, chunks.length);
|
|
86
|
+
chunks.push(subChunk);
|
|
87
|
+
currentLines = [];
|
|
88
|
+
currentStartLine = chunk.startLine + i + 1;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// Don't forget remaining lines
|
|
92
|
+
if (currentLines.length > 0) {
|
|
93
|
+
const subChunk = this.createSubChunk(chunk, currentLines.join('\n'), currentStartLine, currentStartLine + currentLines.length - 1, chunks.length);
|
|
94
|
+
chunks.push(subChunk);
|
|
95
|
+
}
|
|
96
|
+
return chunks;
|
|
97
|
+
}
|
|
98
|
+
createSubChunk(parent, code, startLine, endLine, index) {
|
|
99
|
+
return {
|
|
100
|
+
id: this.generateChunkId(parent.filePath, startLine, endLine, code),
|
|
101
|
+
filePath: parent.filePath,
|
|
102
|
+
relativePath: parent.relativePath,
|
|
103
|
+
code,
|
|
104
|
+
startLine,
|
|
105
|
+
endLine,
|
|
106
|
+
language: parent.language,
|
|
107
|
+
type: parent.type,
|
|
108
|
+
name: parent.name ? `${parent.name}_part${index}` : null,
|
|
109
|
+
scope: parent.scope,
|
|
110
|
+
docstring: index === 0 ? parent.docstring : null,
|
|
111
|
+
tokenEstimate: this.estimateTokens(code),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
fallbackChunk(filePath, relativePath, content, language) {
|
|
115
|
+
const lines = content.split('\n');
|
|
116
|
+
const chunks = [];
|
|
117
|
+
const chunkSize = Math.floor(this.config.maxChunkTokens / 4);
|
|
118
|
+
const overlap = Math.floor(chunkSize / 4);
|
|
119
|
+
for (let i = 0; i < lines.length; i += chunkSize - overlap) {
|
|
120
|
+
const chunkLines = lines.slice(i, i + chunkSize);
|
|
121
|
+
const code = chunkLines.join('\n');
|
|
122
|
+
const startLine = i + 1;
|
|
123
|
+
const endLine = Math.min(i + chunkSize, lines.length);
|
|
124
|
+
if (code.trim().length > 0) {
|
|
125
|
+
chunks.push({
|
|
126
|
+
id: this.generateChunkId(filePath, startLine, endLine, code),
|
|
127
|
+
filePath,
|
|
128
|
+
relativePath,
|
|
129
|
+
code,
|
|
130
|
+
startLine,
|
|
131
|
+
endLine,
|
|
132
|
+
language,
|
|
133
|
+
type: 'block',
|
|
134
|
+
name: null,
|
|
135
|
+
scope: relativePath,
|
|
136
|
+
docstring: null,
|
|
137
|
+
tokenEstimate: this.estimateTokens(code),
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return chunks;
|
|
142
|
+
}
|
|
143
|
+
generateChunkId(filePath, startLine, endLine, content) {
|
|
144
|
+
const input = `${filePath}:${startLine}:${endLine}:${content}`;
|
|
145
|
+
return createHash('sha256').update(input).digest('hex').slice(0, 16);
|
|
146
|
+
}
|
|
147
|
+
estimateTokens(text) {
|
|
148
|
+
// Rough estimate: ~4 characters per token for code
|
|
149
|
+
return Math.ceil(text.length / 4);
|
|
150
|
+
}
|
|
151
|
+
normalizeNodeType(type) {
|
|
152
|
+
// Normalize different language-specific types to common categories
|
|
153
|
+
const typeMap = {
|
|
154
|
+
function_declaration: 'function',
|
|
155
|
+
function_definition: 'function',
|
|
156
|
+
function_item: 'function',
|
|
157
|
+
arrow_function: 'function',
|
|
158
|
+
method_definition: 'method',
|
|
159
|
+
method_declaration: 'method',
|
|
160
|
+
class_declaration: 'class',
|
|
161
|
+
class_definition: 'class',
|
|
162
|
+
class_specifier: 'class',
|
|
163
|
+
interface_declaration: 'interface',
|
|
164
|
+
interface_type: 'interface',
|
|
165
|
+
type_alias_declaration: 'type',
|
|
166
|
+
type_declaration: 'type',
|
|
167
|
+
struct_item: 'struct',
|
|
168
|
+
struct_specifier: 'struct',
|
|
169
|
+
enum_declaration: 'enum',
|
|
170
|
+
enum_item: 'enum',
|
|
171
|
+
enum_specifier: 'enum',
|
|
172
|
+
impl_item: 'impl',
|
|
173
|
+
trait_item: 'trait',
|
|
174
|
+
mod_item: 'module',
|
|
175
|
+
module: 'module',
|
|
176
|
+
namespace_definition: 'namespace',
|
|
177
|
+
namespace_declaration: 'namespace',
|
|
178
|
+
export_statement: 'export',
|
|
179
|
+
decorated_definition: 'decorated',
|
|
180
|
+
};
|
|
181
|
+
return typeMap[type] || type;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAc,MAAM,aAAa,CAAC;AAEpD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAiBpC,MAAM,OAAO,eAAe;IAClB,MAAM,CAAY;IAClB,MAAM,CAAS;IACf,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IAEtC,YAAY,MAAc,EAAE,YAAqB;QAC/C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,MAAM,GAAG,IAAI,SAAS,CAAC,YAAY,CAAC,CAAC;IAC5C,CAAC;IAED,KAAK,CAAC,UAAU;QACd,MAAM,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,SAAS,CACb,QAAgB,EAChB,YAAoB,EACpB,OAAe,EACf,QAAgB;QAEhB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;QAExD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,+CAA+C;YAC/C,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACvE,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACvD,MAAM,MAAM,GAAgB,EAAE,CAAC;QAE/B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,qCAAqC;YACrC,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACvE,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,CAAC;YAEvE,kDAAkD;YAClD,IAAI,KAAK,CAAC,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,CAAC;gBACrD,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;gBACvD,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;YAC5B,CAAC;iBAAM,IAAI,KAAK,CAAC,aAAa,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,GAAG,EAAE,EAAE,CAAC;gBACjE,8CAA8C;gBAC9C,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;QAED,mDAAmD;QACnD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACvE,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,MAAM,CAAC,MAAM,gBAAgB,YAAY,EAAE,CAAC,CAAC;QACvE,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,WAAW,CACjB,IAAgB,EAChB,QAAgB,EAChB,YAAoB,EACpB,QAAgB;QAEhB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAErD,qDAAqD;QACrD,IAAI,YAAY,GAAG,EAAE,CAAC;QAEtB,oBAAoB;QACpB,IAAI,KAAK,EAAE,CAAC;YACV,YAAY,IAAI,aAAa,KAAK,IAAI,CAAC;QACzC,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,YAAY,IAAI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACxC,CAAC;QAED,YAAY,IAAI,IAAI,CAAC,IAAI,CAAC;QAE1B,MAAM,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAEnF,OAAO;YACL,EAAE;YACF,QAAQ;YACR,YAAY;YACZ,IAAI,EAAE,YAAY;YAClB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,QAAQ;YACR,IAAI,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC;YACvC,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK;YACL,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,aAAa;SACd,CAAC;IACJ,CAAC;IAEO,eAAe,CAAC,KAAgB,EAAE,YAAoB;QAC5D,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,oCAAoC;QAEjG,IAAI,YAAY,GAAa,EAAE,CAAC;QAChC,IAAI,gBAAgB,GAAG,KAAK,CAAC,SAAS,CAAC;QAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAE5B,IAAI,YAAY,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;gBACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAClC,KAAK,EACL,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EACvB,gBAAgB,EAChB,gBAAgB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAC1C,MAAM,CAAC,MAAM,CACd,CAAC;gBACF,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACtB,YAAY,GAAG,EAAE,CAAC;gBAClB,gBAAgB,GAAG,KAAK,CAAC,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC;YAC7C,CAAC;QACH,CAAC;QAED,+BAA+B;QAC/B,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAClC,KAAK,EACL,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,EACvB,gBAAgB,EAChB,gBAAgB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,EAC1C,MAAM,CAAC,MAAM,CACd,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACxB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,cAAc,CACpB,MAAiB,EACjB,IAAY,EACZ,SAAiB,EACjB,OAAe,EACf,KAAa;QAEb,OAAO;YACL,EAAE,EAAE,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC;YACnE,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,IAAI;YACJ,SAAS;YACT,OAAO;YACP,QAAQ,EAAE,MAAM,CAAC,QAAQ;YACzB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,QAAQ,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI;YACxD,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,SAAS,EAAE,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI;YAChD,aAAa,EAAE,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;SACzC,CAAC;IACJ,CAAC;IAEO,aAAa,CACnB,QAAgB,EAChB,YAAoB,EACpB,OAAe,EACf,QAAgB;QAEhB,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,MAAM,MAAM,GAAgB,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;QAE1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,GAAG,OAAO,EAAE,CAAC;YAC3D,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;YACjD,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC;YACxB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;YAEtD,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC;oBAC5D,QAAQ;oBACR,YAAY;oBACZ,IAAI;oBACJ,SAAS;oBACT,OAAO;oBACP,QAAQ;oBACR,IAAI,EAAE,OAAO;oBACb,IAAI,EAAE,IAAI;oBACV,KAAK,EAAE,YAAY;oBACnB,SAAS,EAAE,IAAI;oBACf,aAAa,EAAE,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;iBACzC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,eAAe,CACrB,QAAgB,EAChB,SAAiB,EACjB,OAAe,EACf,OAAe;QAEf,MAAM,KAAK,GAAG,GAAG,QAAQ,IAAI,SAAS,IAAI,OAAO,IAAI,OAAO,EAAE,CAAC;QAC/D,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACvE,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,mDAAmD;QACnD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAEO,iBAAiB,CAAC,IAAY;QACpC,mEAAmE;QACnE,MAAM,OAAO,GAA2B;YACtC,oBAAoB,EAAE,UAAU;YAChC,mBAAmB,EAAE,UAAU;YAC/B,aAAa,EAAE,UAAU;YACzB,cAAc,EAAE,UAAU;YAC1B,iBAAiB,EAAE,QAAQ;YAC3B,kBAAkB,EAAE,QAAQ;YAC5B,iBAAiB,EAAE,OAAO;YAC1B,gBAAgB,EAAE,OAAO;YACzB,eAAe,EAAE,OAAO;YACxB,qBAAqB,EAAE,WAAW;YAClC,cAAc,EAAE,WAAW;YAC3B,sBAAsB,EAAE,MAAM;YAC9B,gBAAgB,EAAE,MAAM;YACxB,WAAW,EAAE,QAAQ;YACrB,gBAAgB,EAAE,QAAQ;YAC1B,gBAAgB,EAAE,MAAM;YACxB,SAAS,EAAE,MAAM;YACjB,cAAc,EAAE,MAAM;YACtB,SAAS,EAAE,MAAM;YACjB,UAAU,EAAE,OAAO;YACnB,QAAQ,EAAE,QAAQ;YAClB,MAAM,EAAE,QAAQ;YAChB,oBAAoB,EAAE,WAAW;YACjC,qBAAqB,EAAE,WAAW;YAClC,gBAAgB,EAAE,QAAQ;YAC1B,oBAAoB,EAAE,WAAW;SAClC,CAAC;QAEF,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;IAC/B,CAAC;CACF"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { Config } from '../utils/config.js';
|
|
2
|
+
export interface FileInfo {
|
|
3
|
+
path: string;
|
|
4
|
+
relativePath: string;
|
|
5
|
+
language: string;
|
|
6
|
+
hash: string;
|
|
7
|
+
size: number;
|
|
8
|
+
modifiedAt: Date;
|
|
9
|
+
}
|
|
10
|
+
export interface CrawlResult {
|
|
11
|
+
files: FileInfo[];
|
|
12
|
+
totalFiles: number;
|
|
13
|
+
totalSize: number;
|
|
14
|
+
languages: Record<string, number>;
|
|
15
|
+
}
|
|
16
|
+
export declare class Crawler {
|
|
17
|
+
private config;
|
|
18
|
+
private ignorer;
|
|
19
|
+
private log;
|
|
20
|
+
constructor(config: Config);
|
|
21
|
+
private setupIgnorePatterns;
|
|
22
|
+
loadGitignore(): Promise<void>;
|
|
23
|
+
crawl(): Promise<CrawlResult>;
|
|
24
|
+
hashContent(content: string): string;
|
|
25
|
+
getFileContent(filePath: string): Promise<string>;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=crawler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawler.d.ts","sourceRoot":"","sources":["../../src/indexer/crawler.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,MAAM,EAA4B,MAAM,oBAAoB,CAAC;AAQtE,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;IACrB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,IAAI,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAED,qBAAa,OAAO;IAClB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,GAAG,CAA2B;gBAE1B,MAAM,EAAE,MAAM;IAM1B,OAAO,CAAC,mBAAmB;IAKrB,aAAa,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB9B,KAAK,IAAI,OAAO,CAAC,WAAW,CAAC;IAqEnC,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM;IAI9B,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;CAGxD"}
|