raggrep 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -6
- package/dist/cli/main.js +539 -36
- package/dist/cli/main.js.map +8 -7
- package/dist/domain/entities/fileSummary.d.ts +18 -0
- package/dist/domain/services/keywords.d.ts +45 -0
- package/dist/index.js +141 -7
- package/dist/index.js.map +7 -6
- package/dist/indexer/index.d.ts +25 -0
- package/dist/indexer/watcher.d.ts +33 -0
- package/package.json +4 -3
package/README.md
CHANGED
|
@@ -1,15 +1,147 @@
|
|
|
1
|
-
#
|
|
1
|
+
# RAGgrep
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Local filesystem-based RAG system for codebases** — semantic search using local embeddings.
|
|
4
|
+
|
|
5
|
+
RAGgrep indexes your code and allows semantic search using natural language queries. Everything runs locally on your machine — no external API calls required.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **🏠 Local-first** — All indexing and search happens locally. No cloud dependencies.
|
|
10
|
+
- **📁 Filesystem-based** — Index stored as readable JSON files alongside your code.
|
|
11
|
+
- **⚡ Tiered search** — Fast keyword filtering + semantic search for efficiency.
|
|
12
|
+
- **🔍 Hybrid scoring** — Combines semantic similarity with BM25 keyword matching.
|
|
13
|
+
- **🔄 Incremental** — Only re-indexes files that have changed.
|
|
14
|
+
- **📝 TypeScript-optimized** — AST-based parsing extracts functions, classes, interfaces, types.
|
|
15
|
+
- **🎯 Zero config** — Works out of the box with sensible defaults.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
4
18
|
|
|
5
19
|
```bash
|
|
6
|
-
|
|
20
|
+
# Install globally
|
|
21
|
+
npm install -g raggrep
|
|
22
|
+
|
|
23
|
+
# Or use without installing
|
|
24
|
+
npx raggrep --help
|
|
7
25
|
```
|
|
8
26
|
|
|
9
|
-
|
|
27
|
+
## Quick Start
|
|
10
28
|
|
|
11
29
|
```bash
|
|
12
|
-
|
|
30
|
+
# Index your project
|
|
31
|
+
cd your-project
|
|
32
|
+
raggrep index
|
|
33
|
+
|
|
34
|
+
# Search your codebase
|
|
35
|
+
raggrep query "user authentication"
|
|
13
36
|
```
|
|
14
37
|
|
|
15
|
-
|
|
38
|
+
### Example Output
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
Found 3 results:
|
|
42
|
+
|
|
43
|
+
1. src/auth/authService.ts:24-55 (login)
|
|
44
|
+
Score: 34.4% | Type: function | exported
|
|
45
|
+
export async function login(credentials: LoginCredentials): Promise<AuthResult> ...
|
|
46
|
+
|
|
47
|
+
2. src/auth/authService.ts:60-62 (logout)
|
|
48
|
+
Score: 27.5% | Type: function | exported
|
|
49
|
+
export async function logout(token: string): Promise<void> {
|
|
50
|
+
|
|
51
|
+
3. src/users/types.ts:3-12 (User)
|
|
52
|
+
Score: 26.0% | Type: interface | exported
|
|
53
|
+
export interface User {
|
|
54
|
+
id: string;
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Programmatic API
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
import raggrep from "raggrep";
|
|
61
|
+
|
|
62
|
+
// Index a directory
|
|
63
|
+
await raggrep.index("./my-project");
|
|
64
|
+
|
|
65
|
+
// Search
|
|
66
|
+
const results = await raggrep.search("./my-project", "user authentication");
|
|
67
|
+
console.log(raggrep.formatSearchResults(results));
|
|
68
|
+
|
|
69
|
+
// Cleanup stale entries
|
|
70
|
+
await raggrep.cleanup("./my-project");
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## CLI Reference
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Index commands
|
|
77
|
+
raggrep index # Index current directory
|
|
78
|
+
raggrep index --watch # Watch mode: re-index on file changes
|
|
79
|
+
raggrep index --model bge-small-en-v1.5 # Use different embedding model
|
|
80
|
+
raggrep index --verbose # Show detailed progress
|
|
81
|
+
|
|
82
|
+
# Search commands
|
|
83
|
+
raggrep query "user login" # Basic search
|
|
84
|
+
raggrep query "error handling" --top 5 # Limit results
|
|
85
|
+
raggrep query "database" --min-score 0.1 # Lower threshold (more results)
|
|
86
|
+
raggrep query "interface" --type ts # Filter by file type
|
|
87
|
+
|
|
88
|
+
# Maintenance
|
|
89
|
+
raggrep cleanup # Remove stale index entries
|
|
90
|
+
raggrep status # Show index status
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## How It Works
|
|
94
|
+
|
|
95
|
+
RAGgrep uses a two-tier index system:
|
|
96
|
+
|
|
97
|
+
1. **Symbolic Index** — Lightweight file summaries with extracted keywords. Used for fast BM25 filtering.
|
|
98
|
+
2. **Embedding Index** — Full chunk embeddings for semantic search. Only loaded for relevant files.
|
|
99
|
+
|
|
100
|
+
This design keeps memory usage low and enables fast search on large codebases.
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
Query → BM25 filter (symbolic) → Load candidates → Semantic search → Results
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## What Gets Indexed
|
|
107
|
+
|
|
108
|
+
**File types:** `.ts`, `.tsx`, `.js`, `.jsx`, `.py`, `.go`, `.rs`, `.java`, `.md`
|
|
109
|
+
|
|
110
|
+
**Code structures:**
|
|
111
|
+
|
|
112
|
+
- Functions (regular, async, arrow)
|
|
113
|
+
- Classes (including abstract)
|
|
114
|
+
- Interfaces
|
|
115
|
+
- Type aliases
|
|
116
|
+
- Enums
|
|
117
|
+
- Exported variables
|
|
118
|
+
|
|
119
|
+
**Automatically ignored:**
|
|
120
|
+
|
|
121
|
+
- `node_modules`, `dist`, `build`, `.git`
|
|
122
|
+
- `.next`, `.nuxt`, `__pycache__`, `venv`
|
|
123
|
+
- See [Configuration](./docs/configuration.md) for full list
|
|
124
|
+
|
|
125
|
+
## Documentation
|
|
126
|
+
|
|
127
|
+
- [Getting Started](./docs/getting-started.md) — Installation and first steps
|
|
128
|
+
- [CLI Reference](./docs/cli-reference.md) — All commands and options
|
|
129
|
+
- [Configuration](./docs/configuration.md) — Customize indexing behavior
|
|
130
|
+
- [Architecture](./docs/architecture.md) — How RAGgrep works internally
|
|
131
|
+
|
|
132
|
+
## Performance
|
|
133
|
+
|
|
134
|
+
| Operation | Time | Notes |
|
|
135
|
+
| ------------------------ | ---------- | -------------------------------------- |
|
|
136
|
+
| Initial index (1k files) | 1-2 min | Embedding generation is the bottleneck |
|
|
137
|
+
| Incremental update | <2s | Only changed files |
|
|
138
|
+
| Search | ~100-500ms | Depends on codebase size |
|
|
139
|
+
|
|
140
|
+
## Requirements
|
|
141
|
+
|
|
142
|
+
- Node.js 18+
|
|
143
|
+
- ~50MB disk space for models (cached globally)
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
MIT
|