raggrep 0.1.0 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -6
- package/dist/{indexer → app/indexer}/index.d.ts +26 -1
- package/dist/app/indexer/watcher.d.ts +33 -0
- package/dist/{search → app/search}/index.d.ts +1 -1
- package/dist/cli/main.js +1802 -222
- package/dist/cli/main.js.map +26 -15
- package/dist/composition.d.ts +7 -7
- package/dist/domain/entities/fileSummary.d.ts +18 -0
- package/dist/domain/entities/index.d.ts +1 -1
- package/dist/domain/entities/searchResult.d.ts +47 -2
- package/dist/domain/index.d.ts +5 -3
- package/dist/domain/ports/embedding.d.ts +0 -4
- package/dist/domain/ports/index.d.ts +3 -4
- package/dist/domain/services/bm25.d.ts +24 -0
- package/dist/domain/services/index.d.ts +3 -2
- package/dist/domain/services/keywords.d.ts +45 -0
- package/dist/domain/services/similarity.d.ts +23 -0
- package/dist/{application → domain}/usecases/cleanupIndex.d.ts +2 -2
- package/dist/{application → domain}/usecases/indexDirectory.d.ts +2 -2
- package/dist/{application → domain}/usecases/searchIndex.d.ts +2 -2
- package/dist/index.d.ts +5 -5
- package/dist/index.js +1444 -244
- package/dist/index.js.map +26 -15
- package/dist/{utils/config.d.ts → infrastructure/config/configLoader.d.ts} +7 -4
- package/dist/infrastructure/config/index.d.ts +6 -0
- package/dist/infrastructure/embeddings/index.d.ts +3 -1
- package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +16 -0
- package/dist/infrastructure/index.d.ts +4 -3
- package/dist/infrastructure/storage/index.d.ts +4 -1
- package/dist/{utils/tieredIndex.d.ts → infrastructure/storage/symbolicIndex.d.ts} +7 -18
- package/dist/introspection/fileIntrospector.d.ts +14 -0
- package/dist/introspection/index.d.ts +68 -0
- package/dist/introspection/introspection.test.d.ts +4 -0
- package/dist/introspection/projectDetector.d.ts +27 -0
- package/dist/introspection/types.d.ts +70 -0
- package/dist/modules/core/index.d.ts +69 -0
- package/dist/modules/core/symbols.d.ts +27 -0
- package/dist/modules/core/symbols.test.d.ts +4 -0
- package/dist/modules/{semantic → language/typescript}/index.d.ts +11 -12
- package/dist/types.d.ts +4 -1
- package/package.json +7 -6
- package/dist/application/index.d.ts +0 -7
- package/dist/utils/bm25.d.ts +0 -9
- package/dist/utils/embeddings.d.ts +0 -46
- /package/dist/{cli → app/cli}/main.d.ts +0 -0
- /package/dist/{application → domain}/usecases/index.d.ts +0 -0
- /package/dist/{utils → infrastructure/embeddings}/embeddings.test.d.ts +0 -0
- /package/dist/modules/{semantic → language/typescript}/parseCode.d.ts +0 -0
- /package/dist/modules/{semantic → language/typescript}/parseCode.test.d.ts +0 -0
package/README.md
CHANGED
|
@@ -1,15 +1,147 @@
|
|
|
1
|
-
#
|
|
1
|
+
# RAGgrep
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Local filesystem-based RAG system for codebases** — semantic search using local embeddings.
|
|
4
|
+
|
|
5
|
+
RAGgrep indexes your code and allows semantic search using natural language queries. Everything runs locally on your machine — no external API calls required.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **🏠 Local-first** — All indexing and search happens locally. No cloud dependencies.
|
|
10
|
+
- **📁 Filesystem-based** — Index stored as readable JSON files alongside your code.
|
|
11
|
+
- **⚡ Tiered search** — Fast keyword filtering + semantic search for efficiency.
|
|
12
|
+
- **🔍 Hybrid scoring** — Combines semantic similarity with BM25 keyword matching.
|
|
13
|
+
- **🔄 Incremental** — Only re-indexes files that have changed.
|
|
14
|
+
- **📝 TypeScript-optimized** — AST-based parsing extracts functions, classes, interfaces, types.
|
|
15
|
+
- **🎯 Zero config** — Works out of the box with sensible defaults.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
4
18
|
|
|
5
19
|
```bash
|
|
6
|
-
|
|
20
|
+
# Install globally
|
|
21
|
+
npm install -g raggrep
|
|
22
|
+
|
|
23
|
+
# Or use without installing
|
|
24
|
+
npx raggrep --help
|
|
7
25
|
```
|
|
8
26
|
|
|
9
|
-
|
|
27
|
+
## Quick Start
|
|
10
28
|
|
|
11
29
|
```bash
|
|
12
|
-
|
|
30
|
+
# Index your project
|
|
31
|
+
cd your-project
|
|
32
|
+
raggrep index
|
|
33
|
+
|
|
34
|
+
# Search your codebase
|
|
35
|
+
raggrep query "user authentication"
|
|
13
36
|
```
|
|
14
37
|
|
|
15
|
-
|
|
38
|
+
### Example Output
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
Found 3 results:
|
|
42
|
+
|
|
43
|
+
1. src/auth/authService.ts:24-55 (login)
|
|
44
|
+
Score: 34.4% | Type: function | exported
|
|
45
|
+
export async function login(credentials: LoginCredentials): Promise<AuthResult> ...
|
|
46
|
+
|
|
47
|
+
2. src/auth/authService.ts:60-62 (logout)
|
|
48
|
+
Score: 27.5% | Type: function | exported
|
|
49
|
+
export async function logout(token: string): Promise<void> {
|
|
50
|
+
|
|
51
|
+
3. src/users/types.ts:3-12 (User)
|
|
52
|
+
Score: 26.0% | Type: interface | exported
|
|
53
|
+
export interface User {
|
|
54
|
+
id: string;
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Programmatic API
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
import raggrep from "raggrep";
|
|
61
|
+
|
|
62
|
+
// Index a directory
|
|
63
|
+
await raggrep.index("./my-project");
|
|
64
|
+
|
|
65
|
+
// Search
|
|
66
|
+
const results = await raggrep.search("./my-project", "user authentication");
|
|
67
|
+
console.log(raggrep.formatSearchResults(results));
|
|
68
|
+
|
|
69
|
+
// Cleanup stale entries
|
|
70
|
+
await raggrep.cleanup("./my-project");
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## CLI Reference
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Index commands
|
|
77
|
+
raggrep index # Index current directory
|
|
78
|
+
raggrep index --watch # Watch mode: re-index on file changes
|
|
79
|
+
raggrep index --model bge-small-en-v1.5 # Use different embedding model
|
|
80
|
+
raggrep index --verbose # Show detailed progress
|
|
81
|
+
|
|
82
|
+
# Search commands
|
|
83
|
+
raggrep query "user login" # Basic search
|
|
84
|
+
raggrep query "error handling" --top 5 # Limit results
|
|
85
|
+
raggrep query "database" --min-score 0.1 # Lower threshold (more results)
|
|
86
|
+
raggrep query "interface" --type ts # Filter by file type
|
|
87
|
+
|
|
88
|
+
# Maintenance
|
|
89
|
+
raggrep cleanup # Remove stale index entries
|
|
90
|
+
raggrep status # Show index status
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## How It Works
|
|
94
|
+
|
|
95
|
+
RAGgrep uses a two-tier index system:
|
|
96
|
+
|
|
97
|
+
1. **Symbolic Index** — Lightweight file summaries with extracted keywords. Used for fast BM25 filtering.
|
|
98
|
+
2. **Embedding Index** — Full chunk embeddings for semantic search. Only loaded for relevant files.
|
|
99
|
+
|
|
100
|
+
This design keeps memory usage low and enables fast search on large codebases.
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
Query → BM25 filter (symbolic) → Load candidates → Semantic search → Results
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## What Gets Indexed
|
|
107
|
+
|
|
108
|
+
**File types:** `.ts`, `.tsx`, `.js`, `.jsx`, `.py`, `.go`, `.rs`, `.java`, `.md`
|
|
109
|
+
|
|
110
|
+
**Code structures:**
|
|
111
|
+
|
|
112
|
+
- Functions (regular, async, arrow)
|
|
113
|
+
- Classes (including abstract)
|
|
114
|
+
- Interfaces
|
|
115
|
+
- Type aliases
|
|
116
|
+
- Enums
|
|
117
|
+
- Exported variables
|
|
118
|
+
|
|
119
|
+
**Automatically ignored:**
|
|
120
|
+
|
|
121
|
+
- `node_modules`, `dist`, `build`, `.git`
|
|
122
|
+
- `.next`, `.nuxt`, `__pycache__`, `venv`
|
|
123
|
+
- See [Configuration](./docs/configuration.md) for full list
|
|
124
|
+
|
|
125
|
+
## Documentation
|
|
126
|
+
|
|
127
|
+
- [Getting Started](./docs/getting-started.md) — Installation and first steps
|
|
128
|
+
- [CLI Reference](./docs/cli-reference.md) — All commands and options
|
|
129
|
+
- [Configuration](./docs/configuration.md) — Customize indexing behavior
|
|
130
|
+
- [Architecture](./docs/architecture.md) — How RAGgrep works internally
|
|
131
|
+
|
|
132
|
+
## Performance
|
|
133
|
+
|
|
134
|
+
| Operation | Time | Notes |
|
|
135
|
+
| ------------------------ | ---------- | -------------------------------------- |
|
|
136
|
+
| Initial index (1k files) | 1-2 min | Embedding generation is the bottleneck |
|
|
137
|
+
| Incremental update | <2s | Only changed files |
|
|
138
|
+
| Search | ~100-500ms | Depends on codebase size |
|
|
139
|
+
|
|
140
|
+
## Requirements
|
|
141
|
+
|
|
142
|
+
- Node.js 18+
|
|
143
|
+
- ~50MB disk space for models (cached globally)
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
MIT
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { EmbeddingModelName } from '
|
|
1
|
+
import type { EmbeddingModelName } from '../../domain/ports';
|
|
2
2
|
export interface IndexResult {
|
|
3
3
|
moduleId: string;
|
|
4
4
|
indexed: number;
|
|
@@ -18,6 +18,24 @@ export interface CleanupResult {
|
|
|
18
18
|
/** Number of valid entries kept */
|
|
19
19
|
kept: number;
|
|
20
20
|
}
|
|
21
|
+
export interface IndexStatus {
|
|
22
|
+
/** Whether an index exists */
|
|
23
|
+
exists: boolean;
|
|
24
|
+
/** Root directory path */
|
|
25
|
+
rootDir: string;
|
|
26
|
+
/** Index directory path */
|
|
27
|
+
indexDir: string;
|
|
28
|
+
/** Last time the index was updated */
|
|
29
|
+
lastUpdated?: string;
|
|
30
|
+
/** Active modules and their file counts */
|
|
31
|
+
modules: Array<{
|
|
32
|
+
id: string;
|
|
33
|
+
fileCount: number;
|
|
34
|
+
lastUpdated: string;
|
|
35
|
+
}>;
|
|
36
|
+
/** Total number of indexed files */
|
|
37
|
+
totalFiles: number;
|
|
38
|
+
}
|
|
21
39
|
/**
|
|
22
40
|
* Index a directory using all enabled modules
|
|
23
41
|
*/
|
|
@@ -31,3 +49,10 @@ export declare function indexDirectory(rootDir: string, options?: IndexOptions):
|
|
|
31
49
|
export declare function cleanupIndex(rootDir: string, options?: {
|
|
32
50
|
verbose?: boolean;
|
|
33
51
|
}): Promise<CleanupResult[]>;
|
|
52
|
+
/**
|
|
53
|
+
* Get the current status of the index
|
|
54
|
+
* @param rootDir - Root directory of the project
|
|
55
|
+
* @returns Index status information
|
|
56
|
+
*/
|
|
57
|
+
export declare function getIndexStatus(rootDir: string): Promise<IndexStatus>;
|
|
58
|
+
export { watchDirectory, type WatchOptions, type FileWatcher } from './watcher';
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File watcher for incremental indexing
|
|
3
|
+
*
|
|
4
|
+
* Best practices implemented:
|
|
5
|
+
* - Debouncing: Batches rapid file changes (e.g., IDE saves, git operations)
|
|
6
|
+
* - Queuing: Prevents concurrent index operations
|
|
7
|
+
* - Efficient filtering: Only watches relevant file types
|
|
8
|
+
* - Graceful shutdown: Proper cleanup on SIGINT/SIGTERM
|
|
9
|
+
* - Error recovery: Continues watching after index errors
|
|
10
|
+
*/
|
|
11
|
+
import { type IndexOptions, type IndexResult } from './index';
|
|
12
|
+
export interface WatchOptions extends IndexOptions {
|
|
13
|
+
/** Debounce delay in milliseconds (default: 300) */
|
|
14
|
+
debounceMs?: number;
|
|
15
|
+
/** Callback when indexing starts */
|
|
16
|
+
onIndexStart?: (files: string[]) => void;
|
|
17
|
+
/** Callback when indexing completes */
|
|
18
|
+
onIndexComplete?: (results: IndexResult[]) => void;
|
|
19
|
+
/** Callback when a file change is detected */
|
|
20
|
+
onFileChange?: (event: 'add' | 'change' | 'unlink', filepath: string) => void;
|
|
21
|
+
/** Callback for errors */
|
|
22
|
+
onError?: (error: Error) => void;
|
|
23
|
+
}
|
|
24
|
+
export interface FileWatcher {
|
|
25
|
+
/** Stop watching and clean up */
|
|
26
|
+
stop: () => Promise<void>;
|
|
27
|
+
/** Whether the watcher is currently running */
|
|
28
|
+
isRunning: () => boolean;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Start watching a directory for file changes and index incrementally
|
|
32
|
+
*/
|
|
33
|
+
export declare function watchDirectory(rootDir: string, options?: WatchOptions): Promise<FileWatcher>;
|