raggrep 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -40
- package/dist/app/search/index.d.ts +2 -1
- package/dist/cli/main.js +445 -79
- package/dist/cli/main.js.map +20 -17
- package/dist/domain/entities/index.d.ts +3 -1
- package/dist/domain/entities/rankingWeights.d.ts +84 -0
- package/dist/domain/entities/searchResult.d.ts +28 -1
- package/dist/domain/services/bm25.d.ts +5 -0
- package/dist/domain/services/discriminativeTerms.d.ts +28 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/literalScorer.d.ts +9 -23
- package/dist/domain/services/matchScales.d.ts +19 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +433 -75
- package/dist/index.js.map +20 -17
- package/dist/types.d.ts +1 -1
- package/package.json +7 -4
package/README.md
CHANGED
|
@@ -13,6 +13,7 @@ RAGgrep indexes your code and lets you search it using natural language. Everyth
|
|
|
13
13
|
- **Incremental** — Only re-indexes files that have changed. Instant search when nothing changed.
|
|
14
14
|
- **Watch mode** — Keep the index fresh in real-time as you code.
|
|
15
15
|
- **Hybrid search** — Combines semantic similarity, keyword matching, and exact text matching for best results.
|
|
16
|
+
- **Structured vs semantic** — Each hit shows **Structured** and **Semantic** match strength. Result order defaults to **structured-first**; use `raggrep query --rank-by semantic` (or `combined` for fused score only) to change ordering.
|
|
16
17
|
- **Exact match track** — Finds identifiers in ANY file type (YAML, .env, config, not just code) with grep-like precision.
|
|
17
18
|
- **Fusion boosting** — Semantic results containing exact matches get boosted (1.5x) for better ranking.
|
|
18
19
|
- **Literal boosting** — Exact identifier matches get priority. Use backticks for precise matching: `` `AuthService` ``.
|
|
@@ -54,16 +55,16 @@ Searching for: "user authentication"
|
|
|
54
55
|
Found 3 results:
|
|
55
56
|
|
|
56
57
|
1. src/auth/authService.ts:24-55 (login)
|
|
57
|
-
Score: 34.4% | Type: function | via TypeScript | exported
|
|
58
|
+
Score: 34.4% | Structured: 42.0% | Semantic: 31.0% | Type: function | via TypeScript | exported
|
|
58
59
|
export async function login(credentials: LoginCredentials): Promise<AuthResult> {
|
|
59
60
|
const { email, password } = credentials;
|
|
60
61
|
|
|
61
62
|
2. src/auth/session.ts:10-25 (createSession)
|
|
62
|
-
Score: 28.2% | Type: function | via TypeScript | exported
|
|
63
|
+
Score: 28.2% | Structured: 35.0% | Semantic: 22.0% | Type: function | via TypeScript | exported
|
|
63
64
|
export function createSession(user: User): Session {
|
|
64
65
|
|
|
65
66
|
3. src/users/types.ts:3-12 (User)
|
|
66
|
-
Score: 26.0% | Type: interface | via TypeScript | exported
|
|
67
|
+
Score: 26.0% | Structured: 30.0% | Semantic: 23.0% | Type: interface | via TypeScript | exported
|
|
67
68
|
export interface User {
|
|
68
69
|
id: string;
|
|
69
70
|
```
|
|
@@ -92,34 +93,10 @@ Searching for: "AUTH_SERVICE_URL"
|
|
|
92
93
|
└─────────────────────────────────────────────────────────────────────┘
|
|
93
94
|
|
|
94
95
|
1. src/auth/authService.ts:2-10 (AuthService)
|
|
95
|
-
Score: 45.2% | Type: class | via TypeScript | exported | exact match
|
|
96
|
+
Score: 45.2% | Structured: 48.0% | Semantic: 43.0% | Type: class | via TypeScript | exported | exact match
|
|
96
97
|
export class AuthService {
|
|
97
98
|
private baseUrl = AUTH_SERVICE_URL;
|
|
98
99
|
```
|
|
99
|
-
Index updated: 42 indexed
|
|
100
|
-
|
|
101
|
-
RAGgrep Search
|
|
102
|
-
==============
|
|
103
|
-
|
|
104
|
-
Searching for: "user authentication"
|
|
105
|
-
|
|
106
|
-
Found 3 results:
|
|
107
|
-
|
|
108
|
-
1. src/auth/authService.ts:24-55 (login)
|
|
109
|
-
Score: 34.4% | Type: function | via TypeScript | exported
|
|
110
|
-
export async function login(credentials: LoginCredentials): Promise<AuthResult> {
|
|
111
|
-
const { email, password } = credentials;
|
|
112
|
-
|
|
113
|
-
2. src/auth/session.ts:10-25 (createSession)
|
|
114
|
-
Score: 28.2% | Type: function | via TypeScript | exported
|
|
115
|
-
export function createSession(user: User): Session {
|
|
116
|
-
|
|
117
|
-
3. src/users/types.ts:3-12 (User)
|
|
118
|
-
Score: 26.0% | Type: interface | via TypeScript | exported
|
|
119
|
-
export interface User {
|
|
120
|
-
id: string;
|
|
121
|
-
```
|
|
122
|
-
|
|
123
100
|
### Watch Mode
|
|
124
101
|
|
|
125
102
|
Keep your index fresh in real-time while you code:
|
|
@@ -158,19 +135,24 @@ raggrep query "AUTH_SERVICE_URL" # Exact identifier (auto-triggers e
|
|
|
158
135
|
raggrep query "\`AuthService\`" # Backticks force exact match
|
|
159
136
|
raggrep query "error handling" --top 5 # Limit results
|
|
160
137
|
raggrep query "database" --min-score 0.2 # Set minimum score threshold
|
|
138
|
+
raggrep query "login flow" --rank-by semantic # Order by semantic similarity first
|
|
139
|
+
raggrep query "auth" --rank-by combined # Order by fused score only
|
|
140
|
+
raggrep query "debug" --timing # Print timing breakdown
|
|
161
141
|
raggrep query "interface" --type ts # Filter by file extension
|
|
162
142
|
raggrep query "auth" --filter src/auth # Filter by path
|
|
163
143
|
raggrep query "api" -f src/api -f src/routes # Multiple path filters
|
|
164
144
|
```
|
|
165
145
|
|
|
166
|
-
| Flag
|
|
167
|
-
|
|
|
168
|
-
| `--dir <path>`
|
|
169
|
-
| `--top <n>`
|
|
170
|
-
| `--min-score <n>`
|
|
171
|
-
| `--
|
|
172
|
-
| `--
|
|
173
|
-
| `--
|
|
146
|
+
| Flag | Short | Description |
|
|
147
|
+
| ------------------ | ----- | ----------------------------------------------------------- |
|
|
148
|
+
| `--dir <path>` | `-C` | Project directory to search (default: current directory) |
|
|
149
|
+
| `--top <n>` | `-k` | Number of results to return (default: 10) |
|
|
150
|
+
| `--min-score <n>` | `-s` | Minimum similarity score 0–1 (default: 0.15) |
|
|
151
|
+
| `--rank-by <mode>` | | Sort order: `structured` (default), `semantic`, or `combined` |
|
|
152
|
+
| `--timing` | `-T` | Print timing breakdown for profiling |
|
|
153
|
+
| `--type <ext>` | `-t` | Filter by file extension (e.g., ts, tsx, js) |
|
|
154
|
+
| `--filter <path>` | `-f` | Filter by path or glob pattern (can be used multiple times) |
|
|
155
|
+
| `--help` | `-h` | Show help message |
|
|
174
156
|
|
|
175
157
|
### Filtering by File Type
|
|
176
158
|
|
|
@@ -284,10 +266,13 @@ Indexing uses Transformers.js–style **local ONNX** models. Unless you change `
|
|
|
284
266
|
|
|
285
267
|
| Command | What it measures | Source |
|
|
286
268
|
|--------|------------------|--------|
|
|
287
|
-
| `bun run bench:embeddings` | Embedding throughput (runtime × model matrix; **nomic** omitted from the harness for now) | [`
|
|
288
|
-
| `bun run bench:retrieval` | Index + hybrid search time and accuracy vs golden queries | [`
|
|
269
|
+
| `bun run bench:embeddings` | Embedding throughput (runtime × model matrix; **nomic** omitted from the harness for now) | [`research/bench/benchmark-embedding-runtimes.ts`](./research/bench/benchmark-embedding-runtimes.ts) |
|
|
270
|
+
| `bun run bench:retrieval` | Index + hybrid search time and accuracy vs golden queries | [`research/bench/benchmark-retrieval-quality.ts`](./research/bench/benchmark-retrieval-quality.ts) |
|
|
271
|
+
| `bun run eval:golden` | Accuracy-only golden eval against a checkout | [`research/eval/run-golden-queries.ts`](./research/eval/run-golden-queries.ts) |
|
|
272
|
+
| `bun run bench:golden-hillclimb` | Parameter tuning sweep vs golden set | [`research/bench/benchmark-raggrep-hillclimb.ts`](./research/bench/benchmark-raggrep-hillclimb.ts) |
|
|
273
|
+
| `bun run bench:golden-convex` | Wave-style benchmark vs Convex starter (`--fresh`, `--passes`, etc.) | [`research/bench/benchmark-raggrep-golden-queries.ts`](./research/bench/benchmark-raggrep-golden-queries.ts) |
|
|
289
274
|
|
|
290
|
-
Golden query
|
|
275
|
+
Golden query sets: [`research/eval/golden-queries-next-convex.json`](./research/eval/golden-queries-next-convex.json) (10 queries), [`research/eval/golden-queries-next-convex-50.json`](./research/eval/golden-queries-next-convex-50.json) (50 queries). Benchmark scripts write **`research/results/<name>.result.md`** (versioned in git for reference) and resumable **`research/results/*.cache.json`** (ignored).
|
|
291
276
|
|
|
292
277
|
## What Gets Indexed
|
|
293
278
|
|
|
@@ -376,7 +361,7 @@ raggrep opencode install --tool
|
|
|
376
361
|
#### Tool Usage
|
|
377
362
|
Once installed as a tool, RAGgrep provides direct search functionality:
|
|
378
363
|
- Natural language queries: "user authentication flow"
|
|
379
|
-
- All CLI options: `--top`, `--min-score`, `--type`, `--filter`
|
|
364
|
+
- All CLI options: `--top`, `--min-score`, `--rank-by`, `--type`, `--filter`, `--timing`
|
|
380
365
|
- Context-aware results with scores and file locations
|
|
381
366
|
|
|
382
367
|
#### Skill Usage
|
|
@@ -8,7 +8,8 @@ export declare function search(rootDir: string, query: string, options?: SearchO
|
|
|
8
8
|
* Hybrid search with both semantic and exact match tracks.
|
|
9
9
|
*
|
|
10
10
|
* Returns:
|
|
11
|
-
* - results:
|
|
11
|
+
* - results: Enriched with `semanticMatch` / `structuredMatch` in [0,1]; ordering follows
|
|
12
|
+
* `SearchOptions.rankBy` (default `structured`).
|
|
12
13
|
* - exactMatches: Exact match results for identifier queries (optional)
|
|
13
14
|
*/
|
|
14
15
|
export declare function hybridSearch(rootDir: string, query: string, options?: SearchOptions): Promise<HybridSearchResults>;
|