raggrep 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -78,19 +78,60 @@ This monitors file changes and re-indexes automatically. Useful during active de
78
78
  [Watch] language/typescript: 2 indexed, 0 errors
79
79
  ```
80
80
 
81
- ## CLI Quick Reference
81
+ ## CLI Reference
82
+
83
+ ### Commands
82
84
 
83
85
  ```bash
84
- # Search (auto-indexes if needed)
85
- raggrep query "user login"
86
- raggrep query "error handling" --top 5
87
- raggrep query "database" --type ts
86
+ raggrep query <query> # Search the codebase
87
+ raggrep index # Build/update the index
88
+ raggrep status # Show index status
89
+ raggrep reset # Clear the index
90
+ ```
88
91
 
89
- # Watch mode
90
- raggrep index --watch
92
+ ### Query Options
93
+
94
+ ```bash
95
+ raggrep query "user login" # Basic search
96
+ raggrep query "error handling" --top 5 # Limit results
97
+ raggrep query "database" --min-score 0.2 # Set minimum score threshold
98
+ raggrep query "interface" --type ts # Filter by file extension
99
+ raggrep query "auth" --filter src/auth # Filter by path
100
+ raggrep query "api" -f src/api -f src/routes # Multiple path filters
101
+ ```
102
+
103
+ | Flag | Short | Description |
104
+ |------|-------|-------------|
105
+ | `--top <n>` | `-k` | Number of results to return (default: 10) |
106
+ | `--min-score <n>` | `-s` | Minimum similarity score 0-1 (default: 0.15) |
107
+ | `--type <ext>` | `-t` | Filter by file extension (e.g., ts, tsx, js) |
108
+ | `--filter <path>` | `-f` | Filter by path prefix (can be used multiple times) |
109
+ | `--help` | `-h` | Show help message |
110
+
111
+ ### Index Options
91
112
 
92
- # Check index status
93
- raggrep status
113
+ ```bash
114
+ raggrep index # Index current directory
115
+ raggrep index --watch # Watch mode - re-index on file changes
116
+ raggrep index --verbose # Show detailed progress
117
+ raggrep index --concurrency 8 # Set parallel workers (default: auto)
118
+ raggrep index --model bge-small-en-v1.5 # Use specific embedding model
119
+ ```
120
+
121
+ | Flag | Short | Description |
122
+ |------|-------|-------------|
123
+ | `--watch` | `-w` | Watch for file changes and re-index automatically |
124
+ | `--verbose` | `-v` | Show detailed progress |
125
+ | `--concurrency <n>` | `-c` | Number of parallel workers (default: auto based on CPU) |
126
+ | `--model <name>` | `-m` | Embedding model to use |
127
+ | `--help` | `-h` | Show help message |
128
+
129
+ ### Other Commands
130
+
131
+ ```bash
132
+ raggrep status # Show index status and statistics
133
+ raggrep reset # Clear the index completely
134
+ raggrep --version # Show version
94
135
  ```
95
136
 
96
137
  ## How It Works
@@ -104,9 +145,13 @@ The index is stored in a system temp directory, keeping your project clean.
104
145
 
105
146
  ## What Gets Indexed
106
147
 
107
- **File types:** `.ts`, `.tsx`, `.js`, `.jsx`, `.py`, `.go`, `.rs`, `.java`, `.md`, `.txt`
148
+ **TypeScript/JavaScript:** `.ts`, `.tsx`, `.js`, `.jsx`, `.mjs`, `.cjs` AST-parsed for functions, classes, interfaces, types, enums
149
+
150
+ **Documentation:** `.md`, `.txt` — Section-aware parsing with heading extraction
151
+
152
+ **Data:** `.json` — Structure-aware with key/value extraction
108
153
 
109
- **Code structures:** Functions, classes, interfaces, types, enums, exports
154
+ **Other languages:** `.py`, `.go`, `.rs`, `.java`, `.yaml`, `.yml`, `.toml`, `.sql` — Symbol extraction and keyword search
110
155
 
111
156
  **Automatically ignored:** `node_modules`, `dist`, `build`, `.git`, and other common directories
112
157
 
@@ -4,6 +4,8 @@ export interface IndexResult {
4
4
  indexed: number;
5
5
  skipped: number;
6
6
  errors: number;
7
+ /** Time taken in milliseconds */
8
+ durationMs?: number;
7
9
  }
8
10
  export interface IndexOptions {
9
11
  /** Override the embedding model (semantic module) */
@@ -14,7 +16,7 @@ export interface IndexOptions {
14
16
  quiet?: boolean;
15
17
  /** Logger for progress reporting. If not provided, uses console by default (quiet mode uses silent logger) */
16
18
  logger?: Logger;
17
- /** Number of files to process in parallel (default: 4) */
19
+ /** Number of files to process in parallel (default: auto based on CPU cores) */
18
20
  concurrency?: number;
19
21
  }
20
22
  export interface EnsureFreshResult {
package/dist/cli/main.js CHANGED
@@ -325,6 +325,7 @@ var init_searchResult = __esm(() => {
325
325
  topK: 10,
326
326
  minScore: 0.15,
327
327
  filePatterns: [],
328
+ pathFilter: [],
328
329
  ensureFresh: true
329
330
  };
330
331
  });
@@ -2087,6 +2088,9 @@ class CoreModule {
2087
2088
  name = "Core Search";
2088
2089
  description = "Language-agnostic text search with symbol extraction";
2089
2090
  version = "1.0.0";
2091
+ supportsFile(_filepath) {
2092
+ return true;
2093
+ }
2090
2094
  symbolIndex = new Map;
2091
2095
  bm25Index = null;
2092
2096
  rootDir = "";
@@ -2979,6 +2983,7 @@ var init_storage = __esm(() => {
2979
2983
  // src/modules/language/typescript/index.ts
2980
2984
  var exports_typescript = {};
2981
2985
  __export(exports_typescript, {
2986
+ supportsFile: () => supportsFile,
2982
2987
  isTypeScriptFile: () => isTypeScriptFile,
2983
2988
  TypeScriptModule: () => TypeScriptModule,
2984
2989
  TYPESCRIPT_EXTENSIONS: () => TYPESCRIPT_EXTENSIONS,
@@ -3017,6 +3022,9 @@ class TypeScriptModule {
3017
3022
  name = "TypeScript Search";
3018
3023
  description = "TypeScript-aware code search with AST parsing and semantic embeddings";
3019
3024
  version = "1.0.0";
3025
+ supportsFile(filepath) {
3026
+ return isTypeScriptFile(filepath);
3027
+ }
3020
3028
  embeddingConfig = null;
3021
3029
  symbolicIndex = null;
3022
3030
  pendingSummaries = new Map;
@@ -3240,7 +3248,7 @@ class TypeScriptModule {
3240
3248
  return references;
3241
3249
  }
3242
3250
  }
3243
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS;
3251
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, TYPESCRIPT_EXTENSIONS, supportsFile;
3244
3252
  var init_typescript = __esm(() => {
3245
3253
  init_embeddings();
3246
3254
  init_services();
@@ -3257,11 +3265,13 @@ var init_typescript = __esm(() => {
3257
3265
  ".mts",
3258
3266
  ".cts"
3259
3267
  ];
3268
+ supportsFile = isTypeScriptFile;
3260
3269
  });
3261
3270
 
3262
3271
  // src/modules/data/json/index.ts
3263
3272
  var exports_json = {};
3264
3273
  __export(exports_json, {
3274
+ supportsFile: () => supportsFile2,
3265
3275
  isJsonFile: () => isJsonFile,
3266
3276
  JsonModule: () => JsonModule,
3267
3277
  JSON_EXTENSIONS: () => JSON_EXTENSIONS,
@@ -3318,6 +3328,9 @@ class JsonModule {
3318
3328
  name = "JSON Search";
3319
3329
  description = "JSON file search with structure-aware indexing";
3320
3330
  version = "1.0.0";
3331
+ supportsFile(filepath) {
3332
+ return isJsonFile(filepath);
3333
+ }
3321
3334
  embeddingConfig = null;
3322
3335
  symbolicIndex = null;
3323
3336
  pendingSummaries = new Map;
@@ -3479,18 +3492,20 @@ class JsonModule {
3479
3492
  return results.slice(0, topK);
3480
3493
  }
3481
3494
  }
3482
- var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS;
3495
+ var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS, supportsFile2;
3483
3496
  var init_json = __esm(() => {
3484
3497
  init_embeddings();
3485
3498
  init_services();
3486
3499
  init_config2();
3487
3500
  init_storage();
3488
3501
  JSON_EXTENSIONS = [".json"];
3502
+ supportsFile2 = isJsonFile;
3489
3503
  });
3490
3504
 
3491
3505
  // src/modules/docs/markdown/index.ts
3492
3506
  var exports_markdown = {};
3493
3507
  __export(exports_markdown, {
3508
+ supportsFile: () => supportsFile3,
3494
3509
  isMarkdownFile: () => isMarkdownFile,
3495
3510
  MarkdownModule: () => MarkdownModule,
3496
3511
  MARKDOWN_EXTENSIONS: () => MARKDOWN_EXTENSIONS,
@@ -3596,6 +3611,9 @@ class MarkdownModule {
3596
3611
  name = "Markdown Search";
3597
3612
  description = "Markdown documentation search with section-aware indexing";
3598
3613
  version = "1.0.0";
3614
+ supportsFile(filepath) {
3615
+ return isMarkdownFile(filepath);
3616
+ }
3599
3617
  embeddingConfig = null;
3600
3618
  symbolicIndex = null;
3601
3619
  pendingSummaries = new Map;
@@ -3765,13 +3783,14 @@ ${section.content}` : section.content,
3765
3783
  return results.slice(0, topK);
3766
3784
  }
3767
3785
  }
3768
- var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS;
3786
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
3769
3787
  var init_markdown = __esm(() => {
3770
3788
  init_embeddings();
3771
3789
  init_services();
3772
3790
  init_config2();
3773
3791
  init_storage();
3774
3792
  MARKDOWN_EXTENSIONS = [".md", ".txt"];
3793
+ supportsFile3 = isMarkdownFile;
3775
3794
  });
3776
3795
 
3777
3796
  // src/modules/registry.ts
@@ -4256,6 +4275,7 @@ __export(exports_indexer, {
4256
4275
  import { glob } from "glob";
4257
4276
  import * as fs6 from "fs/promises";
4258
4277
  import * as path15 from "path";
4278
+ import * as os3 from "os";
4259
4279
  async function parallelMap(items, processor, concurrency) {
4260
4280
  const results = new Array(items.length);
4261
4281
  let nextIndex = 0;
@@ -4275,6 +4295,23 @@ async function parallelMap(items, processor, concurrency) {
4275
4295
  await Promise.all(workers);
4276
4296
  return results;
4277
4297
  }
4298
+ function formatDuration(ms) {
4299
+ if (ms < 1000) {
4300
+ return `${ms}ms`;
4301
+ }
4302
+ const seconds = ms / 1000;
4303
+ if (seconds < 60) {
4304
+ return `${seconds.toFixed(1)}s`;
4305
+ }
4306
+ const minutes = Math.floor(seconds / 60);
4307
+ const remainingSeconds = seconds % 60;
4308
+ return `${minutes}m ${remainingSeconds.toFixed(1)}s`;
4309
+ }
4310
+ function getOptimalConcurrency() {
4311
+ const cpuCount = os3.cpus().length;
4312
+ const optimal = Math.max(2, Math.min(16, Math.floor(cpuCount * 0.75)));
4313
+ return optimal;
4314
+ }
4278
4315
  async function indexDirectory(rootDir, options = {}) {
4279
4316
  const verbose = options.verbose ?? false;
4280
4317
  const quiet = options.quiet ?? false;
@@ -4301,8 +4338,10 @@ async function indexDirectory(rootDir, options = {}) {
4301
4338
  logger.info(`Enabled modules: ${enabledModules.map((m) => m.id).join(", ")}`);
4302
4339
  const files = await findFiles(rootDir, config);
4303
4340
  logger.info(`Found ${files.length} files to index`);
4341
+ const overallStart = Date.now();
4304
4342
  const results = [];
4305
4343
  for (const module of enabledModules) {
4344
+ const moduleStart = Date.now();
4306
4345
  logger.info(`
4307
4346
  [${module.name}] Starting indexing...`);
4308
4347
  const moduleConfig = getModuleConfig(config, module.id);
@@ -4320,7 +4359,9 @@ async function indexDirectory(rootDir, options = {}) {
4320
4359
  };
4321
4360
  await module.initialize(configWithOverrides);
4322
4361
  }
4323
- const result = await indexWithModule(rootDir, files, module, config, verbose, introspection, logger, concurrency);
4362
+ const moduleFiles = module.supportsFile ? files.filter((f) => module.supportsFile(f)) : files;
4363
+ logger.info(` Processing ${moduleFiles.length} files...`);
4364
+ const result = await indexWithModule(rootDir, moduleFiles, module, config, verbose, introspection, logger, concurrency);
4324
4365
  results.push(result);
4325
4366
  if (module.finalize) {
4326
4367
  logger.info(`[${module.name}] Building secondary indexes...`);
@@ -4339,9 +4380,18 @@ async function indexDirectory(rootDir, options = {}) {
4339
4380
  };
4340
4381
  await module.finalize(ctx);
4341
4382
  }
4342
- logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors`);
4383
+ const moduleDuration = Date.now() - moduleStart;
4384
+ result.durationMs = moduleDuration;
4385
+ logger.info(`[${module.name}] Complete: ${result.indexed} indexed, ${result.skipped} skipped, ${result.errors} errors (${formatDuration(moduleDuration)})`);
4343
4386
  }
4344
4387
  await introspection.save(config);
4388
+ const overallDuration = Date.now() - overallStart;
4389
+ logger.info(`
4390
+ Indexing complete in ${formatDuration(overallDuration)}`);
4391
+ const totalIndexed = results.reduce((sum, r) => sum + r.indexed, 0);
4392
+ const totalSkipped = results.reduce((sum, r) => sum + r.skipped, 0);
4393
+ const totalErrors = results.reduce((sum, r) => sum + r.errors, 0);
4394
+ logger.info(`Total: ${totalIndexed} indexed, ${totalSkipped} skipped, ${totalErrors} errors`);
4345
4395
  await updateGlobalManifest(rootDir, enabledModules, config);
4346
4396
  return results;
4347
4397
  }
@@ -4805,13 +4855,14 @@ async function getIndexStatus(rootDir) {
4805
4855
  }
4806
4856
  return status;
4807
4857
  }
4808
- var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
4858
+ var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY;
4809
4859
  var init_indexer = __esm(() => {
4810
4860
  init_config2();
4811
4861
  init_registry();
4812
4862
  init_introspection2();
4813
4863
  init_logger();
4814
4864
  init_watcher();
4865
+ DEFAULT_CONCURRENCY = getOptimalConcurrency();
4815
4866
  });
4816
4867
 
4817
4868
  // src/types.ts
@@ -4862,9 +4913,17 @@ async function search(rootDir, query, options = {}) {
4862
4913
  const moduleResults = await module.search(query, ctx, options);
4863
4914
  allResults.push(...moduleResults);
4864
4915
  }
4865
- allResults.sort((a, b) => b.score - a.score);
4916
+ let filteredResults = allResults;
4917
+ if (options.pathFilter && options.pathFilter.length > 0) {
4918
+ const normalizedFilters = options.pathFilter.map((p) => p.replace(/\\/g, "/").replace(/^\//, "").replace(/\/$/, ""));
4919
+ filteredResults = allResults.filter((result) => {
4920
+ const normalizedPath = result.filepath.replace(/\\/g, "/");
4921
+ return normalizedFilters.some((filter) => normalizedPath.startsWith(filter + "/") || normalizedPath === filter || normalizedPath.startsWith("./" + filter + "/") || normalizedPath === "./" + filter);
4922
+ });
4923
+ }
4924
+ filteredResults.sort((a, b) => b.score - a.score);
4866
4925
  const topK = options.topK ?? 10;
4867
- return allResults.slice(0, topK);
4926
+ return filteredResults.slice(0, topK);
4868
4927
  }
4869
4928
  function createSearchContext(rootDir, moduleId, config) {
4870
4929
  const indexPath = getModuleIndexPath(rootDir, moduleId, config);
@@ -4973,7 +5032,7 @@ init_logger();
4973
5032
  // package.json
4974
5033
  var package_default = {
4975
5034
  name: "raggrep",
4976
- version: "0.5.0",
5035
+ version: "0.5.2",
4977
5036
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
4978
5037
  type: "module",
4979
5038
  main: "./dist/index.js",
@@ -5117,6 +5176,17 @@ function parseFlags(args2) {
5117
5176
  console.error(`Invalid concurrency: ${args2[i]}. Must be a positive integer.`);
5118
5177
  process.exit(1);
5119
5178
  }
5179
+ } else if (arg === "--filter" || arg === "-f") {
5180
+ const filterPath = args2[++i];
5181
+ if (filterPath) {
5182
+ if (!flags.pathFilter) {
5183
+ flags.pathFilter = [];
5184
+ }
5185
+ flags.pathFilter.push(filterPath);
5186
+ } else {
5187
+ console.error("--filter requires a path (e.g., src/auth)");
5188
+ process.exit(1);
5189
+ }
5120
5190
  } else if (!arg.startsWith("-")) {
5121
5191
  flags.remaining.push(arg);
5122
5192
  }
@@ -5138,7 +5208,7 @@ Usage:
5138
5208
  Options:
5139
5209
  -w, --watch Watch for file changes and re-index automatically
5140
5210
  -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
5141
- -c, --concurrency <n> Number of files to process in parallel (default: 4)
5211
+ -c, --concurrency <n> Number of files to process in parallel (default: auto)
5142
5212
  -v, --verbose Show detailed progress
5143
5213
  -h, --help Show this help message
5144
5214
 
@@ -5225,6 +5295,7 @@ Options:
5225
5295
  -k, --top <n> Number of results to return (default: 10)
5226
5296
  -s, --min-score <n> Minimum similarity score 0-1 (default: 0.15)
5227
5297
  -t, --type <ext> Filter by file extension (e.g., ts, tsx, js)
5298
+ -f, --filter <path> Filter by path prefix (can be used multiple times)
5228
5299
  -h, --help Show this help message
5229
5300
 
5230
5301
  Note:
@@ -5239,6 +5310,8 @@ Examples:
5239
5310
  raggrep query "handle errors" --top 5
5240
5311
  raggrep query "database" --min-score 0.1
5241
5312
  raggrep query "interface" --type ts
5313
+ raggrep query "login" --filter src/auth
5314
+ raggrep query "api" --filter src/api --filter src/routes
5242
5315
  `);
5243
5316
  process.exit(0);
5244
5317
  }
@@ -5279,6 +5352,7 @@ Examples:
5279
5352
  topK: flags.topK ?? 10,
5280
5353
  minScore: flags.minScore,
5281
5354
  filePatterns,
5355
+ pathFilter: flags.pathFilter,
5282
5356
  ensureFresh: false
5283
5357
  });
5284
5358
  console.log(formatSearchResults2(results));
@@ -5419,4 +5493,4 @@ Run 'raggrep <command> --help' for more information.
5419
5493
  }
5420
5494
  main();
5421
5495
 
5422
- //# debugId=5CA623D9974ACF4364756E2164756E21
5496
+ //# debugId=3B601F4F22C997A464756E2164756E21