raggrep 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -78,19 +78,60 @@ This monitors file changes and re-indexes automatically. Useful during active de
78
78
  [Watch] language/typescript: 2 indexed, 0 errors
79
79
  ```
80
80
 
81
- ## CLI Quick Reference
81
+ ## CLI Reference
82
+
83
+ ### Commands
82
84
 
83
85
  ```bash
84
- # Search (auto-indexes if needed)
85
- raggrep query "user login"
86
- raggrep query "error handling" --top 5
87
- raggrep query "database" --type ts
86
+ raggrep query <query> # Search the codebase
87
+ raggrep index # Build/update the index
88
+ raggrep status # Show index status
89
+ raggrep reset # Clear the index
90
+ ```
88
91
 
89
- # Watch mode
90
- raggrep index --watch
92
+ ### Query Options
93
+
94
+ ```bash
95
+ raggrep query "user login" # Basic search
96
+ raggrep query "error handling" --top 5 # Limit results
97
+ raggrep query "database" --min-score 0.2 # Set minimum score threshold
98
+ raggrep query "interface" --type ts # Filter by file extension
99
+ raggrep query "auth" --filter src/auth # Filter by path
100
+ raggrep query "api" -f src/api -f src/routes # Multiple path filters
101
+ ```
102
+
103
+ | Flag | Short | Description |
104
+ | ----------------- | ----- | -------------------------------------------------- |
105
+ | `--top <n>` | `-k` | Number of results to return (default: 10) |
106
+ | `--min-score <n>` | `-s` | Minimum similarity score 0-1 (default: 0.15) |
107
+ | `--type <ext>` | `-t` | Filter by file extension (e.g., ts, tsx, js) |
108
+ | `--filter <path>` | `-f` | Filter by path prefix (can be used multiple times) |
109
+ | `--help` | `-h` | Show help message |
110
+
111
+ ### Index Options
91
112
 
92
- # Check index status
93
- raggrep status
113
+ ```bash
114
+ raggrep index # Index current directory
115
+ raggrep index --watch # Watch mode - re-index on file changes
116
+ raggrep index --verbose # Show detailed progress
117
+ raggrep index --concurrency 8 # Set parallel workers (default: auto)
118
+ raggrep index --model bge-small-en-v1.5 # Use specific embedding model
119
+ ```
120
+
121
+ | Flag | Short | Description |
122
+ | ------------------- | ----- | ------------------------------------------------------- |
123
+ | `--watch` | `-w` | Watch for file changes and re-index automatically |
124
+ | `--verbose` | `-v` | Show detailed progress |
125
+ | `--concurrency <n>` | `-c` | Number of parallel workers (default: auto based on CPU) |
126
+ | `--model <name>` | `-m` | Embedding model to use |
127
+ | `--help` | `-h` | Show help message |
128
+
129
+ ### Other Commands
130
+
131
+ ```bash
132
+ raggrep status # Show index status and statistics
133
+ raggrep reset # Clear the index completely
134
+ raggrep --version # Show version
94
135
  ```
95
136
 
96
137
  ## How It Works
@@ -104,9 +145,13 @@ The index is stored in a system temp directory, keeping your project clean.
104
145
 
105
146
  ## What Gets Indexed
106
147
 
107
- **File types:** `.ts`, `.tsx`, `.js`, `.jsx`, `.py`, `.go`, `.rs`, `.java`, `.md`, `.txt`
148
+ **TypeScript/JavaScript:** `.ts`, `.tsx`, `.js`, `.jsx`, `.mjs`, `.cjs` AST-parsed for functions, classes, interfaces, types, enums
149
+
150
+ **Documentation:** `.md`, `.txt` — Section-aware parsing with heading extraction
151
+
152
+ **Data:** `.json` — Structure-aware with key/value extraction
108
153
 
109
- **Code structures:** Functions, classes, interfaces, types, enums, exports
154
+ **Other languages:** `.py`, `.go`, `.rs`, `.java`, `.yaml`, `.yml`, `.toml`, `.sql` — Symbol extraction and keyword search
110
155
 
111
156
  **Automatically ignored:** `node_modules`, `dist`, `build`, `.git`, and other common directories
112
157
 
@@ -16,7 +16,7 @@ export interface IndexOptions {
16
16
  quiet?: boolean;
17
17
  /** Logger for progress reporting. If not provided, uses console by default (quiet mode uses silent logger) */
18
18
  logger?: Logger;
19
- /** Number of files to process in parallel (default: 4) */
19
+ /** Number of files to process in parallel (default: auto based on CPU cores) */
20
20
  concurrency?: number;
21
21
  }
22
22
  export interface EnsureFreshResult {
package/dist/cli/main.js CHANGED
@@ -43,7 +43,7 @@ class TransformersEmbeddingProvider {
43
43
  initPromise = null;
44
44
  constructor(config) {
45
45
  this.config = {
46
- model: config?.model ?? "all-MiniLM-L6-v2",
46
+ model: config?.model ?? "bge-small-en-v1.5",
47
47
  showProgress: config?.showProgress ?? false,
48
48
  logger: config?.logger
49
49
  };
@@ -157,7 +157,7 @@ class TransformersEmbeddingProvider {
157
157
  return results;
158
158
  }
159
159
  getDimension() {
160
- return EMBEDDING_DIMENSION;
160
+ return EMBEDDING_DIMENSIONS[this.config.model];
161
161
  }
162
162
  getModelName() {
163
163
  return this.config.model;
@@ -206,7 +206,7 @@ async function getEmbeddings(texts) {
206
206
  const provider = await ensureGlobalProvider();
207
207
  return provider.getEmbeddings(texts);
208
208
  }
209
- var CACHE_DIR, EMBEDDING_MODELS, EMBEDDING_DIMENSION = 384, BATCH_SIZE = 32, globalProvider = null, globalConfig;
209
+ var CACHE_DIR, EMBEDDING_MODELS, EMBEDDING_DIMENSIONS, BATCH_SIZE = 32, globalProvider = null, globalConfig;
210
210
  var init_transformersEmbedding = __esm(() => {
211
211
  CACHE_DIR = path.join(os.homedir(), ".cache", "raggrep", "models");
212
212
  env.cacheDir = CACHE_DIR;
@@ -215,10 +215,18 @@ var init_transformersEmbedding = __esm(() => {
215
215
  "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
216
216
  "all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
217
217
  "bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
218
- "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
218
+ "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2",
219
+ "nomic-embed-text-v1.5": "nomic-ai/nomic-embed-text-v1.5"
220
+ };
221
+ EMBEDDING_DIMENSIONS = {
222
+ "all-MiniLM-L6-v2": 384,
223
+ "all-MiniLM-L12-v2": 384,
224
+ "bge-small-en-v1.5": 384,
225
+ "paraphrase-MiniLM-L3-v2": 384,
226
+ "nomic-embed-text-v1.5": 768
219
227
  };
220
228
  globalConfig = {
221
- model: "all-MiniLM-L6-v2",
229
+ model: "bge-small-en-v1.5",
222
230
  showProgress: false,
223
231
  logger: undefined
224
232
  };
@@ -481,10 +489,10 @@ function getModuleConfig(config, moduleId) {
481
489
  }
482
490
  function getEmbeddingConfigFromModule(moduleConfig) {
483
491
  const options = moduleConfig.options || {};
484
- const modelName = options.embeddingModel || "all-MiniLM-L6-v2";
492
+ const modelName = options.embeddingModel || "bge-small-en-v1.5";
485
493
  if (!(modelName in EMBEDDING_MODELS2)) {
486
- console.warn(`Unknown embedding model: ${modelName}, falling back to all-MiniLM-L6-v2`);
487
- return { model: "all-MiniLM-L6-v2" };
494
+ console.warn(`Unknown embedding model: ${modelName}, falling back to bge-small-en-v1.5`);
495
+ return { model: "bge-small-en-v1.5" };
488
496
  }
489
497
  return {
490
498
  model: modelName,
@@ -500,7 +508,8 @@ var init_configLoader = __esm(() => {
500
508
  "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
501
509
  "all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
502
510
  "bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
503
- "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
511
+ "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2",
512
+ "nomic-embed-text-v1.5": "nomic-ai/nomic-embed-text-v1.5"
504
513
  };
505
514
  });
506
515
 
@@ -4275,6 +4284,7 @@ __export(exports_indexer, {
4275
4284
  import { glob } from "glob";
4276
4285
  import * as fs6 from "fs/promises";
4277
4286
  import * as path15 from "path";
4287
+ import * as os3 from "os";
4278
4288
  async function parallelMap(items, processor, concurrency) {
4279
4289
  const results = new Array(items.length);
4280
4290
  let nextIndex = 0;
@@ -4306,6 +4316,11 @@ function formatDuration(ms) {
4306
4316
  const remainingSeconds = seconds % 60;
4307
4317
  return `${minutes}m ${remainingSeconds.toFixed(1)}s`;
4308
4318
  }
4319
+ function getOptimalConcurrency() {
4320
+ const cpuCount = os3.cpus().length;
4321
+ const optimal = Math.max(2, Math.min(16, Math.floor(cpuCount * 0.75)));
4322
+ return optimal;
4323
+ }
4309
4324
  async function indexDirectory(rootDir, options = {}) {
4310
4325
  const verbose = options.verbose ?? false;
4311
4326
  const quiet = options.quiet ?? false;
@@ -4849,13 +4864,14 @@ async function getIndexStatus(rootDir) {
4849
4864
  }
4850
4865
  return status;
4851
4866
  }
4852
- var INDEX_SCHEMA_VERSION = "1.0.0", DEFAULT_CONCURRENCY = 4;
4867
+ var INDEX_SCHEMA_VERSION = "1.1.0", DEFAULT_CONCURRENCY;
4853
4868
  var init_indexer = __esm(() => {
4854
4869
  init_config2();
4855
4870
  init_registry();
4856
4871
  init_introspection2();
4857
4872
  init_logger();
4858
4873
  init_watcher();
4874
+ DEFAULT_CONCURRENCY = getOptimalConcurrency();
4859
4875
  });
4860
4876
 
4861
4877
  // src/types.ts
@@ -5025,7 +5041,7 @@ init_logger();
5025
5041
  // package.json
5026
5042
  var package_default = {
5027
5043
  name: "raggrep",
5028
- version: "0.5.1",
5044
+ version: "0.6.0",
5029
5045
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
5030
5046
  type: "module",
5031
5047
  main: "./dist/index.js",
@@ -5201,7 +5217,7 @@ Usage:
5201
5217
  Options:
5202
5218
  -w, --watch Watch for file changes and re-index automatically
5203
5219
  -m, --model <name> Embedding model to use (default: all-MiniLM-L6-v2)
5204
- -c, --concurrency <n> Number of files to process in parallel (default: 4)
5220
+ -c, --concurrency <n> Number of files to process in parallel (default: auto)
5205
5221
  -v, --verbose Show detailed progress
5206
5222
  -h, --help Show this help message
5207
5223
 
@@ -5486,4 +5502,4 @@ Run 'raggrep <command> --help' for more information.
5486
5502
  }
5487
5503
  main();
5488
5504
 
5489
- //# debugId=E73618F0DDE8326264756E2164756E21
5505
+ //# debugId=5883D798B8D780D764756E2164756E21