raggrep 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ import type { Logger } from "./logger";
9
9
  /**
10
10
  * Available embedding model names
11
11
  */
12
- export type EmbeddingModelName = "all-MiniLM-L6-v2" | "all-MiniLM-L12-v2" | "bge-small-en-v1.5" | "paraphrase-MiniLM-L3-v2";
12
+ export type EmbeddingModelName = "all-MiniLM-L6-v2" | "all-MiniLM-L12-v2" | "bge-small-en-v1.5" | "paraphrase-MiniLM-L3-v2" | "nomic-embed-text-v1.5";
13
13
  /**
14
14
  * Configuration for embedding provider
15
15
  */
package/dist/index.js CHANGED
@@ -189,10 +189,10 @@ function getModuleConfig(config, moduleId) {
189
189
  }
190
190
  function getEmbeddingConfigFromModule(moduleConfig) {
191
191
  const options = moduleConfig.options || {};
192
- const modelName = options.embeddingModel || "all-MiniLM-L6-v2";
192
+ const modelName = options.embeddingModel || "bge-small-en-v1.5";
193
193
  if (!(modelName in EMBEDDING_MODELS)) {
194
- console.warn(`Unknown embedding model: ${modelName}, falling back to all-MiniLM-L6-v2`);
195
- return { model: "all-MiniLM-L6-v2" };
194
+ console.warn(`Unknown embedding model: ${modelName}, falling back to bge-small-en-v1.5`);
195
+ return { model: "bge-small-en-v1.5" };
196
196
  }
197
197
  return {
198
198
  model: modelName,
@@ -208,7 +208,8 @@ var init_configLoader = __esm(() => {
208
208
  "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
209
209
  "all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
210
210
  "bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
211
- "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
211
+ "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2",
212
+ "nomic-embed-text-v1.5": "nomic-ai/nomic-embed-text-v1.5"
212
213
  };
213
214
  });
214
215
 
@@ -2035,7 +2036,7 @@ class TransformersEmbeddingProvider {
2035
2036
  initPromise = null;
2036
2037
  constructor(config) {
2037
2038
  this.config = {
2038
- model: config?.model ?? "all-MiniLM-L6-v2",
2039
+ model: config?.model ?? "bge-small-en-v1.5",
2039
2040
  showProgress: config?.showProgress ?? false,
2040
2041
  logger: config?.logger
2041
2042
  };
@@ -2149,7 +2150,7 @@ class TransformersEmbeddingProvider {
2149
2150
  return results;
2150
2151
  }
2151
2152
  getDimension() {
2152
- return EMBEDDING_DIMENSION;
2153
+ return EMBEDDING_DIMENSIONS[this.config.model];
2153
2154
  }
2154
2155
  getModelName() {
2155
2156
  return this.config.model;
@@ -2195,7 +2196,7 @@ async function getEmbeddings(texts) {
2195
2196
  const provider = await ensureGlobalProvider();
2196
2197
  return provider.getEmbeddings(texts);
2197
2198
  }
2198
- var CACHE_DIR, EMBEDDING_MODELS2, EMBEDDING_DIMENSION = 384, BATCH_SIZE = 32, globalProvider = null, globalConfig;
2199
+ var CACHE_DIR, EMBEDDING_MODELS2, EMBEDDING_DIMENSIONS, BATCH_SIZE = 32, globalProvider = null, globalConfig;
2199
2200
  var init_transformersEmbedding = __esm(() => {
2200
2201
  CACHE_DIR = path6.join(os2.homedir(), ".cache", "raggrep", "models");
2201
2202
  env.cacheDir = CACHE_DIR;
@@ -2204,10 +2205,18 @@ var init_transformersEmbedding = __esm(() => {
2204
2205
  "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
2205
2206
  "all-MiniLM-L12-v2": "Xenova/all-MiniLM-L12-v2",
2206
2207
  "bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
2207
- "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2"
2208
+ "paraphrase-MiniLM-L3-v2": "Xenova/paraphrase-MiniLM-L3-v2",
2209
+ "nomic-embed-text-v1.5": "nomic-ai/nomic-embed-text-v1.5"
2210
+ };
2211
+ EMBEDDING_DIMENSIONS = {
2212
+ "all-MiniLM-L6-v2": 384,
2213
+ "all-MiniLM-L12-v2": 384,
2214
+ "bge-small-en-v1.5": 384,
2215
+ "paraphrase-MiniLM-L3-v2": 384,
2216
+ "nomic-embed-text-v1.5": 768
2208
2217
  };
2209
2218
  globalConfig = {
2210
- model: "all-MiniLM-L6-v2",
2219
+ model: "bge-small-en-v1.5",
2211
2220
  showProgress: false,
2212
2221
  logger: undefined
2213
2222
  };
@@ -3704,6 +3713,7 @@ init_config2();
3704
3713
  import { glob } from "glob";
3705
3714
  import * as fs6 from "fs/promises";
3706
3715
  import * as path14 from "path";
3716
+ import * as os3 from "os";
3707
3717
 
3708
3718
  // src/modules/registry.ts
3709
3719
  class ModuleRegistryImpl {
@@ -4099,7 +4109,7 @@ async function parallelMap(items, processor, concurrency) {
4099
4109
  await Promise.all(workers);
4100
4110
  return results;
4101
4111
  }
4102
- var INDEX_SCHEMA_VERSION = "1.0.0";
4112
+ var INDEX_SCHEMA_VERSION = "1.1.0";
4103
4113
  function formatDuration(ms) {
4104
4114
  if (ms < 1000) {
4105
4115
  return `${ms}ms`;
@@ -4112,7 +4122,12 @@ function formatDuration(ms) {
4112
4122
  const remainingSeconds = seconds % 60;
4113
4123
  return `${minutes}m ${remainingSeconds.toFixed(1)}s`;
4114
4124
  }
4115
- var DEFAULT_CONCURRENCY = 4;
4125
+ function getOptimalConcurrency() {
4126
+ const cpuCount = os3.cpus().length;
4127
+ const optimal = Math.max(2, Math.min(16, Math.floor(cpuCount * 0.75)));
4128
+ return optimal;
4129
+ }
4130
+ var DEFAULT_CONCURRENCY = getOptimalConcurrency();
4116
4131
  async function indexDirectory(rootDir, options = {}) {
4117
4132
  const verbose = options.verbose ?? false;
4118
4133
  const quiet = options.quiet ?? false;
@@ -4844,4 +4859,4 @@ export {
4844
4859
  ConsoleLogger
4845
4860
  };
4846
4861
 
4847
- //# debugId=C1C754EB0A3147DB64756E2164756E21
4862
+ //# debugId=4915A936C06DA9B864756E2164756E21