raggrep 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -5
- package/dist/cli/main.js +713 -458
- package/dist/cli/main.js.map +19 -13
- package/dist/domain/ports/embedding.d.ts +10 -0
- package/dist/domain/ports/index.d.ts +1 -1
- package/dist/index.js +524 -327
- package/dist/index.js.map +18 -12
- package/dist/infrastructure/config/configLoader.d.ts +9 -11
- package/dist/infrastructure/config/index.d.ts +1 -1
- package/dist/infrastructure/embeddings/embeddingPaths.d.ts +6 -0
- package/dist/infrastructure/embeddings/embeddingProviderFactory.d.ts +9 -0
- package/dist/infrastructure/embeddings/globalEmbeddings.d.ts +28 -0
- package/dist/infrastructure/embeddings/huggingfaceEmbeddingProvider.d.ts +21 -0
- package/dist/infrastructure/embeddings/index.d.ts +9 -2
- package/dist/infrastructure/embeddings/modelCache.d.ts +10 -0
- package/dist/infrastructure/embeddings/modelCatalog.d.ts +23 -0
- package/dist/infrastructure/embeddings/xenovaEmbeddingProvider.d.ts +23 -0
- package/dist/infrastructure/index.d.ts +1 -1
- package/package.json +7 -3
- package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +0 -52
|
@@ -5,21 +5,20 @@
|
|
|
5
5
|
* Handles file I/O operations for configuration management.
|
|
6
6
|
*/
|
|
7
7
|
import type { Config, ModuleConfig } from "../../domain/entities";
|
|
8
|
-
import type { EmbeddingConfig
|
|
8
|
+
import type { EmbeddingConfig } from "../../domain/ports";
|
|
9
|
+
import { EMBEDDING_MODELS } from "../embeddings/modelCatalog";
|
|
10
|
+
export { EMBEDDING_MODELS };
|
|
9
11
|
/** Default configuration instance */
|
|
10
12
|
export declare const DEFAULT_CONFIG: Config;
|
|
11
|
-
/**
|
|
12
|
-
export declare const
|
|
13
|
+
/** Directory name for index data under the project (or CLI cwd) root */
|
|
14
|
+
export declare const RAGGREP_INDEX_DIR = ".raggrep";
|
|
13
15
|
/**
|
|
14
16
|
* Get the index storage directory path.
|
|
15
17
|
*
|
|
16
|
-
* Index data is stored
|
|
17
|
-
*
|
|
18
|
-
* a hash of the project's absolute path to ensure uniqueness.
|
|
18
|
+
* Index data is stored under `{rootDir}/.raggrep/`, where `rootDir` is the
|
|
19
|
+
* directory being indexed (for the CLI this is the current working directory).
|
|
19
20
|
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
* @param rootDir - Absolute path to the project root
|
|
21
|
+
* @param rootDir - Absolute or resolved path to the project root
|
|
23
22
|
* @returns Absolute path to the index storage directory
|
|
24
23
|
*/
|
|
25
24
|
export declare function getRaggrepDir(rootDir: string, _config?: Config): string;
|
|
@@ -45,8 +44,7 @@ export declare function getModuleManifestPath(rootDir: string, moduleId: string,
|
|
|
45
44
|
*/
|
|
46
45
|
export declare function getGlobalManifestPath(rootDir: string, config?: Config): string;
|
|
47
46
|
/**
|
|
48
|
-
* Get the config file path.
|
|
49
|
-
* Note: Config is still stored in the temp index directory, not the project.
|
|
47
|
+
* Get the config file path (inside `.raggrep` under the project root).
|
|
50
48
|
*/
|
|
51
49
|
export declare function getConfigPath(rootDir: string, config?: Config): string;
|
|
52
50
|
/**
|
|
@@ -3,4 +3,4 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Handles loading and saving RAGgrep configuration from the filesystem.
|
|
5
5
|
*/
|
|
6
|
-
export { DEFAULT_CONFIG, EMBEDDING_MODELS, getRaggrepDir, getIndexLocation, getModuleIndexPath, getModuleManifestPath, getGlobalManifestPath, getConfigPath, loadConfig, saveConfig, getModuleConfig, getEmbeddingConfigFromModule, } from "./configLoader";
|
|
6
|
+
export { DEFAULT_CONFIG, EMBEDDING_MODELS, RAGGREP_INDEX_DIR, getRaggrepDir, getIndexLocation, getModuleIndexPath, getModuleManifestPath, getGlobalManifestPath, getConfigPath, loadConfig, saveConfig, getModuleConfig, getEmbeddingConfigFromModule, } from "./configLoader";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory for {@link EmbeddingProvider} implementations (composition root helper).
|
|
3
|
+
*/
|
|
4
|
+
import type { EmbeddingConfig, EmbeddingProvider } from "../../domain/ports";
|
|
5
|
+
/**
|
|
6
|
+
* Instantiate the embedding adapter matching {@link EmbeddingConfig.runtime}.
|
|
7
|
+
* Defaults to `@huggingface/transformers` when `runtime` is omitted.
|
|
8
|
+
*/
|
|
9
|
+
export declare function createEmbeddingProvider(config: EmbeddingConfig): EmbeddingProvider;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Process-wide embedding facade (global provider + convenience functions).
|
|
3
|
+
*
|
|
4
|
+
* Modules call {@link configureEmbeddings} before embedding I/O; the active
|
|
5
|
+
* {@link EmbeddingProvider} is chosen via {@link createEmbeddingProvider}.
|
|
6
|
+
*/
|
|
7
|
+
import type { EmbeddingConfig } from "../../domain/ports";
|
|
8
|
+
/**
|
|
9
|
+
* Configure the global embedding provider. Resets the underlying adapter when
|
|
10
|
+
* model, runtime, or logger reference changes.
|
|
11
|
+
*/
|
|
12
|
+
export declare function configureEmbeddings(config: Partial<EmbeddingConfig>): void;
|
|
13
|
+
/**
|
|
14
|
+
* Current global embedding configuration (shallow copy).
|
|
15
|
+
*/
|
|
16
|
+
export declare function getEmbeddingConfig(): EmbeddingConfig;
|
|
17
|
+
/**
|
|
18
|
+
* Drop the global provider so the next call loads a fresh adapter.
|
|
19
|
+
* Intended for benchmarks and tests.
|
|
20
|
+
*/
|
|
21
|
+
export declare function resetGlobalEmbeddingProvider(): Promise<void>;
|
|
22
|
+
export declare function getEmbedding(text: string): Promise<number[]>;
|
|
23
|
+
export declare function getEmbeddings(texts: string[]): Promise<number[][]>;
|
|
24
|
+
/**
|
|
25
|
+
* Model cache directory on disk (shared by all runtimes when configured this way).
|
|
26
|
+
*/
|
|
27
|
+
export declare function getCacheDir(): string;
|
|
28
|
+
export { isEmbeddingModelCached as isModelCached } from "./modelCache";
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding adapter using `@huggingface/transformers` (Transformers.js v3+ line).
|
|
3
|
+
*/
|
|
4
|
+
import type { EmbeddingProvider, EmbeddingConfig } from "../../domain/ports";
|
|
5
|
+
/**
|
|
6
|
+
* {@link EmbeddingProvider} backed by `@huggingface/transformers`.
|
|
7
|
+
*/
|
|
8
|
+
export declare class HuggingFaceTransformersEmbeddingProvider implements EmbeddingProvider {
|
|
9
|
+
private extractor;
|
|
10
|
+
private config;
|
|
11
|
+
private isInitializing;
|
|
12
|
+
private initPromise;
|
|
13
|
+
constructor(config?: Partial<EmbeddingConfig>);
|
|
14
|
+
initialize(config?: EmbeddingConfig): Promise<void>;
|
|
15
|
+
private ensureExtractor;
|
|
16
|
+
getEmbedding(text: string): Promise<number[]>;
|
|
17
|
+
getEmbeddings(texts: string[]): Promise<number[][]>;
|
|
18
|
+
getDimension(): number;
|
|
19
|
+
getModelName(): string;
|
|
20
|
+
dispose(): Promise<void>;
|
|
21
|
+
}
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Embedding Infrastructure
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Local ONNX embedding adapters plus a global facade for module convenience.
|
|
5
5
|
*/
|
|
6
|
-
export {
|
|
6
|
+
export { EMBEDDING_MODEL_IDS, EMBEDDING_MODELS, ALL_EMBEDDING_MODEL_NAMES, BENCHMARK_MODEL_NAMES, EMBEDDING_DIMENSIONS, getEmbeddingModelId, getEmbeddingDimension, } from "./modelCatalog";
|
|
7
|
+
export { RAGGREP_MODEL_CACHE_DIR } from "./embeddingPaths";
|
|
8
|
+
export { XenovaTransformersEmbeddingProvider } from "./xenovaEmbeddingProvider";
|
|
9
|
+
/** @deprecated Use {@link XenovaTransformersEmbeddingProvider} */
|
|
10
|
+
export { TransformersEmbeddingProvider } from "./xenovaEmbeddingProvider";
|
|
11
|
+
export { HuggingFaceTransformersEmbeddingProvider } from "./huggingfaceEmbeddingProvider";
|
|
12
|
+
export { createEmbeddingProvider } from "./embeddingProviderFactory";
|
|
13
|
+
export { configureEmbeddings, getEmbeddingConfig, getEmbedding, getEmbeddings, getCacheDir, isModelCached, resetGlobalEmbeddingProvider, } from "./globalEmbeddings";
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detect whether a Transformers.js ONNX model appears fully cached on disk.
|
|
3
|
+
*/
|
|
4
|
+
import type { EmbeddingModelName } from "../../domain/ports";
|
|
5
|
+
/**
|
|
6
|
+
* Returns true when the quantized ONNX weights exist for the given logical model.
|
|
7
|
+
* Both `@xenova/transformers` and `@huggingface/transformers` use the same cache layout
|
|
8
|
+
* when `env.cacheDir` points at {@link RAGGREP_MODEL_CACHE_DIR}.
|
|
9
|
+
*/
|
|
10
|
+
export declare function isEmbeddingModelCached(model: EmbeddingModelName): Promise<boolean>;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical mapping of logical embedding model names to Hugging Face model IDs
|
|
3
|
+
* and output dimensions. Shared by all embedding adapters and config validation.
|
|
4
|
+
*/
|
|
5
|
+
import type { EmbeddingModelName } from "../../domain/ports";
|
|
6
|
+
/** Hugging Face hub IDs (ONNX-converted models for Transformers.js) */
|
|
7
|
+
export declare const EMBEDDING_MODEL_IDS: Record<EmbeddingModelName, string>;
|
|
8
|
+
/** Alias for imports that expect the historical name `EMBEDDING_MODELS` */
|
|
9
|
+
export declare const EMBEDDING_MODELS: Record<EmbeddingModelName, string>;
|
|
10
|
+
/**
|
|
11
|
+
* Every {@link EmbeddingModelName}, in harness order.
|
|
12
|
+
* Used when a full model list is required; benchmarks use {@link BENCHMARK_MODEL_NAMES}.
|
|
13
|
+
*/
|
|
14
|
+
export declare const ALL_EMBEDDING_MODEL_NAMES: readonly EmbeddingModelName[];
|
|
15
|
+
/**
|
|
16
|
+
* Models run by `bench:embeddings` and `bench:retrieval` matrix.
|
|
17
|
+
* Omits `nomic-embed-text-v1.5` for now (heavy in the local harness).
|
|
18
|
+
*/
|
|
19
|
+
export declare const BENCHMARK_MODEL_NAMES: readonly EmbeddingModelName[];
|
|
20
|
+
/** Embedding vector dimension per model */
|
|
21
|
+
export declare const EMBEDDING_DIMENSIONS: Record<EmbeddingModelName, number>;
|
|
22
|
+
export declare function getEmbeddingModelId(model: EmbeddingModelName): string;
|
|
23
|
+
export declare function getEmbeddingDimension(model: EmbeddingModelName): number;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding adapter using `@xenova/transformers` (Transformers.js v2 line).
|
|
3
|
+
*/
|
|
4
|
+
import type { EmbeddingProvider, EmbeddingConfig } from "../../domain/ports";
|
|
5
|
+
/**
|
|
6
|
+
* {@link EmbeddingProvider} backed by `@xenova/transformers`.
|
|
7
|
+
*/
|
|
8
|
+
export declare class XenovaTransformersEmbeddingProvider implements EmbeddingProvider {
|
|
9
|
+
private extractor;
|
|
10
|
+
private config;
|
|
11
|
+
private isInitializing;
|
|
12
|
+
private initPromise;
|
|
13
|
+
constructor(config?: Partial<EmbeddingConfig>);
|
|
14
|
+
initialize(config?: EmbeddingConfig): Promise<void>;
|
|
15
|
+
private ensureExtractor;
|
|
16
|
+
getEmbedding(text: string): Promise<number[]>;
|
|
17
|
+
getEmbeddings(texts: string[]): Promise<number[][]>;
|
|
18
|
+
getDimension(): number;
|
|
19
|
+
getModelName(): string;
|
|
20
|
+
dispose(): Promise<void>;
|
|
21
|
+
}
|
|
22
|
+
/** @deprecated Use {@link XenovaTransformersEmbeddingProvider} */
|
|
23
|
+
export declare const TransformersEmbeddingProvider: typeof XenovaTransformersEmbeddingProvider;
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* These connect the domain to external systems (filesystem, ML models, etc.)
|
|
6
6
|
*/
|
|
7
7
|
export { NodeFileSystem, nodeFileSystem } from "./filesystem";
|
|
8
|
-
export { TransformersEmbeddingProvider, getCacheDir, isModelCached, } from "./embeddings";
|
|
8
|
+
export { TransformersEmbeddingProvider, XenovaTransformersEmbeddingProvider, HuggingFaceTransformersEmbeddingProvider, createEmbeddingProvider, configureEmbeddings, getEmbedding, getEmbeddings, getEmbeddingConfig, getCacheDir, isModelCached, resetGlobalEmbeddingProvider, EMBEDDING_MODEL_IDS, EMBEDDING_DIMENSIONS, } from "./embeddings";
|
|
9
9
|
export { FileIndexStorage, SymbolicIndex, getSymbolicPath } from "./storage";
|
|
10
10
|
export { DEFAULT_CONFIG, EMBEDDING_MODELS, getRaggrepDir, getModuleIndexPath, getModuleManifestPath, getGlobalManifestPath, getConfigPath, loadConfig, saveConfig, getModuleConfig, getEmbeddingConfigFromModule, } from "./config";
|
|
11
11
|
export { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger, } from "./logger";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "raggrep",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.0",
|
|
4
4
|
"description": "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -22,12 +22,15 @@
|
|
|
22
22
|
"scripts": {
|
|
23
23
|
"build": "bun run build:clean && bun run build:bundle && bun run build:types && bun run build:shebang",
|
|
24
24
|
"build:clean": "rm -rf dist",
|
|
25
|
-
"build:bundle": "bun build src/index.ts --outdir dist --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript' --external 'chokidar' --external 'web-tree-sitter' && bun build src/app/cli/main.ts --outdir dist/cli --target node --sourcemap=external --external '@xenova/transformers' --external 'glob' --external 'typescript' --external 'chokidar' --external 'web-tree-sitter'",
|
|
26
|
-
"build:types": "tsc --emitDeclarationOnly --outDir dist",
|
|
25
|
+
"build:bundle": "bun build src/index.ts --outdir dist --target node --sourcemap=external --external '@xenova/transformers' --external '@huggingface/transformers' --external 'glob' --external 'typescript' --external 'chokidar' --external 'web-tree-sitter' && bun build src/app/cli/main.ts --outdir dist/cli --target node --sourcemap=external --external '@xenova/transformers' --external '@huggingface/transformers' --external 'glob' --external 'typescript' --external 'chokidar' --external 'web-tree-sitter'",
|
|
26
|
+
"build:types": "tsc --project tsconfig.json --noEmit false --declaration true --emitDeclarationOnly true --rootDir ./src --outDir ./dist",
|
|
27
27
|
"build:shebang": "echo '#!/usr/bin/env node' | cat - dist/cli/main.js > temp && mv temp dist/cli/main.js && chmod +x dist/cli/main.js",
|
|
28
28
|
"prepublishOnly": "bun run build",
|
|
29
29
|
"raggrep": "bun run src/app/cli/main.ts",
|
|
30
30
|
"test": "bun test",
|
|
31
|
+
"typecheck": "tsc --noEmit -p tsconfig.json && tsc --noEmit -p scripts/tsconfig.json",
|
|
32
|
+
"bench:embeddings": "bun run scripts/benchmark-embedding-runtimes.ts",
|
|
33
|
+
"bench:retrieval": "bun run scripts/benchmark-retrieval-quality.ts",
|
|
31
34
|
"dev": "bun run src/app/cli/main.ts"
|
|
32
35
|
},
|
|
33
36
|
"keywords": [
|
|
@@ -55,6 +58,7 @@
|
|
|
55
58
|
"node": ">=18.0.0"
|
|
56
59
|
},
|
|
57
60
|
"dependencies": {
|
|
61
|
+
"@huggingface/transformers": "^4.0.0",
|
|
58
62
|
"@xenova/transformers": "^2.17.0",
|
|
59
63
|
"chokidar": "^5.0.0",
|
|
60
64
|
"fdir": "^6.5.0",
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Transformers.js Embedding Adapter
|
|
3
|
-
*
|
|
4
|
-
* Implements the EmbeddingProvider port using Transformers.js for local embeddings.
|
|
5
|
-
* Models are automatically downloaded and cached on first use.
|
|
6
|
-
*/
|
|
7
|
-
import type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName } from "../../domain/ports";
|
|
8
|
-
/** Available embedding models and their Hugging Face IDs */
|
|
9
|
-
export declare const EMBEDDING_MODELS: Record<EmbeddingModelName, string>;
|
|
10
|
-
/** Embedding dimensions per model */
|
|
11
|
-
export declare const EMBEDDING_DIMENSIONS: Record<EmbeddingModelName, number>;
|
|
12
|
-
/**
|
|
13
|
-
* Embedding provider using Transformers.js (local inference).
|
|
14
|
-
*/
|
|
15
|
-
export declare class TransformersEmbeddingProvider implements EmbeddingProvider {
|
|
16
|
-
private pipeline;
|
|
17
|
-
private config;
|
|
18
|
-
private isInitializing;
|
|
19
|
-
private initPromise;
|
|
20
|
-
constructor(config?: Partial<EmbeddingConfig>);
|
|
21
|
-
initialize(config?: EmbeddingConfig): Promise<void>;
|
|
22
|
-
private ensurePipeline;
|
|
23
|
-
getEmbedding(text: string): Promise<number[]>;
|
|
24
|
-
getEmbeddings(texts: string[]): Promise<number[][]>;
|
|
25
|
-
getDimension(): number;
|
|
26
|
-
getModelName(): string;
|
|
27
|
-
dispose(): Promise<void>;
|
|
28
|
-
}
|
|
29
|
-
/**
|
|
30
|
-
* Get the cache directory path
|
|
31
|
-
*/
|
|
32
|
-
export declare function getCacheDir(): string;
|
|
33
|
-
/**
|
|
34
|
-
* Check if a model is already cached
|
|
35
|
-
*/
|
|
36
|
-
export declare function isModelCached(model: EmbeddingModelName): Promise<boolean>;
|
|
37
|
-
/**
|
|
38
|
-
* Configure the global embedding provider.
|
|
39
|
-
*/
|
|
40
|
-
export declare function configureEmbeddings(config: Partial<EmbeddingConfig>): void;
|
|
41
|
-
/**
|
|
42
|
-
* Get current embedding configuration.
|
|
43
|
-
*/
|
|
44
|
-
export declare function getEmbeddingConfig(): EmbeddingConfig;
|
|
45
|
-
/**
|
|
46
|
-
* Get embedding for a single text using the global provider.
|
|
47
|
-
*/
|
|
48
|
-
export declare function getEmbedding(text: string): Promise<number[]>;
|
|
49
|
-
/**
|
|
50
|
-
* Get embeddings for multiple texts using the global provider.
|
|
51
|
-
*/
|
|
52
|
-
export declare function getEmbeddings(texts: string[]): Promise<number[][]>;
|