viberag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -0
- package/README.md +219 -0
- package/dist/cli/__tests__/mcp-setup.test.d.ts +6 -0
- package/dist/cli/__tests__/mcp-setup.test.js +597 -0
- package/dist/cli/app.d.ts +2 -0
- package/dist/cli/app.js +238 -0
- package/dist/cli/commands/handlers.d.ts +57 -0
- package/dist/cli/commands/handlers.js +231 -0
- package/dist/cli/commands/index.d.ts +2 -0
- package/dist/cli/commands/index.js +2 -0
- package/dist/cli/commands/mcp-setup.d.ts +107 -0
- package/dist/cli/commands/mcp-setup.js +509 -0
- package/dist/cli/commands/useRagCommands.d.ts +23 -0
- package/dist/cli/commands/useRagCommands.js +180 -0
- package/dist/cli/components/CleanWizard.d.ts +17 -0
- package/dist/cli/components/CleanWizard.js +169 -0
- package/dist/cli/components/InitWizard.d.ts +20 -0
- package/dist/cli/components/InitWizard.js +370 -0
- package/dist/cli/components/McpSetupWizard.d.ts +37 -0
- package/dist/cli/components/McpSetupWizard.js +387 -0
- package/dist/cli/components/SearchResultsDisplay.d.ts +13 -0
- package/dist/cli/components/SearchResultsDisplay.js +130 -0
- package/dist/cli/components/WelcomeBanner.d.ts +10 -0
- package/dist/cli/components/WelcomeBanner.js +26 -0
- package/dist/cli/components/index.d.ts +1 -0
- package/dist/cli/components/index.js +1 -0
- package/dist/cli/data/mcp-editors.d.ts +80 -0
- package/dist/cli/data/mcp-editors.js +270 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +26 -0
- package/dist/cli-bundle.cjs +5269 -0
- package/dist/common/commands/terminalSetup.d.ts +2 -0
- package/dist/common/commands/terminalSetup.js +144 -0
- package/dist/common/components/CommandSuggestions.d.ts +9 -0
- package/dist/common/components/CommandSuggestions.js +20 -0
- package/dist/common/components/StaticWithResize.d.ts +23 -0
- package/dist/common/components/StaticWithResize.js +62 -0
- package/dist/common/components/StatusBar.d.ts +8 -0
- package/dist/common/components/StatusBar.js +64 -0
- package/dist/common/components/TextInput.d.ts +12 -0
- package/dist/common/components/TextInput.js +239 -0
- package/dist/common/components/index.d.ts +3 -0
- package/dist/common/components/index.js +3 -0
- package/dist/common/hooks/index.d.ts +4 -0
- package/dist/common/hooks/index.js +4 -0
- package/dist/common/hooks/useCommandHistory.d.ts +7 -0
- package/dist/common/hooks/useCommandHistory.js +51 -0
- package/dist/common/hooks/useCtrlC.d.ts +9 -0
- package/dist/common/hooks/useCtrlC.js +40 -0
- package/dist/common/hooks/useKittyKeyboard.d.ts +10 -0
- package/dist/common/hooks/useKittyKeyboard.js +26 -0
- package/dist/common/hooks/useStaticOutputBuffer.d.ts +31 -0
- package/dist/common/hooks/useStaticOutputBuffer.js +58 -0
- package/dist/common/hooks/useTerminalResize.d.ts +28 -0
- package/dist/common/hooks/useTerminalResize.js +51 -0
- package/dist/common/hooks/useTextBuffer.d.ts +13 -0
- package/dist/common/hooks/useTextBuffer.js +165 -0
- package/dist/common/index.d.ts +13 -0
- package/dist/common/index.js +17 -0
- package/dist/common/types.d.ts +162 -0
- package/dist/common/types.js +1 -0
- package/dist/mcp/index.d.ts +12 -0
- package/dist/mcp/index.js +66 -0
- package/dist/mcp/server.d.ts +25 -0
- package/dist/mcp/server.js +837 -0
- package/dist/mcp/watcher.d.ts +86 -0
- package/dist/mcp/watcher.js +334 -0
- package/dist/rag/__tests__/grammar-smoke.test.d.ts +9 -0
- package/dist/rag/__tests__/grammar-smoke.test.js +161 -0
- package/dist/rag/__tests__/helpers.d.ts +30 -0
- package/dist/rag/__tests__/helpers.js +67 -0
- package/dist/rag/__tests__/merkle.test.d.ts +5 -0
- package/dist/rag/__tests__/merkle.test.js +161 -0
- package/dist/rag/__tests__/metadata-extraction.test.d.ts +10 -0
- package/dist/rag/__tests__/metadata-extraction.test.js +202 -0
- package/dist/rag/__tests__/multi-language.test.d.ts +13 -0
- package/dist/rag/__tests__/multi-language.test.js +535 -0
- package/dist/rag/__tests__/rag.test.d.ts +10 -0
- package/dist/rag/__tests__/rag.test.js +311 -0
- package/dist/rag/__tests__/search-exhaustive.test.d.ts +9 -0
- package/dist/rag/__tests__/search-exhaustive.test.js +87 -0
- package/dist/rag/__tests__/search-filters.test.d.ts +10 -0
- package/dist/rag/__tests__/search-filters.test.js +250 -0
- package/dist/rag/__tests__/search-modes.test.d.ts +8 -0
- package/dist/rag/__tests__/search-modes.test.js +133 -0
- package/dist/rag/config/index.d.ts +61 -0
- package/dist/rag/config/index.js +111 -0
- package/dist/rag/constants.d.ts +41 -0
- package/dist/rag/constants.js +57 -0
- package/dist/rag/embeddings/fastembed.d.ts +62 -0
- package/dist/rag/embeddings/fastembed.js +124 -0
- package/dist/rag/embeddings/gemini.d.ts +26 -0
- package/dist/rag/embeddings/gemini.js +116 -0
- package/dist/rag/embeddings/index.d.ts +10 -0
- package/dist/rag/embeddings/index.js +9 -0
- package/dist/rag/embeddings/local-4b.d.ts +28 -0
- package/dist/rag/embeddings/local-4b.js +51 -0
- package/dist/rag/embeddings/local.d.ts +29 -0
- package/dist/rag/embeddings/local.js +119 -0
- package/dist/rag/embeddings/mistral.d.ts +22 -0
- package/dist/rag/embeddings/mistral.js +85 -0
- package/dist/rag/embeddings/openai.d.ts +22 -0
- package/dist/rag/embeddings/openai.js +85 -0
- package/dist/rag/embeddings/types.d.ts +37 -0
- package/dist/rag/embeddings/types.js +1 -0
- package/dist/rag/gitignore/index.d.ts +57 -0
- package/dist/rag/gitignore/index.js +178 -0
- package/dist/rag/index.d.ts +15 -0
- package/dist/rag/index.js +25 -0
- package/dist/rag/indexer/chunker.d.ts +129 -0
- package/dist/rag/indexer/chunker.js +1352 -0
- package/dist/rag/indexer/index.d.ts +6 -0
- package/dist/rag/indexer/index.js +6 -0
- package/dist/rag/indexer/indexer.d.ts +73 -0
- package/dist/rag/indexer/indexer.js +356 -0
- package/dist/rag/indexer/types.d.ts +68 -0
- package/dist/rag/indexer/types.js +47 -0
- package/dist/rag/logger/index.d.ts +20 -0
- package/dist/rag/logger/index.js +75 -0
- package/dist/rag/manifest/index.d.ts +50 -0
- package/dist/rag/manifest/index.js +97 -0
- package/dist/rag/merkle/diff.d.ts +26 -0
- package/dist/rag/merkle/diff.js +95 -0
- package/dist/rag/merkle/hash.d.ts +34 -0
- package/dist/rag/merkle/hash.js +165 -0
- package/dist/rag/merkle/index.d.ts +68 -0
- package/dist/rag/merkle/index.js +298 -0
- package/dist/rag/merkle/node.d.ts +51 -0
- package/dist/rag/merkle/node.js +69 -0
- package/dist/rag/search/filters.d.ts +21 -0
- package/dist/rag/search/filters.js +100 -0
- package/dist/rag/search/fts.d.ts +32 -0
- package/dist/rag/search/fts.js +61 -0
- package/dist/rag/search/hybrid.d.ts +17 -0
- package/dist/rag/search/hybrid.js +58 -0
- package/dist/rag/search/index.d.ts +89 -0
- package/dist/rag/search/index.js +367 -0
- package/dist/rag/search/types.d.ts +130 -0
- package/dist/rag/search/types.js +4 -0
- package/dist/rag/search/vector.d.ts +25 -0
- package/dist/rag/search/vector.js +44 -0
- package/dist/rag/storage/index.d.ts +92 -0
- package/dist/rag/storage/index.js +287 -0
- package/dist/rag/storage/lancedb-native.d.ts +7 -0
- package/dist/rag/storage/lancedb-native.js +10 -0
- package/dist/rag/storage/schema.d.ts +23 -0
- package/dist/rag/storage/schema.js +50 -0
- package/dist/rag/storage/types.d.ts +100 -0
- package/dist/rag/storage/types.js +68 -0
- package/package.json +67 -0
- package/scripts/check-node-version.js +37 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini embedding provider using Google's Generative AI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses gemini-embedding-001 model with 1536 dimensions.
|
|
5
|
+
* Note: The model defaults to 3072 dims but we explicitly request 1536 for:
|
|
6
|
+
* - Good balance of quality and storage
|
|
7
|
+
* - Matches OpenAI text-embedding-3-small dimensions
|
|
8
|
+
*
|
|
9
|
+
* Free tier available with generous limits.
|
|
10
|
+
*/
|
|
11
|
+
const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models';
|
|
12
|
+
const MODEL = 'gemini-embedding-001';
|
|
13
|
+
const BATCH_SIZE = 100; // Gemini supports up to 100 texts per request
|
|
14
|
+
/**
|
|
15
|
+
* Gemini embedding provider.
|
|
16
|
+
* Uses gemini-embedding-001 model via Google's Generative AI API.
|
|
17
|
+
*/
|
|
18
|
+
export class GeminiEmbeddingProvider {
|
|
19
|
+
constructor(apiKey) {
|
|
20
|
+
Object.defineProperty(this, "dimensions", {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
configurable: true,
|
|
23
|
+
writable: true,
|
|
24
|
+
value: 1536
|
|
25
|
+
});
|
|
26
|
+
Object.defineProperty(this, "apiKey", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: void 0
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "initialized", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: false
|
|
37
|
+
});
|
|
38
|
+
this.apiKey = apiKey ?? '';
|
|
39
|
+
}
|
|
40
|
+
async initialize(_onProgress) {
|
|
41
|
+
if (!this.apiKey) {
|
|
42
|
+
throw new Error('Gemini API key required. Run /init to configure your API key.');
|
|
43
|
+
}
|
|
44
|
+
this.initialized = true;
|
|
45
|
+
}
|
|
46
|
+
async embed(texts) {
|
|
47
|
+
if (!this.initialized) {
|
|
48
|
+
await this.initialize();
|
|
49
|
+
}
|
|
50
|
+
if (texts.length === 0) {
|
|
51
|
+
return [];
|
|
52
|
+
}
|
|
53
|
+
const results = [];
|
|
54
|
+
// Process in batches
|
|
55
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
56
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
57
|
+
const batchResults = await this.embedBatch(batch);
|
|
58
|
+
results.push(...batchResults);
|
|
59
|
+
}
|
|
60
|
+
return results;
|
|
61
|
+
}
|
|
62
|
+
async embedBatch(texts) {
|
|
63
|
+
const url = `${GEMINI_API_BASE}/${MODEL}:batchEmbedContents?key=${this.apiKey}`;
|
|
64
|
+
const response = await fetch(url, {
|
|
65
|
+
method: 'POST',
|
|
66
|
+
headers: {
|
|
67
|
+
'Content-Type': 'application/json',
|
|
68
|
+
},
|
|
69
|
+
body: JSON.stringify({
|
|
70
|
+
requests: texts.map(text => ({
|
|
71
|
+
model: `models/${MODEL}`,
|
|
72
|
+
content: {
|
|
73
|
+
parts: [{ text }],
|
|
74
|
+
},
|
|
75
|
+
taskType: 'RETRIEVAL_DOCUMENT',
|
|
76
|
+
outputDimensionality: this.dimensions,
|
|
77
|
+
})),
|
|
78
|
+
}),
|
|
79
|
+
});
|
|
80
|
+
if (!response.ok) {
|
|
81
|
+
const error = await response.text();
|
|
82
|
+
throw new Error(`Gemini API error: ${response.status} - ${error}`);
|
|
83
|
+
}
|
|
84
|
+
const data = (await response.json());
|
|
85
|
+
return data.embeddings.map(e => e.values);
|
|
86
|
+
}
|
|
87
|
+
async embedSingle(text) {
|
|
88
|
+
if (!this.initialized) {
|
|
89
|
+
await this.initialize();
|
|
90
|
+
}
|
|
91
|
+
const url = `${GEMINI_API_BASE}/${MODEL}:embedContent?key=${this.apiKey}`;
|
|
92
|
+
const response = await fetch(url, {
|
|
93
|
+
method: 'POST',
|
|
94
|
+
headers: {
|
|
95
|
+
'Content-Type': 'application/json',
|
|
96
|
+
},
|
|
97
|
+
body: JSON.stringify({
|
|
98
|
+
model: `models/${MODEL}`,
|
|
99
|
+
content: {
|
|
100
|
+
parts: [{ text }],
|
|
101
|
+
},
|
|
102
|
+
taskType: 'RETRIEVAL_QUERY',
|
|
103
|
+
outputDimensionality: this.dimensions,
|
|
104
|
+
}),
|
|
105
|
+
});
|
|
106
|
+
if (!response.ok) {
|
|
107
|
+
const error = await response.text();
|
|
108
|
+
throw new Error(`Gemini API error: ${response.status} - ${error}`);
|
|
109
|
+
}
|
|
110
|
+
const data = (await response.json());
|
|
111
|
+
return data.embedding.values;
|
|
112
|
+
}
|
|
113
|
+
close() {
|
|
114
|
+
this.initialized = false;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings module for generating vector embeddings.
|
|
3
|
+
* Supports both local (ONNX) and cloud API providers.
|
|
4
|
+
*/
|
|
5
|
+
export { GeminiEmbeddingProvider } from './gemini.js';
|
|
6
|
+
export { Local4BEmbeddingProvider } from './local-4b.js';
|
|
7
|
+
export { LocalEmbeddingProvider } from './local.js';
|
|
8
|
+
export { MistralEmbeddingProvider } from './mistral.js';
|
|
9
|
+
export { OpenAIEmbeddingProvider } from './openai.js';
|
|
10
|
+
export type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings module for generating vector embeddings.
|
|
3
|
+
* Supports both local (ONNX) and cloud API providers.
|
|
4
|
+
*/
|
|
5
|
+
export { GeminiEmbeddingProvider } from './gemini.js';
|
|
6
|
+
export { Local4BEmbeddingProvider } from './local-4b.js';
|
|
7
|
+
export { LocalEmbeddingProvider } from './local.js';
|
|
8
|
+
export { MistralEmbeddingProvider } from './mistral.js';
|
|
9
|
+
export { OpenAIEmbeddingProvider } from './openai.js';
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding provider using Qwen3-Embedding-4B.
|
|
3
|
+
*
|
|
4
|
+
* ⚠️ NOT CURRENTLY AVAILABLE
|
|
5
|
+
*
|
|
6
|
+
* No transformers.js-compatible ONNX version exists yet.
|
|
7
|
+
* The zhiqing/Qwen3-Embedding-4B-ONNX model has files in root instead of onnx/ subfolder.
|
|
8
|
+
* Waiting for onnx-community to release a properly structured version.
|
|
9
|
+
*
|
|
10
|
+
* When available:
|
|
11
|
+
* - 2560 dimensions
|
|
12
|
+
* - ~8GB download (full precision)
|
|
13
|
+
* - ~8GB RAM usage
|
|
14
|
+
* - 32K context window
|
|
15
|
+
* - +5 MTEB points over 0.6B (69.45 vs 64.33)
|
|
16
|
+
*/
|
|
17
|
+
import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
18
|
+
/**
|
|
19
|
+
* Local embedding provider using Qwen3-Embedding-4B FP32.
|
|
20
|
+
* Currently throws an error - no compatible ONNX model available.
|
|
21
|
+
*/
|
|
22
|
+
export declare class Local4BEmbeddingProvider implements EmbeddingProvider {
|
|
23
|
+
readonly dimensions = 2560;
|
|
24
|
+
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
25
|
+
embed(_texts: string[]): Promise<number[][]>;
|
|
26
|
+
embedSingle(_text: string): Promise<number[]>;
|
|
27
|
+
close(): void;
|
|
28
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding provider using Qwen3-Embedding-4B.
|
|
3
|
+
*
|
|
4
|
+
* ⚠️ NOT CURRENTLY AVAILABLE
|
|
5
|
+
*
|
|
6
|
+
* No transformers.js-compatible ONNX version exists yet.
|
|
7
|
+
* The zhiqing/Qwen3-Embedding-4B-ONNX model has files in root instead of onnx/ subfolder.
|
|
8
|
+
* Waiting for onnx-community to release a properly structured version.
|
|
9
|
+
*
|
|
10
|
+
* When available:
|
|
11
|
+
* - 2560 dimensions
|
|
12
|
+
* - ~8GB download (full precision)
|
|
13
|
+
* - ~8GB RAM usage
|
|
14
|
+
* - 32K context window
|
|
15
|
+
* - +5 MTEB points over 0.6B (69.45 vs 64.33)
|
|
16
|
+
*/
|
|
17
|
+
const DIMENSIONS = 2560;
|
|
18
|
+
const NOT_AVAILABLE_ERROR = 'local-4b is not available yet.\n\n' +
|
|
19
|
+
'No transformers.js-compatible ONNX version of Qwen3-Embedding-4B exists.\n' +
|
|
20
|
+
'The zhiqing/Qwen3-Embedding-4B-ONNX model has incorrect file structure.\n\n' +
|
|
21
|
+
'Options:\n' +
|
|
22
|
+
' 1. Use "local" (0.6B Q8) - works now, ~1.2GB RAM\n' +
|
|
23
|
+
' 2. Use "gemini" - free API, best quality\n' +
|
|
24
|
+
' 3. Wait for onnx-community to release 4B version\n\n' +
|
|
25
|
+
'Run "viberag /init" to choose a different provider.';
|
|
26
|
+
/**
|
|
27
|
+
* Local embedding provider using Qwen3-Embedding-4B FP32.
|
|
28
|
+
* Currently throws an error - no compatible ONNX model available.
|
|
29
|
+
*/
|
|
30
|
+
export class Local4BEmbeddingProvider {
|
|
31
|
+
constructor() {
|
|
32
|
+
Object.defineProperty(this, "dimensions", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: DIMENSIONS
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
async initialize(_onProgress) {
|
|
40
|
+
throw new Error(NOT_AVAILABLE_ERROR);
|
|
41
|
+
}
|
|
42
|
+
async embed(_texts) {
|
|
43
|
+
throw new Error(NOT_AVAILABLE_ERROR);
|
|
44
|
+
}
|
|
45
|
+
async embedSingle(_text) {
|
|
46
|
+
throw new Error(NOT_AVAILABLE_ERROR);
|
|
47
|
+
}
|
|
48
|
+
close() {
|
|
49
|
+
// Nothing to close
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding provider using Qwen3-Embedding-0.6B.
|
|
3
|
+
*
|
|
4
|
+
* Uses Qwen3-Embedding-0.6B Q8 via @huggingface/transformers (ONNX Runtime).
|
|
5
|
+
* - 1024 dimensions
|
|
6
|
+
* - ~700MB download (Q8 quantized)
|
|
7
|
+
* - ~10GB RAM usage
|
|
8
|
+
* - 32K context window
|
|
9
|
+
*
|
|
10
|
+
* Benefits:
|
|
11
|
+
* - Works completely offline
|
|
12
|
+
* - No API key required
|
|
13
|
+
* - No per-token costs
|
|
14
|
+
* - Data never leaves your machine
|
|
15
|
+
*/
|
|
16
|
+
import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
17
|
+
/**
|
|
18
|
+
* Local embedding provider using Qwen3-Embedding-0.6B Q8.
|
|
19
|
+
*/
|
|
20
|
+
export declare class LocalEmbeddingProvider implements EmbeddingProvider {
|
|
21
|
+
readonly dimensions = 1024;
|
|
22
|
+
private extractor;
|
|
23
|
+
private initialized;
|
|
24
|
+
initialize(onProgress?: ModelProgressCallback): Promise<void>;
|
|
25
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
26
|
+
private embedBatch;
|
|
27
|
+
embedSingle(text: string): Promise<number[]>;
|
|
28
|
+
close(): void;
|
|
29
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local embedding provider using Qwen3-Embedding-0.6B.
|
|
3
|
+
*
|
|
4
|
+
* Uses Qwen3-Embedding-0.6B Q8 via @huggingface/transformers (ONNX Runtime).
|
|
5
|
+
* - 1024 dimensions
|
|
6
|
+
* - ~700MB download (Q8 quantized)
|
|
7
|
+
* - ~10GB RAM usage
|
|
8
|
+
* - 32K context window
|
|
9
|
+
*
|
|
10
|
+
* Benefits:
|
|
11
|
+
* - Works completely offline
|
|
12
|
+
* - No API key required
|
|
13
|
+
* - No per-token costs
|
|
14
|
+
* - Data never leaves your machine
|
|
15
|
+
*/
|
|
16
|
+
import { pipeline } from '@huggingface/transformers';
|
|
17
|
+
const MODEL_NAME = 'onnx-community/Qwen3-Embedding-0.6B-ONNX';
|
|
18
|
+
const DIMENSIONS = 1024;
|
|
19
|
+
const BATCH_SIZE = 8;
|
|
20
|
+
/**
|
|
21
|
+
* Local embedding provider using Qwen3-Embedding-0.6B Q8.
|
|
22
|
+
*/
|
|
23
|
+
export class LocalEmbeddingProvider {
|
|
24
|
+
constructor() {
|
|
25
|
+
Object.defineProperty(this, "dimensions", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: DIMENSIONS
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "extractor", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: null
|
|
36
|
+
});
|
|
37
|
+
Object.defineProperty(this, "initialized", {
|
|
38
|
+
enumerable: true,
|
|
39
|
+
configurable: true,
|
|
40
|
+
writable: true,
|
|
41
|
+
value: false
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
async initialize(onProgress) {
|
|
45
|
+
if (this.initialized)
|
|
46
|
+
return;
|
|
47
|
+
// Track download progress for the model files
|
|
48
|
+
let lastProgress = 0;
|
|
49
|
+
const progressCallback = onProgress
|
|
50
|
+
? (progress) => {
|
|
51
|
+
if (progress.status === 'progress' &&
|
|
52
|
+
progress.progress !== undefined) {
|
|
53
|
+
// Round to avoid too many updates
|
|
54
|
+
const pct = Math.round(progress.progress);
|
|
55
|
+
if (pct !== lastProgress) {
|
|
56
|
+
lastProgress = pct;
|
|
57
|
+
onProgress('downloading', pct, progress.file);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
else if (progress.status === 'ready') {
|
|
61
|
+
onProgress('loading');
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
: undefined;
|
|
65
|
+
// Notify loading is starting
|
|
66
|
+
onProgress?.('loading');
|
|
67
|
+
// Load the model with q8 (int8) quantization for smaller size and faster inference
|
|
68
|
+
// First load will download the model (~700MB)
|
|
69
|
+
this.extractor = await pipeline('feature-extraction', MODEL_NAME, {
|
|
70
|
+
dtype: 'q8', // int8 quantization
|
|
71
|
+
progress_callback: progressCallback,
|
|
72
|
+
});
|
|
73
|
+
onProgress?.('ready');
|
|
74
|
+
this.initialized = true;
|
|
75
|
+
}
|
|
76
|
+
async embed(texts) {
|
|
77
|
+
if (!this.initialized) {
|
|
78
|
+
await this.initialize();
|
|
79
|
+
}
|
|
80
|
+
if (texts.length === 0) {
|
|
81
|
+
return [];
|
|
82
|
+
}
|
|
83
|
+
const results = [];
|
|
84
|
+
// Process in batches for memory efficiency
|
|
85
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
86
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
87
|
+
const batchResults = await this.embedBatch(batch);
|
|
88
|
+
results.push(...batchResults);
|
|
89
|
+
}
|
|
90
|
+
return results;
|
|
91
|
+
}
|
|
92
|
+
async embedBatch(texts) {
|
|
93
|
+
const results = [];
|
|
94
|
+
for (const text of texts) {
|
|
95
|
+
const output = await this.extractor(text, {
|
|
96
|
+
pooling: 'mean',
|
|
97
|
+
normalize: true,
|
|
98
|
+
});
|
|
99
|
+
// Extract embedding from output tensor
|
|
100
|
+
const embedding = Array.from(output.data);
|
|
101
|
+
results.push(embedding);
|
|
102
|
+
}
|
|
103
|
+
return results;
|
|
104
|
+
}
|
|
105
|
+
async embedSingle(text) {
|
|
106
|
+
if (!this.initialized) {
|
|
107
|
+
await this.initialize();
|
|
108
|
+
}
|
|
109
|
+
const output = await this.extractor(text, {
|
|
110
|
+
pooling: 'mean',
|
|
111
|
+
normalize: true,
|
|
112
|
+
});
|
|
113
|
+
return Array.from(output.data);
|
|
114
|
+
}
|
|
115
|
+
close() {
|
|
116
|
+
this.extractor = null;
|
|
117
|
+
this.initialized = false;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mistral embedding provider using Mistral AI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses codestral-embed model (1024 dimensions).
|
|
5
|
+
* Optimized for code and technical content.
|
|
6
|
+
*/
|
|
7
|
+
import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* Mistral embedding provider.
|
|
10
|
+
* Uses codestral-embed model via Mistral AI API.
|
|
11
|
+
*/
|
|
12
|
+
export declare class MistralEmbeddingProvider implements EmbeddingProvider {
|
|
13
|
+
readonly dimensions = 1024;
|
|
14
|
+
private apiKey;
|
|
15
|
+
private initialized;
|
|
16
|
+
constructor(apiKey?: string);
|
|
17
|
+
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
18
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
19
|
+
private embedBatch;
|
|
20
|
+
embedSingle(text: string): Promise<number[]>;
|
|
21
|
+
close(): void;
|
|
22
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mistral embedding provider using Mistral AI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses codestral-embed model (1024 dimensions).
|
|
5
|
+
* Optimized for code and technical content.
|
|
6
|
+
*/
|
|
7
|
+
const MISTRAL_API_BASE = 'https://api.mistral.ai/v1';
|
|
8
|
+
const MODEL = 'codestral-embed';
|
|
9
|
+
const BATCH_SIZE = 64; // Mistral supports batching
|
|
10
|
+
/**
|
|
11
|
+
* Mistral embedding provider.
|
|
12
|
+
* Uses codestral-embed model via Mistral AI API.
|
|
13
|
+
*/
|
|
14
|
+
export class MistralEmbeddingProvider {
|
|
15
|
+
constructor(apiKey) {
|
|
16
|
+
Object.defineProperty(this, "dimensions", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: 1024
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "apiKey", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "initialized", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: false
|
|
33
|
+
});
|
|
34
|
+
this.apiKey = apiKey ?? '';
|
|
35
|
+
}
|
|
36
|
+
async initialize(_onProgress) {
|
|
37
|
+
if (!this.apiKey) {
|
|
38
|
+
throw new Error('Mistral API key required. Run /init to configure your API key.');
|
|
39
|
+
}
|
|
40
|
+
this.initialized = true;
|
|
41
|
+
}
|
|
42
|
+
async embed(texts) {
|
|
43
|
+
if (!this.initialized) {
|
|
44
|
+
await this.initialize();
|
|
45
|
+
}
|
|
46
|
+
if (texts.length === 0) {
|
|
47
|
+
return [];
|
|
48
|
+
}
|
|
49
|
+
const results = [];
|
|
50
|
+
// Process in batches
|
|
51
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
52
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
53
|
+
const batchResults = await this.embedBatch(batch);
|
|
54
|
+
results.push(...batchResults);
|
|
55
|
+
}
|
|
56
|
+
return results;
|
|
57
|
+
}
|
|
58
|
+
async embedBatch(texts) {
|
|
59
|
+
const response = await fetch(`${MISTRAL_API_BASE}/embeddings`, {
|
|
60
|
+
method: 'POST',
|
|
61
|
+
headers: {
|
|
62
|
+
'Content-Type': 'application/json',
|
|
63
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
64
|
+
},
|
|
65
|
+
body: JSON.stringify({
|
|
66
|
+
model: MODEL,
|
|
67
|
+
input: texts,
|
|
68
|
+
}),
|
|
69
|
+
});
|
|
70
|
+
if (!response.ok) {
|
|
71
|
+
const error = await response.text();
|
|
72
|
+
throw new Error(`Mistral API error: ${response.status} - ${error}`);
|
|
73
|
+
}
|
|
74
|
+
const data = (await response.json());
|
|
75
|
+
// Sort by index to ensure correct order
|
|
76
|
+
return data.data.sort((a, b) => a.index - b.index).map(d => d.embedding);
|
|
77
|
+
}
|
|
78
|
+
async embedSingle(text) {
|
|
79
|
+
const results = await this.embed([text]);
|
|
80
|
+
return results[0];
|
|
81
|
+
}
|
|
82
|
+
close() {
|
|
83
|
+
this.initialized = false;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI embedding provider using OpenAI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses text-embedding-3-small model (1536 dimensions).
|
|
5
|
+
* Good quality with fast API responses and low cost ($0.02/1M tokens).
|
|
6
|
+
*/
|
|
7
|
+
import type { EmbeddingProvider, ModelProgressCallback } from './types.js';
|
|
8
|
+
/**
|
|
9
|
+
* OpenAI embedding provider.
|
|
10
|
+
* Uses text-embedding-3-small model via OpenAI API.
|
|
11
|
+
*/
|
|
12
|
+
export declare class OpenAIEmbeddingProvider implements EmbeddingProvider {
|
|
13
|
+
readonly dimensions = 1536;
|
|
14
|
+
private apiKey;
|
|
15
|
+
private initialized;
|
|
16
|
+
constructor(apiKey?: string);
|
|
17
|
+
initialize(_onProgress?: ModelProgressCallback): Promise<void>;
|
|
18
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
19
|
+
private embedBatch;
|
|
20
|
+
embedSingle(text: string): Promise<number[]>;
|
|
21
|
+
close(): void;
|
|
22
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI embedding provider using OpenAI API.
|
|
3
|
+
*
|
|
4
|
+
* Uses text-embedding-3-small model (1536 dimensions).
|
|
5
|
+
* Good quality with fast API responses and low cost ($0.02/1M tokens).
|
|
6
|
+
*/
|
|
7
|
+
const OPENAI_API_BASE = 'https://api.openai.com/v1';
|
|
8
|
+
const MODEL = 'text-embedding-3-small';
|
|
9
|
+
const BATCH_SIZE = 2048; // OpenAI supports up to 2048 texts per request
|
|
10
|
+
/**
|
|
11
|
+
* OpenAI embedding provider.
|
|
12
|
+
* Uses text-embedding-3-small model via OpenAI API.
|
|
13
|
+
*/
|
|
14
|
+
export class OpenAIEmbeddingProvider {
|
|
15
|
+
constructor(apiKey) {
|
|
16
|
+
Object.defineProperty(this, "dimensions", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: 1536
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "apiKey", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "initialized", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: false
|
|
33
|
+
});
|
|
34
|
+
this.apiKey = apiKey ?? '';
|
|
35
|
+
}
|
|
36
|
+
async initialize(_onProgress) {
|
|
37
|
+
if (!this.apiKey) {
|
|
38
|
+
throw new Error('OpenAI API key required. Run /init to configure your API key.');
|
|
39
|
+
}
|
|
40
|
+
this.initialized = true;
|
|
41
|
+
}
|
|
42
|
+
async embed(texts) {
|
|
43
|
+
if (!this.initialized) {
|
|
44
|
+
await this.initialize();
|
|
45
|
+
}
|
|
46
|
+
if (texts.length === 0) {
|
|
47
|
+
return [];
|
|
48
|
+
}
|
|
49
|
+
const results = [];
|
|
50
|
+
// Process in batches
|
|
51
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
52
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
53
|
+
const batchResults = await this.embedBatch(batch);
|
|
54
|
+
results.push(...batchResults);
|
|
55
|
+
}
|
|
56
|
+
return results;
|
|
57
|
+
}
|
|
58
|
+
async embedBatch(texts) {
|
|
59
|
+
const response = await fetch(`${OPENAI_API_BASE}/embeddings`, {
|
|
60
|
+
method: 'POST',
|
|
61
|
+
headers: {
|
|
62
|
+
'Content-Type': 'application/json',
|
|
63
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
64
|
+
},
|
|
65
|
+
body: JSON.stringify({
|
|
66
|
+
model: MODEL,
|
|
67
|
+
input: texts,
|
|
68
|
+
}),
|
|
69
|
+
});
|
|
70
|
+
if (!response.ok) {
|
|
71
|
+
const error = await response.text();
|
|
72
|
+
throw new Error(`OpenAI API error: ${response.status} - ${error}`);
|
|
73
|
+
}
|
|
74
|
+
const data = (await response.json());
|
|
75
|
+
// Sort by index to ensure correct order
|
|
76
|
+
return data.data.sort((a, b) => a.index - b.index).map(d => d.embedding);
|
|
77
|
+
}
|
|
78
|
+
async embedSingle(text) {
|
|
79
|
+
const results = await this.embed([text]);
|
|
80
|
+
return results[0];
|
|
81
|
+
}
|
|
82
|
+
close() {
|
|
83
|
+
this.initialized = false;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Progress callback for model loading/downloading.
|
|
3
|
+
* @param status - Current status: 'downloading', 'loading', 'ready'
|
|
4
|
+
* @param progress - Download progress 0-100 (only for 'downloading')
|
|
5
|
+
* @param message - Optional message (e.g., file being downloaded)
|
|
6
|
+
*/
|
|
7
|
+
export type ModelProgressCallback = (status: 'downloading' | 'loading' | 'ready', progress?: number, message?: string) => void;
|
|
8
|
+
/**
|
|
9
|
+
* Embedding provider interface for generating vector embeddings.
|
|
10
|
+
*/
|
|
11
|
+
export interface EmbeddingProvider {
|
|
12
|
+
/** Number of dimensions in the embedding vectors */
|
|
13
|
+
readonly dimensions: number;
|
|
14
|
+
/**
|
|
15
|
+
* Initialize the provider (load model, etc.)
|
|
16
|
+
* Must be called before using embed() or embedSingle().
|
|
17
|
+
* @param onProgress - Optional callback for download/loading progress
|
|
18
|
+
*/
|
|
19
|
+
initialize(onProgress?: ModelProgressCallback): Promise<void>;
|
|
20
|
+
/**
|
|
21
|
+
* Generate embeddings for multiple texts.
|
|
22
|
+
* @param texts - Array of text strings to embed
|
|
23
|
+
* @returns Array of embedding vectors (one per text)
|
|
24
|
+
*/
|
|
25
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
26
|
+
/**
|
|
27
|
+
* Generate embedding for a single text.
|
|
28
|
+
* Optimized for query embedding.
|
|
29
|
+
* @param text - Text string to embed
|
|
30
|
+
* @returns Embedding vector
|
|
31
|
+
*/
|
|
32
|
+
embedSingle(text: string): Promise<number[]>;
|
|
33
|
+
/**
|
|
34
|
+
* Close the provider and free resources.
|
|
35
|
+
*/
|
|
36
|
+
close(): void;
|
|
37
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gitignore-based file filtering.
|
|
3
|
+
*
|
|
4
|
+
* Uses the `ignore` package to parse .gitignore files and filter paths.
|
|
5
|
+
* This replaces the hardcoded excludePatterns approach.
|
|
6
|
+
*/
|
|
7
|
+
interface Ignore {
|
|
8
|
+
add(patterns: string | string[]): this;
|
|
9
|
+
ignores(pathname: string): boolean;
|
|
10
|
+
filter(pathnames: string[]): string[];
|
|
11
|
+
createFilter(): (pathname: string) => boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Load and parse .gitignore file from project root.
|
|
15
|
+
* Returns an Ignore instance that can filter paths.
|
|
16
|
+
*
|
|
17
|
+
* @param projectRoot - Project root directory
|
|
18
|
+
* @returns Ignore instance for filtering
|
|
19
|
+
*/
|
|
20
|
+
export declare function loadGitignore(projectRoot: string): Promise<Ignore>;
|
|
21
|
+
/**
|
|
22
|
+
* Check if a path should be ignored based on .gitignore rules.
|
|
23
|
+
*
|
|
24
|
+
* @param projectRoot - Project root directory
|
|
25
|
+
* @param relativePath - Path relative to project root
|
|
26
|
+
* @returns true if the path should be ignored
|
|
27
|
+
*/
|
|
28
|
+
export declare function shouldIgnore(projectRoot: string, relativePath: string): Promise<boolean>;
|
|
29
|
+
/**
|
|
30
|
+
* Create a filter function for use with file listing.
|
|
31
|
+
* The filter returns true for files that should be INCLUDED (not ignored).
|
|
32
|
+
*
|
|
33
|
+
* @param projectRoot - Project root directory
|
|
34
|
+
* @returns Filter function that returns true for non-ignored files
|
|
35
|
+
*/
|
|
36
|
+
export declare function createGitignoreFilter(projectRoot: string): Promise<(relativePath: string) => boolean>;
|
|
37
|
+
/**
|
|
38
|
+
* Clear the cache for a specific project root.
|
|
39
|
+
* Call this if .gitignore has been modified.
|
|
40
|
+
*
|
|
41
|
+
* @param projectRoot - Project root directory
|
|
42
|
+
*/
|
|
43
|
+
export declare function clearGitignoreCache(projectRoot: string): void;
|
|
44
|
+
/**
|
|
45
|
+
* Clear all cached Ignore instances.
|
|
46
|
+
*/
|
|
47
|
+
export declare function clearAllGitignoreCache(): void;
|
|
48
|
+
/**
|
|
49
|
+
* Convert gitignore patterns to fast-glob ignore patterns.
|
|
50
|
+
* This allows fast-glob to skip directories upfront instead of
|
|
51
|
+
* scanning them and filtering later.
|
|
52
|
+
*
|
|
53
|
+
* @param projectRoot - Project root directory
|
|
54
|
+
* @returns Array of fast-glob compatible ignore patterns
|
|
55
|
+
*/
|
|
56
|
+
export declare function getGlobIgnorePatterns(projectRoot: string): Promise<string[]>;
|
|
57
|
+
export {};
|