vecbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,292 @@
1
+ /**
2
+ * Llama.cpp Provider - Local embeddings using llama.cpp directly
3
+ * Uses llama-embedding binary without any external dependencies
4
+ */
5
+
6
+ import { access, constants } from 'fs/promises';
7
+ import { join, resolve } from 'path';
8
+ import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
9
+ import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@src/types/index';
10
+ import { logger } from '@src/util/logger';
11
+ import * as http from 'http';
12
+
13
+ // Extend EmbedConfig to include llamaPath
14
+ interface LlamaCppConfig extends EmbedConfig {
15
+ llamaPath?: string;
16
+ }
17
+
18
+ export class LlamaCppProvider extends EmbeddingProvider {
19
+ private llamaPath: string;
20
+ private modelPath: string;
21
+
22
+ constructor(config: LlamaCppConfig) {
23
+ super({ ...config, provider: 'llamacpp' });
24
+ this.modelPath = config.model || 'nomic-embed-text-v1.5.Q4_K_M.gguf';
25
+ this.llamaPath = config.llamaPath || './llama.cpp/build/bin/llama-embedding';
26
+ logger.info(`Llama.cpp provider initialized with model: ${this.modelPath}`);
27
+ }
28
+
29
+ // Public API methods
30
+ getProviderName(): string {
31
+ return 'Llama.cpp';
32
+ }
33
+
34
+ getDimensions(): number {
35
+ // Known dimensions for common models
36
+ const model = this.getModel();
37
+ if (model.includes('nomic-embed-text-v1.5')) return 768;
38
+ if (model.includes('nomic-embed-text-v1')) return 768;
39
+ if (model.includes('all-MiniLM-L6-v2')) return 384;
40
+ if (model.includes('bge-base')) return 768;
41
+ if (model.includes('bert-base')) return 768;
42
+ return 768; // default
43
+ }
44
+
45
+ async isReady(): Promise<boolean> {
46
+ try {
47
+ // Check if llama-embedding exists and is executable
48
+ await access(this.llamaPath, constants.F_OK);
49
+ await access(this.llamaPath, constants.X_OK);
50
+
51
+ // Check if model file exists
52
+ const modelPath = await this.getModelPath();
53
+ await access(modelPath, constants.F_OK);
54
+
55
+ logger.debug('Llama.cpp provider is ready');
56
+ return true;
57
+ } catch (error: unknown) {
58
+ logger.error(`Llama.cpp readiness check failed: ${(error instanceof Error ? error.message : String(error))}`);
59
+ return false;
60
+ }
61
+ }
62
+
63
+ async embed(input: EmbedInput): Promise<EmbedResult> {
64
+ try {
65
+ logger.debug(`Embedding text with llama.cpp: ${this.getModel()}`);
66
+
67
+ const text = await this.readInput(input);
68
+ if (!text.trim()) {
69
+ throw new Error('Text input cannot be empty');
70
+ }
71
+
72
+ // Use HTTP API instead of CLI arguments
73
+ const requestBody = {
74
+ input: text,
75
+ model: await this.getModelPath(),
76
+ pooling: 'mean',
77
+ normalize: 2
78
+ };
79
+
80
+ // Execute HTTP request to llama.cpp server
81
+ const result = await this.executeLlamaEmbedding([JSON.stringify(requestBody)]);
82
+
83
+ // Parse output to extract embedding
84
+ const embedding = this.parseRawOutput(result.stdout);
85
+
86
+ return {
87
+ embedding,
88
+ dimensions: embedding.length,
89
+ model: this.getModel(),
90
+ provider: 'llamacpp',
91
+ };
92
+ } catch (error: unknown) {
93
+ logger.error(`Llama.cpp embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
94
+ throw error;
95
+ }
96
+ }
97
+
98
+ async embedBatch(inputs: EmbedInput[]): Promise<BatchEmbedResult> {
99
+ try {
100
+ logger.debug(`Batch embedding ${inputs.length} texts with llama.cpp`);
101
+
102
+ const texts = [];
103
+ for (const input of inputs) {
104
+ const text = await this.readInput(input);
105
+ if (text.trim()) {
106
+ texts.push(text);
107
+ }
108
+ }
109
+
110
+ if (texts.length === 0) {
111
+ throw new Error('No valid texts to embed');
112
+ }
113
+
114
+ // For batch processing, use HTTP API
115
+ const modelPath = await this.getModelPath();
116
+ const requests = inputs.map(input => ({
117
+ input: input.text || '',
118
+ model: modelPath,
119
+ pooling: 'mean',
120
+ normalize: 2
121
+ }));
122
+
123
+ // Execute batch requests (for now, do individual requests)
124
+ const embeddings: number[][] = [];
125
+ for (const request of requests) {
126
+ const result = await this.executeLlamaEmbedding([JSON.stringify(request)]);
127
+ const embedding = this.parseRawOutput(result.stdout);
128
+ embeddings.push(embedding);
129
+ }
130
+
131
+ return {
132
+ embeddings,
133
+ dimensions: embeddings[0]?.length || 0,
134
+ model: this.getModel(),
135
+ provider: 'llamacpp',
136
+ };
137
+ } catch (error: unknown) {
138
+ logger.error(`Llama.cpp batch embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
139
+ throw error;
140
+ }
141
+ }
142
+
143
+ // Protected methods
144
+ protected getModel(): string {
145
+ return this.modelPath;
146
+ }
147
+
148
+ // Private helper methods
149
+ private async getModelPath(): Promise<string> {
150
+ // Try different model paths
151
+ const possiblePaths = [
152
+ this.modelPath, // As provided
153
+ join('./llama.cpp/models', this.modelPath), // In llama.cpp/models
154
+ join('./llama.cpp', this.modelPath), // In llama.cpp root
155
+ this.modelPath // Fallback
156
+ ];
157
+
158
+ for (const path of possiblePaths) {
159
+ try {
160
+ await access(path, constants.F_OK);
161
+ return resolve(path);
162
+ } catch {
163
+ continue;
164
+ }
165
+ }
166
+
167
+ throw new Error(`Model file not found: ${this.modelPath}`);
168
+ }
169
+
170
+ private async executeLlamaEmbedding(args: string[]): Promise<{stdout: string; stderr: string}> {
171
+ return new Promise((resolve, reject) => {
172
+ // Use HTTP API instead of CLI for cleaner output
173
+ const port = 8080; // Default llama.cpp server port
174
+
175
+ // Parse the request body from args[0] (JSON string)
176
+ let requestBody;
177
+ try {
178
+ requestBody = JSON.parse(args[0] || '{}');
179
+ } catch {
180
+ reject(new Error('Invalid request body for HTTP API'));
181
+ return;
182
+ }
183
+
184
+ const postData = JSON.stringify(requestBody);
185
+
186
+ const options = {
187
+ hostname: 'localhost',
188
+ port: port,
189
+ path: '/embedding',
190
+ method: 'POST',
191
+ headers: {
192
+ 'Content-Type': 'application/json',
193
+ 'Content-Length': Buffer.byteLength(postData)
194
+ }
195
+ };
196
+
197
+ const req = http.request(options, (res: http.IncomingMessage) => {
198
+ let data = '';
199
+
200
+ res.on('data', (chunk: Buffer | string) => {
201
+ data += chunk;
202
+ });
203
+
204
+ res.on('end', () => {
205
+ if (res.statusCode === 200) {
206
+ resolve({ stdout: data, stderr: '' });
207
+ } else {
208
+ reject(new Error(`HTTP ${res.statusCode}: ${data}`));
209
+ }
210
+ });
211
+ });
212
+
213
+ req.on('error', (error: Error) => {
214
+ reject(new Error(`Failed to connect to llama.cpp server: ${(error instanceof Error ? error.message : String(error))}`));
215
+ });
216
+
217
+ req.write(postData);
218
+ req.end();
219
+ });
220
+ }
221
+
222
+ private parseRawOutput(output: string): number[] {
223
+ try {
224
+ const response = JSON.parse(output);
225
+
226
+ logger.debug(`PARSE DEBUG: Response type: ${typeof response}`);
227
+ logger.debug(`PARSE DEBUG: Is Array: ${Array.isArray(response)}`);
228
+
229
+ // CASE 1: Array of objects with nested embedding
230
+ // Format: [{index: 0, embedding: [[...]]}]
231
+ if (Array.isArray(response) && response.length > 0) {
232
+ const first = response[0];
233
+
234
+ if (first && first.embedding && Array.isArray(first.embedding)) {
235
+ const emb = first.embedding;
236
+
237
+ // Check if nested: [[...]]
238
+ if (Array.isArray(emb[0])) {
239
+ const flat = emb[0]; // ← Take the inner array
240
+ logger.debug(`Parsed ${flat.length} dimensions (nested)`);
241
+ return flat;
242
+ }
243
+
244
+ // Not nested: [...]
245
+ logger.debug(`Parsed ${emb.length} dimensions (direct)`);
246
+ return emb;
247
+ }
248
+ }
249
+
250
+ // CASE 2: Direct object {embedding: [...]}
251
+ if (response.embedding && Array.isArray(response.embedding)) {
252
+ const emb = response.embedding;
253
+
254
+ // Check nested
255
+ if (Array.isArray(emb[0])) {
256
+ return emb[0];
257
+ }
258
+
259
+ return emb;
260
+ }
261
+
262
+ // CASE 3: Direct array of numbers
263
+ if (Array.isArray(response) && typeof response[0] === 'number') {
264
+ logger.debug(`Parsed ${response.length} dimensions (flat array)`);
265
+ return response;
266
+ }
267
+
268
+ throw new Error(`Unexpected format: ${JSON.stringify(Object.keys(response))}`);
269
+
270
+ } catch (error: unknown) {
271
+ const errorMessage = error instanceof Error ? (error instanceof Error ? error.message : String(error)) : 'Unknown error';
272
+ throw new Error(`Parse failed: ${errorMessage}`, { cause: error });
273
+ }
274
+ }
275
+
276
+ private parseArrayOutput(output: string): number[][] {
277
+ // Parse array format: [[val1,val2,...], [val1,val2,...], ...]
278
+ const arrayPattern = /\[([^\]]+)\]/g;
279
+ const matches = [...output.matchAll(arrayPattern)];
280
+
281
+ if (matches.length === 0) {
282
+ throw new Error('No array embeddings found in output');
283
+ }
284
+
285
+ const embeddings = matches.map(match => {
286
+ const values = match[1]?.split(',').map(v => v.trim()) || [];
287
+ return values.map(v => parseFloat(v)).filter(v => !isNaN(v));
288
+ }).filter(embedding => embedding.length > 0);
289
+
290
+ return embeddings;
291
+ }
292
+ }
@@ -0,0 +1,113 @@
1
+ import { Mistral } from '@mistralai/mistralai';
2
+ import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
3
+ import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@src/types/index';
4
+ import { Logger } from '@src/util/logger';
5
+
6
+ const logger = Logger.createModuleLogger('mistral');
7
+
8
+ export class MistralProvider extends EmbeddingProvider {
9
+ private client: Mistral;
10
+
11
+ constructor(config: EmbedConfig) {
12
+ super(config);
13
+
14
+ if (!config.apiKey) {
15
+ throw new Error('Mistral API key is required');
16
+ }
17
+
18
+ this.client = new Mistral({
19
+ apiKey: config.apiKey,
20
+ serverURL: config.baseUrl,
21
+ timeoutMs: config.timeout || 30000,
22
+ });
23
+
24
+ logger.info('Mistral provider initialized');
25
+ }
26
+
27
+ async embed(input: EmbedInput): Promise<EmbedResult> {
28
+ try {
29
+ const text = await this.readInput(input);
30
+ logger.debug(`Embedding text with model: ${this.getModel()}`);
31
+
32
+ const response = await this.client.embeddings.create({
33
+ model: this.getModel(),
34
+ inputs: [text],
35
+ });
36
+
37
+ const embedding = response.data[0];
38
+ if (!embedding) {
39
+ throw new Error('No embedding returned from Mistral API');
40
+ }
41
+
42
+ return {
43
+ embedding: embedding.embedding || [],
44
+ dimensions: embedding.embedding?.length || 0,
45
+ model: response.model,
46
+ provider: 'mistral',
47
+ usage: response.usage?.promptTokens && response.usage?.totalTokens ? {
48
+ promptTokens: response.usage.promptTokens,
49
+ totalTokens: response.usage.totalTokens,
50
+ } : undefined,
51
+ };
52
+ } catch (error: unknown) {
53
+ logger.error(`Mistral embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
54
+ throw error;
55
+ }
56
+ }
57
+
58
+ async embedBatch(inputs: EmbedInput[]): Promise<BatchEmbedResult> {
59
+ try {
60
+ const texts = await Promise.all(inputs.map(input => this.readInput(input)));
61
+ logger.debug(`Batch embedding ${texts.length} texts with model: ${this.getModel()}`);
62
+
63
+ const response = await this.client.embeddings.create({
64
+ model: this.getModel(),
65
+ inputs: texts,
66
+ });
67
+
68
+ const embeddings = response.data.map((item) => {
69
+ if (!item.embedding) throw new Error('No embedding returned from Mistral API');
70
+ return item.embedding as number[];
71
+ });
72
+
73
+ return {
74
+ embeddings,
75
+ dimensions: embeddings[0]?.length || 0,
76
+ model: response.model,
77
+ provider: 'mistral',
78
+ usage: response.usage?.promptTokens && response.usage?.totalTokens ? {
79
+ promptTokens: response.usage.promptTokens,
80
+ totalTokens: response.usage.totalTokens,
81
+ } : undefined,
82
+ };
83
+ } catch (error: unknown) {
84
+ logger.error(`Mistral batch embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
85
+ throw error;
86
+ }
87
+ }
88
+
89
+ getDimensions(): number {
90
+ // Mistral embedding dimensions
91
+ const model = this.getModel();
92
+ if (model.includes('mistral-embed')) return 1024;
93
+ return 1024; // default for Mistral
94
+ }
95
+
96
+ getProviderName(): string {
97
+ return 'Mistral AI';
98
+ }
99
+
100
+ async isReady(): Promise<boolean> {
101
+ try {
102
+ // Test with a simple embedding request
103
+ const response = await this.client.embeddings.create({
104
+ model: this.getModel(),
105
+ inputs: ['test'],
106
+ });
107
+ return response.data.length > 0;
108
+ } catch (error: unknown) {
109
+ logger.error(`Mistral readiness check failed: ${(error instanceof Error ? error.message : String(error))}`);
110
+ return false;
111
+ }
112
+ }
113
+ }
@@ -0,0 +1,108 @@
1
+ import OpenAI from 'openai';
2
+ import { EmbeddingProvider } from '@providers/base/EmbeddingProvider';
3
+ import type { EmbedConfig, EmbedInput, EmbedResult, BatchEmbedResult } from '@src/types/index';
4
+ import { Logger } from '@src/util/logger';
5
+
6
+ const logger = Logger.createModuleLogger('openai');
7
+
8
+ export class OpenAIProvider extends EmbeddingProvider {
9
+ private client: OpenAI;
10
+
11
+ constructor(config: EmbedConfig) {
12
+ super(config);
13
+
14
+ if (!config.apiKey) {
15
+ throw new Error('OpenAI API key is required');
16
+ }
17
+
18
+ this.client = new OpenAI({
19
+ apiKey: config.apiKey,
20
+ baseURL: config.baseUrl,
21
+ timeout: config.timeout || 30000,
22
+ });
23
+
24
+ logger.info('OpenAI provider initialized');
25
+ }
26
+
27
+ async embed(input: EmbedInput): Promise<EmbedResult> {
28
+ try {
29
+ const text = await this.readInput(input);
30
+ logger.debug(`Embedding text with model: ${this.getModel()}`);
31
+
32
+ const response = await this.client.embeddings.create({
33
+ model: this.getModel(),
34
+ input: text,
35
+ });
36
+
37
+ const embedding = response.data[0];
38
+ if (!embedding) {
39
+ throw new Error('No embedding returned from OpenAI API');
40
+ }
41
+
42
+ return {
43
+ embedding: embedding.embedding || [],
44
+ dimensions: embedding.embedding?.length || 0,
45
+ model: response.model,
46
+ provider: 'openai',
47
+ usage: response.usage ? {
48
+ promptTokens: response.usage.prompt_tokens,
49
+ totalTokens: response.usage.total_tokens,
50
+ } : undefined,
51
+ };
52
+ } catch (error: unknown) {
53
+ logger.error(`OpenAI embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
54
+ throw error;
55
+ }
56
+ }
57
+
58
+ async embedBatch(inputs: EmbedInput[]): Promise<BatchEmbedResult> {
59
+ try {
60
+ const texts = await Promise.all(inputs.map(input => this.readInput(input)));
61
+ logger.debug(`Batch embedding ${texts.length} texts with model: ${this.getModel()}`);
62
+
63
+ const response = await this.client.embeddings.create({
64
+ model: this.getModel(),
65
+ input: texts,
66
+ });
67
+
68
+ const embeddings = response.data.map(item => item.embedding);
69
+
70
+ return {
71
+ embeddings,
72
+ dimensions: embeddings[0]?.length || 0,
73
+ model: response.model,
74
+ provider: 'openai',
75
+ usage: response.usage ? {
76
+ promptTokens: response.usage.prompt_tokens,
77
+ totalTokens: response.usage.total_tokens,
78
+ } : undefined,
79
+ };
80
+ } catch (error: unknown) {
81
+ logger.error(`OpenAI batch embedding failed: ${(error instanceof Error ? error.message : String(error))}`);
82
+ throw error;
83
+ }
84
+ }
85
+
86
+ getDimensions(): number {
87
+ // Common OpenAI embedding dimensions
88
+ const model = this.getModel();
89
+ if (model.includes('text-embedding-3-large')) return 3072;
90
+ if (model.includes('text-embedding-3-small')) return 1536;
91
+ if (model.includes('text-embedding-ada-002')) return 1536;
92
+ return 1536; // default
93
+ }
94
+
95
+ getProviderName(): string {
96
+ return 'OpenAI';
97
+ }
98
+
99
+ async isReady(): Promise<boolean> {
100
+ try {
101
+ await this.client.models.list();
102
+ return true;
103
+ } catch (error: unknown) {
104
+ logger.error(`OpenAI readiness check failed: ${(error instanceof Error ? error.message : String(error))}`);
105
+ return false;
106
+ }
107
+ }
108
+ }
@@ -0,0 +1,15 @@
1
+ declare module 'deepseek' {
2
+ export class DeepSeek {
3
+ constructor(options: { apiKey: string; baseURL?: string; timeout?: number });
4
+ embeddings: {
5
+ create: (options: { model: string; input: string | string[] }) => Promise<{
6
+ data: Array<{ embedding: number[]; model: string }>;
7
+ model: string;
8
+ usage?: {
9
+ prompt_tokens: number;
10
+ total_tokens: number;
11
+ };
12
+ }>;
13
+ };
14
+ }
15
+ }
@@ -0,0 +1,43 @@
1
+ export type ProviderType =
2
+ | 'openai'
3
+ | 'gemini'
4
+ | 'claude'
5
+ | 'mistral'
6
+ | 'deepseek'
7
+ | 'llamacpp';
8
+
9
+ export interface EmbedConfig {
10
+ provider: ProviderType;
11
+ model?: string;
12
+ apiKey?: string;
13
+ baseUrl?: string;
14
+ timeout?: number;
15
+ maxRetries?: number;
16
+ }
17
+
18
+ export interface EmbedInput {
19
+ text?: string;
20
+ filePath?: string;
21
+ }
22
+
23
+ export interface EmbedResult {
24
+ embedding: number[];
25
+ dimensions: number;
26
+ model: string;
27
+ provider: string;
28
+ usage?: {
29
+ promptTokens?: number;
30
+ totalTokens?: number;
31
+ } | undefined;
32
+ }
33
+
34
+ export interface BatchEmbedResult {
35
+ embeddings: number[][];
36
+ dimensions: number;
37
+ model: string;
38
+ provider: string;
39
+ usage?: {
40
+ promptTokens?: number;
41
+ totalTokens?: number;
42
+ } | undefined;
43
+ }
@@ -0,0 +1,43 @@
1
+ export type ProviderType =
2
+ | 'openai'
3
+ | 'gemini'
4
+ | 'claude'
5
+ | 'mistral'
6
+ | 'deepseek'
7
+ | 'llamacpp';
8
+
9
+ export interface EmbedConfig {
10
+ provider: ProviderType;
11
+ model?: string;
12
+ apiKey?: string;
13
+ baseUrl?: string;
14
+ timeout?: number;
15
+ maxRetries?: number;
16
+ }
17
+
18
+ export interface EmbedInput {
19
+ text?: string;
20
+ filePath?: string;
21
+ }
22
+
23
+ export interface EmbedResult {
24
+ embedding: number[];
25
+ dimensions: number;
26
+ model: string;
27
+ provider: string;
28
+ usage?: {
29
+ promptTokens?: number;
30
+ totalTokens?: number;
31
+ } | undefined;
32
+ }
33
+
34
+ export interface BatchEmbedResult {
35
+ embeddings: number[][];
36
+ dimensions: number;
37
+ model: string;
38
+ provider: string;
39
+ usage?: {
40
+ promptTokens?: number;
41
+ totalTokens?: number;
42
+ } | undefined;
43
+ }
@@ -0,0 +1 @@
1
+ {"name": "@types", "version": "1.0.0", "type": "module", "main": "index.js", "types": "index.d.ts"}
@@ -0,0 +1,7 @@
1
+ declare module '@xenova/transformers' {
2
+ export function pipeline(task: string, model: string): Promise<unknown>;
3
+ export const env: {
4
+ cacheDir: string;
5
+ allowLocalModels: boolean;
6
+ };
7
+ }