vecbox 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,874 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
3
+ var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
4
+
5
+ // main.ts
6
+ import * as dotenv from "dotenv";
7
+
8
+ // src/providers/openai.ts
9
+ import OpenAI from "openai";
10
+
11
+ // src/providers/base/EmbeddingProvider.ts
12
+ var EmbeddingProvider = class {
13
+ constructor(config2) {
14
+ __publicField(this, "config");
15
+ this.config = config2;
16
+ }
17
+ getModel() {
18
+ return this.config.model || "default";
19
+ }
20
+ async readInput(input) {
21
+ if (input.text) {
22
+ return input.text;
23
+ }
24
+ if (input.filePath) {
25
+ const fs = await import("fs/promises");
26
+ return await fs.readFile(input.filePath, "utf-8");
27
+ }
28
+ throw new Error("Either text or filePath must be provided");
29
+ }
30
+ };
31
+
32
+ // src/util/logger.ts
33
+ var _Logger = class _Logger {
34
+ constructor(moduleName = "embedbox", level = 1 /* INFO */) {
35
+ __publicField(this, "currentLevel");
36
+ __publicField(this, "moduleName");
37
+ this.moduleName = moduleName;
38
+ this.currentLevel = level;
39
+ }
40
+ static getInstance(moduleName, level) {
41
+ if (!_Logger.instance) {
42
+ _Logger.instance = new _Logger(moduleName || "embedbox", level);
43
+ }
44
+ return _Logger.instance;
45
+ }
46
+ setLevel(level) {
47
+ this.currentLevel = level;
48
+ }
49
+ getLevel() {
50
+ return this.currentLevel;
51
+ }
52
+ formatMessage(level, message) {
53
+ const levelName = _Logger.LEVEL_NAMES[level];
54
+ const color = _Logger.COLORS[levelName];
55
+ const reset = _Logger.COLORS.RESET;
56
+ return `${color}[${levelName}(${this.moduleName})]${reset} ${message}
57
+
58
+ `;
59
+ }
60
+ log(level, message) {
61
+ if (level < this.currentLevel) {
62
+ return;
63
+ }
64
+ const formattedMessage = this.formatMessage(level, message);
65
+ process.stdout.write(formattedMessage);
66
+ }
67
+ debug(message) {
68
+ this.log(0 /* DEBUG */, message);
69
+ }
70
+ info(message) {
71
+ this.log(1 /* INFO */, message);
72
+ }
73
+ warn(message) {
74
+ this.log(2 /* WARN */, message);
75
+ }
76
+ error(message) {
77
+ this.log(3 /* ERROR */, message);
78
+ }
79
+ // Static methods for quick access
80
+ static debug(message, moduleName) {
81
+ const logger9 = new _Logger(moduleName || "embedbox");
82
+ logger9.debug(message);
83
+ }
84
+ static info(message, moduleName) {
85
+ const logger9 = new _Logger(moduleName || "embedbox");
86
+ logger9.info(message);
87
+ }
88
+ static warn(message, moduleName) {
89
+ const logger9 = new _Logger(moduleName || "embedbox");
90
+ logger9.warn(message);
91
+ }
92
+ static error(message, moduleName) {
93
+ const logger9 = new _Logger(moduleName || "embedbox");
94
+ logger9.error(message);
95
+ }
96
+ // Method to create a logger instance for a specific module
97
+ static createModuleLogger(moduleName, level) {
98
+ return new _Logger(`embedbox:${moduleName}`, level);
99
+ }
100
+ };
101
+ __publicField(_Logger, "instance");
102
+ // ANSI color codes - simplified for better readability
103
+ __publicField(_Logger, "COLORS", {
104
+ RESET: "\x1B[0m",
105
+ DEBUG: "\x1B[36m",
106
+ // Cyan
107
+ INFO: "\x1B[32m",
108
+ // Green
109
+ WARN: "\x1B[33m",
110
+ // Yellow
111
+ ERROR: "\x1B[31m"
112
+ // Red
113
+ });
114
+ __publicField(_Logger, "LEVEL_NAMES", {
115
+ [0 /* DEBUG */]: "DEBUG",
116
+ [1 /* INFO */]: "INFO",
117
+ [2 /* WARN */]: "WARN",
118
+ [3 /* ERROR */]: "ERROR"
119
+ });
120
+ var Logger = _Logger;
121
+ var logger = Logger.getInstance();
122
+
123
+ // src/providers/openai.ts
124
+ var logger2 = Logger.createModuleLogger("openai");
125
+ var OpenAIProvider = class extends EmbeddingProvider {
126
+ constructor(config2) {
127
+ super(config2);
128
+ __publicField(this, "client");
129
+ if (!config2.apiKey) {
130
+ throw new Error("OpenAI API key is required");
131
+ }
132
+ this.client = new OpenAI({
133
+ apiKey: config2.apiKey,
134
+ baseURL: config2.baseUrl,
135
+ timeout: config2.timeout || 3e4
136
+ });
137
+ logger2.info("OpenAI provider initialized");
138
+ }
139
+ async embed(input) {
140
+ try {
141
+ const text = await this.readInput(input);
142
+ logger2.debug(`Embedding text with model: ${this.getModel()}`);
143
+ const response = await this.client.embeddings.create({
144
+ model: this.getModel(),
145
+ input: text
146
+ });
147
+ const embedding = response.data[0];
148
+ if (!embedding) {
149
+ throw new Error("No embedding returned from OpenAI API");
150
+ }
151
+ return {
152
+ embedding: embedding.embedding || [],
153
+ dimensions: embedding.embedding?.length || 0,
154
+ model: response.model,
155
+ provider: "openai",
156
+ usage: response.usage ? {
157
+ promptTokens: response.usage.prompt_tokens,
158
+ totalTokens: response.usage.total_tokens
159
+ } : void 0
160
+ };
161
+ } catch (error) {
162
+ logger2.error(`OpenAI embedding failed: ${error instanceof Error ? error.message : String(error)}`);
163
+ throw error;
164
+ }
165
+ }
166
+ async embedBatch(inputs) {
167
+ try {
168
+ const texts = await Promise.all(inputs.map((input) => this.readInput(input)));
169
+ logger2.debug(`Batch embedding ${texts.length} texts with model: ${this.getModel()}`);
170
+ const response = await this.client.embeddings.create({
171
+ model: this.getModel(),
172
+ input: texts
173
+ });
174
+ const embeddings = response.data.map((item) => item.embedding);
175
+ return {
176
+ embeddings,
177
+ dimensions: embeddings[0]?.length || 0,
178
+ model: response.model,
179
+ provider: "openai",
180
+ usage: response.usage ? {
181
+ promptTokens: response.usage.prompt_tokens,
182
+ totalTokens: response.usage.total_tokens
183
+ } : void 0
184
+ };
185
+ } catch (error) {
186
+ logger2.error(`OpenAI batch embedding failed: ${error instanceof Error ? error.message : String(error)}`);
187
+ throw error;
188
+ }
189
+ }
190
+ getDimensions() {
191
+ const model = this.getModel();
192
+ if (model.includes("text-embedding-3-large")) return 3072;
193
+ if (model.includes("text-embedding-3-small")) return 1536;
194
+ if (model.includes("text-embedding-ada-002")) return 1536;
195
+ return 1536;
196
+ }
197
+ getProviderName() {
198
+ return "OpenAI";
199
+ }
200
+ async isReady() {
201
+ try {
202
+ await this.client.models.list();
203
+ return true;
204
+ } catch (error) {
205
+ logger2.error(`OpenAI readiness check failed: ${error instanceof Error ? error.message : String(error)}`);
206
+ return false;
207
+ }
208
+ }
209
+ };
210
+
211
+ // src/providers/gemini.ts
212
+ import { GoogleGenerativeAI } from "@google/generative-ai";
213
+ var logger3 = Logger.createModuleLogger("gemini");
214
+ var GeminiProvider = class extends EmbeddingProvider {
215
+ constructor(config2) {
216
+ super(config2);
217
+ __publicField(this, "client");
218
+ if (!config2.apiKey) {
219
+ throw new Error("Google API key is required");
220
+ }
221
+ this.client = new GoogleGenerativeAI(config2.apiKey);
222
+ logger3.info("Gemini provider initialized");
223
+ }
224
+ async embed(input) {
225
+ try {
226
+ const text = await this.readInput(input);
227
+ logger3.debug(`Embedding text with model: ${this.getModel()}`);
228
+ const model = this.client.getGenerativeModel({
229
+ model: this.getModel()
230
+ });
231
+ const result = await model.embedContent(text);
232
+ const embedding = result.embedding;
233
+ return {
234
+ embedding: embedding.values,
235
+ dimensions: embedding.values.length,
236
+ model: this.getModel(),
237
+ provider: "gemini"
238
+ };
239
+ } catch (error) {
240
+ logger3.error(`Gemini embedding failed: ${error instanceof Error ? error.message : String(error)}`);
241
+ throw error;
242
+ }
243
+ }
244
+ async embedBatch(inputs) {
245
+ try {
246
+ const texts = await Promise.all(inputs.map((input) => this.readInput(input)));
247
+ logger3.debug(`Batch embedding ${texts.length} texts with model: ${this.getModel()}`);
248
+ const model = this.client.getGenerativeModel({
249
+ model: this.getModel()
250
+ });
251
+ const results = await Promise.all(
252
+ texts.map((text) => model.embedContent(text))
253
+ );
254
+ const embeddings = results.map((result) => result.embedding.values);
255
+ return {
256
+ embeddings,
257
+ dimensions: embeddings[0]?.length || 0,
258
+ model: this.getModel(),
259
+ provider: "gemini"
260
+ };
261
+ } catch (error) {
262
+ logger3.error(`Gemini batch embedding failed: ${error instanceof Error ? error.message : String(error)}`);
263
+ throw error;
264
+ }
265
+ }
266
+ getDimensions() {
267
+ const model = this.getModel();
268
+ if (model.includes("gemini-embedding-001")) return 768;
269
+ if (model.includes("text-embedding-004")) return 768;
270
+ if (model.includes("embedding-001")) return 768;
271
+ if (model.includes("multimodalembedding")) return 768;
272
+ return 768;
273
+ }
274
+ getProviderName() {
275
+ return "Google Gemini";
276
+ }
277
+ getModel() {
278
+ return this.config.model || "gemini-embedding-001";
279
+ }
280
+ async isReady() {
281
+ try {
282
+ const model = this.client.getGenerativeModel({
283
+ model: this.getModel()
284
+ });
285
+ await model.embedContent("test");
286
+ return true;
287
+ } catch (error) {
288
+ logger3.error(`Gemini readiness check failed: ${error instanceof Error ? error.message : String(error)}`);
289
+ return false;
290
+ }
291
+ }
292
+ };
293
+
294
+ // src/providers/claude.ts
295
+ import Anthropic from "@anthropic-ai/sdk";
296
+ var logger4 = Logger.createModuleLogger("claude");
297
+ var ClaudeProvider = class extends EmbeddingProvider {
298
+ constructor(config2) {
299
+ super(config2);
300
+ __publicField(this, "client");
301
+ if (!config2.apiKey) {
302
+ throw new Error("Anthropic API key is required");
303
+ }
304
+ this.client = new Anthropic({
305
+ apiKey: config2.apiKey,
306
+ baseURL: config2.baseUrl,
307
+ timeout: config2.timeout || 3e4
308
+ });
309
+ logger4.info("Claude provider initialized");
310
+ }
311
+ async embed() {
312
+ try {
313
+ logger4.debug(`Embedding text with model: ${this.getModel()}`);
314
+ throw new Error("Claude embeddings API not yet available. Please use another provider.");
315
+ } catch (error) {
316
+ const errorMessage = error instanceof Error ? error instanceof Error ? error.message : String(error) : "Unknown error";
317
+ logger4.error(`Claude embedding failed: ${errorMessage}`);
318
+ throw error;
319
+ }
320
+ }
321
+ async embedBatch() {
322
+ try {
323
+ throw new Error("Claude embeddings API not yet available. Please use another provider.");
324
+ } catch (error) {
325
+ const errorMessage = error instanceof Error ? error instanceof Error ? error.message : String(error) : "Unknown error";
326
+ logger4.error(`Claude batch embedding failed: ${errorMessage}`);
327
+ throw error;
328
+ }
329
+ }
330
+ getDimensions() {
331
+ return 0;
332
+ }
333
+ getProviderName() {
334
+ return "Anthropic Claude";
335
+ }
336
+ async isReady() {
337
+ try {
338
+ await this.client.messages.create({
339
+ model: "claude-3-haiku-20240307",
340
+ max_tokens: 10,
341
+ messages: [{ role: "user", content: "test" }]
342
+ });
343
+ return true;
344
+ } catch (error) {
345
+ const errorMessage = error instanceof Error ? error instanceof Error ? error.message : String(error) : "Unknown error";
346
+ logger4.error(`Claude readiness check failed: ${errorMessage}`);
347
+ return false;
348
+ }
349
+ }
350
+ };
351
+
352
+ // src/providers/mistral.ts
353
+ import { Mistral } from "@mistralai/mistralai";
354
+ var logger5 = Logger.createModuleLogger("mistral");
355
+ var MistralProvider = class extends EmbeddingProvider {
356
+ constructor(config2) {
357
+ super(config2);
358
+ __publicField(this, "client");
359
+ if (!config2.apiKey) {
360
+ throw new Error("Mistral API key is required");
361
+ }
362
+ this.client = new Mistral({
363
+ apiKey: config2.apiKey,
364
+ serverURL: config2.baseUrl,
365
+ timeoutMs: config2.timeout || 3e4
366
+ });
367
+ logger5.info("Mistral provider initialized");
368
+ }
369
+ async embed(input) {
370
+ try {
371
+ const text = await this.readInput(input);
372
+ logger5.debug(`Embedding text with model: ${this.getModel()}`);
373
+ const response = await this.client.embeddings.create({
374
+ model: this.getModel(),
375
+ inputs: [text]
376
+ });
377
+ const embedding = response.data[0];
378
+ if (!embedding) {
379
+ throw new Error("No embedding returned from Mistral API");
380
+ }
381
+ return {
382
+ embedding: embedding.embedding || [],
383
+ dimensions: embedding.embedding?.length || 0,
384
+ model: response.model,
385
+ provider: "mistral",
386
+ usage: response.usage?.promptTokens && response.usage?.totalTokens ? {
387
+ promptTokens: response.usage.promptTokens,
388
+ totalTokens: response.usage.totalTokens
389
+ } : void 0
390
+ };
391
+ } catch (error) {
392
+ logger5.error(`Mistral embedding failed: ${error instanceof Error ? error.message : String(error)}`);
393
+ throw error;
394
+ }
395
+ }
396
+ async embedBatch(inputs) {
397
+ try {
398
+ const texts = await Promise.all(inputs.map((input) => this.readInput(input)));
399
+ logger5.debug(`Batch embedding ${texts.length} texts with model: ${this.getModel()}`);
400
+ const response = await this.client.embeddings.create({
401
+ model: this.getModel(),
402
+ inputs: texts
403
+ });
404
+ const embeddings = response.data.map((item) => {
405
+ if (!item.embedding) throw new Error("No embedding returned from Mistral API");
406
+ return item.embedding;
407
+ });
408
+ return {
409
+ embeddings,
410
+ dimensions: embeddings[0]?.length || 0,
411
+ model: response.model,
412
+ provider: "mistral",
413
+ usage: response.usage?.promptTokens && response.usage?.totalTokens ? {
414
+ promptTokens: response.usage.promptTokens,
415
+ totalTokens: response.usage.totalTokens
416
+ } : void 0
417
+ };
418
+ } catch (error) {
419
+ logger5.error(`Mistral batch embedding failed: ${error instanceof Error ? error.message : String(error)}`);
420
+ throw error;
421
+ }
422
+ }
423
+ getDimensions() {
424
+ const model = this.getModel();
425
+ if (model.includes("mistral-embed")) return 1024;
426
+ return 1024;
427
+ }
428
+ getProviderName() {
429
+ return "Mistral AI";
430
+ }
431
+ async isReady() {
432
+ try {
433
+ const response = await this.client.embeddings.create({
434
+ model: this.getModel(),
435
+ inputs: ["test"]
436
+ });
437
+ return response.data.length > 0;
438
+ } catch (error) {
439
+ logger5.error(`Mistral readiness check failed: ${error instanceof Error ? error.message : String(error)}`);
440
+ return false;
441
+ }
442
+ }
443
+ };
444
+
445
+ // src/providers/deepseek.ts
446
+ import { DeepSeek } from "deepseek";
447
+ var logger6 = Logger.createModuleLogger("deepseek");
448
+ var DeepSeekProvider = class extends EmbeddingProvider {
449
+ constructor(config2) {
450
+ super(config2);
451
+ __publicField(this, "client");
452
+ if (!config2.apiKey) {
453
+ throw new Error("DeepSeek API key is required");
454
+ }
455
+ const clientOptions = {
456
+ apiKey: config2.apiKey,
457
+ timeout: config2.timeout || 3e4
458
+ };
459
+ if (config2.baseUrl) {
460
+ clientOptions.baseURL = config2.baseUrl;
461
+ }
462
+ this.client = new DeepSeek(clientOptions);
463
+ logger6.info("DeepSeek provider initialized");
464
+ }
465
+ async embed(input) {
466
+ try {
467
+ const text = await this.readInput(input);
468
+ logger6.debug(`Embedding text with model: ${this.getModel()}`);
469
+ const response = await this.client.embeddings.create({
470
+ model: this.getModel(),
471
+ input: text
472
+ });
473
+ const embedding = response.data[0];
474
+ if (!embedding) {
475
+ throw new Error("No embedding returned from DeepSeek API");
476
+ }
477
+ return {
478
+ embedding: embedding.embedding || [],
479
+ dimensions: embedding.embedding?.length || 0,
480
+ model: embedding.model || this.getModel(),
481
+ provider: "deepseek",
482
+ usage: response.usage ? {
483
+ promptTokens: response.usage.prompt_tokens,
484
+ totalTokens: response.usage.total_tokens
485
+ } : void 0
486
+ };
487
+ } catch (error) {
488
+ logger6.error(`DeepSeek embedding failed: ${error instanceof Error ? error.message : String(error)}`);
489
+ throw error;
490
+ }
491
+ }
492
+ async embedBatch(inputs) {
493
+ try {
494
+ const texts = await Promise.all(inputs.map((input) => this.readInput(input)));
495
+ logger6.debug(`Batch embedding ${texts.length} texts with model: ${this.getModel()}`);
496
+ const response = await this.client.embeddings.create({
497
+ model: this.getModel(),
498
+ input: texts
499
+ });
500
+ const embeddings = response.data.map((item) => item.embedding);
501
+ return {
502
+ embeddings,
503
+ dimensions: embeddings[0]?.length || 0,
504
+ model: response.model,
505
+ provider: "deepseek",
506
+ usage: response.usage ? {
507
+ promptTokens: response.usage.prompt_tokens,
508
+ totalTokens: response.usage.total_tokens
509
+ } : void 0
510
+ };
511
+ } catch (error) {
512
+ logger6.error(`DeepSeek batch embedding failed: ${error instanceof Error ? error.message : String(error)}`);
513
+ throw error;
514
+ }
515
+ }
516
+ getDimensions() {
517
+ const model = this.getModel();
518
+ if (model.includes("deepseek-chat")) return 4096;
519
+ return 4096;
520
+ }
521
+ getProviderName() {
522
+ return "DeepSeek";
523
+ }
524
+ async isReady() {
525
+ try {
526
+ await this.client.embeddings.create({
527
+ model: this.getModel(),
528
+ input: "test"
529
+ });
530
+ return true;
531
+ } catch (error) {
532
+ logger6.error(`DeepSeek readiness check failed: ${error instanceof Error ? error.message : String(error)}`);
533
+ return false;
534
+ }
535
+ }
536
+ };
537
+
538
+ // src/providers/llamacpp.ts
539
+ import { access, constants } from "fs/promises";
540
+ import { join, resolve } from "path";
541
+ import * as http from "http";
542
+ var LlamaCppProvider = class extends EmbeddingProvider {
543
+ constructor(config2) {
544
+ super({ ...config2, provider: "llamacpp" });
545
+ __publicField(this, "llamaPath");
546
+ __publicField(this, "modelPath");
547
+ this.modelPath = config2.model || "nomic-embed-text-v1.5.Q4_K_M.gguf";
548
+ this.llamaPath = config2.llamaPath || "./llama.cpp/build/bin/llama-embedding";
549
+ logger.info(`Llama.cpp provider initialized with model: ${this.modelPath}`);
550
+ }
551
+ // Public API methods
552
+ getProviderName() {
553
+ return "Llama.cpp";
554
+ }
555
+ getDimensions() {
556
+ const model = this.getModel();
557
+ if (model.includes("nomic-embed-text-v1.5")) return 768;
558
+ if (model.includes("nomic-embed-text-v1")) return 768;
559
+ if (model.includes("all-MiniLM-L6-v2")) return 384;
560
+ if (model.includes("bge-base")) return 768;
561
+ if (model.includes("bert-base")) return 768;
562
+ return 768;
563
+ }
564
+ async isReady() {
565
+ try {
566
+ await access(this.llamaPath, constants.F_OK);
567
+ await access(this.llamaPath, constants.X_OK);
568
+ const modelPath = await this.getModelPath();
569
+ await access(modelPath, constants.F_OK);
570
+ logger.debug("Llama.cpp provider is ready");
571
+ return true;
572
+ } catch (error) {
573
+ logger.error(`Llama.cpp readiness check failed: ${error instanceof Error ? error.message : String(error)}`);
574
+ return false;
575
+ }
576
+ }
577
+ async embed(input) {
578
+ try {
579
+ logger.debug(`Embedding text with llama.cpp: ${this.getModel()}`);
580
+ const text = await this.readInput(input);
581
+ if (!text.trim()) {
582
+ throw new Error("Text input cannot be empty");
583
+ }
584
+ const requestBody = {
585
+ input: text,
586
+ model: await this.getModelPath(),
587
+ pooling: "mean",
588
+ normalize: 2
589
+ };
590
+ const result = await this.executeLlamaEmbedding([JSON.stringify(requestBody)]);
591
+ const embedding = this.parseRawOutput(result.stdout);
592
+ return {
593
+ embedding,
594
+ dimensions: embedding.length,
595
+ model: this.getModel(),
596
+ provider: "llamacpp"
597
+ };
598
+ } catch (error) {
599
+ logger.error(`Llama.cpp embedding failed: ${error instanceof Error ? error.message : String(error)}`);
600
+ throw error;
601
+ }
602
+ }
603
+ async embedBatch(inputs) {
604
+ try {
605
+ logger.debug(`Batch embedding ${inputs.length} texts with llama.cpp`);
606
+ const texts = [];
607
+ for (const input of inputs) {
608
+ const text = await this.readInput(input);
609
+ if (text.trim()) {
610
+ texts.push(text);
611
+ }
612
+ }
613
+ if (texts.length === 0) {
614
+ throw new Error("No valid texts to embed");
615
+ }
616
+ const modelPath = await this.getModelPath();
617
+ const requests = inputs.map((input) => ({
618
+ input: input.text || "",
619
+ model: modelPath,
620
+ pooling: "mean",
621
+ normalize: 2
622
+ }));
623
+ const embeddings = [];
624
+ for (const request2 of requests) {
625
+ const result = await this.executeLlamaEmbedding([JSON.stringify(request2)]);
626
+ const embedding = this.parseRawOutput(result.stdout);
627
+ embeddings.push(embedding);
628
+ }
629
+ return {
630
+ embeddings,
631
+ dimensions: embeddings[0]?.length || 0,
632
+ model: this.getModel(),
633
+ provider: "llamacpp"
634
+ };
635
+ } catch (error) {
636
+ logger.error(`Llama.cpp batch embedding failed: ${error instanceof Error ? error.message : String(error)}`);
637
+ throw error;
638
+ }
639
+ }
640
+ // Protected methods
641
+ getModel() {
642
+ return this.modelPath;
643
+ }
644
+ // Private helper methods
645
+ async getModelPath() {
646
+ const possiblePaths = [
647
+ this.modelPath,
648
+ // As provided
649
+ join("./llama.cpp/models", this.modelPath),
650
+ // In llama.cpp/models
651
+ join("./llama.cpp", this.modelPath),
652
+ // In llama.cpp root
653
+ this.modelPath
654
+ // Fallback
655
+ ];
656
+ for (const path of possiblePaths) {
657
+ try {
658
+ await access(path, constants.F_OK);
659
+ return resolve(path);
660
+ } catch {
661
+ continue;
662
+ }
663
+ }
664
+ throw new Error(`Model file not found: ${this.modelPath}`);
665
+ }
666
+ async executeLlamaEmbedding(args) {
667
+ return new Promise((resolve2, reject) => {
668
+ const port = 8080;
669
+ let requestBody;
670
+ try {
671
+ requestBody = JSON.parse(args[0] || "{}");
672
+ } catch {
673
+ reject(new Error("Invalid request body for HTTP API"));
674
+ return;
675
+ }
676
+ const postData = JSON.stringify(requestBody);
677
+ const options = {
678
+ hostname: "localhost",
679
+ port,
680
+ path: "/embedding",
681
+ method: "POST",
682
+ headers: {
683
+ "Content-Type": "application/json",
684
+ "Content-Length": Buffer.byteLength(postData)
685
+ }
686
+ };
687
+ const req = http.request(options, (res) => {
688
+ let data = "";
689
+ res.on("data", (chunk) => {
690
+ data += chunk;
691
+ });
692
+ res.on("end", () => {
693
+ if (res.statusCode === 200) {
694
+ resolve2({ stdout: data, stderr: "" });
695
+ } else {
696
+ reject(new Error(`HTTP ${res.statusCode}: ${data}`));
697
+ }
698
+ });
699
+ });
700
+ req.on("error", (error) => {
701
+ reject(new Error(`Failed to connect to llama.cpp server: ${error instanceof Error ? error.message : String(error)}`));
702
+ });
703
+ req.write(postData);
704
+ req.end();
705
+ });
706
+ }
707
+ parseRawOutput(output) {
708
+ try {
709
+ const response = JSON.parse(output);
710
+ logger.debug(`PARSE DEBUG: Response type: ${typeof response}`);
711
+ logger.debug(`PARSE DEBUG: Is Array: ${Array.isArray(response)}`);
712
+ if (Array.isArray(response) && response.length > 0) {
713
+ const first = response[0];
714
+ if (first && first.embedding && Array.isArray(first.embedding)) {
715
+ const emb = first.embedding;
716
+ if (Array.isArray(emb[0])) {
717
+ const flat = emb[0];
718
+ logger.debug(`Parsed ${flat.length} dimensions (nested)`);
719
+ return flat;
720
+ }
721
+ logger.debug(`Parsed ${emb.length} dimensions (direct)`);
722
+ return emb;
723
+ }
724
+ }
725
+ if (response.embedding && Array.isArray(response.embedding)) {
726
+ const emb = response.embedding;
727
+ if (Array.isArray(emb[0])) {
728
+ return emb[0];
729
+ }
730
+ return emb;
731
+ }
732
+ if (Array.isArray(response) && typeof response[0] === "number") {
733
+ logger.debug(`Parsed ${response.length} dimensions (flat array)`);
734
+ return response;
735
+ }
736
+ throw new Error(`Unexpected format: ${JSON.stringify(Object.keys(response))}`);
737
+ } catch (error) {
738
+ const errorMessage = error instanceof Error ? error instanceof Error ? error.message : String(error) : "Unknown error";
739
+ throw new Error(`Parse failed: ${errorMessage}`, { cause: error });
740
+ }
741
+ }
742
+ parseArrayOutput(output) {
743
+ const arrayPattern = /\[([^\]]+)\]/g;
744
+ const matches = [...output.matchAll(arrayPattern)];
745
+ if (matches.length === 0) {
746
+ throw new Error("No array embeddings found in output");
747
+ }
748
+ const embeddings = matches.map((match) => {
749
+ const values = match[1]?.split(",").map((v) => v.trim()) || [];
750
+ return values.map((v) => parseFloat(v)).filter((v) => !isNaN(v));
751
+ }).filter((embedding) => embedding.length > 0);
752
+ return embeddings;
753
+ }
754
+ };
755
+
756
+ // src/factory/EmbeddingFactory.ts
757
+ var logger7 = Logger.createModuleLogger("factory");
758
+ var EmbeddingFactory = class {
759
+ static create(config2) {
760
+ logger7.info(`Creating provider: ${config2.provider}`);
761
+ const ProviderClass = this.providers.get(config2.provider);
762
+ if (!ProviderClass) {
763
+ throw new Error(`Unsupported provider: ${config2.provider}`);
764
+ }
765
+ return new ProviderClass(config2);
766
+ }
767
+ static getSupportedProviders() {
768
+ return Array.from(this.providers.keys());
769
+ }
770
+ };
771
+ __publicField(EmbeddingFactory, "providers", /* @__PURE__ */ new Map([
772
+ ["openai", OpenAIProvider],
773
+ ["gemini", GeminiProvider],
774
+ ["claude", ClaudeProvider],
775
+ ["mistral", MistralProvider],
776
+ ["deepseek", DeepSeekProvider],
777
+ ["llamacpp", LlamaCppProvider]
778
+ // Local embeddings with llama.cpp
779
+ ]));
780
+
781
+ // main.ts
782
+ dotenv.config();
783
+ var logger8 = Logger.createModuleLogger("main");
784
+ async function embed(config2, input) {
785
+ try {
786
+ logger8.info(`Starting embedding with provider: ${config2.provider}`);
787
+ const provider = EmbeddingFactory.create(config2);
788
+ const isReady = await provider.isReady();
789
+ if (!isReady) {
790
+ throw new Error(`Provider ${config2.provider} is not ready`);
791
+ }
792
+ if (Array.isArray(input)) {
793
+ logger8.debug(`Processing batch of ${input.length} items`);
794
+ return await provider.embedBatch(input);
795
+ } else {
796
+ logger8.debug(`Processing single item`);
797
+ return await provider.embed(input);
798
+ }
799
+ } catch (error) {
800
+ const errorMessage = error instanceof Error ? error.message : String(error);
801
+ logger8.error(`Embedding failed: ${errorMessage}`);
802
+ throw error;
803
+ }
804
+ }
805
+ async function autoEmbed(input) {
806
+ logger8.info("Auto-detecting best provider...");
807
+ const providers = [
808
+ { provider: "llamacpp", model: "nomic-embed-text-v1.5.Q4_K_M.gguf" },
809
+ // Local & free (llama.cpp)
810
+ { provider: "openai", model: "text-embedding-3-small", apiKey: process.env.OPENAI_API_KEY || void 0 },
811
+ { provider: "gemini", model: "gemini-embedding-001", apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY || void 0 },
812
+ { provider: "mistral", model: "mistral-embed", apiKey: process.env.MISTRAL_API_KEY || void 0 },
813
+ { provider: "deepseek", model: "deepseek-chat", apiKey: process.env.DEEPSEEK_API_KEY || void 0 }
814
+ ];
815
+ for (const config2 of providers) {
816
+ try {
817
+ if (config2.provider === "llamacpp" || config2.apiKey) {
818
+ logger8.info(`Trying provider: ${config2.provider}`);
819
+ const cleanConfig = {
820
+ provider: config2.provider,
821
+ model: config2.model
822
+ };
823
+ if (config2.apiKey) {
824
+ cleanConfig.apiKey = config2.apiKey;
825
+ }
826
+ return await embed(cleanConfig, input);
827
+ }
828
+ } catch (error) {
829
+ const errorMessage = error instanceof Error ? error.message : String(error);
830
+ logger8.warn(`Provider ${config2.provider} failed: ${errorMessage}`);
831
+ continue;
832
+ }
833
+ }
834
+ throw new Error("No available embedding provider found");
835
+ }
836
+ function getSupportedProviders() {
837
+ return EmbeddingFactory.getSupportedProviders();
838
+ }
839
+ function createProvider(config2) {
840
+ return EmbeddingFactory.create(config2);
841
+ }
842
+
843
+ // index.ts
844
+ var VERSION = "1.0.0";
845
+ function getVersion() {
846
+ return VERSION;
847
+ }
848
+ var LIB_INFO = {
849
+ name: "embedbox",
850
+ version: VERSION,
851
+ description: "A minimal and powerful embedding library",
852
+ homepage: "https://embedbox.dev",
853
+ repository: "https://github.com/embedbox/embedbox.git",
854
+ supportedProviders: [
855
+ "openai",
856
+ "gemini",
857
+ "claude",
858
+ "mistral",
859
+ "deepseek",
860
+ "llamacpp"
861
+ ]
862
+ };
863
+ export {
864
+ EmbeddingFactory,
865
+ EmbeddingProvider,
866
+ LIB_INFO,
867
+ VERSION,
868
+ autoEmbed,
869
+ createProvider,
870
+ embed,
871
+ getSupportedProviders,
872
+ getVersion
873
+ };
874
+ //# sourceMappingURL=index.js.map