voyageai-cli 1.33.2 → 1.33.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.33.2",
3
+ "version": "1.33.4",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "_comment": "This package contains the CLI + web playground. The electron/ directory is excluded via .npmignore and distributed via GitHub Releases.",
6
6
  "bin": {
@@ -316,7 +316,10 @@ function createPlaygroundServer() {
316
316
 
317
317
  // Handle RAG API requests
318
318
  if (req.url.startsWith('/api/rag/')) {
319
- const handled = await handleRAGRequest(req, res, { generateEmbeddings });
319
+ const handled = await handleRAGRequest(req, res, {
320
+ generateEmbeddings,
321
+ generateLocalEmbeddings: require('../nano/nano-local.js').generateLocalEmbeddings,
322
+ });
320
323
  if (handled) return;
321
324
  }
322
325
 
@@ -25,6 +25,34 @@ async function extractTextFromPDF(buffer) {
25
25
  const RAG_DB = 'vai_rag';
26
26
  const KBS_COLLECTION = 'knowledge_bases';
27
27
 
28
+ async function computeKBStatsFromCollection(docsCollection) {
29
+ const stats = await docsCollection.aggregate([
30
+ { $group: {
31
+ _id: null,
32
+ totalSize: { $sum: { $strLenBytes: { $ifNull: ['$content', ''] } } },
33
+ chunkCount: { $sum: 1 },
34
+ files: { $addToSet: '$fileName' }
35
+ } }
36
+ ]).toArray();
37
+
38
+ const liveStats = stats[0] || { totalSize: 0, chunkCount: 0, files: [] };
39
+ return {
40
+ size: liveStats.totalSize,
41
+ chunkCount: liveStats.chunkCount,
42
+ docCount: liveStats.files.filter(Boolean).length
43
+ };
44
+ }
45
+
46
+ async function computeKBStats(db, kbName) {
47
+ return computeKBStatsFromCollection(db.collection(`kb_${kbName}_docs`));
48
+ }
49
+
50
+ function normalizeChunks(content) {
51
+ return chunkText(content)
52
+ .map(chunk => typeof chunk === 'string' ? chunk.trim() : '')
53
+ .filter(Boolean);
54
+ }
55
+
28
56
  // ── Friendly KB name generator ──
29
57
  const KB_ADJECTIVES = [
30
58
  'swift', 'bright', 'calm', 'bold', 'keen',
@@ -76,26 +104,86 @@ function generateKBName() {
76
104
  return `${adj}-${noun}-${suffix}`;
77
105
  }
78
106
 
107
+ /**
108
+ * Resolve the correct embedding function based on the selected model.
109
+ * When embeddingModel is 'voyage-4-nano', uses local nano embeddings.
110
+ * Otherwise, uses the remote Voyage API.
111
+ *
112
+ * @param {string} embeddingModel - Selected embedding model name
113
+ * @param {Function} remoteEmbed - Remote generateEmbeddings function
114
+ * @param {Function} localEmbed - Local generateLocalEmbeddings function
115
+ * @returns {{ embedFn: Function, model: string, isLocal: boolean }}
116
+ */
117
+ function resolveEmbedFn(embeddingModel, remoteEmbed, localEmbed) {
118
+ if (embeddingModel === 'voyage-4-nano' && localEmbed) {
119
+ return {
120
+ embedFn: (texts, opts) => localEmbed(texts, {
121
+ inputType: opts.inputType || 'document',
122
+ dimensions: 1024,
123
+ }),
124
+ model: 'voyage-4-nano',
125
+ isLocal: true,
126
+ };
127
+ }
128
+ return {
129
+ embedFn: (texts, opts) => remoteEmbed(texts, {
130
+ model: embeddingModel || 'voyage-4-large',
131
+ inputType: opts.inputType || 'document',
132
+ }),
133
+ model: embeddingModel || 'voyage-4-large',
134
+ isLocal: false,
135
+ };
136
+ }
137
+
79
138
  /**
80
139
  * Handle RAG API requests
81
140
  * Returns true if handled, false otherwise
82
141
  * @param {http.IncomingMessage} req
83
142
  * @param {http.ServerResponse} res
84
- * @param {Object} context - API context (generateEmbeddings, etc.)
143
+ * @param {Object} context - API context (generateEmbeddings, generateLocalEmbeddings)
85
144
  */
86
145
  async function handleRAGRequest(req, res, context) {
87
- const { generateEmbeddings } = context;
146
+ const { generateEmbeddings, generateLocalEmbeddings } = context;
88
147
 
89
148
  // GET /api/rag/kbs - List all knowledge bases
90
149
  if (req.method === 'GET' && req.url === '/api/rag/kbs') {
91
150
  try {
92
151
  const { client, collection: kbsCollection } = await getMongoCollection(RAG_DB, KBS_COLLECTION);
152
+ const db = client.db(RAG_DB);
93
153
  const kbs = await kbsCollection.find({}).toArray();
154
+ const metadataFixes = [];
155
+ const hydratedKbs = await Promise.all(kbs.map(async (kb) => {
156
+ const liveStats = await computeKBStats(db, kb.name);
157
+ if (
158
+ (kb.docCount || 0) !== liveStats.docCount ||
159
+ (kb.chunkCount || 0) !== liveStats.chunkCount ||
160
+ (kb.size || 0) !== liveStats.size
161
+ ) {
162
+ metadataFixes.push({
163
+ updateOne: {
164
+ filter: { _id: kb._id },
165
+ update: {
166
+ $set: {
167
+ docCount: liveStats.docCount,
168
+ chunkCount: liveStats.chunkCount,
169
+ size: liveStats.size
170
+ }
171
+ }
172
+ }
173
+ });
174
+ }
175
+ return { ...kb, ...liveStats };
176
+ }));
177
+
178
+ if (metadataFixes.length > 0) {
179
+ await kbsCollection.bulkWrite(metadataFixes, { ordered: false });
180
+ }
181
+
94
182
  client.close();
95
183
 
96
184
  res.writeHead(200, { 'Content-Type': 'application/json' });
97
185
  res.end(JSON.stringify({
98
- kbs: kbs.map(kb => ({
186
+ kbs: hydratedKbs.map(kb => ({
99
187
  name: kb.name,
100
188
  displayName: kb.displayName || kb.name,
101
189
  docCount: kb.docCount || 0,
@@ -223,6 +311,7 @@ async function handleRAGRequest(req, res, context) {
223
311
  const headerSep = Buffer.from('\r\n\r\n');
224
312
  const crlf = Buffer.from('\r\n');
225
313
  let kbName = null;
314
+ let embeddingModel = null;
226
315
 
227
316
  // Find all boundary positions in the raw Buffer
228
317
  let searchStart = 0;
@@ -267,6 +356,8 @@ async function handleRAGRequest(req, res, context) {
267
356
  files.push({ name: filename, path: filepath });
268
357
  } else if (nameMatch && nameMatch[1] === 'kbName') {
269
358
  kbName = body.slice(contentStart, contentEnd).toString('utf8').trim();
359
+ } else if (nameMatch && nameMatch[1] === 'embeddingModel') {
360
+ embeddingModel = body.slice(contentStart, contentEnd).toString('utf8').trim();
270
361
  }
271
362
  }
272
363
 
@@ -323,6 +414,9 @@ async function handleRAGRequest(req, res, context) {
323
414
  }
324
415
  }
325
416
 
417
+ // Resolve embedding function (local nano vs remote API)
418
+ const { embedFn } = resolveEmbedFn(embeddingModel, generateEmbeddings, generateLocalEmbeddings);
419
+
326
420
  // Ingest files
327
421
  res.writeHead(200, {
328
422
  'Content-Type': 'application/x-ndjson',
@@ -355,10 +449,10 @@ async function handleRAGRequest(req, res, context) {
355
449
  } else {
356
450
  content = fs.readFileSync(file.path, 'utf8');
357
451
  }
358
- totalSize += Buffer.byteLength(content, 'utf8');
452
+ const contentSize = Buffer.byteLength(content, 'utf8');
359
453
 
360
454
  // Stage: chunking
361
- const chunks = chunkText(content);
455
+ const chunks = normalizeChunks(content);
362
456
  res.write(JSON.stringify({
363
457
  type: 'progress',
364
458
  stage: 'chunking',
@@ -368,7 +462,18 @@ async function handleRAGRequest(req, res, context) {
368
462
  fileCount: files.length
369
463
  }) + '\n');
370
464
 
465
+ if (chunks.length === 0) {
466
+ res.write(JSON.stringify({
467
+ type: 'warning',
468
+ file: file.name,
469
+ warning: `No text content could be extracted from ${file.name}.`
470
+ }) + '\n');
471
+ continue;
472
+ }
473
+
371
474
  // Stage: embedding (per-chunk progress)
475
+ let persistedChunks = 0;
476
+ let lastEmbedError = null;
372
477
  for (let c = 0; c < chunks.length; c++) {
373
478
  try {
374
479
  res.write(JSON.stringify({
@@ -381,7 +486,7 @@ async function handleRAGRequest(req, res, context) {
381
486
  fileCount: files.length
382
487
  }) + '\n');
383
488
 
384
- const embedding = await generateEmbeddings(chunks[c], 'voyage-4-large');
489
+ const embedding = await embedFn([chunks[c]], { inputType: 'document' });
385
490
  const doc = {
386
491
  _id: crypto.randomUUID(),
387
492
  kbName,
@@ -391,8 +496,10 @@ async function handleRAGRequest(req, res, context) {
391
496
  createdAt: new Date()
392
497
  };
393
498
  await docsCollection.insertOne(doc);
499
+ persistedChunks++;
394
500
  totalChunks++;
395
501
  } catch (embedErr) {
502
+ lastEmbedError = embedErr;
396
503
  console.warn(`Failed to embed chunk from ${file.name}:`, embedErr.message);
397
504
  }
398
505
  }
@@ -406,7 +513,26 @@ async function handleRAGRequest(req, res, context) {
406
513
  fileCount: files.length
407
514
  }) + '\n');
408
515
 
409
- totalDocs++;
516
+ if (persistedChunks > 0) {
517
+ totalDocs++;
518
+ totalSize += contentSize;
519
+ } else {
520
+ const detail = lastEmbedError?.message ? ` ${lastEmbedError.message}` : '';
521
+ res.write(JSON.stringify({
522
+ type: 'warning',
523
+ file: file.name,
524
+ warning: `No chunks were stored for ${file.name}.${detail}`.trim()
525
+ }) + '\n');
526
+ }
527
+
528
+ if (persistedChunks > 0 && persistedChunks < chunks.length) {
529
+ res.write(JSON.stringify({
530
+ type: 'warning',
531
+ file: file.name,
532
+ warning: `Only ${persistedChunks}/${chunks.length} chunks were stored for ${file.name}.`
533
+ }) + '\n');
534
+ }
535
+
410
536
  try {
411
537
  fs.unlinkSync(file.path);
412
538
  } catch (e) {
@@ -414,18 +540,13 @@ async function handleRAGRequest(req, res, context) {
414
540
  }
415
541
  }
416
542
 
417
- // Update KB metadata (use $inc so size accumulates across uploads)
543
+ // Recompute live stats so counters stay accurate even when some files
544
+ // produce zero persisted chunks or partial embeddings succeed.
545
+ const liveStats = await computeKBStatsFromCollection(docsCollection);
418
546
  await kbsCollection.updateOne(
419
547
  { name: kbName },
420
548
  {
421
- $inc: {
422
- docCount: totalDocs,
423
- chunkCount: totalChunks,
424
- size: totalSize
425
- },
426
- $set: {
427
- updatedAt: new Date()
428
- }
549
+ $set: { ...liveStats, updatedAt: new Date() }
429
550
  }
430
551
  );
431
552
 
@@ -510,20 +631,8 @@ async function handleRAGRequest(req, res, context) {
510
631
  }
511
632
 
512
633
  // Compute live stats from docs collection (more accurate than stored metadata)
513
- const { collection: docsCollection } = await getMongoCollection(RAG_DB, `kb_${kbName}_docs`);
514
- const stats = await docsCollection.aggregate([
515
- { $group: {
516
- _id: null,
517
- totalSize: { $sum: { $strLenBytes: { $ifNull: ['$content', ''] } } },
518
- chunkCount: { $sum: 1 },
519
- files: { $addToSet: '$fileName' }
520
- }}
521
- ]).toArray();
522
-
523
- const liveStats = stats[0] || { totalSize: 0, chunkCount: 0, files: [] };
524
- kb.size = liveStats.totalSize;
525
- kb.chunkCount = liveStats.chunkCount;
526
- kb.docCount = liveStats.files.length;
634
+ const db = client.db(RAG_DB);
635
+ Object.assign(kb, await computeKBStats(db, kbName));
527
636
 
528
637
  client.close();
529
638
  res.writeHead(200, { 'Content-Type': 'application/json' });
@@ -645,11 +754,10 @@ async function handleRAGRequest(req, res, context) {
645
754
 
646
755
  await docsCollection.deleteOne({ _id: docId });
647
756
 
648
- // Update KB doc count
649
- const docCount = await docsCollection.countDocuments();
757
+ const liveStats = await computeKBStatsFromCollection(docsCollection);
650
758
  await kbsCollection.updateOne(
651
759
  { name: kbName },
652
- { $set: { chunkCount: docCount, updatedAt: new Date() } }
760
+ { $set: { ...liveStats, updatedAt: new Date() } }
653
761
  );
654
762
 
655
763
  kbClient.close();
@@ -670,7 +778,7 @@ async function handleRAGRequest(req, res, context) {
670
778
  req.on('data', chunk => { body += chunk; });
671
779
  req.on('end', async () => {
672
780
  try {
673
- const { text, kbName, title } = JSON.parse(body);
781
+ const { text, kbName, title, embeddingModel } = JSON.parse(body);
674
782
 
675
783
  if (!text || typeof text !== 'string' || !text.trim()) {
676
784
  res.writeHead(400, { 'Content-Type': 'application/json' });
@@ -700,17 +808,28 @@ async function handleRAGRequest(req, res, context) {
700
808
  'Cache-Control': 'no-cache'
701
809
  });
702
810
 
703
- const chunks = chunkText(text.trim());
811
+ const chunks = normalizeChunks(text.trim());
704
812
  const fileName = title && title.trim() ? title.trim().slice(0, 80) : `pasted-text-${Date.now()}`;
705
813
  const totalSize = Buffer.byteLength(text, 'utf8');
706
814
 
815
+ // Resolve embedding function (local nano vs remote API)
816
+ const { embedFn } = resolveEmbedFn(embeddingModel, generateEmbeddings, generateLocalEmbeddings);
817
+
707
818
  res.write(JSON.stringify({ type: 'progress', stage: 'chunking', current: chunks.length, total: chunks.length }) + '\n');
708
819
 
820
+ if (chunks.length === 0) {
821
+ res.write(JSON.stringify({ type: 'error', error: 'No text content could be chunked from the pasted text.' }) + '\n');
822
+ res.end();
823
+ kbClient.close();
824
+ return;
825
+ }
826
+
709
827
  let totalChunks = 0;
828
+ let lastEmbedError = null;
710
829
  for (let i = 0; i < chunks.length; i++) {
711
830
  res.write(JSON.stringify({ type: 'progress', stage: 'embedding', current: i + 1, total: chunks.length }) + '\n');
712
831
  try {
713
- const embedding = await generateEmbeddings(chunks[i], 'voyage-4-large');
832
+ const embedding = await embedFn([chunks[i]], { inputType: 'document' });
714
833
  const doc = {
715
834
  _id: crypto.randomUUID(),
716
835
  kbName,
@@ -722,15 +841,24 @@ async function handleRAGRequest(req, res, context) {
722
841
  await docsCollection.insertOne(doc);
723
842
  totalChunks++;
724
843
  } catch (embedErr) {
844
+ lastEmbedError = embedErr;
725
845
  console.warn(`Failed to embed chunk from pasted text:`, embedErr.message);
726
846
  }
727
847
  }
728
848
 
849
+ if (totalChunks === 0) {
850
+ const detail = lastEmbedError?.message ? ` ${lastEmbedError.message}` : '';
851
+ res.write(JSON.stringify({ type: 'error', error: `No chunks were stored for the pasted text.${detail}`.trim() }) + '\n');
852
+ res.end();
853
+ kbClient.close();
854
+ return;
855
+ }
856
+
857
+ const liveStats = await computeKBStatsFromCollection(docsCollection);
729
858
  await kbsCollection.updateOne(
730
859
  { name: kbName },
731
860
  {
732
- $inc: { docCount: 1, chunkCount: totalChunks, size: totalSize },
733
- $set: { updatedAt: new Date() }
861
+ $set: { ...liveStats, updatedAt: new Date() }
734
862
  }
735
863
  );
736
864
 
@@ -752,7 +880,7 @@ async function handleRAGRequest(req, res, context) {
752
880
  req.on('data', chunk => { body += chunk; });
753
881
  req.on('end', async () => {
754
882
  try {
755
- const { url, kbName } = JSON.parse(body);
883
+ const { url, kbName, embeddingModel } = JSON.parse(body);
756
884
 
757
885
  if (!url || typeof url !== 'string' || !/^https?:\/\//i.test(url)) {
758
886
  res.writeHead(400, { 'Content-Type': 'application/json' });
@@ -826,20 +954,31 @@ async function handleRAGRequest(req, res, context) {
826
954
  return;
827
955
  }
828
956
 
829
- const chunks = chunkText(content);
957
+ const chunks = normalizeChunks(content);
830
958
  // Build fileName from URL hostname + path, truncated to 80 chars
831
959
  let parsedUrl;
832
960
  try { parsedUrl = new URL(url); } catch { parsedUrl = { hostname: 'unknown', pathname: '' }; }
833
961
  const fileName = (parsedUrl.hostname + parsedUrl.pathname).slice(0, 80);
834
962
  const totalSize = Buffer.byteLength(content, 'utf8');
835
963
 
964
+ // Resolve embedding function (local nano vs remote API)
965
+ const { embedFn } = resolveEmbedFn(embeddingModel, generateEmbeddings, generateLocalEmbeddings);
966
+
836
967
  res.write(JSON.stringify({ type: 'progress', stage: 'chunking', current: chunks.length, total: chunks.length }) + '\n');
837
968
 
969
+ if (chunks.length === 0) {
970
+ res.write(JSON.stringify({ type: 'error', error: 'No text content could be chunked from the fetched URL.' }) + '\n');
971
+ res.end();
972
+ kbClient.close();
973
+ return;
974
+ }
975
+
838
976
  let totalChunks = 0;
977
+ let lastEmbedError = null;
839
978
  for (let i = 0; i < chunks.length; i++) {
840
979
  res.write(JSON.stringify({ type: 'progress', stage: 'embedding', current: i + 1, total: chunks.length }) + '\n');
841
980
  try {
842
- const embedding = await generateEmbeddings(chunks[i], 'voyage-4-large');
981
+ const embedding = await embedFn([chunks[i]], { inputType: 'document' });
843
982
  const doc = {
844
983
  _id: crypto.randomUUID(),
845
984
  kbName,
@@ -851,15 +990,24 @@ async function handleRAGRequest(req, res, context) {
851
990
  await docsCollection.insertOne(doc);
852
991
  totalChunks++;
853
992
  } catch (embedErr) {
993
+ lastEmbedError = embedErr;
854
994
  console.warn(`Failed to embed chunk from URL ${url}:`, embedErr.message);
855
995
  }
856
996
  }
857
997
 
998
+ if (totalChunks === 0) {
999
+ const detail = lastEmbedError?.message ? ` ${lastEmbedError.message}` : '';
1000
+ res.write(JSON.stringify({ type: 'error', error: `No chunks were stored for the fetched URL.${detail}`.trim() }) + '\n');
1001
+ res.end();
1002
+ kbClient.close();
1003
+ return;
1004
+ }
1005
+
1006
+ const liveStats = await computeKBStatsFromCollection(docsCollection);
858
1007
  await kbsCollection.updateOne(
859
1008
  { name: kbName },
860
1009
  {
861
- $inc: { docCount: 1, chunkCount: totalChunks, size: totalSize },
862
- $set: { updatedAt: new Date() }
1010
+ $set: { ...liveStats, updatedAt: new Date() }
863
1011
  }
864
1012
  );
865
1013
 
@@ -14,7 +14,7 @@ Really wrestled with this... if you have better ideas, please let me know.
14
14
  import json
15
15
  import sys
16
16
 
17
- BRIDGE_VERSION = "1.33.2"
17
+ BRIDGE_VERSION = "1.33.4"
18
18
  MODEL_NAME = "voyageai/voyage-4-nano"
19
19
 
20
20
  # Lazy-loaded on first embed request