voyageai-cli 1.22.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.22.0",
3
+ "version": "1.23.0",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "bin": {
6
6
  "vai": "./src/cli.js"
@@ -41,12 +41,14 @@
41
41
  },
42
42
  "dependencies": {
43
43
  "@clack/prompts": "^1.0.0",
44
+ "@modelcontextprotocol/sdk": "^1.26.0",
44
45
  "commander": "^12.0.0",
45
46
  "dotenv": "^17.2.3",
46
47
  "mongodb": "^6.0.0",
47
48
  "ora": "^9.1.0",
48
49
  "picocolors": "^1.1.1",
49
- "update-notifier": "^7.3.1"
50
+ "update-notifier": "^7.3.1",
51
+ "zod": "^4.3.6"
50
52
  },
51
53
  "devDependencies": {
52
54
  "playwright": "^1.58.1"
package/src/cli.js CHANGED
@@ -35,6 +35,8 @@ const { registerAbout } = require('./commands/about');
35
35
  const { register: registerDoctor } = require('./commands/doctor');
36
36
  const { register: registerQuickstart } = require('./commands/quickstart');
37
37
  const { registerBug } = require('./commands/bug');
38
+ const { registerChat } = require('./commands/chat');
39
+ const { registerMcpServer } = require('./commands/mcp-server');
38
40
  const { showBanner, showQuickStart, getVersion } = require('./lib/banner');
39
41
 
40
42
  const version = getVersion();
@@ -74,6 +76,8 @@ registerAbout(program);
74
76
  registerDoctor(program);
75
77
  registerQuickstart(program);
76
78
  registerBug(program);
79
+ registerChat(program);
80
+ registerMcpServer(program);
77
81
 
78
82
  // Append disclaimer to all help output
79
83
  program.addHelpText('after', `
@@ -0,0 +1,503 @@
1
+ 'use strict';
2
+
3
+ const readline = require('readline');
4
+ const { createLLMProvider, resolveLLMConfig } = require('../lib/llm');
5
+ const { ChatHistory } = require('../lib/history');
6
+ const { chatTurn } = require('../lib/chat');
7
+ const { loadProject } = require('../lib/project');
8
+ const { getMongoCollection } = require('../lib/mongo');
9
+ const { setConfigValue } = require('../lib/config');
10
+ const { runWizard } = require('../lib/wizard');
11
+ const { createCLIRenderer } = require('../lib/wizard-cli');
12
+ const { chatSetupSteps } = require('../lib/wizard-steps-chat');
13
+ const ui = require('../lib/ui');
14
+ const pc = require('picocolors');
15
+ const fs = require('fs');
16
+
17
+ /**
18
+ * Register the chat command.
19
+ * @param {import('commander').Command} program
20
+ */
21
+ function registerChat(program) {
22
+ program
23
+ .command('chat')
24
+ .description('RAG-powered conversational interface — chat with your knowledge base')
25
+ .option('--db <name>', 'MongoDB database name')
26
+ .option('--collection <name>', 'Collection with embedded documents')
27
+ .option('--session <id>', 'Resume a previous chat session')
28
+ .option('--llm-provider <name>', 'LLM provider: anthropic, openai, ollama')
29
+ .option('--llm-model <name>', 'Specific LLM model to use')
30
+ .option('--llm-api-key <key>', 'LLM API key')
31
+ .option('--llm-base-url <url>', 'LLM API base URL (Ollama)')
32
+ .option('--max-context-docs <n>', 'Max retrieved documents for context', (v) => parseInt(v, 10), 5)
33
+ .option('--max-turns <n>', 'Max conversation turns before truncation', (v) => parseInt(v, 10), 20)
34
+ .option('--no-history', 'Disable MongoDB persistence (in-memory only)')
35
+ .option('--no-rerank', 'Skip reranking step')
36
+ .option('--no-stream', 'Wait for complete response instead of streaming')
37
+ .option('--system-prompt <text>', 'Override the system prompt')
38
+ .option('--text-field <name>', 'Document text field name', 'text')
39
+ .option('--filter <json>', 'MongoDB pre-filter for vector search')
40
+ .option('--estimate', 'Show estimated per-turn cost breakdown and exit')
41
+ .option('--json', 'Output JSON per turn (for scripting)')
42
+ .option('-q, --quiet', 'Suppress decorative output')
43
+ .action(async (opts) => {
44
+ try {
45
+ await runChat(opts);
46
+ } catch (err) {
47
+ console.error(ui.error(err.message));
48
+ process.exit(1);
49
+ }
50
+ });
51
+ }
52
+
53
+ async function runChat(opts) {
54
+ const { config: proj } = loadProject();
55
+ const chatConf = proj.chat || {};
56
+
57
+ const db = opts.db || proj.db;
58
+ const collection = opts.collection || proj.collection;
59
+ const maxDocs = opts.maxContextDocs || chatConf.maxContextDocs || 5;
60
+ const maxTurns = opts.maxTurns || chatConf.maxConversationTurns || 20;
61
+ const textField = opts.textField || 'text';
62
+ const doRerank = opts.rerank !== false;
63
+ const doStream = opts.stream !== false;
64
+ const systemPrompt = opts.systemPrompt || chatConf.systemPrompt;
65
+
66
+ // Validate DB + collection
67
+ if (!db || !collection) {
68
+ console.error(ui.error('Database and collection required.'));
69
+ console.error('');
70
+ console.error(' Use --db and --collection, or configure .vai.json:');
71
+ console.error(' vai init');
72
+ console.error('');
73
+ process.exit(1);
74
+ }
75
+
76
+ // Resolve LLM config — run interactive setup if missing
77
+ let llmConfig = resolveLLMConfig(opts);
78
+ if (!llmConfig.provider) {
79
+ if (opts.json) {
80
+ // Non-interactive mode — can't run wizard
81
+ console.error(JSON.stringify({ error: 'No LLM provider configured. Run vai chat interactively to set up.' }));
82
+ process.exit(1);
83
+ }
84
+
85
+ const { answers, cancelled } = await runWizard({
86
+ steps: chatSetupSteps,
87
+ config: llmConfig,
88
+ renderer: createCLIRenderer({
89
+ title: 'vai chat — LLM Setup',
90
+ doneMessage: 'Configuration saved. Starting chat...',
91
+ }),
92
+ });
93
+
94
+ if (cancelled) {
95
+ process.exit(0);
96
+ }
97
+
98
+ // Persist to ~/.vai/config.json
99
+ if (answers.provider) setConfigValue('llmProvider', answers.provider);
100
+ if (answers.apiKey) setConfigValue('llmApiKey', answers.apiKey);
101
+ if (answers.model) setConfigValue('llmModel', answers.model);
102
+ if (answers.ollamaBaseUrl && answers.ollamaBaseUrl !== 'http://localhost:11434') {
103
+ setConfigValue('llmBaseUrl', answers.ollamaBaseUrl);
104
+ }
105
+
106
+ // Re-resolve with new config
107
+ llmConfig = resolveLLMConfig(opts);
108
+ }
109
+
110
+ // --estimate: show per-turn cost breakdown and exit
111
+ if (opts.estimate) {
112
+ const { estimateChatCost, formatChatCostBreakdown } = require('../lib/cost');
113
+ const breakdown = estimateChatCost({
114
+ query: 'How does authentication work?', // sample question
115
+ contextDocs: maxDocs,
116
+ embeddingModel: proj.model || 'voyage-4-large',
117
+ rerankModel: doRerank ? 'rerank-2.5' : null,
118
+ llmProvider: llmConfig.provider,
119
+ llmModel: llmConfig.model,
120
+ historyTurns: 0,
121
+ });
122
+ if (opts.json) {
123
+ console.log(JSON.stringify(breakdown, null, 2));
124
+ } else {
125
+ console.log('');
126
+ console.log(formatChatCostBreakdown(breakdown));
127
+ console.log('');
128
+ }
129
+ return;
130
+ }
131
+
132
+ const llm = createLLMProvider(opts);
133
+
134
+ // Preflight: verify the RAG pipeline is ready
135
+ if (!opts.json) {
136
+ const { runPreflight, formatPreflight, waitForIndex } = require('../lib/preflight');
137
+ const { checks, ready } = await runPreflight({
138
+ db, collection,
139
+ field: proj.field || 'embedding',
140
+ llmConfig,
141
+ textField,
142
+ });
143
+
144
+ console.log('');
145
+ console.log(formatPreflight(checks));
146
+ console.log('');
147
+
148
+ if (!ready) {
149
+ // Check if the only blocker is an index that's building
150
+ const indexCheck = checks.find(c => c.id === 'vectorIndex');
151
+ const otherFailures = checks.filter(c => !c.ok && c.id !== 'vectorIndex');
152
+
153
+ if (indexCheck?.building && otherFailures.length === 0) {
154
+ // Wait for it with a spinner
155
+ const p = require('@clack/prompts');
156
+ const spinner = p.spinner();
157
+ spinner.start(`Index '${indexCheck.indexName}' is building — waiting for it to be ready...`);
158
+
159
+ const result = await waitForIndex({
160
+ db, collection,
161
+ indexName: indexCheck.indexName,
162
+ timeoutMs: 300000, // 5 minutes
163
+ });
164
+
165
+ if (result.ready) {
166
+ const secs = Math.round(result.elapsed / 1000);
167
+ spinner.stop(`Index ready (${secs}s)`);
168
+ } else {
169
+ spinner.stop(`Index not ready after ${Math.round(result.elapsed / 1000)}s (status: ${result.status})`);
170
+ console.log('');
171
+ console.log(pc.dim(' The index may need more time. Try again in a few minutes.'));
172
+ console.log('');
173
+ process.exit(1);
174
+ }
175
+ } else if (otherFailures.length > 0) {
176
+ process.exit(1);
177
+ }
178
+ }
179
+ }
180
+
181
+ // Initialize history
182
+ let historyMongo = null;
183
+ if (opts.history !== false) {
184
+ try {
185
+ const historyCollection = chatConf.historyCollection ||
186
+ process.env.VAI_CHAT_HISTORY || 'vai_chat_history';
187
+ const { client, collection: coll } = await getMongoCollection(db, historyCollection);
188
+ historyMongo = { client, collection: coll };
189
+ await ChatHistory.ensureIndexes(coll);
190
+ } catch {
191
+ // MongoDB persistence failure is non-fatal
192
+ historyMongo = null;
193
+ }
194
+ }
195
+
196
+ const history = new ChatHistory({
197
+ sessionId: opts.session || undefined,
198
+ maxTurns,
199
+ mongo: historyMongo,
200
+ });
201
+
202
+ // Load existing session if resuming
203
+ if (opts.session) {
204
+ const loaded = await history.load();
205
+ if (!loaded && !opts.quiet && !opts.json) {
206
+ console.log(ui.warn(`Session ${opts.session} not found. Starting new conversation.`));
207
+ }
208
+ }
209
+
210
+ // Print header
211
+ if (!opts.quiet && !opts.json) {
212
+ console.log('');
213
+ console.log(`${pc.bold('vai chat')} v${getVersion()}`);
214
+ console.log(ui.label('Provider', `${llmConfig.provider} (${llmConfig.model})`));
215
+ console.log(ui.label('Knowledge base', `${db}.${collection}`));
216
+ console.log(ui.label('Session', pc.dim(history.sessionId)));
217
+ console.log(pc.dim('Type /help for commands, /quit to exit.'));
218
+ console.log('');
219
+ }
220
+
221
+ // Start REPL
222
+ const rl = readline.createInterface({
223
+ input: process.stdin,
224
+ output: process.stdout,
225
+ prompt: pc.green('> '),
226
+ terminal: !opts.json,
227
+ });
228
+
229
+ rl.prompt();
230
+
231
+ rl.on('line', async (line) => {
232
+ const input = line.trim();
233
+ if (!input) {
234
+ rl.prompt();
235
+ return;
236
+ }
237
+
238
+ // Handle slash commands
239
+ if (input.startsWith('/')) {
240
+ const handled = await handleSlashCommand(input, { history, opts, db, collection, llm, rl, historyMongo });
241
+ if (handled === 'quit') {
242
+ await cleanup(historyMongo);
243
+ process.exit(0);
244
+ }
245
+ rl.prompt();
246
+ return;
247
+ }
248
+
249
+ // Execute chat turn
250
+ try {
251
+ let turnNum = Math.floor(history.turns.length / 2) + 1;
252
+
253
+ if (opts.json) {
254
+ // JSON mode — collect everything then output
255
+ let fullResponse = '';
256
+ let sources = [];
257
+ let metadata = {};
258
+
259
+ for await (const event of chatTurn({
260
+ query: input, db, collection, llm, history,
261
+ opts: { maxDocs, rerank: doRerank, stream: false, systemPrompt, textField, filter: opts.filter },
262
+ })) {
263
+ if (event.type === 'chunk') fullResponse += event.data;
264
+ if (event.type === 'done') {
265
+ sources = event.data.sources;
266
+ metadata = event.data.metadata;
267
+ }
268
+ }
269
+
270
+ console.log(JSON.stringify({
271
+ sessionId: history.sessionId,
272
+ turn: turnNum,
273
+ query: input,
274
+ response: fullResponse,
275
+ sources,
276
+ metadata,
277
+ }));
278
+ } else {
279
+ // Interactive mode — stream output
280
+ let retrievalShown = false;
281
+
282
+ for await (const event of chatTurn({
283
+ query: input, db, collection, llm, history,
284
+ opts: { maxDocs, rerank: doRerank, stream: doStream, systemPrompt, textField, filter: opts.filter },
285
+ })) {
286
+ if (event.type === 'retrieval' && !opts.quiet) {
287
+ const { docs, timeMs } = event.data;
288
+ if (!retrievalShown) {
289
+ console.log(pc.dim(` [${docs.length} docs retrieved in ${timeMs}ms]`));
290
+ console.log('');
291
+ retrievalShown = true;
292
+ }
293
+ }
294
+
295
+ if (event.type === 'chunk') {
296
+ process.stdout.write(event.data);
297
+ }
298
+
299
+ if (event.type === 'done') {
300
+ console.log(''); // End the streamed response line
301
+
302
+ // Show sources
303
+ const { sources, metadata } = event.data;
304
+ if (sources.length > 0 && chatConf.showSources !== false) {
305
+ console.log('');
306
+ console.log(pc.dim('Sources:'));
307
+ for (let i = 0; i < sources.length; i++) {
308
+ const s = sources[i];
309
+ console.log(pc.dim(` [${i + 1}] ${s.source} (relevance: ${s.score?.toFixed(2) || 'N/A'})`));
310
+ }
311
+ }
312
+ console.log('');
313
+ }
314
+ }
315
+ }
316
+ } catch (err) {
317
+ console.error('');
318
+ console.error(ui.error(err.message));
319
+ console.error('');
320
+ }
321
+
322
+ rl.prompt();
323
+ });
324
+
325
+ rl.on('close', async () => {
326
+ await cleanup(historyMongo);
327
+ process.exit(0);
328
+ });
329
+
330
+ // Handle Ctrl+C gracefully
331
+ rl.on('SIGINT', async () => {
332
+ console.log('');
333
+ await cleanup(historyMongo);
334
+ process.exit(0);
335
+ });
336
+ }
337
+
338
+ /**
339
+ * Handle slash commands within the REPL.
340
+ * @returns {'quit'|true|false} - 'quit' to exit, true if handled, false if unknown
341
+ */
342
+ async function handleSlashCommand(input, ctx) {
343
+ const { history, opts, db, collection, llm, rl } = ctx;
344
+ const parts = input.split(/\s+/);
345
+ const cmd = parts[0].toLowerCase();
346
+
347
+ switch (cmd) {
348
+ case '/quit':
349
+ case '/exit':
350
+ case '/q':
351
+ return 'quit';
352
+
353
+ case '/help':
354
+ console.log('');
355
+ console.log(pc.bold('Commands:'));
356
+ console.log(' /sources Show sources from last response');
357
+ console.log(' /session Display current session ID');
358
+ console.log(' /history List recent chat sessions');
359
+ console.log(' /context Show retrieved context from last query');
360
+ console.log(' /clear Clear conversation history');
361
+ console.log(' /model Show or switch LLM model (/model <name>)');
362
+ console.log(' /export Export conversation (markdown or json)');
363
+ console.log(' /help Show this help');
364
+ console.log(' /quit Exit chat');
365
+ console.log('');
366
+ return true;
367
+
368
+ case '/sources': {
369
+ const sources = history.getLastSources();
370
+ if (!sources) {
371
+ console.log(pc.dim(' No sources available yet.'));
372
+ } else {
373
+ console.log('');
374
+ for (let i = 0; i < sources.length; i++) {
375
+ console.log(` [${i + 1}] ${sources[i].source} (${sources[i].score?.toFixed(2) || 'N/A'})`);
376
+ }
377
+ console.log('');
378
+ }
379
+ return true;
380
+ }
381
+
382
+ case '/session':
383
+ console.log(` Session: ${history.sessionId}`);
384
+ console.log(` Turns: ${Math.floor(history.turns.length / 2)}`);
385
+ return true;
386
+
387
+ case '/context': {
388
+ const ctx = history.getLastContext();
389
+ if (!ctx) {
390
+ console.log(pc.dim(' No context available yet.'));
391
+ } else {
392
+ console.log('');
393
+ for (const doc of ctx) {
394
+ console.log(pc.bold(` [${doc.source}]`));
395
+ const preview = (doc.text || '').substring(0, 300);
396
+ console.log(` ${preview}${doc.text?.length > 300 ? '...' : ''}`);
397
+ console.log('');
398
+ }
399
+ }
400
+ return true;
401
+ }
402
+
403
+ case '/history': {
404
+ if (!ctx.historyMongo) {
405
+ console.log(pc.dim(' History persistence is disabled (--no-history or no MongoDB).'));
406
+ return true;
407
+ }
408
+ try {
409
+ const { listSessions } = require('../lib/history');
410
+ const sessions = await listSessions(ctx.historyMongo.collection, 10);
411
+ if (sessions.length === 0) {
412
+ console.log(pc.dim(' No previous sessions found.'));
413
+ } else {
414
+ console.log('');
415
+ for (const s of sessions) {
416
+ const active = s.sessionId === history.sessionId ? pc.green(' ← current') : '';
417
+ const date = s.lastActivity ? new Date(s.lastActivity).toLocaleString() : 'unknown';
418
+ const preview = (s.firstMessage || '').substring(0, 60);
419
+ console.log(` ${pc.bold(s.sessionId.slice(0, 8))} ${pc.dim(date)} ${s.turnCount} turns${active}`);
420
+ if (preview) console.log(` ${pc.dim(preview)}${s.firstMessage?.length > 60 ? '...' : ''}`);
421
+ }
422
+ console.log('');
423
+ console.log(pc.dim(' Resume with: vai chat --session <id>'));
424
+ }
425
+ } catch (err) {
426
+ console.log(pc.dim(` Error listing sessions: ${err.message}`));
427
+ }
428
+ return true;
429
+ }
430
+
431
+ case '/clear':
432
+ history.clear();
433
+ console.log(pc.dim(' Conversation cleared.'));
434
+ return true;
435
+
436
+ case '/model':
437
+ case '/models': {
438
+ if (parts.length > 1) {
439
+ llm.model = parts[1];
440
+ console.log(` Model switched to: ${parts[1]}`);
441
+ } else {
442
+ console.log(` Current model: ${pc.bold(llm.model)}`);
443
+ console.log(` Provider: ${llm.name}`);
444
+ try {
445
+ const { listModels } = require('../lib/llm');
446
+ const models = await listModels(llm.name);
447
+ if (models.length > 0) {
448
+ console.log('');
449
+ console.log(` Available models:`);
450
+ for (const m of models) {
451
+ const current = m.id === llm.model ? pc.green(' ← current') : '';
452
+ let info = m.name || m.id;
453
+ if (m.size) info += pc.dim(` (${m.size})`);
454
+ if (m.parameterSize) info += pc.dim(` [${m.parameterSize}]`);
455
+ if (m.context) info += pc.dim(` ctx:${m.context}`);
456
+ console.log(` ${info}${current}`);
457
+ }
458
+ console.log('');
459
+ console.log(pc.dim(' Switch with: /model <name>'));
460
+ }
461
+ } catch { /* ignore */ }
462
+ }
463
+ return true;
464
+ }
465
+
466
+ case '/export': {
467
+ const format = parts[1] || 'md';
468
+ if (format === 'json') {
469
+ const data = history.exportJSON();
470
+ const filename = `chat-${history.sessionId.slice(0, 8)}.json`;
471
+ fs.writeFileSync(filename, JSON.stringify(data, null, 2) + '\n');
472
+ console.log(ui.success(`Exported to ${filename}`));
473
+ } else {
474
+ const md = history.exportMarkdown();
475
+ const filename = `chat-${history.sessionId.slice(0, 8)}.md`;
476
+ fs.writeFileSync(filename, md);
477
+ console.log(ui.success(`Exported to ${filename}`));
478
+ }
479
+ return true;
480
+ }
481
+
482
+ default:
483
+ console.log(pc.dim(` Unknown command: ${cmd}. Type /help for available commands.`));
484
+ return true;
485
+ }
486
+ }
487
+
488
+ async function cleanup(mongo) {
489
+ if (mongo?.client) {
490
+ try { await mongo.client.close(); } catch { /* ignore */ }
491
+ }
492
+ }
493
+
494
+ function getVersion() {
495
+ try {
496
+ const pkg = require('../../package.json');
497
+ return pkg.version;
498
+ } catch {
499
+ return '0.0.0';
500
+ }
501
+ }
502
+
503
+ module.exports = { registerChat };
@@ -337,6 +337,76 @@ function registerDemo(program) {
337
337
  }
338
338
  }
339
339
 
340
+ // ── Step 6: Chat (optional) ──
341
+ const { resolveLLMConfig } = require('../lib/llm');
342
+ const llmConfig = resolveLLMConfig();
343
+ const hasLLM = !!llmConfig.provider;
344
+ const hasCollection = !skipPipeline;
345
+
346
+ if (hasLLM && hasCollection) {
347
+ stepHeader(skipPipeline ? 5 : 6, 'RAG Chat');
348
+ console.log(' Now the grand finale — let\'s chat with the documents we just stored.');
349
+ console.log(' This combines everything: embed the question → vector search → rerank → LLM answers.');
350
+ console.log('');
351
+ console.log(` Provider: ${pc.cyan(llmConfig.provider)} (${llmConfig.model})`);
352
+ console.log('');
353
+
354
+ await waitForEnter(noPause);
355
+
356
+ // Run a single non-interactive chat turn
357
+ const db = 'test';
358
+ const collection = 'demo_voyage_test';
359
+ const chatQuery = 'What cloud database options are available for AI applications?';
360
+
361
+ console.log(` ${pc.bold('Question:')} ${chatQuery}`);
362
+ console.log('');
363
+
364
+ // Run chat non-interactively by piping the question via stdin
365
+ const chatResult = spawnSync(process.execPath, [
366
+ CLI_PATH, 'chat',
367
+ '--db', db, '--collection', collection,
368
+ '--no-history', '--quiet',
369
+ ], {
370
+ input: chatQuery + '\n/quit\n',
371
+ stdio: ['pipe', 'inherit', 'inherit'],
372
+ env: process.env,
373
+ timeout: 60000,
374
+ });
375
+
376
+ ok = chatResult.status === 0;
377
+
378
+ if (!ok) {
379
+ console.log('');
380
+ console.log(pc.dim(' Chat step had an issue — this requires an LLM provider and'));
381
+ console.log(pc.dim(' the demo collection to still exist with a vector index.'));
382
+ }
383
+
384
+ console.log('');
385
+ console.log(' That\'s the complete RAG pipeline: documents → embeddings → vector search');
386
+ console.log(' → reranking → LLM generation — all in one tool.');
387
+
388
+ await waitForEnter(noPause);
389
+
390
+ } else {
391
+ // Show what they're missing
392
+ const stepNum = skipPipeline ? 5 : 6;
393
+ stepHeader(stepNum, 'RAG Chat (skipped)');
394
+ if (!hasLLM) {
395
+ console.log(pc.dim(' Skipping chat demo — no LLM provider configured.'));
396
+ console.log(pc.dim(' Set one up to try it:'));
397
+ console.log('');
398
+ console.log(` ${pc.cyan('vai config set llm-provider anthropic')}`);
399
+ console.log(` ${pc.cyan('vai config set llm-api-key YOUR_KEY')}`);
400
+ console.log('');
401
+ console.log(pc.dim(' Or use Ollama for free local inference:'));
402
+ console.log(` ${pc.cyan('vai config set llm-provider ollama')}`);
403
+ } else {
404
+ console.log(pc.dim(' Skipping chat demo — pipeline step was skipped (no MongoDB URI).'));
405
+ console.log(pc.dim(' Set MONGODB_URI to try the full flow including chat.'));
406
+ }
407
+ console.log('');
408
+ }
409
+
340
410
  // ── Done ──
341
411
  console.log('');
342
412
  console.log('─'.repeat(60));
@@ -347,6 +417,11 @@ function registerDemo(program) {
347
417
  console.log(` • Explore models: ${pc.cyan('vai models')}`);
348
418
  console.log(` • Configure: ${pc.cyan('vai config set api-key <your-key>')}`);
349
419
  console.log(` • Full pipeline: ${pc.cyan('vai store → vai index create → vai search')}`);
420
+ if (!hasLLM) {
421
+ console.log(` • Enable chat: ${pc.cyan('vai config set llm-provider anthropic')}`);
422
+ } else {
423
+ console.log(` • Chat: ${pc.cyan('vai chat --db myapp --collection knowledge')}`);
424
+ }
350
425
  console.log('');
351
426
  console.log(' Happy searching! 🚀');
352
427
  console.log('');
@@ -21,12 +21,22 @@ function registerEmbed(program) {
21
21
  .option('--no-truncation', 'Disable truncation')
22
22
  .option('--output-dtype <type>', 'Output data type: float, int8, uint8, binary, ubinary', 'float')
23
23
  .option('-o, --output-format <format>', 'Output format: json or array', 'json')
24
+ .option('--estimate', 'Show estimated tokens and cost without calling the API')
24
25
  .option('--json', 'Machine-readable JSON output')
25
26
  .option('-q, --quiet', 'Suppress non-essential output')
26
27
  .action(async (text, opts) => {
27
28
  try {
28
29
  const texts = await resolveTextInput(text, opts.file);
29
30
 
31
+ // --estimate: show cost comparison, optionally switch model
32
+ if (opts.estimate) {
33
+ const { estimateTokensForTexts, confirmOrSwitchModel } = require('../lib/cost');
34
+ const tokens = estimateTokensForTexts(texts);
35
+ const chosenModel = await confirmOrSwitchModel(tokens, opts.model, { json: opts.json });
36
+ if (!chosenModel) return; // cancelled
37
+ opts.model = chosenModel;
38
+ }
39
+
30
40
  const useColor = !opts.json;
31
41
  const useSpinner = useColor && !opts.quiet;
32
42
 
@@ -22,7 +22,7 @@ function registerIndex(program) {
22
22
  .requiredOption('--field <name>', 'Embedding field name')
23
23
  .option('-d, --dimensions <n>', 'Vector dimensions', (v) => parseInt(v, 10), getDefaultDimensions())
24
24
  .option('-s, --similarity <type>', 'Similarity function: cosine, dotProduct, euclidean', 'cosine')
25
- .option('-n, --index-name <name>', 'Index name', 'default')
25
+ .option('-n, --index-name <name>', 'Index name', 'vector_index')
26
26
  .option('--json', 'Machine-readable JSON output')
27
27
  .option('-q, --quiet', 'Suppress non-essential output')
28
28
  .action(async (opts) => {