voyageai-cli 1.20.6 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +142 -26
  2. package/README.md +130 -2
  3. package/package.json +3 -2
  4. package/src/cli.js +10 -0
  5. package/src/commands/bug.js +249 -0
  6. package/src/commands/eval.js +420 -10
  7. package/src/commands/generate.js +220 -0
  8. package/src/commands/playground.js +93 -0
  9. package/src/commands/purge.js +271 -0
  10. package/src/commands/refresh.js +322 -0
  11. package/src/commands/scaffold.js +217 -0
  12. package/src/lib/codegen.js +339 -0
  13. package/src/lib/explanations.js +155 -0
  14. package/src/lib/scaffold-structure.js +114 -0
  15. package/src/lib/templates/nextjs/README.md.tpl +106 -0
  16. package/src/lib/templates/nextjs/env.example.tpl +8 -0
  17. package/src/lib/templates/nextjs/layout.jsx.tpl +29 -0
  18. package/src/lib/templates/nextjs/lib-mongo.js.tpl +111 -0
  19. package/src/lib/templates/nextjs/lib-voyage.js.tpl +103 -0
  20. package/src/lib/templates/nextjs/package.json.tpl +33 -0
  21. package/src/lib/templates/nextjs/page-search.jsx.tpl +147 -0
  22. package/src/lib/templates/nextjs/route-ingest.js.tpl +114 -0
  23. package/src/lib/templates/nextjs/route-search.js.tpl +97 -0
  24. package/src/lib/templates/nextjs/theme.js.tpl +84 -0
  25. package/src/lib/templates/python/README.md.tpl +145 -0
  26. package/src/lib/templates/python/app.py.tpl +221 -0
  27. package/src/lib/templates/python/chunker.py.tpl +127 -0
  28. package/src/lib/templates/python/env.example.tpl +12 -0
  29. package/src/lib/templates/python/mongo_client.py.tpl +125 -0
  30. package/src/lib/templates/python/requirements.txt.tpl +10 -0
  31. package/src/lib/templates/python/voyage_client.py.tpl +124 -0
  32. package/src/lib/templates/vanilla/README.md.tpl +156 -0
  33. package/src/lib/templates/vanilla/client.js.tpl +103 -0
  34. package/src/lib/templates/vanilla/connection.js.tpl +126 -0
  35. package/src/lib/templates/vanilla/env.example.tpl +11 -0
  36. package/src/lib/templates/vanilla/ingest.js.tpl +231 -0
  37. package/src/lib/templates/vanilla/package.json.tpl +31 -0
  38. package/src/lib/templates/vanilla/retrieval.js.tpl +100 -0
  39. package/src/lib/templates/vanilla/search-api.js.tpl +175 -0
  40. package/src/lib/templates/vanilla/server.js.tpl +81 -0
  41. package/src/lib/zip.js +130 -0
  42. package/src/playground/index.html +708 -3
@@ -114,6 +114,99 @@ function createPlaygroundServer() {
114
114
  return;
115
115
  }
116
116
 
117
+ // API: Generate code
118
+ if (req.method === 'POST' && req.url === '/api/generate') {
119
+ let body = '';
120
+ req.on('data', chunk => { body += chunk; });
121
+ req.on('end', () => {
122
+ try {
123
+ const { target, component, config } = JSON.parse(body);
124
+ const codegen = require('../lib/codegen');
125
+
126
+ const templateMap = {
127
+ vanilla: { client: 'client.js', connection: 'connection.js', retrieval: 'retrieval.js', ingest: 'ingest.js', 'search-api': 'search-api.js' },
128
+ nextjs: { client: 'lib-voyage.js', connection: 'lib-mongo.js', retrieval: 'route-search.js', ingest: 'route-ingest.js', 'search-page': 'page-search.jsx' },
129
+ python: { client: 'voyage_client.py', connection: 'mongo_client.py', retrieval: 'app.py', ingest: 'chunker.py' },
130
+ };
131
+
132
+ const templateName = (templateMap[target] || {})[component];
133
+ if (!templateName) {
134
+ res.writeHead(400, { 'Content-Type': 'application/json' });
135
+ res.end(JSON.stringify({ error: `Unknown component: ${component}` }));
136
+ return;
137
+ }
138
+
139
+ const context = codegen.buildContext(config || {}, { projectName: 'my-app' });
140
+ const code = codegen.renderTemplate(target, templateName.replace(/\.(js|jsx|py)$/, ''), context);
141
+
142
+ res.writeHead(200, { 'Content-Type': 'application/json' });
143
+ res.end(JSON.stringify({ code, filename: templateName }));
144
+ } catch (err) {
145
+ res.writeHead(500, { 'Content-Type': 'application/json' });
146
+ res.end(JSON.stringify({ error: err.message }));
147
+ }
148
+ });
149
+ return;
150
+ }
151
+
152
+ // API: Scaffold project (returns ZIP for web mode)
153
+ if (req.method === 'POST' && req.url === '/api/scaffold') {
154
+ let body = '';
155
+ req.on('data', chunk => { body += chunk; });
156
+ req.on('end', () => {
157
+ try {
158
+ const { projectName, target, config } = JSON.parse(body);
159
+ const codegen = require('../lib/codegen');
160
+ const { PROJECT_STRUCTURE } = require('../lib/scaffold-structure');
161
+ const { createZip } = require('../lib/zip');
162
+
163
+ const structure = PROJECT_STRUCTURE[target];
164
+ if (!structure) {
165
+ res.writeHead(400, { 'Content-Type': 'application/json' });
166
+ res.end(JSON.stringify({ error: `Unknown target: ${target}` }));
167
+ return;
168
+ }
169
+
170
+ const context = codegen.buildContext(config || {}, { projectName: projectName || 'my-app' });
171
+ const files = [];
172
+
173
+ // Render template files
174
+ for (const file of structure.files) {
175
+ const content = codegen.renderTemplate(target, file.template.replace(/\.(js|jsx|py|json|md|txt)$/, ''), context);
176
+ files.push({
177
+ name: `${projectName}/${file.output}`,
178
+ content,
179
+ });
180
+ }
181
+
182
+ // Add extra static files
183
+ if (structure.extraFiles) {
184
+ for (const file of structure.extraFiles) {
185
+ const content = typeof file.content === 'function' ? file.content(context) : file.content;
186
+ files.push({
187
+ name: `${projectName}/${file.output}`,
188
+ content,
189
+ });
190
+ }
191
+ }
192
+
193
+ // Create ZIP
194
+ const zipBuffer = createZip(files);
195
+
196
+ res.writeHead(200, {
197
+ 'Content-Type': 'application/zip',
198
+ 'Content-Disposition': `attachment; filename="${projectName}.zip"`,
199
+ 'Content-Length': zipBuffer.length,
200
+ });
201
+ res.end(zipBuffer);
202
+ } catch (err) {
203
+ res.writeHead(500, { 'Content-Type': 'application/json' });
204
+ res.end(JSON.stringify({ error: err.message }));
205
+ }
206
+ });
207
+ return;
208
+ }
209
+
117
210
  // API: Concepts (from vai explain)
118
211
  if (req.method === 'GET' && req.url === '/api/concepts') {
119
212
  const { concepts } = require('../lib/explanations');
@@ -0,0 +1,271 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const p = require('@clack/prompts');
6
+ const { loadProject } = require('../lib/project');
7
+ const { connect, close } = require('../lib/mongo');
8
+ const ui = require('../lib/ui');
9
+
10
+ /**
11
+ * Build a MongoDB filter from the provided criteria.
12
+ */
13
+ function buildFilter(options) {
14
+ const conditions = [];
15
+
16
+ // Filter by source pattern (glob-like)
17
+ if (options.source) {
18
+ // Convert glob pattern to regex
19
+ const pattern = options.source
20
+ .replace(/\./g, '\\.')
21
+ .replace(/\*/g, '.*')
22
+ .replace(/\?/g, '.');
23
+ conditions.push({ 'metadata.source': { $regex: pattern } });
24
+ }
25
+
26
+ // Filter by embedded date
27
+ if (options.before) {
28
+ const date = new Date(options.before);
29
+ if (isNaN(date.getTime())) {
30
+ throw new Error(`Invalid date format: ${options.before}`);
31
+ }
32
+ conditions.push({ _embeddedAt: { $lt: date } });
33
+ }
34
+
35
+ // Filter by model
36
+ if (options.model) {
37
+ conditions.push({ _model: options.model });
38
+ }
39
+
40
+ // Raw MongoDB filter
41
+ if (options.filter) {
42
+ try {
43
+ const rawFilter = JSON.parse(options.filter);
44
+ conditions.push(rawFilter);
45
+ } catch (err) {
46
+ throw new Error(`Invalid JSON filter: ${err.message}`);
47
+ }
48
+ }
49
+
50
+ // Combine conditions with $and
51
+ if (conditions.length === 0) {
52
+ return {};
53
+ } else if (conditions.length === 1) {
54
+ return conditions[0];
55
+ } else {
56
+ return { $and: conditions };
57
+ }
58
+ }
59
+
60
+ /**
61
+ * Check which documents have stale source files (file no longer exists on disk).
62
+ */
63
+ async function findStaleDocuments(collection, baseDir) {
64
+ const docs = await collection.find({ 'metadata.source': { $exists: true } }).toArray();
65
+ const staleIds = [];
66
+
67
+ for (const doc of docs) {
68
+ const source = doc.metadata?.source;
69
+ if (source) {
70
+ // Resolve relative to baseDir or treat as absolute
71
+ const filePath = path.isAbsolute(source) ? source : path.join(baseDir, source);
72
+ if (!fs.existsSync(filePath)) {
73
+ staleIds.push(doc._id);
74
+ }
75
+ }
76
+ }
77
+
78
+ return staleIds;
79
+ }
80
+
81
+ /**
82
+ * Format a sample of documents for display.
83
+ */
84
+ function formatSample(docs, limit = 5) {
85
+ const sample = docs.slice(0, limit);
86
+ return sample.map(doc => {
87
+ const source = doc.metadata?.source || doc._id?.toString() || 'unknown';
88
+ const model = doc._model || 'unknown';
89
+ const date = doc._embeddedAt ? new Date(doc._embeddedAt).toISOString().split('T')[0] : 'unknown';
90
+ return ` • ${source} (model: ${model}, date: ${date})`;
91
+ }).join('\n');
92
+ }
93
+
94
+ /**
95
+ * Execute the purge command.
96
+ */
97
+ async function purge(options = {}) {
98
+ const quiet = options.quiet || options.json;
99
+
100
+ // Load project config
101
+ const project = loadProject();
102
+ const db = options.db || project.db || process.env.VAI_DB || 'vai';
103
+ const collectionName = options.collection || project.collection || process.env.VAI_COLLECTION || 'embeddings';
104
+
105
+ if (!quiet) {
106
+ p.intro(ui.title('vai purge'));
107
+ }
108
+
109
+ // Validate that at least one filter is provided
110
+ if (!options.source && !options.before && !options.model && !options.filter && !options.stale) {
111
+ if (options.json) {
112
+ console.log(JSON.stringify({ error: 'No filter criteria provided. Use --source, --before, --model, --filter, or --stale.' }));
113
+ } else {
114
+ p.log.error('No filter criteria provided.');
115
+ p.log.info('Use --source, --before, --model, --filter, or --stale to specify what to purge.');
116
+ }
117
+ return { success: false, error: 'No filter criteria' };
118
+ }
119
+
120
+ let client;
121
+ try {
122
+ // Connect to MongoDB
123
+ if (!quiet) {
124
+ p.log.step(`Connecting to database: ${db}`);
125
+ }
126
+ client = await connect(db);
127
+ const collection = client.db(db).collection(collectionName);
128
+
129
+ let filter = {};
130
+ let staleIds = [];
131
+
132
+ if (options.stale) {
133
+ // Find documents with stale source files
134
+ if (!quiet) {
135
+ p.log.step('Scanning for stale documents (source files that no longer exist)...');
136
+ }
137
+ const baseDir = project.root || process.cwd();
138
+ staleIds = await findStaleDocuments(collection, baseDir);
139
+
140
+ if (staleIds.length === 0) {
141
+ if (options.json) {
142
+ console.log(JSON.stringify({ success: true, count: 0, message: 'No stale documents found' }));
143
+ } else {
144
+ p.log.success('No stale documents found.');
145
+ p.outro('Nothing to purge.');
146
+ }
147
+ return { success: true, count: 0 };
148
+ }
149
+
150
+ filter = { _id: { $in: staleIds } };
151
+ } else {
152
+ // Build filter from criteria
153
+ filter = buildFilter(options);
154
+ }
155
+
156
+ // Count matching documents
157
+ const count = options.stale ? staleIds.length : await collection.countDocuments(filter);
158
+
159
+ if (count === 0) {
160
+ if (options.json) {
161
+ console.log(JSON.stringify({ success: true, count: 0, message: 'No matching documents found' }));
162
+ } else {
163
+ p.log.success('No matching documents found.');
164
+ p.outro('Nothing to purge.');
165
+ }
166
+ return { success: true, count: 0 };
167
+ }
168
+
169
+ // Get sample for display
170
+ const sampleDocs = await collection.find(filter).limit(5).toArray();
171
+
172
+ if (options.json) {
173
+ if (options.dryRun) {
174
+ console.log(JSON.stringify({
175
+ dryRun: true,
176
+ count,
177
+ sample: sampleDocs.map(d => ({
178
+ id: d._id?.toString(),
179
+ source: d.metadata?.source,
180
+ model: d._model,
181
+ embeddedAt: d._embeddedAt,
182
+ })),
183
+ }));
184
+ return { success: true, dryRun: true, count };
185
+ }
186
+ } else {
187
+ // Show what will be deleted
188
+ p.log.warn(`Found ${count} document${count === 1 ? '' : 's'} matching criteria:`);
189
+ console.log(formatSample(sampleDocs));
190
+ if (count > 5) {
191
+ console.log(` ... and ${count - 5} more`);
192
+ }
193
+ console.log();
194
+ }
195
+
196
+ // Dry run - stop here
197
+ if (options.dryRun) {
198
+ if (!quiet) {
199
+ p.log.info('Dry run - no documents deleted.');
200
+ p.outro(`Would delete ${count} document${count === 1 ? '' : 's'}.`);
201
+ }
202
+ return { success: true, dryRun: true, count };
203
+ }
204
+
205
+ // Confirm unless --force
206
+ if (!options.force && !options.json) {
207
+ const confirmed = await p.confirm({
208
+ message: `Delete ${count} document${count === 1 ? '' : 's'}? This cannot be undone.`,
209
+ initialValue: false,
210
+ });
211
+
212
+ if (p.isCancel(confirmed) || !confirmed) {
213
+ p.log.info('Purge cancelled.');
214
+ p.outro('No documents deleted.');
215
+ return { success: false, cancelled: true };
216
+ }
217
+ }
218
+
219
+ // Delete documents
220
+ if (!quiet) {
221
+ p.log.step('Deleting documents...');
222
+ }
223
+
224
+ const result = await collection.deleteMany(filter);
225
+ const deleted = result.deletedCount;
226
+
227
+ if (options.json) {
228
+ console.log(JSON.stringify({ success: true, deleted }));
229
+ } else {
230
+ p.log.success(`Deleted ${deleted} document${deleted === 1 ? '' : 's'}.`);
231
+ p.outro('Purge complete.');
232
+ }
233
+
234
+ return { success: true, deleted };
235
+
236
+ } catch (err) {
237
+ if (options.json) {
238
+ console.log(JSON.stringify({ error: err.message }));
239
+ } else {
240
+ p.log.error(`Purge failed: ${err.message}`);
241
+ }
242
+ return { success: false, error: err.message };
243
+ } finally {
244
+ if (client) {
245
+ await close();
246
+ }
247
+ }
248
+ }
249
+
250
+ /**
251
+ * Register the purge command with Commander.
252
+ */
253
+ function register(program) {
254
+ program
255
+ .command('purge')
256
+ .description('Remove embeddings from MongoDB based on criteria')
257
+ .option('--db <database>', 'Database name')
258
+ .option('--collection <name>', 'Collection name')
259
+ .option('--source <glob>', 'Filter by metadata.source pattern')
260
+ .option('--before <date>', 'Filter by _embeddedAt before date (ISO 8601)')
261
+ .option('-m, --model <model>', 'Filter by _model field')
262
+ .option('--filter <json>', 'Raw MongoDB filter (JSON)')
263
+ .option('--stale', 'Remove docs whose source files no longer exist')
264
+ .option('--force', 'Skip confirmation prompt')
265
+ .option('--dry-run', 'Show what would be deleted without acting')
266
+ .option('--json', 'Machine-readable output')
267
+ .option('-q, --quiet', 'Suppress non-essential output')
268
+ .action(purge);
269
+ }
270
+
271
+ module.exports = { register, purge, buildFilter };
@@ -0,0 +1,322 @@
1
+ 'use strict';
2
+
3
+ const p = require('@clack/prompts');
4
+ const { loadProject, saveProject } = require('../lib/project');
5
+ const { connect, close } = require('../lib/mongo');
6
+ const { generateEmbeddings } = require('../lib/api');
7
+ const { chunkText } = require('../lib/chunker');
8
+ const ui = require('../lib/ui');
9
+
10
+ /**
11
+ * Process documents in batches.
12
+ */
13
+ async function processBatch(docs, embedder, options) {
14
+ const texts = docs.map(d => d.text);
15
+ const embeddings = await embedder(texts);
16
+
17
+ return docs.map((doc, i) => ({
18
+ ...doc,
19
+ [options.field]: embeddings[i],
20
+ _model: options.model,
21
+ _embeddedAt: new Date(),
22
+ }));
23
+ }
24
+
25
+ /**
26
+ * Re-chunk a document's text.
27
+ */
28
+ function rechunkDocument(doc, options) {
29
+ const text = doc.text || doc.content || '';
30
+ if (!text) return [doc];
31
+
32
+ const chunks = chunkText(text, {
33
+ strategy: options.strategy || 'recursive',
34
+ chunkSize: options.chunkSize || 512,
35
+ overlap: options.overlap || 50,
36
+ });
37
+
38
+ return chunks.map((chunk, i) => ({
39
+ ...doc,
40
+ text: chunk.text,
41
+ _chunkIndex: i,
42
+ _chunkCount: chunks.length,
43
+ metadata: {
44
+ ...doc.metadata,
45
+ chunkIndex: i,
46
+ chunkCount: chunks.length,
47
+ originalId: doc._id?.toString(),
48
+ },
49
+ }));
50
+ }
51
+
52
+ /**
53
+ * Execute the refresh command.
54
+ */
55
+ async function refresh(options = {}) {
56
+ const quiet = options.quiet || options.json;
57
+
58
+ // Load project config
59
+ const project = loadProject();
60
+ const db = options.db || project.db || process.env.VAI_DB || 'vai';
61
+ const collectionName = options.collection || project.collection || process.env.VAI_COLLECTION || 'embeddings';
62
+ const field = options.field || project.field || 'embedding';
63
+ const model = options.model || project.model || 'voyage-3.5-lite';
64
+ const dimensions = options.dimensions || project.dimensions;
65
+ const batchSize = options.batchSize || 25;
66
+
67
+ if (!quiet) {
68
+ p.intro(ui.title('vai refresh'));
69
+ }
70
+
71
+ let client;
72
+ try {
73
+ // Connect to MongoDB
74
+ if (!quiet) {
75
+ p.log.step(`Connecting to database: ${db}`);
76
+ }
77
+ client = await connect(db);
78
+ const collection = client.db(db).collection(collectionName);
79
+
80
+ // Build filter
81
+ let filter = {};
82
+ if (options.filter) {
83
+ try {
84
+ filter = JSON.parse(options.filter);
85
+ } catch (err) {
86
+ throw new Error(`Invalid JSON filter: ${err.message}`);
87
+ }
88
+ }
89
+
90
+ // Count documents
91
+ const totalCount = await collection.countDocuments(filter);
92
+
93
+ if (totalCount === 0) {
94
+ if (options.json) {
95
+ console.log(JSON.stringify({ success: true, count: 0, message: 'No documents to refresh' }));
96
+ } else {
97
+ p.log.success('No documents to refresh.');
98
+ p.outro('Nothing to do.');
99
+ }
100
+ return { success: true, count: 0 };
101
+ }
102
+
103
+ // Show plan
104
+ const rechunkLabel = options.rechunk ? ` (re-chunking with ${options.strategy || 'recursive'})` : '';
105
+ const dimLabel = dimensions ? ` @ ${dimensions}d` : '';
106
+
107
+ if (options.json && options.dryRun) {
108
+ console.log(JSON.stringify({
109
+ dryRun: true,
110
+ count: totalCount,
111
+ model,
112
+ dimensions: dimensions || 'default',
113
+ rechunk: !!options.rechunk,
114
+ }));
115
+ return { success: true, dryRun: true, count: totalCount };
116
+ }
117
+
118
+ if (!quiet) {
119
+ p.log.info(`Found ${totalCount} document${totalCount === 1 ? '' : 's'} to refresh`);
120
+ p.log.info(`Target model: ${model}${dimLabel}${rechunkLabel}`);
121
+ }
122
+
123
+ // Dry run - stop here
124
+ if (options.dryRun) {
125
+ if (!quiet) {
126
+ p.log.info('Dry run - no documents modified.');
127
+ p.outro(`Would refresh ${totalCount} document${totalCount === 1 ? '' : 's'}.`);
128
+ }
129
+ return { success: true, dryRun: true, count: totalCount };
130
+ }
131
+
132
+ // Confirm unless --force
133
+ if (!options.force && !options.json) {
134
+ const confirmed = await p.confirm({
135
+ message: `Re-embed ${totalCount} document${totalCount === 1 ? '' : 's'}? This will update the embeddings in-place.`,
136
+ initialValue: true,
137
+ });
138
+
139
+ if (p.isCancel(confirmed) || !confirmed) {
140
+ p.log.info('Refresh cancelled.');
141
+ p.outro('No documents modified.');
142
+ return { success: false, cancelled: true };
143
+ }
144
+ }
145
+
146
+ // Create embedder function
147
+ const embedder = async (texts) => {
148
+ const result = await generateEmbeddings(texts, {
149
+ model,
150
+ dimensions,
151
+ inputType: 'document',
152
+ });
153
+ return result.embeddings;
154
+ };
155
+
156
+ // Process documents
157
+ let processed = 0;
158
+ let errors = 0;
159
+ const cursor = collection.find(filter);
160
+ let batch = [];
161
+
162
+ const spinner = !quiet ? p.spinner() : null;
163
+ if (spinner) spinner.start('Processing documents...');
164
+
165
+ while (await cursor.hasNext()) {
166
+ const doc = await cursor.next();
167
+
168
+ if (options.rechunk) {
169
+ // Re-chunk the document
170
+ const chunks = rechunkDocument(doc, options);
171
+ batch.push(...chunks);
172
+ } else {
173
+ batch.push(doc);
174
+ }
175
+
176
+ // Process when batch is full
177
+ if (batch.length >= batchSize) {
178
+ try {
179
+ const updated = await processBatch(batch, embedder, { field, model });
180
+
181
+ // Replace documents in database
182
+ for (const updatedDoc of updated) {
183
+ if (options.rechunk && updatedDoc.metadata?.originalId) {
184
+ // For rechunked docs, insert new and delete original later
185
+ await collection.insertOne(updatedDoc);
186
+ } else {
187
+ // Update in place
188
+ await collection.updateOne(
189
+ { _id: updatedDoc._id },
190
+ { $set: { [field]: updatedDoc[field], _model: model, _embeddedAt: new Date() } }
191
+ );
192
+ }
193
+ }
194
+
195
+ processed += batch.length;
196
+ if (spinner) spinner.message(`Processed ${processed}/${totalCount} documents...`);
197
+ } catch (err) {
198
+ errors += batch.length;
199
+ if (!quiet) {
200
+ p.log.warn(`Batch error: ${err.message}`);
201
+ }
202
+ }
203
+ batch = [];
204
+ }
205
+ }
206
+
207
+ // Process remaining batch
208
+ if (batch.length > 0) {
209
+ try {
210
+ const updated = await processBatch(batch, embedder, { field, model });
211
+
212
+ for (const updatedDoc of updated) {
213
+ if (options.rechunk && updatedDoc.metadata?.originalId) {
214
+ await collection.insertOne(updatedDoc);
215
+ } else {
216
+ await collection.updateOne(
217
+ { _id: updatedDoc._id },
218
+ { $set: { [field]: updatedDoc[field], _model: model, _embeddedAt: new Date() } }
219
+ );
220
+ }
221
+ }
222
+
223
+ processed += batch.length;
224
+ } catch (err) {
225
+ errors += batch.length;
226
+ if (!quiet) {
227
+ p.log.warn(`Batch error: ${err.message}`);
228
+ }
229
+ }
230
+ }
231
+
232
+ // If rechunking, delete original documents
233
+ if (options.rechunk) {
234
+ const originalIds = await collection.distinct('metadata.originalId', filter);
235
+ if (originalIds.length > 0) {
236
+ // Convert string IDs back to ObjectIds for deletion
237
+ const { ObjectId } = require('mongodb');
238
+ const objectIds = originalIds
239
+ .filter(id => id)
240
+ .map(id => {
241
+ try { return new ObjectId(id); } catch { return null; }
242
+ })
243
+ .filter(id => id);
244
+
245
+ if (objectIds.length > 0) {
246
+ await collection.deleteMany({ _id: { $in: objectIds } });
247
+ }
248
+ }
249
+ }
250
+
251
+ if (spinner) spinner.stop('Processing complete.');
252
+
253
+ // Update project config if model/dimensions changed
254
+ const configUpdated = (model !== project.model) || (dimensions && dimensions !== project.dimensions);
255
+ if (configUpdated && !options.json) {
256
+ try {
257
+ saveProject({
258
+ ...project,
259
+ model,
260
+ ...(dimensions && { dimensions }),
261
+ });
262
+ if (!quiet) {
263
+ p.log.info('Updated .vai.json with new model/dimensions.');
264
+ }
265
+ } catch {
266
+ // Ignore save errors
267
+ }
268
+ }
269
+
270
+ if (options.json) {
271
+ console.log(JSON.stringify({ success: true, processed, errors }));
272
+ } else {
273
+ if (errors > 0) {
274
+ p.log.warn(`Refreshed ${processed} documents with ${errors} errors.`);
275
+ } else {
276
+ p.log.success(`Refreshed ${processed} document${processed === 1 ? '' : 's'}.`);
277
+ }
278
+ p.outro('Refresh complete.');
279
+ }
280
+
281
+ return { success: true, processed, errors };
282
+
283
+ } catch (err) {
284
+ if (options.json) {
285
+ console.log(JSON.stringify({ error: err.message }));
286
+ } else {
287
+ p.log.error(`Refresh failed: ${err.message}`);
288
+ }
289
+ return { success: false, error: err.message };
290
+ } finally {
291
+ if (client) {
292
+ await close();
293
+ }
294
+ }
295
+ }
296
+
297
+ /**
298
+ * Register the refresh command with Commander.
299
+ */
300
+ function register(program) {
301
+ program
302
+ .command('refresh')
303
+ .description('Re-embed documents with a new model, dimensions, or chunk settings')
304
+ .option('--db <database>', 'Database name')
305
+ .option('--collection <name>', 'Collection name')
306
+ .option('--field <name>', 'Embedding field name')
307
+ .option('-m, --model <model>', 'New embedding model')
308
+ .option('-d, --dimensions <n>', 'New dimensions', parseInt)
309
+ .option('--rechunk', 'Re-chunk text before re-embedding')
310
+ .option('-s, --strategy <strategy>', 'Chunk strategy (with --rechunk)')
311
+ .option('-c, --chunk-size <n>', 'Chunk size (with --rechunk)', parseInt)
312
+ .option('--overlap <n>', 'Chunk overlap (with --rechunk)', parseInt)
313
+ .option('--batch-size <n>', 'Texts per API call (default: 25)', parseInt)
314
+ .option('--filter <json>', 'Only refresh matching documents (JSON)')
315
+ .option('--force', 'Skip confirmation prompt')
316
+ .option('--dry-run', 'Show plan without executing')
317
+ .option('--json', 'Machine-readable output')
318
+ .option('-q, --quiet', 'Suppress non-essential output')
319
+ .action(refresh);
320
+ }
321
+
322
+ module.exports = { register, refresh };