voyageai-cli 1.13.0 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.13.0",
3
+ "version": "1.16.0",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "bin": {
6
6
  "vai": "./src/cli.js"
package/src/cli.js CHANGED
@@ -20,6 +20,9 @@ const { registerIngest } = require('./commands/ingest');
20
20
  const { registerCompletions } = require('./commands/completions');
21
21
  const { registerPlayground } = require('./commands/playground');
22
22
  const { registerBenchmark } = require('./commands/benchmark');
23
+ const { registerEstimate } = require('./commands/estimate');
24
+ const { registerInit } = require('./commands/init');
25
+ const { registerChunk } = require('./commands/chunk');
23
26
  const { registerAbout } = require('./commands/about');
24
27
  const { showBanner, showQuickStart, getVersion } = require('./lib/banner');
25
28
 
@@ -45,6 +48,9 @@ registerIngest(program);
45
48
  registerCompletions(program);
46
49
  registerPlayground(program);
47
50
  registerBenchmark(program);
51
+ registerEstimate(program);
52
+ registerInit(program);
53
+ registerChunk(program);
48
54
  registerAbout(program);
49
55
 
50
56
  // Append disclaimer to all help output
@@ -1212,6 +1212,170 @@ function registerBenchmark(program) {
1212
1212
  .option('--json', 'Machine-readable JSON output')
1213
1213
  .option('-q, --quiet', 'Suppress non-essential output')
1214
1214
  .action(benchmarkAsymmetric);
1215
+
1216
+ // ── benchmark space ──
1217
+ bench
1218
+ .command('space')
1219
+ .description('Validate shared embedding space — embed same text with all Voyage 4 models')
1220
+ .option('--text <text>', 'Text to embed across models')
1221
+ .option('--texts <texts>', 'Comma-separated texts to compare')
1222
+ .option('--models <models>', 'Comma-separated models', 'voyage-4-large,voyage-4,voyage-4-lite')
1223
+ .option('-d, --dimensions <n>', 'Output dimensions (must be supported by all models)')
1224
+ .option('--json', 'Machine-readable JSON output')
1225
+ .option('-q, --quiet', 'Suppress non-essential output')
1226
+ .action(benchmarkSpace);
1227
+ }
1228
+
1229
+ /**
1230
+ * benchmark space — Validate shared embedding space across Voyage 4 models.
1231
+ * Embeds the same text(s) with multiple models, then computes pairwise cosine
1232
+ * similarities to prove they produce compatible embeddings.
1233
+ */
1234
+ async function benchmarkSpace(opts) {
1235
+ const models = opts.models
1236
+ ? parseModels(opts.models)
1237
+ : ['voyage-4-large', 'voyage-4', 'voyage-4-lite'];
1238
+
1239
+ const texts = opts.texts
1240
+ ? opts.texts.split(',').map(t => t.trim())
1241
+ : opts.text
1242
+ ? [opts.text]
1243
+ : [
1244
+ 'MongoDB Atlas provides a fully managed cloud database with vector search.',
1245
+ 'Machine learning models transform raw data into semantic embeddings.',
1246
+ 'The quick brown fox jumps over the lazy dog.',
1247
+ ];
1248
+
1249
+ const dimensions = opts.dimensions ? parseInt(opts.dimensions, 10) : undefined;
1250
+
1251
+ if (!opts.json && !opts.quiet) {
1252
+ console.log('');
1253
+ console.log(ui.bold(' 🔮 Shared Embedding Space Validation'));
1254
+ console.log(ui.dim(` Models: ${models.join(', ')}`));
1255
+ console.log(ui.dim(` Texts: ${texts.length}${dimensions ? `, dimensions: ${dimensions}` : ''}`));
1256
+ console.log('');
1257
+ }
1258
+
1259
+ // Embed all texts with all models
1260
+ const embeddings = {}; // { model: [[embedding for text 0], [embedding for text 1], ...] }
1261
+
1262
+ for (const model of models) {
1263
+ const spin = (!opts.json && !opts.quiet) ? ui.spinner(` Embedding with ${model}...`) : null;
1264
+ if (spin) spin.start();
1265
+
1266
+ try {
1267
+ const embedOpts = { model, inputType: 'document' };
1268
+ if (dimensions) embedOpts.dimensions = dimensions;
1269
+ const result = await generateEmbeddings(texts, embedOpts);
1270
+ embeddings[model] = result.data.map(d => d.embedding);
1271
+ if (spin) spin.stop();
1272
+ } catch (err) {
1273
+ if (spin) spin.stop();
1274
+ console.error(ui.warn(` ${model}: ${err.message} — skipping`));
1275
+ }
1276
+ }
1277
+
1278
+ const validModels = Object.keys(embeddings);
1279
+ if (validModels.length < 2) {
1280
+ console.error(ui.error('Need at least 2 models to compare embedding spaces.'));
1281
+ process.exit(1);
1282
+ }
1283
+
1284
+ // Compute pairwise cross-model similarities for each text
1285
+ const results = [];
1286
+
1287
+ for (let t = 0; t < texts.length; t++) {
1288
+ const textResult = {
1289
+ text: texts[t],
1290
+ pairs: [],
1291
+ };
1292
+
1293
+ for (let i = 0; i < validModels.length; i++) {
1294
+ for (let j = i + 1; j < validModels.length; j++) {
1295
+ const modelA = validModels[i];
1296
+ const modelB = validModels[j];
1297
+ const sim = cosineSimilarity(embeddings[modelA][t], embeddings[modelB][t]);
1298
+ textResult.pairs.push({
1299
+ modelA,
1300
+ modelB,
1301
+ similarity: sim,
1302
+ });
1303
+ }
1304
+ }
1305
+
1306
+ results.push(textResult);
1307
+ }
1308
+
1309
+ // Also compute within-model similarity across different texts (baseline)
1310
+ const withinModelSims = [];
1311
+ if (texts.length >= 2) {
1312
+ for (const model of validModels) {
1313
+ const sim = cosineSimilarity(embeddings[model][0], embeddings[model][1]);
1314
+ withinModelSims.push({ model, text0: texts[0], text1: texts[1], similarity: sim });
1315
+ }
1316
+ }
1317
+
1318
+ if (opts.json) {
1319
+ console.log(JSON.stringify({ benchmark: 'space', models: validModels, texts, results, withinModelSims }, null, 2));
1320
+ return;
1321
+ }
1322
+
1323
+ // Display results
1324
+ console.log(ui.bold(' Cross-Model Similarity (same text, different models):'));
1325
+ console.log(ui.dim(' High similarity (>0.95) = shared embedding space confirmed'));
1326
+ console.log('');
1327
+
1328
+ let allHigh = true;
1329
+ for (const r of results) {
1330
+ const preview = r.text.substring(0, 55) + (r.text.length > 55 ? '...' : '');
1331
+ console.log(` ${ui.dim('Text:')} "${preview}"`);
1332
+
1333
+ for (const p of r.pairs) {
1334
+ const simStr = p.similarity.toFixed(4);
1335
+ const quality = p.similarity >= 0.98 ? ui.green('●')
1336
+ : p.similarity >= 0.95 ? ui.cyan('●')
1337
+ : p.similarity >= 0.90 ? ui.yellow('●')
1338
+ : ui.red('●');
1339
+ if (p.similarity < 0.95) allHigh = false;
1340
+ console.log(` ${quality} ${rpad(p.modelA, 18)} ↔ ${rpad(p.modelB, 18)} ${ui.bold(simStr)}`);
1341
+ }
1342
+ console.log('');
1343
+ }
1344
+
1345
+ // Show within-model cross-text similarity for context
1346
+ if (withinModelSims.length > 0) {
1347
+ console.log(ui.bold(' Within-Model Similarity (different texts, same model):'));
1348
+ console.log(ui.dim(' Shows that cross-model same-text similarity is much higher'));
1349
+ console.log('');
1350
+
1351
+ for (const w of withinModelSims) {
1352
+ console.log(` ${ui.dim(rpad(w.model, 18))} text₀ ↔ text₁ ${ui.dim(w.similarity.toFixed(4))}`);
1353
+ }
1354
+ console.log('');
1355
+ }
1356
+
1357
+ // Summary
1358
+ const avgCrossModel = results.flatMap(r => r.pairs).reduce((sum, p) => sum + p.similarity, 0)
1359
+ / results.flatMap(r => r.pairs).length;
1360
+ const avgWithin = withinModelSims.length > 0
1361
+ ? withinModelSims.reduce((sum, w) => sum + w.similarity, 0) / withinModelSims.length
1362
+ : null;
1363
+
1364
+ if (allHigh) {
1365
+ console.log(ui.success(`Shared embedding space confirmed! Avg cross-model similarity: ${avgCrossModel.toFixed(4)}`));
1366
+ } else {
1367
+ console.log(ui.warn(`Cross-model similarity lower than expected. Avg: ${avgCrossModel.toFixed(4)}`));
1368
+ }
1369
+
1370
+ if (avgWithin !== null) {
1371
+ const ratio = (avgCrossModel / avgWithin).toFixed(1);
1372
+ console.log(ui.dim(` Cross-model same-text similarity is ${ratio}× higher than same-model different-text similarity.`));
1373
+ }
1374
+
1375
+ console.log('');
1376
+ console.log(ui.dim(' This means you can embed docs with voyage-4-large and query with voyage-4-lite'));
1377
+ console.log(ui.dim(' — the embeddings live in the same space. See "vai explain shared-space".'));
1378
+ console.log('');
1215
1379
  }
1216
1380
 
1217
1381
  module.exports = { registerBenchmark };
@@ -0,0 +1,277 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const { chunk, estimateTokens, STRATEGIES, DEFAULTS } = require('../lib/chunker');
6
+ const { readFile, scanDirectory, isSupported, getReaderType } = require('../lib/readers');
7
+ const { loadProject, mergeOptions } = require('../lib/project');
8
+ const ui = require('../lib/ui');
9
+
10
+ /**
11
+ * Format a number with commas.
12
+ */
13
+ function fmtNum(n) {
14
+ return n.toLocaleString('en-US');
15
+ }
16
+
17
+ /**
18
+ * Build chunk metadata for a source file.
19
+ * @param {string} filePath - Source file path
20
+ * @param {string} basePath - Base directory for relative paths
21
+ * @param {number} index - Chunk index within the file
22
+ * @param {number} total - Total chunks from this file
23
+ * @returns {object}
24
+ */
25
+ function buildMetadata(filePath, basePath, index, total) {
26
+ return {
27
+ source: path.relative(basePath, filePath),
28
+ chunk_index: index,
29
+ total_chunks: total,
30
+ };
31
+ }
32
+
33
+ /**
34
+ * Register the chunk command on a Commander program.
35
+ * @param {import('commander').Command} program
36
+ */
37
+ function registerChunk(program) {
38
+ program
39
+ .command('chunk [input]')
40
+ .description('Chunk documents for embedding — files, directories, or stdin')
41
+ .option('-s, --strategy <strategy>', `Chunking strategy: ${STRATEGIES.join(', ')}`)
42
+ .option('-c, --chunk-size <n>', 'Target chunk size in characters', (v) => parseInt(v, 10))
43
+ .option('--overlap <n>', 'Overlap between chunks in characters', (v) => parseInt(v, 10))
44
+ .option('--min-size <n>', 'Minimum chunk size (drop smaller)', (v) => parseInt(v, 10))
45
+ .option('-o, --output <path>', 'Output file (JSONL). Omit for stdout')
46
+ .option('--text-field <name>', 'Text field name for JSON/JSONL input', 'text')
47
+ .option('--extensions <exts>', 'Comma-separated file extensions to include when scanning directories')
48
+ .option('--ignore <dirs>', 'Comma-separated directory names to skip', 'node_modules,.git,__pycache__')
49
+ .option('--dry-run', 'Show what would be chunked without processing')
50
+ .option('--stats', 'Show chunking statistics after processing')
51
+ .option('--json', 'Machine-readable JSON output')
52
+ .option('-q, --quiet', 'Suppress non-essential output')
53
+ .action(async (input, opts) => {
54
+ try {
55
+ // Load project config, merge with CLI opts
56
+ const { config: projectConfig } = loadProject();
57
+ const chunkConfig = projectConfig.chunk || {};
58
+
59
+ const strategy = opts.strategy || chunkConfig.strategy || DEFAULTS.strategy || 'recursive';
60
+ const chunkSize = opts.chunkSize || chunkConfig.size || DEFAULTS.size;
61
+ const overlap = opts.overlap != null ? opts.overlap : (chunkConfig.overlap != null ? chunkConfig.overlap : DEFAULTS.overlap);
62
+ const minSize = opts.minSize || chunkConfig.minSize || DEFAULTS.minSize;
63
+ const textField = opts.textField || 'text';
64
+
65
+ if (!STRATEGIES.includes(strategy)) {
66
+ console.error(ui.error(`Unknown strategy: "${strategy}". Available: ${STRATEGIES.join(', ')}`));
67
+ process.exit(1);
68
+ }
69
+
70
+ // Resolve input files
71
+ const files = resolveInput(input, opts);
72
+
73
+ if (files.length === 0) {
74
+ console.error(ui.error('No supported files found. Supported types: .txt, .md, .html, .json, .jsonl, .pdf'));
75
+ process.exit(1);
76
+ }
77
+
78
+ // Dry run
79
+ if (opts.dryRun) {
80
+ if (opts.json) {
81
+ console.log(JSON.stringify({ files: files.map(f => path.relative(process.cwd(), f)), strategy, chunkSize, overlap }, null, 2));
82
+ } else {
83
+ console.log(ui.bold(`Would chunk ${files.length} file(s) with strategy: ${strategy}`));
84
+ console.log(ui.dim(` Chunk size: ${chunkSize} chars, overlap: ${overlap} chars`));
85
+ console.log('');
86
+ for (const f of files) {
87
+ const size = fs.statSync(f).size;
88
+ console.log(` ${ui.dim(path.relative(process.cwd(), f))} (${fmtNum(size)} bytes)`);
89
+ }
90
+ }
91
+ return;
92
+ }
93
+
94
+ // Process files
95
+ const basePath = input && fs.existsSync(input) && fs.statSync(input).isDirectory()
96
+ ? path.resolve(input)
97
+ : process.cwd();
98
+
99
+ const allChunks = [];
100
+ const fileStats = [];
101
+
102
+ const showProgress = !opts.json && !opts.quiet && files.length > 1;
103
+ if (showProgress) {
104
+ console.log(ui.bold(`Chunking ${files.length} file(s) with strategy: ${strategy}`));
105
+ console.log(ui.dim(` Chunk size: ${chunkSize}, overlap: ${overlap}, min: ${minSize}`));
106
+ console.log('');
107
+ }
108
+
109
+ for (let fi = 0; fi < files.length; fi++) {
110
+ const filePath = files[fi];
111
+ const relPath = path.relative(basePath, filePath);
112
+ const readerType = getReaderType(filePath);
113
+
114
+ try {
115
+ const content = await readFile(filePath, { textField });
116
+
117
+ // readFile returns string for text/html/pdf, array for json/jsonl
118
+ let textsToChunk = [];
119
+
120
+ if (typeof content === 'string') {
121
+ textsToChunk = [{ text: content, metadata: {} }];
122
+ } else if (Array.isArray(content)) {
123
+ textsToChunk = content;
124
+ }
125
+
126
+ let fileChunkCount = 0;
127
+ for (const item of textsToChunk) {
128
+ const effectiveStrategy = readerType === 'text' && filePath.endsWith('.md') ? 'markdown' : strategy;
129
+ // Auto-detect markdown for .md files when using default strategy
130
+ const useStrategy = (strategy === 'recursive' && filePath.endsWith('.md')) ? 'markdown' : strategy;
131
+
132
+ const chunks = chunk(item.text, {
133
+ strategy: useStrategy,
134
+ size: chunkSize,
135
+ overlap,
136
+ minSize,
137
+ });
138
+
139
+ for (let ci = 0; ci < chunks.length; ci++) {
140
+ allChunks.push({
141
+ text: chunks[ci],
142
+ metadata: {
143
+ ...item.metadata,
144
+ ...buildMetadata(filePath, basePath, ci, chunks.length),
145
+ },
146
+ });
147
+ }
148
+ fileChunkCount += chunks.length;
149
+ }
150
+
151
+ fileStats.push({
152
+ file: relPath,
153
+ inputChars: textsToChunk.reduce((sum, t) => sum + t.text.length, 0),
154
+ chunks: fileChunkCount,
155
+ });
156
+
157
+ if (showProgress) {
158
+ console.log(` ${ui.green('✓')} ${relPath} → ${fileChunkCount} chunks`);
159
+ }
160
+ } catch (err) {
161
+ fileStats.push({ file: relPath, error: err.message, chunks: 0 });
162
+ if (!opts.quiet) {
163
+ console.error(` ${ui.red('✗')} ${relPath}: ${err.message}`);
164
+ }
165
+ }
166
+ }
167
+
168
+ // Output
169
+ if (opts.json) {
170
+ const output = {
171
+ totalChunks: allChunks.length,
172
+ totalTokens: allChunks.reduce((sum, c) => sum + estimateTokens(c.text), 0),
173
+ strategy,
174
+ chunkSize,
175
+ overlap,
176
+ files: fileStats,
177
+ chunks: allChunks,
178
+ };
179
+ const jsonStr = JSON.stringify(output, null, 2);
180
+ if (opts.output) {
181
+ fs.writeFileSync(opts.output, jsonStr + '\n');
182
+ } else {
183
+ console.log(jsonStr);
184
+ }
185
+ } else {
186
+ // JSONL output
187
+ const lines = allChunks.map(c => JSON.stringify(c));
188
+ const jsonlStr = lines.join('\n') + '\n';
189
+
190
+ if (opts.output) {
191
+ fs.writeFileSync(opts.output, jsonlStr);
192
+ if (!opts.quiet) {
193
+ console.log('');
194
+ console.log(ui.success(`Wrote ${fmtNum(allChunks.length)} chunks to ${opts.output}`));
195
+ }
196
+ } else if (opts.quiet || !showProgress) {
197
+ // Stdout — write JSONL directly
198
+ process.stdout.write(jsonlStr);
199
+ } else {
200
+ // Progress was shown, write to stdout with separator
201
+ console.log('');
202
+ process.stdout.write(jsonlStr);
203
+ }
204
+ }
205
+
206
+ // Stats summary
207
+ if ((opts.stats || showProgress) && !opts.json) {
208
+ const totalChars = fileStats.reduce((sum, f) => sum + (f.inputChars || 0), 0);
209
+ const totalTokens = allChunks.reduce((sum, c) => sum + estimateTokens(c.text), 0);
210
+ const avgChunkSize = allChunks.length > 0
211
+ ? Math.round(allChunks.reduce((sum, c) => sum + c.text.length, 0) / allChunks.length)
212
+ : 0;
213
+ const errors = fileStats.filter(f => f.error).length;
214
+
215
+ console.log('');
216
+ console.log(ui.bold('Summary'));
217
+ console.log(ui.label('Files', `${fmtNum(files.length)}${errors ? ` (${errors} failed)` : ''}`));
218
+ console.log(ui.label('Input', `${fmtNum(totalChars)} chars`));
219
+ console.log(ui.label('Chunks', fmtNum(allChunks.length)));
220
+ console.log(ui.label('Avg chunk', `${fmtNum(avgChunkSize)} chars (~${fmtNum(Math.round(avgChunkSize / 4))} tokens)`));
221
+ console.log(ui.label('Est. tokens', `~${fmtNum(totalTokens)}`));
222
+
223
+ // Cost hint
224
+ const pricePerMToken = 0.12; // voyage-4-large default
225
+ const cost = (totalTokens / 1e6) * pricePerMToken;
226
+ if (cost > 0) {
227
+ console.log(ui.label('Est. cost', ui.dim(`~$${cost < 0.01 ? cost.toFixed(4) : cost.toFixed(2)} with voyage-4-large`)));
228
+ }
229
+ }
230
+ } catch (err) {
231
+ console.error(ui.error(err.message));
232
+ process.exit(1);
233
+ }
234
+ });
235
+ }
236
+
237
+ /**
238
+ * Resolve input to a list of file paths.
239
+ * @param {string} input - File path, directory path, or glob
240
+ * @param {object} opts
241
+ * @returns {string[]}
242
+ */
243
+ function resolveInput(input, opts) {
244
+ if (!input) {
245
+ console.error(ui.error('Please provide a file or directory path.'));
246
+ console.error(ui.dim(' Usage: vai chunk <file-or-directory> [options]'));
247
+ process.exit(1);
248
+ }
249
+
250
+ const resolved = path.resolve(input);
251
+
252
+ if (!fs.existsSync(resolved)) {
253
+ console.error(ui.error(`Not found: ${input}`));
254
+ process.exit(1);
255
+ }
256
+
257
+ const stat = fs.statSync(resolved);
258
+
259
+ if (stat.isFile()) {
260
+ return [resolved];
261
+ }
262
+
263
+ if (stat.isDirectory()) {
264
+ const scanOpts = {};
265
+ if (opts.extensions) {
266
+ scanOpts.extensions = opts.extensions.split(',').map(e => e.trim());
267
+ }
268
+ if (opts.ignore) {
269
+ scanOpts.ignore = opts.ignore.split(',').map(d => d.trim());
270
+ }
271
+ return scanDirectory(resolved, scanOpts);
272
+ }
273
+
274
+ return [];
275
+ }
276
+
277
+ module.exports = { registerChunk };
@@ -19,7 +19,7 @@ _vai_completions() {
19
19
  prev="\${COMP_WORDS[COMP_CWORD-1]}"
20
20
 
21
21
  # Top-level commands
22
- commands="embed rerank store search index models ping config demo explain similarity ingest completions help"
22
+ commands="embed rerank store search index models ping config demo explain similarity ingest estimate init chunk completions help"
23
23
 
24
24
  # Subcommands
25
25
  local index_subs="create list delete"
@@ -102,6 +102,18 @@ _vai_completions() {
102
102
  COMPREPLY=( \$(compgen -W "--file --db --collection --field --model --input-type --dimensions --batch-size --text-field --text-column --strict --dry-run --json --quiet --help" -- "\$cur") )
103
103
  return 0
104
104
  ;;
105
+ estimate)
106
+ COMPREPLY=( \$(compgen -W "--docs --queries --doc-tokens --query-tokens --doc-model --query-model --months --json --quiet --help" -- "\$cur") )
107
+ return 0
108
+ ;;
109
+ init)
110
+ COMPREPLY=( \$(compgen -W "--yes --force --json --quiet --help" -- "\$cur") )
111
+ return 0
112
+ ;;
113
+ chunk)
114
+ COMPREPLY=( \$(compgen -W "--strategy --chunk-size --overlap --min-size --output --text-field --extensions --ignore --dry-run --stats --json --quiet --help" -- "\$cur") )
115
+ return 0
116
+ ;;
105
117
  completions)
106
118
  COMPREPLY=( \$(compgen -W "bash zsh --help" -- "\$cur") )
107
119
  return 0
@@ -172,6 +184,9 @@ _vai() {
172
184
  'explain:Learn about AI and vector search concepts'
173
185
  'similarity:Compute cosine similarity between texts'
174
186
  'ingest:Bulk import documents with progress'
187
+ 'estimate:Estimate embedding costs — symmetric vs asymmetric'
188
+ 'init:Initialize project with .vai.json'
189
+ 'chunk:Chunk documents for embedding'
175
190
  'completions:Generate shell completion scripts'
176
191
  'help:Display help for command'
177
192
  )
@@ -375,6 +390,41 @@ _vai() {
375
390
  '--json[Machine-readable JSON output]' \\
376
391
  '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
377
392
  ;;
393
+ estimate)
394
+ _arguments \\
395
+ '--docs[Number of documents]:count:' \\
396
+ '--queries[Queries per month]:count:' \\
397
+ '--doc-tokens[Avg tokens per document]:tokens:' \\
398
+ '--query-tokens[Avg tokens per query]:tokens:' \\
399
+ '--doc-model[Document embedding model]:model:(\$models)' \\
400
+ '--query-model[Query embedding model]:model:(\$models)' \\
401
+ '--months[Months to project]:months:' \\
402
+ '--json[Machine-readable JSON output]' \\
403
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
404
+ ;;
405
+ init)
406
+ _arguments \\
407
+ '(-y --yes)'{-y,--yes}'[Accept all defaults]' \\
408
+ '--force[Overwrite existing .vai.json]' \\
409
+ '--json[Output config as JSON]' \\
410
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
411
+ ;;
412
+ chunk)
413
+ _arguments \\
414
+ '1:input:_files' \\
415
+ '(-s --strategy)'{-s,--strategy}'[Chunking strategy]:strategy:(fixed sentence paragraph recursive markdown)' \\
416
+ '(-c --chunk-size)'{-c,--chunk-size}'[Target chunk size]:size:' \\
417
+ '--overlap[Overlap between chunks]:chars:' \\
418
+ '--min-size[Minimum chunk size]:chars:' \\
419
+ '(-o --output)'{-o,--output}'[Output file]:file:_files' \\
420
+ '--text-field[Text field for JSON]:field:' \\
421
+ '--extensions[File extensions]:exts:' \\
422
+ '--ignore[Dirs to skip]:dirs:' \\
423
+ '--dry-run[Preview without processing]' \\
424
+ '--stats[Show statistics]' \\
425
+ '--json[JSON output]' \\
426
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
427
+ ;;
378
428
  completions)
379
429
  _arguments \\
380
430
  '1:shell:(bash zsh)'