voyageai-cli 1.15.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voyageai-cli",
3
- "version": "1.15.0",
3
+ "version": "1.18.0",
4
4
  "description": "CLI for Voyage AI embeddings, reranking, and MongoDB Atlas Vector Search",
5
5
  "bin": {
6
6
  "vai": "./src/cli.js"
package/src/cli.js CHANGED
@@ -21,6 +21,10 @@ const { registerCompletions } = require('./commands/completions');
21
21
  const { registerPlayground } = require('./commands/playground');
22
22
  const { registerBenchmark } = require('./commands/benchmark');
23
23
  const { registerEstimate } = require('./commands/estimate');
24
+ const { registerInit } = require('./commands/init');
25
+ const { registerChunk } = require('./commands/chunk');
26
+ const { registerQuery } = require('./commands/query');
27
+ const { registerPipeline } = require('./commands/pipeline');
24
28
  const { registerAbout } = require('./commands/about');
25
29
  const { showBanner, showQuickStart, getVersion } = require('./lib/banner');
26
30
 
@@ -47,6 +51,10 @@ registerCompletions(program);
47
51
  registerPlayground(program);
48
52
  registerBenchmark(program);
49
53
  registerEstimate(program);
54
+ registerInit(program);
55
+ registerChunk(program);
56
+ registerQuery(program);
57
+ registerPipeline(program);
50
58
  registerAbout(program);
51
59
 
52
60
  // Append disclaimer to all help output
@@ -0,0 +1,277 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const { chunk, estimateTokens, STRATEGIES, DEFAULTS } = require('../lib/chunker');
6
+ const { readFile, scanDirectory, isSupported, getReaderType } = require('../lib/readers');
7
+ const { loadProject, mergeOptions } = require('../lib/project');
8
+ const ui = require('../lib/ui');
9
+
10
+ /**
11
+ * Format a number with commas.
12
+ */
13
+ function fmtNum(n) {
14
+ return n.toLocaleString('en-US');
15
+ }
16
+
17
+ /**
18
+ * Build chunk metadata for a source file.
19
+ * @param {string} filePath - Source file path
20
+ * @param {string} basePath - Base directory for relative paths
21
+ * @param {number} index - Chunk index within the file
22
+ * @param {number} total - Total chunks from this file
23
+ * @returns {object}
24
+ */
25
+ function buildMetadata(filePath, basePath, index, total) {
26
+ return {
27
+ source: path.relative(basePath, filePath),
28
+ chunk_index: index,
29
+ total_chunks: total,
30
+ };
31
+ }
32
+
33
+ /**
34
+ * Register the chunk command on a Commander program.
35
+ * @param {import('commander').Command} program
36
+ */
37
+ function registerChunk(program) {
38
+ program
39
+ .command('chunk [input]')
40
+ .description('Chunk documents for embedding — files, directories, or stdin')
41
+ .option('-s, --strategy <strategy>', `Chunking strategy: ${STRATEGIES.join(', ')}`)
42
+ .option('-c, --chunk-size <n>', 'Target chunk size in characters', (v) => parseInt(v, 10))
43
+ .option('--overlap <n>', 'Overlap between chunks in characters', (v) => parseInt(v, 10))
44
+ .option('--min-size <n>', 'Minimum chunk size (drop smaller)', (v) => parseInt(v, 10))
45
+ .option('-o, --output <path>', 'Output file (JSONL). Omit for stdout')
46
+ .option('--text-field <name>', 'Text field name for JSON/JSONL input', 'text')
47
+ .option('--extensions <exts>', 'Comma-separated file extensions to include when scanning directories')
48
+ .option('--ignore <dirs>', 'Comma-separated directory names to skip', 'node_modules,.git,__pycache__')
49
+ .option('--dry-run', 'Show what would be chunked without processing')
50
+ .option('--stats', 'Show chunking statistics after processing')
51
+ .option('--json', 'Machine-readable JSON output')
52
+ .option('-q, --quiet', 'Suppress non-essential output')
53
+ .action(async (input, opts) => {
54
+ try {
55
+ // Load project config, merge with CLI opts
56
+ const { config: projectConfig } = loadProject();
57
+ const chunkConfig = projectConfig.chunk || {};
58
+
59
+ const strategy = opts.strategy || chunkConfig.strategy || DEFAULTS.strategy || 'recursive';
60
+ const chunkSize = opts.chunkSize || chunkConfig.size || DEFAULTS.size;
61
+ const overlap = opts.overlap != null ? opts.overlap : (chunkConfig.overlap != null ? chunkConfig.overlap : DEFAULTS.overlap);
62
+ const minSize = opts.minSize || chunkConfig.minSize || DEFAULTS.minSize;
63
+ const textField = opts.textField || 'text';
64
+
65
+ if (!STRATEGIES.includes(strategy)) {
66
+ console.error(ui.error(`Unknown strategy: "${strategy}". Available: ${STRATEGIES.join(', ')}`));
67
+ process.exit(1);
68
+ }
69
+
70
+ // Resolve input files
71
+ const files = resolveInput(input, opts);
72
+
73
+ if (files.length === 0) {
74
+ console.error(ui.error('No supported files found. Supported types: .txt, .md, .html, .json, .jsonl, .pdf'));
75
+ process.exit(1);
76
+ }
77
+
78
+ // Dry run
79
+ if (opts.dryRun) {
80
+ if (opts.json) {
81
+ console.log(JSON.stringify({ files: files.map(f => path.relative(process.cwd(), f)), strategy, chunkSize, overlap }, null, 2));
82
+ } else {
83
+ console.log(ui.bold(`Would chunk ${files.length} file(s) with strategy: ${strategy}`));
84
+ console.log(ui.dim(` Chunk size: ${chunkSize} chars, overlap: ${overlap} chars`));
85
+ console.log('');
86
+ for (const f of files) {
87
+ const size = fs.statSync(f).size;
88
+ console.log(` ${ui.dim(path.relative(process.cwd(), f))} (${fmtNum(size)} bytes)`);
89
+ }
90
+ }
91
+ return;
92
+ }
93
+
94
+ // Process files
95
+ const basePath = input && fs.existsSync(input) && fs.statSync(input).isDirectory()
96
+ ? path.resolve(input)
97
+ : process.cwd();
98
+
99
+ const allChunks = [];
100
+ const fileStats = [];
101
+
102
+ const showProgress = !opts.json && !opts.quiet && files.length > 1;
103
+ if (showProgress) {
104
+ console.log(ui.bold(`Chunking ${files.length} file(s) with strategy: ${strategy}`));
105
+ console.log(ui.dim(` Chunk size: ${chunkSize}, overlap: ${overlap}, min: ${minSize}`));
106
+ console.log('');
107
+ }
108
+
109
+ for (let fi = 0; fi < files.length; fi++) {
110
+ const filePath = files[fi];
111
+ const relPath = path.relative(basePath, filePath);
112
+ const readerType = getReaderType(filePath);
113
+
114
+ try {
115
+ const content = await readFile(filePath, { textField });
116
+
117
+ // readFile returns string for text/html/pdf, array for json/jsonl
118
+ let textsToChunk = [];
119
+
120
+ if (typeof content === 'string') {
121
+ textsToChunk = [{ text: content, metadata: {} }];
122
+ } else if (Array.isArray(content)) {
123
+ textsToChunk = content;
124
+ }
125
+
126
+ let fileChunkCount = 0;
127
+ for (const item of textsToChunk) {
128
+ const effectiveStrategy = readerType === 'text' && filePath.endsWith('.md') ? 'markdown' : strategy;
129
+ // Auto-detect markdown for .md files when using default strategy
130
+ const useStrategy = (strategy === 'recursive' && filePath.endsWith('.md')) ? 'markdown' : strategy;
131
+
132
+ const chunks = chunk(item.text, {
133
+ strategy: useStrategy,
134
+ size: chunkSize,
135
+ overlap,
136
+ minSize,
137
+ });
138
+
139
+ for (let ci = 0; ci < chunks.length; ci++) {
140
+ allChunks.push({
141
+ text: chunks[ci],
142
+ metadata: {
143
+ ...item.metadata,
144
+ ...buildMetadata(filePath, basePath, ci, chunks.length),
145
+ },
146
+ });
147
+ }
148
+ fileChunkCount += chunks.length;
149
+ }
150
+
151
+ fileStats.push({
152
+ file: relPath,
153
+ inputChars: textsToChunk.reduce((sum, t) => sum + t.text.length, 0),
154
+ chunks: fileChunkCount,
155
+ });
156
+
157
+ if (showProgress) {
158
+ console.log(` ${ui.green('✓')} ${relPath} → ${fileChunkCount} chunks`);
159
+ }
160
+ } catch (err) {
161
+ fileStats.push({ file: relPath, error: err.message, chunks: 0 });
162
+ if (!opts.quiet) {
163
+ console.error(` ${ui.red('✗')} ${relPath}: ${err.message}`);
164
+ }
165
+ }
166
+ }
167
+
168
+ // Output
169
+ if (opts.json) {
170
+ const output = {
171
+ totalChunks: allChunks.length,
172
+ totalTokens: allChunks.reduce((sum, c) => sum + estimateTokens(c.text), 0),
173
+ strategy,
174
+ chunkSize,
175
+ overlap,
176
+ files: fileStats,
177
+ chunks: allChunks,
178
+ };
179
+ const jsonStr = JSON.stringify(output, null, 2);
180
+ if (opts.output) {
181
+ fs.writeFileSync(opts.output, jsonStr + '\n');
182
+ } else {
183
+ console.log(jsonStr);
184
+ }
185
+ } else {
186
+ // JSONL output
187
+ const lines = allChunks.map(c => JSON.stringify(c));
188
+ const jsonlStr = lines.join('\n') + '\n';
189
+
190
+ if (opts.output) {
191
+ fs.writeFileSync(opts.output, jsonlStr);
192
+ if (!opts.quiet) {
193
+ console.log('');
194
+ console.log(ui.success(`Wrote ${fmtNum(allChunks.length)} chunks to ${opts.output}`));
195
+ }
196
+ } else if (opts.quiet || !showProgress) {
197
+ // Stdout — write JSONL directly
198
+ process.stdout.write(jsonlStr);
199
+ } else {
200
+ // Progress was shown, write to stdout with separator
201
+ console.log('');
202
+ process.stdout.write(jsonlStr);
203
+ }
204
+ }
205
+
206
+ // Stats summary
207
+ if ((opts.stats || showProgress) && !opts.json) {
208
+ const totalChars = fileStats.reduce((sum, f) => sum + (f.inputChars || 0), 0);
209
+ const totalTokens = allChunks.reduce((sum, c) => sum + estimateTokens(c.text), 0);
210
+ const avgChunkSize = allChunks.length > 0
211
+ ? Math.round(allChunks.reduce((sum, c) => sum + c.text.length, 0) / allChunks.length)
212
+ : 0;
213
+ const errors = fileStats.filter(f => f.error).length;
214
+
215
+ console.log('');
216
+ console.log(ui.bold('Summary'));
217
+ console.log(ui.label('Files', `${fmtNum(files.length)}${errors ? ` (${errors} failed)` : ''}`));
218
+ console.log(ui.label('Input', `${fmtNum(totalChars)} chars`));
219
+ console.log(ui.label('Chunks', fmtNum(allChunks.length)));
220
+ console.log(ui.label('Avg chunk', `${fmtNum(avgChunkSize)} chars (~${fmtNum(Math.round(avgChunkSize / 4))} tokens)`));
221
+ console.log(ui.label('Est. tokens', `~${fmtNum(totalTokens)}`));
222
+
223
+ // Cost hint
224
+ const pricePerMToken = 0.12; // voyage-4-large default
225
+ const cost = (totalTokens / 1e6) * pricePerMToken;
226
+ if (cost > 0) {
227
+ console.log(ui.label('Est. cost', ui.dim(`~$${cost < 0.01 ? cost.toFixed(4) : cost.toFixed(2)} with voyage-4-large`)));
228
+ }
229
+ }
230
+ } catch (err) {
231
+ console.error(ui.error(err.message));
232
+ process.exit(1);
233
+ }
234
+ });
235
+ }
236
+
237
+ /**
238
+ * Resolve input to a list of file paths.
239
+ * @param {string} input - File path, directory path, or glob
240
+ * @param {object} opts
241
+ * @returns {string[]}
242
+ */
243
+ function resolveInput(input, opts) {
244
+ if (!input) {
245
+ console.error(ui.error('Please provide a file or directory path.'));
246
+ console.error(ui.dim(' Usage: vai chunk <file-or-directory> [options]'));
247
+ process.exit(1);
248
+ }
249
+
250
+ const resolved = path.resolve(input);
251
+
252
+ if (!fs.existsSync(resolved)) {
253
+ console.error(ui.error(`Not found: ${input}`));
254
+ process.exit(1);
255
+ }
256
+
257
+ const stat = fs.statSync(resolved);
258
+
259
+ if (stat.isFile()) {
260
+ return [resolved];
261
+ }
262
+
263
+ if (stat.isDirectory()) {
264
+ const scanOpts = {};
265
+ if (opts.extensions) {
266
+ scanOpts.extensions = opts.extensions.split(',').map(e => e.trim());
267
+ }
268
+ if (opts.ignore) {
269
+ scanOpts.ignore = opts.ignore.split(',').map(d => d.trim());
270
+ }
271
+ return scanDirectory(resolved, scanOpts);
272
+ }
273
+
274
+ return [];
275
+ }
276
+
277
+ module.exports = { registerChunk };
@@ -19,7 +19,7 @@ _vai_completions() {
19
19
  prev="\${COMP_WORDS[COMP_CWORD-1]}"
20
20
 
21
21
  # Top-level commands
22
- commands="embed rerank store search index models ping config demo explain similarity ingest estimate completions help"
22
+ commands="embed rerank store search index models ping config demo explain similarity ingest estimate init chunk query pipeline completions help"
23
23
 
24
24
  # Subcommands
25
25
  local index_subs="create list delete"
@@ -106,6 +106,22 @@ _vai_completions() {
106
106
  COMPREPLY=( \$(compgen -W "--docs --queries --doc-tokens --query-tokens --doc-model --query-model --months --json --quiet --help" -- "\$cur") )
107
107
  return 0
108
108
  ;;
109
+ init)
110
+ COMPREPLY=( \$(compgen -W "--yes --force --json --quiet --help" -- "\$cur") )
111
+ return 0
112
+ ;;
113
+ chunk)
114
+ COMPREPLY=( \$(compgen -W "--strategy --chunk-size --overlap --min-size --output --text-field --extensions --ignore --dry-run --stats --json --quiet --help" -- "\$cur") )
115
+ return 0
116
+ ;;
117
+ query)
118
+ COMPREPLY=( \$(compgen -W "--db --collection --index --field --model --dimensions --limit --top-k --rerank --no-rerank --rerank-model --text-field --filter --num-candidates --show-vectors --json --quiet --help" -- "\$cur") )
119
+ return 0
120
+ ;;
121
+ pipeline)
122
+ COMPREPLY=( \$(compgen -W "--db --collection --field --index --model --dimensions --strategy --chunk-size --overlap --batch-size --text-field --extensions --ignore --create-index --dry-run --json --quiet --help" -- "\$cur") )
123
+ return 0
124
+ ;;
109
125
  completions)
110
126
  COMPREPLY=( \$(compgen -W "bash zsh --help" -- "\$cur") )
111
127
  return 0
@@ -177,6 +193,10 @@ _vai() {
177
193
  'similarity:Compute cosine similarity between texts'
178
194
  'ingest:Bulk import documents with progress'
179
195
  'estimate:Estimate embedding costs — symmetric vs asymmetric'
196
+ 'init:Initialize project with .vai.json'
197
+ 'chunk:Chunk documents for embedding'
198
+ 'query:Search + rerank in one shot'
199
+ 'pipeline:Chunk, embed, and store documents'
180
200
  'completions:Generate shell completion scripts'
181
201
  'help:Display help for command'
182
202
  )
@@ -392,6 +412,69 @@ _vai() {
392
412
  '--json[Machine-readable JSON output]' \\
393
413
  '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
394
414
  ;;
415
+ init)
416
+ _arguments \\
417
+ '(-y --yes)'{-y,--yes}'[Accept all defaults]' \\
418
+ '--force[Overwrite existing .vai.json]' \\
419
+ '--json[Output config as JSON]' \\
420
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
421
+ ;;
422
+ chunk)
423
+ _arguments \\
424
+ '1:input:_files' \\
425
+ '(-s --strategy)'{-s,--strategy}'[Chunking strategy]:strategy:(fixed sentence paragraph recursive markdown)' \\
426
+ '(-c --chunk-size)'{-c,--chunk-size}'[Target chunk size]:size:' \\
427
+ '--overlap[Overlap between chunks]:chars:' \\
428
+ '--min-size[Minimum chunk size]:chars:' \\
429
+ '(-o --output)'{-o,--output}'[Output file]:file:_files' \\
430
+ '--text-field[Text field for JSON]:field:' \\
431
+ '--extensions[File extensions]:exts:' \\
432
+ '--ignore[Dirs to skip]:dirs:' \\
433
+ '--dry-run[Preview without processing]' \\
434
+ '--stats[Show statistics]' \\
435
+ '--json[JSON output]' \\
436
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
437
+ ;;
438
+ query)
439
+ _arguments \\
440
+ '1:query text:' \\
441
+ '--db[Database name]:database:' \\
442
+ '--collection[Collection name]:collection:' \\
443
+ '--index[Vector search index]:index:' \\
444
+ '--field[Embedding field]:field:' \\
445
+ '(-m --model)'{-m,--model}'[Embedding model]:model:(\$models)' \\
446
+ '(-d --dimensions)'{-d,--dimensions}'[Output dimensions]:dims:' \\
447
+ '(-l --limit)'{-l,--limit}'[Search candidates]:limit:' \\
448
+ '(-k --top-k)'{-k,--top-k}'[Final results]:k:' \\
449
+ '--rerank[Enable reranking]' \\
450
+ '--no-rerank[Skip reranking]' \\
451
+ '--rerank-model[Reranking model]:model:' \\
452
+ '--text-field[Document text field]:field:' \\
453
+ '--filter[Pre-filter JSON]:json:' \\
454
+ '--json[JSON output]' \\
455
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
456
+ ;;
457
+ pipeline)
458
+ _arguments \\
459
+ '1:input:_files' \\
460
+ '--db[Database name]:database:' \\
461
+ '--collection[Collection name]:collection:' \\
462
+ '--field[Embedding field]:field:' \\
463
+ '--index[Vector search index]:index:' \\
464
+ '(-m --model)'{-m,--model}'[Embedding model]:model:(\$models)' \\
465
+ '(-d --dimensions)'{-d,--dimensions}'[Output dimensions]:dims:' \\
466
+ '(-s --strategy)'{-s,--strategy}'[Chunking strategy]:strategy:(fixed sentence paragraph recursive markdown)' \\
467
+ '(-c --chunk-size)'{-c,--chunk-size}'[Chunk size]:size:' \\
468
+ '--overlap[Chunk overlap]:chars:' \\
469
+ '--batch-size[Texts per API call]:size:' \\
470
+ '--text-field[Text field for JSON]:field:' \\
471
+ '--extensions[File extensions]:exts:' \\
472
+ '--ignore[Dirs to skip]:dirs:' \\
473
+ '--create-index[Auto-create vector index]' \\
474
+ '--dry-run[Preview without executing]' \\
475
+ '--json[JSON output]' \\
476
+ '(-q --quiet)'{-q,--quiet}'[Suppress non-essential output]'
477
+ ;;
395
478
  completions)
396
479
  _arguments \\
397
480
  '1:shell:(bash zsh)'
@@ -0,0 +1,153 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+ const readline = require('readline');
6
+ const { MODEL_CATALOG } = require('../lib/catalog');
7
+ const { STRATEGIES } = require('../lib/chunker');
8
+ const { defaultProjectConfig, saveProject, findProjectFile, PROJECT_FILE } = require('../lib/project');
9
+ const ui = require('../lib/ui');
10
+
11
+ /**
12
+ * Prompt the user for input with a default value.
13
+ * @param {readline.Interface} rl
14
+ * @param {string} question
15
+ * @param {string} [defaultVal]
16
+ * @returns {Promise<string>}
17
+ */
18
+ function ask(rl, question, defaultVal) {
19
+ const suffix = defaultVal ? ` ${ui.dim(`(${defaultVal})`)}` : '';
20
+ return new Promise((resolve) => {
21
+ rl.question(` ${question}${suffix}: `, (answer) => {
22
+ resolve(answer.trim() || defaultVal || '');
23
+ });
24
+ });
25
+ }
26
+
27
+ /**
28
+ * Prompt for a choice from a list.
29
+ * @param {readline.Interface} rl
30
+ * @param {string} question
31
+ * @param {string[]} choices
32
+ * @param {string} defaultVal
33
+ * @returns {Promise<string>}
34
+ */
35
+ async function askChoice(rl, question, choices, defaultVal) {
36
+ console.log('');
37
+ for (let i = 0; i < choices.length; i++) {
38
+ const marker = choices[i] === defaultVal ? ui.cyan('→') : ' ';
39
+ console.log(` ${marker} ${i + 1}. ${choices[i]}`);
40
+ }
41
+ const answer = await ask(rl, question, defaultVal);
42
+ // Accept number or value
43
+ const num = parseInt(answer, 10);
44
+ if (num >= 1 && num <= choices.length) return choices[num - 1];
45
+ if (choices.includes(answer)) return answer;
46
+ return defaultVal;
47
+ }
48
+
49
+ /**
50
+ * Register the init command on a Commander program.
51
+ * @param {import('commander').Command} program
52
+ */
53
+ function registerInit(program) {
54
+ program
55
+ .command('init')
56
+ .description('Initialize a project with .vai.json configuration')
57
+ .option('-y, --yes', 'Accept all defaults (non-interactive)')
58
+ .option('--force', 'Overwrite existing .vai.json')
59
+ .option('--json', 'Output created config as JSON (non-interactive)')
60
+ .option('-q, --quiet', 'Suppress non-essential output')
61
+ .action(async (opts) => {
62
+ // Check for existing config
63
+ const existing = findProjectFile();
64
+ if (existing && !opts.force) {
65
+ const relPath = path.relative(process.cwd(), existing);
66
+ console.error(ui.warn(`Project already initialized: ${relPath}`));
67
+ console.error(ui.dim(' Use --force to overwrite.'));
68
+ process.exit(1);
69
+ }
70
+
71
+ const defaults = defaultProjectConfig();
72
+
73
+ // Non-interactive mode
74
+ if (opts.yes || opts.json) {
75
+ const filePath = saveProject(defaults);
76
+ if (opts.json) {
77
+ console.log(JSON.stringify(defaults, null, 2));
78
+ } else if (!opts.quiet) {
79
+ console.log(ui.success(`Created ${PROJECT_FILE}`));
80
+ }
81
+ return;
82
+ }
83
+
84
+ // Interactive mode
85
+ console.log('');
86
+ console.log(ui.bold(' 🚀 Initialize Voyage AI Project'));
87
+ console.log(ui.dim(' Creates .vai.json in the current directory.'));
88
+ console.log(ui.dim(' Press Enter to accept defaults.'));
89
+ console.log('');
90
+
91
+ const rl = readline.createInterface({
92
+ input: process.stdin,
93
+ output: process.stdout,
94
+ });
95
+
96
+ try {
97
+ // Embedding model
98
+ const embeddingModels = MODEL_CATALOG
99
+ .filter(m => m.type === 'embedding' && !m.legacy && !m.unreleased)
100
+ .map(m => m.name);
101
+ const model = await askChoice(rl, 'Embedding model', embeddingModels, defaults.model);
102
+
103
+ // MongoDB settings
104
+ console.log('');
105
+ console.log(ui.bold(' MongoDB Atlas'));
106
+ const db = await ask(rl, 'Database name', defaults.db || 'myapp');
107
+ const collection = await ask(rl, 'Collection name', defaults.collection || 'documents');
108
+ const field = await ask(rl, 'Embedding field', defaults.field);
109
+ const index = await ask(rl, 'Vector index name', defaults.index);
110
+
111
+ // Dimensions
112
+ const modelInfo = MODEL_CATALOG.find(m => m.name === model);
113
+ const defaultDims = modelInfo && modelInfo.dimensions.includes('1024') ? '1024' : '512';
114
+ const dimensions = parseInt(await ask(rl, 'Dimensions', defaultDims), 10) || parseInt(defaultDims, 10);
115
+
116
+ // Chunking
117
+ console.log('');
118
+ console.log(ui.bold(' Chunking'));
119
+ const strategy = await askChoice(rl, 'Chunk strategy', STRATEGIES, defaults.chunk.strategy);
120
+ const chunkSize = parseInt(await ask(rl, 'Chunk size (chars)', String(defaults.chunk.size)), 10);
121
+ const chunkOverlap = parseInt(await ask(rl, 'Chunk overlap (chars)', String(defaults.chunk.overlap)), 10);
122
+
123
+ const config = {
124
+ model,
125
+ db,
126
+ collection,
127
+ field,
128
+ inputType: 'document',
129
+ dimensions,
130
+ index,
131
+ chunk: {
132
+ strategy,
133
+ size: chunkSize,
134
+ overlap: chunkOverlap,
135
+ },
136
+ };
137
+
138
+ const filePath = saveProject(config);
139
+ console.log('');
140
+ console.log(ui.success(`Created ${path.relative(process.cwd(), filePath)}`));
141
+ console.log('');
142
+ console.log(ui.dim(' Next steps:'));
143
+ console.log(ui.dim(' vai chunk ./docs/ # Chunk your documents'));
144
+ console.log(ui.dim(' vai pipeline ./docs/ # Chunk → embed → store (coming soon)'));
145
+ console.log(ui.dim(' vai search --query "..." # Search your collection'));
146
+ console.log('');
147
+ } finally {
148
+ rl.close();
149
+ }
150
+ });
151
+ }
152
+
153
+ module.exports = { registerInit };