voyageai-cli 1.30.1 → 1.30.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +4 -4
  2. package/package.json +1 -1
  3. package/src/cli.js +2 -0
  4. package/src/commands/about.js +3 -3
  5. package/src/commands/code-search.js +751 -0
  6. package/src/commands/doctor.js +1 -1
  7. package/src/commands/index-workspace.js +9 -5
  8. package/src/commands/playground.js +9 -1
  9. package/src/commands/quickstart.js +4 -4
  10. package/src/commands/workflow.js +132 -65
  11. package/src/lib/catalog.js +4 -2
  12. package/src/lib/code-search.js +315 -0
  13. package/src/lib/codegen.js +1 -1
  14. package/src/lib/explanations.js +3 -3
  15. package/src/lib/github.js +226 -0
  16. package/src/lib/template-engine.js +154 -20
  17. package/src/lib/workflow-builder.js +753 -0
  18. package/src/lib/workflow-formatters.js +454 -0
  19. package/src/lib/workflow-input-cache.js +111 -0
  20. package/src/lib/workflow-scaffold.js +1 -1
  21. package/src/lib/workflow.js +91 -1
  22. package/src/mcp/schemas/index.js +130 -0
  23. package/src/mcp/server.js +17 -4
  24. package/src/mcp/tools/authoring.js +662 -0
  25. package/src/mcp/tools/code-search.js +620 -0
  26. package/src/mcp/tools/ingest.js +2 -5
  27. package/src/mcp/tools/retrieval.js +2 -15
  28. package/src/mcp/tools/workspace.js +1 -12
  29. package/src/mcp/utils.js +20 -0
  30. package/src/playground/help/workflow-nodes.js +127 -2
  31. package/src/playground/index.html +1366 -24
  32. package/src/workflows/code-review.json +110 -0
  33. package/src/workflows/cost-analysis.json +5 -0
  34. package/src/workflows/tests/code-review.fresh-index.test.json +83 -0
  35. package/src/workflows/tests/code-review.happy-path.test.json +121 -0
  36. package/src/workflows/tests/code-review.no-question.test.json +70 -0
  37. package/src/workflows/tests/smart-ingest.duplicate-detected.test.json +2 -2
@@ -16,6 +16,7 @@ const {
16
16
  const VAI_TOOLS = new Set([
17
17
  'query', 'search', 'rerank', 'embed', 'similarity',
18
18
  'ingest', 'collections', 'models', 'explain', 'estimate',
19
+ 'code_index', 'code_search', 'code_query', 'code_find_similar', 'code_status',
19
20
  ]);
20
21
 
21
22
  const CONTROL_FLOW_TOOLS = new Set(['merge', 'filter', 'transform', 'generate', 'conditional', 'loop', 'template']);
@@ -98,6 +99,25 @@ function validateWorkflow(definition, { mode = 'strict' } = {}) {
98
99
  }
99
100
  }
100
101
 
102
+ // Validate formatters section (optional)
103
+ if (definition.formatters) {
104
+ if (typeof definition.formatters !== 'object' || Array.isArray(definition.formatters)) {
105
+ addIssue('error', null, 'INVALID_FORMATTERS', '"formatters" must be a plain object');
106
+ } else {
107
+ const f = definition.formatters;
108
+ const validFormats = ['json', 'table', 'markdown', 'text', 'csv'];
109
+ if (f.default && !validFormats.includes(f.default)) {
110
+ addIssue('warning', null, 'INVALID_FORMATTER_DEFAULT', `formatters.default "${f.default}" is not a recognized format (${validFormats.join(', ')})`);
111
+ }
112
+ if (f.columns && !Array.isArray(f.columns)) {
113
+ addIssue('error', null, 'INVALID_FORMATTER_COLUMNS', 'formatters.columns must be an array of strings');
114
+ }
115
+ if (f.title && typeof f.title !== 'string') {
116
+ addIssue('error', null, 'INVALID_FORMATTER_TITLE', 'formatters.title must be a string');
117
+ }
118
+ }
119
+ }
120
+
101
121
  // Step-level validation
102
122
  const stepIds = new Set();
103
123
  const duplicateIds = new Set();
@@ -1576,6 +1596,55 @@ async function executeGenerate(inputs) {
1576
1596
  };
1577
1597
  }
1578
1598
 
1599
+ // ════════════════════════════════════════════════════════════════════
1600
+ // Code Search Tool Executors
1601
+ // ════════════════════════════════════════════════════════════════════
1602
+
1603
+ /**
1604
+ * Execute a code_index step: index a local directory or GitHub repo.
1605
+ */
1606
+ async function executeCodeIndex(inputs) {
1607
+ const { handleCodeIndex } = require('../mcp/tools/code-search');
1608
+ const result = await handleCodeIndex(inputs);
1609
+ return result.structuredContent;
1610
+ }
1611
+
1612
+ /**
1613
+ * Execute a code_search step: semantic search across an indexed codebase.
1614
+ */
1615
+ async function executeCodeSearch(inputs) {
1616
+ const { handleCodeSearch } = require('../mcp/tools/code-search');
1617
+ const result = await handleCodeSearch(inputs);
1618
+ return result.structuredContent;
1619
+ }
1620
+
1621
+ /**
1622
+ * Execute a code_query step: RAG query against indexed code.
1623
+ */
1624
+ async function executeCodeQuery(inputs) {
1625
+ const { handleCodeQuery } = require('../mcp/tools/code-search');
1626
+ const result = await handleCodeQuery(inputs);
1627
+ return result.structuredContent;
1628
+ }
1629
+
1630
+ /**
1631
+ * Execute a code_find_similar step: find code similar to a snippet.
1632
+ */
1633
+ async function executeCodeFindSimilar(inputs) {
1634
+ const { handleCodeFindSimilar } = require('../mcp/tools/code-search');
1635
+ const result = await handleCodeFindSimilar(inputs);
1636
+ return result.structuredContent;
1637
+ }
1638
+
1639
+ /**
1640
+ * Execute a code_status step: check index health.
1641
+ */
1642
+ async function executeCodeStatus(inputs) {
1643
+ const { handleCodeStatus } = require('../mcp/tools/code-search');
1644
+ const result = await handleCodeStatus(inputs);
1645
+ return result.structuredContent;
1646
+ }
1647
+
1579
1648
  // ════════════════════════════════════════════════════════════════════
1580
1649
  // Step Dispatcher
1581
1650
  // ════════════════════════════════════════════════════════════════════
@@ -1635,6 +1704,18 @@ async function executeStep(step, resolvedInputs, defaults, context) {
1635
1704
  case 'estimate':
1636
1705
  return executeEstimate(resolvedInputs);
1637
1706
 
1707
+ // Code search tools
1708
+ case 'code_index':
1709
+ return executeCodeIndex(resolvedInputs);
1710
+ case 'code_search':
1711
+ return executeCodeSearch(resolvedInputs);
1712
+ case 'code_query':
1713
+ return executeCodeQuery(resolvedInputs);
1714
+ case 'code_find_similar':
1715
+ return executeCodeFindSimilar(resolvedInputs);
1716
+ case 'code_status':
1717
+ return executeCodeStatus(resolvedInputs);
1718
+
1638
1719
  default:
1639
1720
  throw new Error(`Unknown tool: "${step.tool}"`);
1640
1721
  }
@@ -1867,6 +1948,7 @@ async function executeWorkflow(definition, opts = {}) {
1867
1948
  steps: stepResults,
1868
1949
  totalTimeMs: Date.now() - startTime,
1869
1950
  layers,
1951
+ formatters: definition.formatters || null,
1870
1952
  };
1871
1953
  }
1872
1954
 
@@ -1894,9 +1976,10 @@ function coerceInput(value, type) {
1894
1976
  * prompt users for missing inputs before execution.
1895
1977
  *
1896
1978
  * @param {object} definition - Workflow definition with an `inputs` property
1979
+ * @param {object} [cachedInputs] - Previously cached input values (from last run)
1897
1980
  * @returns {import('./wizard').Step[]}
1898
1981
  */
1899
- function buildInputSteps(definition) {
1982
+ function buildInputSteps(definition, cachedInputs = {}) {
1900
1983
  if (!definition.inputs) return [];
1901
1984
  return Object.entries(definition.inputs).map(([key, spec]) => ({
1902
1985
  id: key,
@@ -1905,6 +1988,10 @@ function buildInputSteps(definition) {
1905
1988
  required: !!spec.required,
1906
1989
  placeholder: spec.type === 'number' ? 'number' : (spec.type || 'string'),
1907
1990
  defaultValue: spec.default !== undefined ? String(spec.default) : undefined,
1991
+ getDefault: () => {
1992
+ if (key in cachedInputs) return String(cachedInputs[key]);
1993
+ return spec.default !== undefined ? String(spec.default) : undefined;
1994
+ },
1908
1995
  validate: (val) => {
1909
1996
  if (spec.type === 'number' && val && isNaN(Number(val))) {
1910
1997
  return 'Must be a number';
@@ -2063,6 +2150,9 @@ module.exports = {
2063
2150
  executeIngest,
2064
2151
  executeAggregate,
2065
2152
 
2153
+ // Dependency graph
2154
+ buildDependencyGraph,
2155
+
2066
2156
  // Main execution
2067
2157
  executeStep,
2068
2158
  executeWorkflow,
@@ -124,6 +124,129 @@ const explainCodeSchema = {
124
124
  model: z.string().optional().describe('Voyage AI embedding model'),
125
125
  };
126
126
 
127
+ /** vai_code_index input schema */
128
+ const codeIndexSchema = {
129
+ source: z.string().min(1).describe(
130
+ 'Local directory path or GitHub repo URL (e.g., "/path/to/project" or "https://github.com/org/repo")'
131
+ ),
132
+ db: z.string().optional().describe('MongoDB database name. Default: "vai_code_search"'),
133
+ collection: z.string().optional().describe(
134
+ 'Collection name. Auto-derived from project name if omitted.'
135
+ ),
136
+ model: z.string().optional().describe(
137
+ 'Embedding model. Default: auto-detected (voyage-code-3 for code, voyage-4-large for docs)'
138
+ ),
139
+ branch: z.string().default('main').describe('Git branch for remote repos'),
140
+ maxFiles: z.number().int().min(1).max(10000).default(5000)
141
+ .describe('Maximum files to index'),
142
+ maxFileSize: z.number().int().min(1000).max(1000000).default(100000)
143
+ .describe('Maximum file size in bytes'),
144
+ chunkSize: z.number().int().min(100).max(4000).default(512)
145
+ .describe('Target chunk size in characters'),
146
+ chunkOverlap: z.number().int().min(0).max(500).default(50)
147
+ .describe('Overlap between chunks in characters'),
148
+ batchSize: z.number().int().min(1).max(50).default(20)
149
+ .describe('Files per embedding batch'),
150
+ refresh: z.boolean().default(false)
151
+ .describe('Incremental refresh: only re-index changed files'),
152
+ contentType: z.enum(['code', 'docs', 'config', 'all']).default('code')
153
+ .describe('Type of content to index'),
154
+ };
155
+
156
+ /** vai_code_search input schema */
157
+ const codeSearchSchema = {
158
+ query: z.string().min(1).max(5000).describe(
159
+ 'Natural language search query (e.g., "where do we handle auth timeouts")'
160
+ ),
161
+ db: z.string().optional().describe('MongoDB database name'),
162
+ collection: z.string().optional().describe('Collection with indexed code'),
163
+ limit: z.number().int().min(1).max(50).default(10)
164
+ .describe('Maximum number of results'),
165
+ language: z.string().optional()
166
+ .describe('Filter by programming language (e.g., "js", "py", "go")'),
167
+ category: z.enum(['code', 'docs', 'config']).optional()
168
+ .describe('Filter by content category'),
169
+ rerank: z.boolean().default(true)
170
+ .describe('Rerank results with Voyage AI reranker for better relevance'),
171
+ rerankModel: z.enum(['rerank-2.5', 'rerank-2.5-lite']).default('rerank-2.5')
172
+ .describe('Reranking model'),
173
+ model: z.string().optional()
174
+ .describe('Embedding model for query. Default: voyage-code-3'),
175
+ filter: z.record(z.string(), z.unknown()).optional()
176
+ .describe('Additional MongoDB filter on metadata fields'),
177
+ };
178
+
179
+ /** vai_code_query input schema */
180
+ const codeQuerySchema = {
181
+ query: z.string().min(1).max(5000).describe(
182
+ 'Question about the codebase (e.g., "how does the auth middleware work")'
183
+ ),
184
+ db: z.string().optional().describe('MongoDB database name'),
185
+ collection: z.string().optional().describe('Collection with indexed code'),
186
+ limit: z.number().int().min(1).max(20).default(5)
187
+ .describe('Maximum results (fewer, higher quality)'),
188
+ language: z.string().optional()
189
+ .describe('Filter by programming language'),
190
+ model: z.string().optional()
191
+ .describe('Embedding model. Default: voyage-code-3'),
192
+ filter: z.record(z.string(), z.unknown()).optional()
193
+ .describe('Additional MongoDB filter'),
194
+ };
195
+
196
+ /** vai_code_find_similar input schema */
197
+ const codeFindSimilarSchema = {
198
+ code: z.string().min(1).max(10000).describe(
199
+ 'Code snippet to find similar implementations for'
200
+ ),
201
+ db: z.string().optional().describe('MongoDB database name'),
202
+ collection: z.string().optional().describe('Collection with indexed code'),
203
+ limit: z.number().int().min(1).max(50).default(10)
204
+ .describe('Maximum results'),
205
+ language: z.string().optional()
206
+ .describe('Filter by programming language'),
207
+ model: z.string().optional()
208
+ .describe('Embedding model. Default: voyage-code-3'),
209
+ threshold: z.number().min(0).max(1).default(0.5)
210
+ .describe('Minimum similarity score (0-1)'),
211
+ filter: z.record(z.string(), z.unknown()).optional()
212
+ .describe('Additional MongoDB filter'),
213
+ };
214
+
215
+ /** vai_code_status input schema */
216
+ const codeStatusSchema = {
217
+ db: z.string().optional().describe('MongoDB database name'),
218
+ collection: z.string().optional().describe('Collection to check'),
219
+ };
220
+
221
+ /** vai_generate_workflow input schema */
222
+ const generateWorkflowSchema = {
223
+ description: z.string().min(1).max(500).describe('Natural language description of the workflow to generate'),
224
+ category: z.enum(['retrieval', 'analysis', 'ingestion', 'domain-specific', 'utility', 'integration']).optional()
225
+ .describe('Workflow category'),
226
+ tools: z.array(z.string()).optional()
227
+ .describe('Explicit list of tools to include (e.g., ["query", "rerank", "generate"]). If omitted, tools are inferred from the description.'),
228
+ };
229
+
230
+ /** vai_validate_workflow input schema */
231
+ const validateWorkflowSchema = {
232
+ workflow: z.object({
233
+ name: z.string().optional(),
234
+ description: z.string().optional(),
235
+ version: z.string().optional(),
236
+ inputs: z.record(z.string(), z.unknown()).optional(),
237
+ defaults: z.record(z.string(), z.unknown()).optional(),
238
+ steps: z.array(z.object({
239
+ id: z.string(),
240
+ tool: z.string(),
241
+ name: z.string().optional(),
242
+ inputs: z.record(z.string(), z.unknown()).optional(),
243
+ condition: z.string().optional(),
244
+ forEach: z.string().optional(),
245
+ })),
246
+ output: z.record(z.string(), z.unknown()).optional(),
247
+ }).describe('The workflow JSON definition to validate'),
248
+ };
249
+
127
250
  module.exports = {
128
251
  querySchema,
129
252
  searchSchema,
@@ -139,4 +262,11 @@ module.exports = {
139
262
  indexWorkspaceSchema,
140
263
  searchCodeSchema,
141
264
  explainCodeSchema,
265
+ codeIndexSchema,
266
+ codeSearchSchema,
267
+ codeQuerySchema,
268
+ codeFindSimilarSchema,
269
+ codeStatusSchema,
270
+ generateWorkflowSchema,
271
+ validateWorkflowSchema,
142
272
  };
package/src/mcp/server.js CHANGED
@@ -9,6 +9,8 @@ const { registerManagementTools } = require('./tools/management');
9
9
  const { registerUtilityTools } = require('./tools/utility');
10
10
  const { registerIngestTool } = require('./tools/ingest');
11
11
  const { registerWorkspaceTools } = require('./tools/workspace');
12
+ const { registerCodeSearchTools } = require('./tools/code-search');
13
+ const { registerAuthoringTools } = require('./tools/authoring');
12
14
 
13
15
  const VERSION = require('../../package.json').version;
14
16
 
@@ -29,6 +31,8 @@ function createServer() {
29
31
  registerUtilityTools(server, schemas);
30
32
  registerIngestTool(server, schemas);
31
33
  registerWorkspaceTools(server, schemas);
34
+ registerCodeSearchTools(server, schemas);
35
+ registerAuthoringTools(server, schemas);
32
36
 
33
37
  return server;
34
38
  }
@@ -68,7 +72,7 @@ async function runHttpServer({ port = 3100, host = '127.0.0.1', sse = false } =
68
72
  const allKeys = envKey ? [...serverKeys, envKey] : serverKeys;
69
73
  const requireAuth = allKeys.length > 0;
70
74
 
71
- /** Bearer token authentication middleware */
75
+ /** Bearer token authentication middleware (timing-safe comparison) */
72
76
  function authenticateRequest(req, res, next) {
73
77
  if (!requireAuth) return next();
74
78
  const authHeader = req.headers.authorization;
@@ -76,7 +80,13 @@ async function runHttpServer({ port = 3100, host = '127.0.0.1', sse = false } =
76
80
  return res.status(401).json({ error: 'Missing or invalid Authorization header' });
77
81
  }
78
82
  const token = authHeader.slice(7);
79
- if (!allKeys.includes(token)) {
83
+ const tokenBuf = Buffer.from(token);
84
+ const match = allKeys.some(key => {
85
+ const keyBuf = Buffer.from(key);
86
+ if (keyBuf.length !== tokenBuf.length) return false;
87
+ return crypto.timingSafeEqual(keyBuf, tokenBuf);
88
+ });
89
+ if (!match) {
80
90
  return res.status(401).json({ error: 'Invalid API key' });
81
91
  }
82
92
  next();
@@ -95,7 +105,6 @@ async function runHttpServer({ port = 3100, host = '127.0.0.1', sse = false } =
95
105
 
96
106
  // Check Voyage AI connectivity
97
107
  try {
98
- const { getConfigValue } = require('../lib/config');
99
108
  const hasKey = !!(process.env.VOYAGE_API_KEY || getConfigValue('apiKey'));
100
109
  health.voyageAi = hasKey ? 'configured' : 'not configured';
101
110
  } catch {
@@ -104,7 +113,6 @@ async function runHttpServer({ port = 3100, host = '127.0.0.1', sse = false } =
104
113
 
105
114
  // Check MongoDB connectivity
106
115
  try {
107
- const { getConfigValue } = require('../lib/config');
108
116
  const hasUri = !!(process.env.MONGODB_URI || getConfigValue('mongodbUri'));
109
117
  health.mongodb = hasUri ? 'configured' : 'not configured';
110
118
  } catch {
@@ -175,3 +183,8 @@ function generateKey() {
175
183
  }
176
184
 
177
185
  module.exports = { createServer, runStdioServer, runHttpServer, generateKey };
186
+
187
+ // Allow direct execution: `node src/mcp/server.js`
188
+ if (require.main === module) {
189
+ runStdioServer();
190
+ }