voyageai-cli 1.28.0 → 1.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +82 -8
  2. package/package.json +2 -1
  3. package/src/commands/app.js +15 -0
  4. package/src/commands/benchmark.js +22 -8
  5. package/src/commands/chat.js +18 -0
  6. package/src/commands/chunk.js +10 -0
  7. package/src/commands/demo.js +4 -0
  8. package/src/commands/embed.js +13 -0
  9. package/src/commands/estimate.js +3 -0
  10. package/src/commands/eval.js +6 -0
  11. package/src/commands/explain.js +2 -0
  12. package/src/commands/generate.js +2 -0
  13. package/src/commands/ingest.js +4 -0
  14. package/src/commands/init.js +2 -0
  15. package/src/commands/mcp-server.js +2 -0
  16. package/src/commands/models.js +2 -0
  17. package/src/commands/ping.js +7 -0
  18. package/src/commands/pipeline.js +15 -0
  19. package/src/commands/playground.js +685 -8
  20. package/src/commands/query.js +16 -0
  21. package/src/commands/rerank.js +12 -0
  22. package/src/commands/scaffold.js +2 -0
  23. package/src/commands/search.js +11 -0
  24. package/src/commands/similarity.js +9 -0
  25. package/src/commands/store.js +4 -0
  26. package/src/commands/workflow.js +702 -13
  27. package/src/lib/capability-report.js +134 -0
  28. package/src/lib/chat.js +32 -1
  29. package/src/lib/config.js +2 -0
  30. package/src/lib/cost-display.js +107 -0
  31. package/src/lib/explanations.js +94 -0
  32. package/src/lib/llm.js +125 -18
  33. package/src/lib/npm-utils.js +265 -0
  34. package/src/lib/quality-audit.js +71 -0
  35. package/src/lib/security/blocked-domains.json +17 -0
  36. package/src/lib/security-audit.js +198 -0
  37. package/src/lib/telemetry.js +23 -1
  38. package/src/lib/workflow-registry.js +416 -0
  39. package/src/lib/workflow-scaffold.js +380 -0
  40. package/src/lib/workflow-test-runner.js +208 -0
  41. package/src/lib/workflow.js +559 -7
  42. package/src/playground/announcements.md +80 -0
  43. package/src/playground/assets/announcements/appstore.jpg +0 -0
  44. package/src/playground/assets/announcements/circuits.jpg +0 -0
  45. package/src/playground/assets/announcements/csvingest.jpg +0 -0
  46. package/src/playground/assets/announcements/green-wave.jpg +0 -0
  47. package/src/playground/help/workflow-nodes.js +472 -0
  48. package/src/playground/icons/V.png +0 -0
  49. package/src/playground/index.html +3634 -226
  50. package/src/workflows/consistency-check.json +4 -0
  51. package/src/workflows/cost-analysis.json +4 -0
  52. package/src/workflows/enrich-and-ingest.json +56 -0
  53. package/src/workflows/intelligent-ingest.json +66 -0
  54. package/src/workflows/kb-health-report.json +45 -0
  55. package/src/workflows/multi-collection-search.json +4 -0
  56. package/src/workflows/research-and-summarize.json +4 -0
  57. package/src/workflows/search-with-fallback.json +66 -0
  58. package/src/workflows/smart-ingest.json +4 -0
@@ -18,9 +18,26 @@ const VAI_TOOLS = new Set([
18
18
  'ingest', 'collections', 'models', 'explain', 'estimate',
19
19
  ]);
20
20
 
21
- const CONTROL_FLOW_TOOLS = new Set(['merge', 'filter', 'transform', 'generate']);
21
+ const CONTROL_FLOW_TOOLS = new Set(['merge', 'filter', 'transform', 'generate', 'conditional', 'loop', 'template']);
22
22
 
23
- const ALL_TOOLS = new Set([...VAI_TOOLS, ...CONTROL_FLOW_TOOLS]);
23
+ const PROCESSING_TOOLS = new Set(['chunk', 'aggregate']);
24
+
25
+ const INTEGRATION_TOOLS = new Set(['http']);
26
+
27
+ const ALL_TOOLS = new Set([...VAI_TOOLS, ...CONTROL_FLOW_TOOLS, ...PROCESSING_TOOLS, ...INTEGRATION_TOOLS]);
28
+
29
+ // ════════════════════════════════════════════════════════════════════
30
+ // Schema Limits (Phase 1 Enhanced Validation)
31
+ // ════════════════════════════════════════════════════════════════════
32
+
33
+ const SCHEMA_LIMITS = {
34
+ maxSteps: 50,
35
+ maxInputs: 20,
36
+ maxTemplateDepth: 5,
37
+ maxNameLength: 64,
38
+ maxDescriptionLength: 500,
39
+ maxStepNameLength: 100,
40
+ };
24
41
 
25
42
  // ════════════════════════════════════════════════════════════════════
26
43
  // Validation
@@ -52,8 +69,8 @@ function validateWorkflow(definition) {
52
69
  // Validate inputs schema
53
70
  if (definition.inputs) {
54
71
  for (const [key, schema] of Object.entries(definition.inputs)) {
55
- if (schema.type && !['string', 'number', 'boolean'].includes(schema.type)) {
56
- errors.push(`Input "${key}" has invalid type "${schema.type}" (must be string, number, or boolean)`);
72
+ if (schema.type && !['string', 'number', 'boolean', 'array'].includes(schema.type)) {
73
+ errors.push(`Input "${key}" has invalid type "${schema.type}" (must be string, number, boolean, or array)`);
57
74
  }
58
75
  }
59
76
  }
@@ -94,10 +111,22 @@ function validateWorkflow(definition) {
94
111
  // Check template references point to known step IDs or reserved prefixes
95
112
  // "item" and "index" are injected by forEach at runtime
96
113
  const forEachVars = step.forEach ? new Set(['item', 'index']) : new Set();
114
+ // For loop nodes, the "as" variable and inline step refs are scoped
115
+ const loopVars = new Set();
116
+ if (step.tool === 'loop' && step.inputs) {
117
+ if (step.inputs.as) loopVars.add(step.inputs.as);
118
+ loopVars.add('item');
119
+ loopVars.add('index');
120
+ }
97
121
  if (step.inputs) {
98
- const deps = extractDependencies(step.inputs);
122
+ // For loop nodes, only check dependencies on top-level inputs (items, as, maxIterations)
123
+ // not on the inline step's inputs which may reference the loop variable
124
+ const inputsToCheck = step.tool === 'loop'
125
+ ? { items: step.inputs.items }
126
+ : step.inputs;
127
+ const deps = extractDependencies(inputsToCheck);
99
128
  for (const dep of deps) {
100
- if (!forEachVars.has(dep) && !stepIds.has(dep) && !definition.steps.some(s => s.id === dep)) {
129
+ if (!forEachVars.has(dep) && !loopVars.has(dep) && !stepIds.has(dep) && !definition.steps.some(s => s.id === dep)) {
101
130
  errors.push(`${stepPrefix}: references unknown step "${dep}"`);
102
131
  }
103
132
  }
@@ -119,6 +148,44 @@ function validateWorkflow(definition) {
119
148
  errors.push(`Duplicate step id: "${id}"`);
120
149
  }
121
150
 
151
+ // Validate conditional branch references
152
+ for (const step of definition.steps) {
153
+ if (step.tool === 'conditional' && step.inputs) {
154
+ const branches = ['then', 'else'];
155
+ for (const branch of branches) {
156
+ const refs = step.inputs[branch];
157
+ if (refs && Array.isArray(refs)) {
158
+ for (const ref of refs) {
159
+ if (!stepIds.has(ref)) {
160
+ errors.push(`Step "${step.id}": conditional ${branch} references unknown step "${ref}"`);
161
+ }
162
+ }
163
+ }
164
+ }
165
+ if (!step.inputs.condition) {
166
+ errors.push(`Step "${step.id}": conditional must have a "condition" input`);
167
+ }
168
+ if (!step.inputs.then || !Array.isArray(step.inputs.then)) {
169
+ errors.push(`Step "${step.id}": conditional must have a "then" array`);
170
+ }
171
+ }
172
+
173
+ // Validate loop inline step
174
+ if (step.tool === 'loop' && step.inputs) {
175
+ if (!step.inputs.items) {
176
+ errors.push(`Step "${step.id}": loop must have an "items" input`);
177
+ }
178
+ if (!step.inputs.as || typeof step.inputs.as !== 'string') {
179
+ errors.push(`Step "${step.id}": loop must have a string "as" input`);
180
+ }
181
+ if (!step.inputs.step || typeof step.inputs.step !== 'object') {
182
+ errors.push(`Step "${step.id}": loop must have a "step" object`);
183
+ } else if (step.inputs.step.tool && !ALL_TOOLS.has(step.inputs.step.tool)) {
184
+ errors.push(`Step "${step.id}": loop sub-step has unknown tool "${step.inputs.step.tool}"`);
185
+ }
186
+ }
187
+ }
188
+
122
189
  // Check for circular dependencies
123
190
  const cycleErrors = detectCycles(definition.steps);
124
191
  errors.push(...cycleErrors);
@@ -126,6 +193,74 @@ function validateWorkflow(definition) {
126
193
  return errors;
127
194
  }
128
195
 
196
+ /**
197
+ * Enhanced schema validation for publishable workflows.
198
+ * Runs all existing validateWorkflow() checks plus additional quality gates.
199
+ *
200
+ * @param {object} definition - Parsed workflow JSON
201
+ * @returns {string[]} errors
202
+ */
203
+ function validateSchemaEnhanced(definition) {
204
+ const errors = validateWorkflow(definition);
205
+
206
+ if (!definition || typeof definition !== 'object') return errors;
207
+
208
+ // Step count limit
209
+ if (Array.isArray(definition.steps) && definition.steps.length > SCHEMA_LIMITS.maxSteps) {
210
+ errors.push(`Too many steps (${definition.steps.length}/${SCHEMA_LIMITS.maxSteps})`);
211
+ }
212
+
213
+ // Input count limit
214
+ if (definition.inputs && Object.keys(definition.inputs).length > SCHEMA_LIMITS.maxInputs) {
215
+ errors.push(`Too many inputs (${Object.keys(definition.inputs).length}/${SCHEMA_LIMITS.maxInputs})`);
216
+ }
217
+
218
+ // Name length
219
+ if (definition.name && definition.name.length > SCHEMA_LIMITS.maxNameLength) {
220
+ errors.push(`Workflow name too long (${definition.name.length}/${SCHEMA_LIMITS.maxNameLength})`);
221
+ }
222
+
223
+ // Description required (min 10 chars) for publishable workflows
224
+ if (!definition.description || definition.description.length < 10) {
225
+ errors.push('Description must be at least 10 characters');
226
+ }
227
+
228
+ // Description length limit
229
+ if (definition.description && definition.description.length > SCHEMA_LIMITS.maxDescriptionLength) {
230
+ errors.push(`Description too long (${definition.description.length}/${SCHEMA_LIMITS.maxDescriptionLength})`);
231
+ }
232
+
233
+ // Version must be valid semver
234
+ if (definition.version && !/^\d+\.\d+\.\d+/.test(definition.version)) {
235
+ errors.push('Version must be valid semver (e.g. 1.0.0)');
236
+ }
237
+
238
+ // Every input should have a description
239
+ for (const [key, spec] of Object.entries(definition.inputs || {})) {
240
+ if (!spec.description) {
241
+ errors.push(`Input "${key}" missing description`);
242
+ }
243
+ }
244
+
245
+ // Every step should have a human-readable name
246
+ if (Array.isArray(definition.steps)) {
247
+ for (const step of definition.steps) {
248
+ if (!step.name) {
249
+ errors.push(`Step "${step.id}" missing "name" field`);
250
+ } else if (step.name.length > SCHEMA_LIMITS.maxStepNameLength) {
251
+ errors.push(`Step "${step.id}" name too long (${step.name.length}/${SCHEMA_LIMITS.maxStepNameLength})`);
252
+ }
253
+ }
254
+ }
255
+
256
+ // Output section should exist
257
+ if (!definition.output || Object.keys(definition.output).length === 0) {
258
+ errors.push('Workflow should define an "output" section');
259
+ }
260
+
261
+ return errors;
262
+ }
263
+
129
264
  /**
130
265
  * Detect circular dependencies in steps using DFS.
131
266
  * @param {Array} steps
@@ -197,6 +332,18 @@ function detectCycles(steps) {
197
332
  function buildDependencyGraph(steps) {
198
333
  const graph = new Map();
199
334
 
335
+ // First pass: build index of conditional branches
336
+ // Steps referenced in then/else of a conditional depend on that conditional
337
+ const conditionalDeps = new Map(); // stepId -> conditionalStepId
338
+ for (const step of steps) {
339
+ if (step.tool === 'conditional' && step.inputs) {
340
+ const branches = [...(step.inputs.then || []), ...(step.inputs.else || [])];
341
+ for (const ref of branches) {
342
+ conditionalDeps.set(ref, step.id);
343
+ }
344
+ }
345
+ }
346
+
200
347
  for (const step of steps) {
201
348
  const deps = extractDependencies(step.inputs || {});
202
349
  if (step.condition) {
@@ -207,6 +354,10 @@ function buildDependencyGraph(steps) {
207
354
  const forDeps = extractDependencies(step.forEach);
208
355
  for (const d of forDeps) deps.add(d);
209
356
  }
357
+ // If this step is referenced by a conditional, it depends on that conditional
358
+ if (conditionalDeps.has(step.id)) {
359
+ deps.add(conditionalDeps.get(step.id));
360
+ }
210
361
  graph.set(step.id, deps);
211
362
  }
212
363
 
@@ -554,6 +705,309 @@ function executeTransform(inputs) {
554
705
  return { results, resultCount: results.length };
555
706
  }
556
707
 
708
+ /**
709
+ * Execute a conditional step: evaluate condition and determine branch.
710
+ *
711
+ * NOTE: The actual branch enforcement (skipping steps) is handled by
712
+ * the main execution loop, not here. This just evaluates and returns
713
+ * which branch was taken.
714
+ *
715
+ * @param {object} inputs - { condition: string, then: string[], else?: string[] }
716
+ * @param {object} context - workflow context
717
+ * @returns {{ conditionResult: boolean, branchTaken: string, enabledSteps: string[], skippedSteps: string[] }}
718
+ */
719
+ function executeConditional(inputs, context) {
720
+ const { condition } = inputs;
721
+ const thenSteps = inputs.then || [];
722
+ const elseSteps = inputs.else || [];
723
+
724
+ if (!condition && condition !== false && condition !== 0) {
725
+ throw new Error('conditional: "condition" input is required');
726
+ }
727
+
728
+ // Condition may already be resolved by template engine to a boolean
729
+ let result;
730
+ if (typeof condition === 'boolean') {
731
+ result = condition;
732
+ } else if (typeof condition === 'string') {
733
+ result = evaluateCondition(condition, context);
734
+ } else {
735
+ result = Boolean(condition);
736
+ }
737
+
738
+ const taken = result ? 'then' : 'else';
739
+ const enabled = result ? thenSteps : elseSteps;
740
+ const skipped = result ? elseSteps : thenSteps;
741
+
742
+ return {
743
+ conditionResult: result,
744
+ branchTaken: taken,
745
+ enabledSteps: enabled,
746
+ skippedSteps: skipped,
747
+ };
748
+ }
749
+
750
+ /**
751
+ * Execute a template step: compose text from template.
752
+ *
753
+ * @param {object} inputs - { text: string }
754
+ * @returns {{ text: string, charCount: number, referencedSteps: string[] }}
755
+ */
756
+ function executeTemplate(inputs) {
757
+ const { text } = inputs;
758
+
759
+ if (text === undefined || text === null) {
760
+ throw new Error('template: "text" input is required');
761
+ }
762
+
763
+ const textStr = String(text);
764
+ // Extract referenced step IDs from the original template (before resolution)
765
+ // Since inputs are already resolved by this point, we just return the composed text
766
+ return {
767
+ text: textStr,
768
+ charCount: textStr.length,
769
+ };
770
+ }
771
+
772
+ /**
773
+ * Execute a loop step: iterate over an array, executing a sub-step per item.
774
+ *
775
+ * @param {object} inputs - { items, as, step, maxIterations? }
776
+ * @param {object} defaults - workflow defaults
777
+ * @param {object} context - workflow context
778
+ * @returns {Promise<{ iterations: number, results: any[], errors: any[] }>}
779
+ */
780
+ async function executeLoop(inputs, defaults, context) {
781
+ const { items, as, step: subStepDef, maxIterations = 100 } = inputs;
782
+
783
+ if (!Array.isArray(items)) {
784
+ throw new Error('loop: "items" must resolve to an array');
785
+ }
786
+ if (!as || typeof as !== 'string') {
787
+ throw new Error('loop: "as" must be a string variable name');
788
+ }
789
+ if (!subStepDef || typeof subStepDef !== 'object') {
790
+ throw new Error('loop: "step" must be a step definition object');
791
+ }
792
+
793
+ const results = [];
794
+ const errors = [];
795
+
796
+ const limit = Math.min(items.length, maxIterations);
797
+
798
+ for (let i = 0; i < limit; i++) {
799
+ const item = items[i];
800
+ // Build scoped context with loop variable
801
+ const scopedContext = { ...context, [as]: item, _loopIndex: i };
802
+
803
+ try {
804
+ // Resolve the sub-step inputs in the scoped context
805
+ const resolvedInputs = resolveTemplate(subStepDef.inputs || {}, scopedContext);
806
+ // Create a temporary step object for the dispatcher
807
+ const tempStep = { id: `_loop_${i}`, tool: subStepDef.tool, inputs: subStepDef.inputs };
808
+ const output = await executeStep(tempStep, resolvedInputs, defaults, scopedContext);
809
+ results.push(output);
810
+ } catch (err) {
811
+ errors.push({ index: i, error: err.message });
812
+ // If the parent loop has continueOnError, we keep going (handled by caller)
813
+ // For now, loop always continues and collects errors
814
+ }
815
+ }
816
+
817
+ if (items.length > maxIterations) {
818
+ errors.push({ index: maxIterations, error: `Loop truncated at maxIterations (${maxIterations})` });
819
+ }
820
+
821
+ return {
822
+ iterations: results.length,
823
+ results,
824
+ errors,
825
+ };
826
+ }
827
+
828
+ /**
829
+ * Execute a chunk step: split text using vai's chunking strategies.
830
+ *
831
+ * @param {object} inputs - { text, strategy?, size?, overlap?, source? }
832
+ * @returns {{ chunks: object[], totalChunks: number, strategy: string, avgChunkSize: number }}
833
+ */
834
+ function executeChunk(inputs) {
835
+ const { chunk: doChunk } = require('./chunker');
836
+
837
+ const { text, strategy = 'recursive', size = 512, overlap = 50, source } = inputs;
838
+
839
+ if (!text && text !== '') {
840
+ throw new Error('chunk: "text" input is required');
841
+ }
842
+
843
+ const chunkTexts = doChunk(text, { strategy, size, overlap });
844
+
845
+ const chunks = chunkTexts.map((content, index) => {
846
+ const obj = {
847
+ index,
848
+ content,
849
+ charCount: content.length,
850
+ };
851
+ if (source) obj.source = source;
852
+ obj.metadata = { strategy };
853
+ // For markdown strategy, try to extract heading
854
+ if (strategy === 'markdown') {
855
+ const headingMatch = content.match(/^#+\s+(.+)/m);
856
+ if (headingMatch) obj.metadata.heading = headingMatch[1];
857
+ }
858
+ return obj;
859
+ });
860
+
861
+ const totalChars = chunks.reduce((sum, c) => sum + c.charCount, 0);
862
+
863
+ return {
864
+ chunks,
865
+ totalChunks: chunks.length,
866
+ strategy,
867
+ avgChunkSize: chunks.length > 0 ? Math.round(totalChars / chunks.length) : 0,
868
+ };
869
+ }
870
+
871
+ /**
872
+ * Execute an HTTP request step.
873
+ *
874
+ * @param {object} inputs - { url, method?, headers?, body?, timeout?, responseType?, followRedirects? }
875
+ * @returns {Promise<{ status: number, statusText: string, headers: object, body: any, durationMs: number }>}
876
+ */
877
+ async function executeHttp(inputs) {
878
+ const effectiveResponseType = inputs.responseType || (inputs.extract === 'text' ? 'text' : 'json');
879
+ const { url, method = 'GET', headers = {}, body, timeout = 30000, followRedirects = false } = inputs;
880
+ const responseType = effectiveResponseType;
881
+
882
+ if (!url || typeof url !== 'string') {
883
+ throw new Error('http: "url" input is required');
884
+ }
885
+
886
+ // URL allowlisting check
887
+ try {
888
+ const { loadProject } = require('./project');
889
+ const { config: proj } = loadProject();
890
+ if (proj && proj.allowedHosts && Array.isArray(proj.allowedHosts)) {
891
+ const parsed = new URL(url);
892
+ if (!proj.allowedHosts.includes(parsed.hostname)) {
893
+ throw new Error(`http: host "${parsed.hostname}" is not in allowedHosts. Allowed: ${proj.allowedHosts.join(', ')}`);
894
+ }
895
+ }
896
+ } catch (e) {
897
+ if (e.message.includes('allowedHosts')) throw e;
898
+ // If project config can't be loaded, allow all hosts
899
+ }
900
+
901
+ const startTime = Date.now();
902
+
903
+ // Build fetch options
904
+ const fetchOpts = {
905
+ method: method.toUpperCase(),
906
+ headers: { ...headers },
907
+ signal: AbortSignal.timeout(timeout),
908
+ redirect: followRedirects ? 'follow' : 'manual',
909
+ };
910
+
911
+ if (body && ['POST', 'PUT', 'PATCH'].includes(fetchOpts.method)) {
912
+ if (typeof body === 'object') {
913
+ fetchOpts.body = JSON.stringify(body);
914
+ if (!fetchOpts.headers['Content-Type'] && !fetchOpts.headers['content-type']) {
915
+ fetchOpts.headers['Content-Type'] = 'application/json';
916
+ }
917
+ } else {
918
+ fetchOpts.body = String(body);
919
+ }
920
+ }
921
+
922
+ const response = await fetch(url, fetchOpts);
923
+ const durationMs = Date.now() - startTime;
924
+
925
+ // Response size limit: 5MB
926
+ const MAX_RESPONSE_SIZE = 5 * 1024 * 1024;
927
+ const responseText = await response.text();
928
+ const truncated = responseText.length > MAX_RESPONSE_SIZE;
929
+ const rawBody = truncated ? responseText.slice(0, MAX_RESPONSE_SIZE) : responseText;
930
+
931
+ // Parse body
932
+ let parsedBody;
933
+ if (responseType === 'json') {
934
+ try {
935
+ parsedBody = JSON.parse(rawBody);
936
+ } catch {
937
+ parsedBody = rawBody; // Fall back to text
938
+ }
939
+ } else {
940
+ parsedBody = rawBody;
941
+ }
942
+
943
+ // Strip HTML if extract: "text"
944
+ if (inputs.extract === 'text' && typeof parsedBody === 'string') {
945
+ const { stripHtml } = require('./readers');
946
+ parsedBody = stripHtml(parsedBody);
947
+ }
948
+
949
+ // Collect response headers
950
+ const respHeaders = {};
951
+ response.headers.forEach((value, key) => {
952
+ respHeaders[key] = value;
953
+ });
954
+
955
+ return {
956
+ status: response.status,
957
+ statusText: response.statusText,
958
+ headers: respHeaders,
959
+ body: parsedBody,
960
+ durationMs,
961
+ ...(truncated && { warning: 'Response truncated at 5MB' }),
962
+ };
963
+ }
964
+
965
+ /**
966
+ * Execute a MongoDB aggregation pipeline step.
967
+ *
968
+ * @param {object} inputs - { db?, collection?, pipeline, allowWrites? }
969
+ * @param {object} defaults - workflow defaults
970
+ * @returns {Promise<{ results: any[], count: number, durationMs: number }>}
971
+ */
972
+ async function executeAggregate(inputs, defaults) {
973
+ const { getMongoCollection } = require('./mongo');
974
+ const { loadProject } = require('./project');
975
+ const { config: proj } = loadProject();
976
+
977
+ const db = inputs.db || defaults.db || proj.db;
978
+ const collection = inputs.collection || defaults.collection || proj.collection;
979
+ const pipeline = inputs.pipeline;
980
+ const allowWrites = inputs.allowWrites || false;
981
+
982
+ if (!db) throw new Error('aggregate: database not specified');
983
+ if (!collection) throw new Error('aggregate: collection not specified');
984
+ if (!Array.isArray(pipeline)) throw new Error('aggregate: "pipeline" must be an array');
985
+ if (pipeline.length > 20) throw new Error('aggregate: pipeline limited to 20 stages');
986
+
987
+ // Block write stages unless explicitly allowed
988
+ if (!allowWrites) {
989
+ for (const stage of pipeline) {
990
+ const stageKey = Object.keys(stage)[0];
991
+ if (stageKey === '$out' || stageKey === '$merge') {
992
+ throw new Error(`aggregate: "${stageKey}" stage is not allowed without allowWrites: true`);
993
+ }
994
+ }
995
+ }
996
+
997
+ const startTime = Date.now();
998
+ const { client, collection: col } = await getMongoCollection(db, collection);
999
+ try {
1000
+ const results = await col.aggregate(pipeline).toArray();
1001
+ return {
1002
+ results,
1003
+ count: results.length,
1004
+ durationMs: Date.now() - startTime,
1005
+ };
1006
+ } finally {
1007
+ await client.close();
1008
+ }
1009
+ }
1010
+
557
1011
  // ════════════════════════════════════════════════════════════════════
558
1012
  // VAI Tool Executors
559
1013
  // ════════════════════════════════════════════════════════════════════
@@ -754,11 +1208,40 @@ async function executeIngest(inputs, defaults) {
754
1208
  const { client, collection: col } = await getMongoCollection(db, collection);
755
1209
  try {
756
1210
  const result = await col.insertMany(docs);
1211
+
1212
+ // Auto-create vector search index if it doesn't exist
1213
+ // Note: Atlas vector search indexes take a few seconds to become ready after creation.
1214
+ // The search step may need a brief delay on first run.
1215
+ let indexCreated = false;
1216
+ try {
1217
+ const indexes = await col.listSearchIndexes().toArray();
1218
+ const hasVectorIndex = indexes.some(idx => idx.name === 'vector_index');
1219
+ if (!hasVectorIndex) {
1220
+ await col.createSearchIndex({
1221
+ name: 'vector_index',
1222
+ type: 'vectorSearch',
1223
+ definition: {
1224
+ fields: [{
1225
+ type: 'vector',
1226
+ path: 'embedding',
1227
+ numDimensions: embRes.data[0].embedding.length,
1228
+ similarity: 'cosine'
1229
+ }]
1230
+ }
1231
+ });
1232
+ indexCreated = true;
1233
+ }
1234
+ } catch (indexErr) {
1235
+ // Ignore errors: index may already exist, or createSearchIndex may not be
1236
+ // available on non-Atlas deployments
1237
+ }
1238
+
757
1239
  return {
758
1240
  insertedCount: result.insertedCount,
759
1241
  chunks: chunks.length,
760
1242
  source,
761
1243
  model: embRes.model,
1244
+ indexCreated,
762
1245
  };
763
1246
  } finally {
764
1247
  await client.close();
@@ -938,6 +1421,18 @@ async function executeStep(step, resolvedInputs, defaults, context) {
938
1421
  return executeTransform(resolvedInputs);
939
1422
  case 'generate':
940
1423
  return executeGenerate(resolvedInputs);
1424
+ case 'conditional':
1425
+ return executeConditional(resolvedInputs, context);
1426
+ case 'template':
1427
+ return executeTemplate(resolvedInputs);
1428
+ case 'loop':
1429
+ return executeLoop(resolvedInputs, defaults, context);
1430
+ case 'chunk':
1431
+ return executeChunk(resolvedInputs);
1432
+ case 'http':
1433
+ return executeHttp(resolvedInputs);
1434
+ case 'aggregate':
1435
+ return executeAggregate(resolvedInputs, defaults);
941
1436
 
942
1437
  // VAI tools
943
1438
  case 'query':
@@ -1056,12 +1551,27 @@ async function executeWorkflow(definition, opts = {}) {
1056
1551
 
1057
1552
  // Execute layer by layer
1058
1553
  const stepResults = [];
1554
+ const skippedByConditional = new Set(); // Steps skipped by conditional branches
1059
1555
 
1060
1556
  for (const layer of layers) {
1061
1557
  const layerPromises = layer.map(async (stepId) => {
1062
1558
  const step = stepMap.get(stepId);
1063
1559
  const stepStart = Date.now();
1064
1560
 
1561
+ // Check if this step was skipped by a conditional branch
1562
+ if (skippedByConditional.has(stepId)) {
1563
+ if (opts.onStepSkip) opts.onStepSkip(stepId, 'conditional branch not taken');
1564
+ context[stepId] = { output: null, skipped: true };
1565
+ stepResults.push({
1566
+ id: stepId,
1567
+ tool: step.tool,
1568
+ skipped: true,
1569
+ reason: 'conditional branch not taken',
1570
+ durationMs: Date.now() - stepStart,
1571
+ });
1572
+ return;
1573
+ }
1574
+
1065
1575
  // Evaluate condition
1066
1576
  if (step.condition) {
1067
1577
  const conditionMet = evaluateCondition(step.condition, context);
@@ -1083,7 +1593,31 @@ async function executeWorkflow(definition, opts = {}) {
1083
1593
  try {
1084
1594
  let output;
1085
1595
 
1086
- if (step.forEach) {
1596
+ // Mock executor injection: if _mockExecutors has a mock for this tool, use it
1597
+ const mockExecutor = opts._mockExecutors && opts._mockExecutors[step.tool];
1598
+ if (mockExecutor && step.tool !== 'conditional') {
1599
+ const resolvedInputs = resolveTemplate(step.inputs || {}, context);
1600
+ output = await mockExecutor(resolvedInputs, step, context);
1601
+ } else if (step.tool === 'conditional') {
1602
+ // Special handling: resolve then/else but pass raw condition to evaluator
1603
+ const rawCondition = step.inputs.condition;
1604
+ const resolvedInputs = {
1605
+ condition: rawCondition, // Keep raw for condition evaluator
1606
+ then: step.inputs.then || [],
1607
+ else: step.inputs.else || [],
1608
+ };
1609
+ output = await executeStep(step, resolvedInputs, defaults, context);
1610
+ } else if (step.tool === 'loop') {
1611
+ // Special handling: resolve items but pass raw step def for per-iteration resolution
1612
+ const resolvedItems = resolveTemplate(step.inputs.items, context);
1613
+ const resolvedInputs = {
1614
+ items: resolvedItems,
1615
+ as: step.inputs.as,
1616
+ step: step.inputs.step, // Raw — resolved per iteration inside executeLoop
1617
+ maxIterations: step.inputs.maxIterations,
1618
+ };
1619
+ output = await executeStep(step, resolvedInputs, defaults, context);
1620
+ } else if (step.forEach) {
1087
1621
  // Iterate over an array
1088
1622
  const iterArray = resolveTemplate(step.forEach, context);
1089
1623
  if (!Array.isArray(iterArray)) {
@@ -1104,6 +1638,13 @@ async function executeWorkflow(definition, opts = {}) {
1104
1638
  output = await executeStep(step, resolvedInputs, defaults, context);
1105
1639
  }
1106
1640
 
1641
+ // If this was a conditional node, mark skipped branch steps
1642
+ if (step.tool === 'conditional' && output.skippedSteps) {
1643
+ for (const skippedId of output.skippedSteps) {
1644
+ skippedByConditional.add(skippedId);
1645
+ }
1646
+ }
1647
+
1107
1648
  const durationMs = Date.now() - stepStart;
1108
1649
  context[stepId] = { output };
1109
1650
 
@@ -1320,7 +1861,9 @@ function loadWorkflow(nameOrPath) {
1320
1861
  module.exports = {
1321
1862
  // Validation
1322
1863
  validateWorkflow,
1864
+ validateSchemaEnhanced,
1323
1865
  detectCycles,
1866
+ SCHEMA_LIMITS,
1324
1867
 
1325
1868
  // Dependency resolution
1326
1869
  buildDependencyGraph,
@@ -1333,6 +1876,13 @@ module.exports = {
1333
1876
  executeMerge,
1334
1877
  executeFilter,
1335
1878
  executeTransform,
1879
+ executeConditional,
1880
+ executeTemplate,
1881
+ executeLoop,
1882
+ executeChunk,
1883
+ executeHttp,
1884
+ executeIngest,
1885
+ executeAggregate,
1336
1886
 
1337
1887
  // Main execution
1338
1888
  executeStep,
@@ -1351,5 +1901,7 @@ module.exports = {
1351
1901
  // Constants
1352
1902
  VAI_TOOLS,
1353
1903
  CONTROL_FLOW_TOOLS,
1904
+ PROCESSING_TOOLS,
1905
+ INTEGRATION_TOOLS,
1354
1906
  ALL_TOOLS,
1355
1907
  };