voyageai-cli 1.28.0 → 1.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -8
- package/package.json +2 -1
- package/src/commands/app.js +15 -0
- package/src/commands/benchmark.js +22 -8
- package/src/commands/chat.js +18 -0
- package/src/commands/chunk.js +10 -0
- package/src/commands/demo.js +4 -0
- package/src/commands/embed.js +13 -0
- package/src/commands/estimate.js +3 -0
- package/src/commands/eval.js +6 -0
- package/src/commands/explain.js +2 -0
- package/src/commands/generate.js +2 -0
- package/src/commands/ingest.js +4 -0
- package/src/commands/init.js +2 -0
- package/src/commands/mcp-server.js +2 -0
- package/src/commands/models.js +2 -0
- package/src/commands/ping.js +7 -0
- package/src/commands/pipeline.js +15 -0
- package/src/commands/playground.js +685 -8
- package/src/commands/query.js +16 -0
- package/src/commands/rerank.js +12 -0
- package/src/commands/scaffold.js +2 -0
- package/src/commands/search.js +11 -0
- package/src/commands/similarity.js +9 -0
- package/src/commands/store.js +4 -0
- package/src/commands/workflow.js +702 -13
- package/src/lib/capability-report.js +134 -0
- package/src/lib/chat.js +32 -1
- package/src/lib/config.js +2 -0
- package/src/lib/cost-display.js +107 -0
- package/src/lib/explanations.js +94 -0
- package/src/lib/llm.js +125 -18
- package/src/lib/npm-utils.js +265 -0
- package/src/lib/quality-audit.js +71 -0
- package/src/lib/security/blocked-domains.json +17 -0
- package/src/lib/security-audit.js +198 -0
- package/src/lib/telemetry.js +23 -1
- package/src/lib/workflow-registry.js +416 -0
- package/src/lib/workflow-scaffold.js +380 -0
- package/src/lib/workflow-test-runner.js +208 -0
- package/src/lib/workflow.js +559 -7
- package/src/playground/announcements.md +80 -0
- package/src/playground/assets/announcements/appstore.jpg +0 -0
- package/src/playground/assets/announcements/circuits.jpg +0 -0
- package/src/playground/assets/announcements/csvingest.jpg +0 -0
- package/src/playground/assets/announcements/green-wave.jpg +0 -0
- package/src/playground/help/workflow-nodes.js +472 -0
- package/src/playground/icons/V.png +0 -0
- package/src/playground/index.html +3634 -226
- package/src/workflows/consistency-check.json +4 -0
- package/src/workflows/cost-analysis.json +4 -0
- package/src/workflows/enrich-and-ingest.json +56 -0
- package/src/workflows/intelligent-ingest.json +66 -0
- package/src/workflows/kb-health-report.json +45 -0
- package/src/workflows/multi-collection-search.json +4 -0
- package/src/workflows/research-and-summarize.json +4 -0
- package/src/workflows/search-with-fallback.json +66 -0
- package/src/workflows/smart-ingest.json +4 -0
package/src/lib/workflow.js
CHANGED
|
@@ -18,9 +18,26 @@ const VAI_TOOLS = new Set([
|
|
|
18
18
|
'ingest', 'collections', 'models', 'explain', 'estimate',
|
|
19
19
|
]);
|
|
20
20
|
|
|
21
|
-
const CONTROL_FLOW_TOOLS = new Set(['merge', 'filter', 'transform', 'generate']);
|
|
21
|
+
const CONTROL_FLOW_TOOLS = new Set(['merge', 'filter', 'transform', 'generate', 'conditional', 'loop', 'template']);
|
|
22
22
|
|
|
23
|
-
const
|
|
23
|
+
const PROCESSING_TOOLS = new Set(['chunk', 'aggregate']);
|
|
24
|
+
|
|
25
|
+
const INTEGRATION_TOOLS = new Set(['http']);
|
|
26
|
+
|
|
27
|
+
const ALL_TOOLS = new Set([...VAI_TOOLS, ...CONTROL_FLOW_TOOLS, ...PROCESSING_TOOLS, ...INTEGRATION_TOOLS]);
|
|
28
|
+
|
|
29
|
+
// ════════════════════════════════════════════════════════════════════
|
|
30
|
+
// Schema Limits (Phase 1 Enhanced Validation)
|
|
31
|
+
// ════════════════════════════════════════════════════════════════════
|
|
32
|
+
|
|
33
|
+
const SCHEMA_LIMITS = {
|
|
34
|
+
maxSteps: 50,
|
|
35
|
+
maxInputs: 20,
|
|
36
|
+
maxTemplateDepth: 5,
|
|
37
|
+
maxNameLength: 64,
|
|
38
|
+
maxDescriptionLength: 500,
|
|
39
|
+
maxStepNameLength: 100,
|
|
40
|
+
};
|
|
24
41
|
|
|
25
42
|
// ════════════════════════════════════════════════════════════════════
|
|
26
43
|
// Validation
|
|
@@ -52,8 +69,8 @@ function validateWorkflow(definition) {
|
|
|
52
69
|
// Validate inputs schema
|
|
53
70
|
if (definition.inputs) {
|
|
54
71
|
for (const [key, schema] of Object.entries(definition.inputs)) {
|
|
55
|
-
if (schema.type && !['string', 'number', 'boolean'].includes(schema.type)) {
|
|
56
|
-
errors.push(`Input "${key}" has invalid type "${schema.type}" (must be string, number, or
|
|
72
|
+
if (schema.type && !['string', 'number', 'boolean', 'array'].includes(schema.type)) {
|
|
73
|
+
errors.push(`Input "${key}" has invalid type "${schema.type}" (must be string, number, boolean, or array)`);
|
|
57
74
|
}
|
|
58
75
|
}
|
|
59
76
|
}
|
|
@@ -94,10 +111,22 @@ function validateWorkflow(definition) {
|
|
|
94
111
|
// Check template references point to known step IDs or reserved prefixes
|
|
95
112
|
// "item" and "index" are injected by forEach at runtime
|
|
96
113
|
const forEachVars = step.forEach ? new Set(['item', 'index']) : new Set();
|
|
114
|
+
// For loop nodes, the "as" variable and inline step refs are scoped
|
|
115
|
+
const loopVars = new Set();
|
|
116
|
+
if (step.tool === 'loop' && step.inputs) {
|
|
117
|
+
if (step.inputs.as) loopVars.add(step.inputs.as);
|
|
118
|
+
loopVars.add('item');
|
|
119
|
+
loopVars.add('index');
|
|
120
|
+
}
|
|
97
121
|
if (step.inputs) {
|
|
98
|
-
|
|
122
|
+
// For loop nodes, only check dependencies on top-level inputs (items, as, maxIterations)
|
|
123
|
+
// not on the inline step's inputs which may reference the loop variable
|
|
124
|
+
const inputsToCheck = step.tool === 'loop'
|
|
125
|
+
? { items: step.inputs.items }
|
|
126
|
+
: step.inputs;
|
|
127
|
+
const deps = extractDependencies(inputsToCheck);
|
|
99
128
|
for (const dep of deps) {
|
|
100
|
-
if (!forEachVars.has(dep) && !stepIds.has(dep) && !definition.steps.some(s => s.id === dep)) {
|
|
129
|
+
if (!forEachVars.has(dep) && !loopVars.has(dep) && !stepIds.has(dep) && !definition.steps.some(s => s.id === dep)) {
|
|
101
130
|
errors.push(`${stepPrefix}: references unknown step "${dep}"`);
|
|
102
131
|
}
|
|
103
132
|
}
|
|
@@ -119,6 +148,44 @@ function validateWorkflow(definition) {
|
|
|
119
148
|
errors.push(`Duplicate step id: "${id}"`);
|
|
120
149
|
}
|
|
121
150
|
|
|
151
|
+
// Validate conditional branch references
|
|
152
|
+
for (const step of definition.steps) {
|
|
153
|
+
if (step.tool === 'conditional' && step.inputs) {
|
|
154
|
+
const branches = ['then', 'else'];
|
|
155
|
+
for (const branch of branches) {
|
|
156
|
+
const refs = step.inputs[branch];
|
|
157
|
+
if (refs && Array.isArray(refs)) {
|
|
158
|
+
for (const ref of refs) {
|
|
159
|
+
if (!stepIds.has(ref)) {
|
|
160
|
+
errors.push(`Step "${step.id}": conditional ${branch} references unknown step "${ref}"`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
if (!step.inputs.condition) {
|
|
166
|
+
errors.push(`Step "${step.id}": conditional must have a "condition" input`);
|
|
167
|
+
}
|
|
168
|
+
if (!step.inputs.then || !Array.isArray(step.inputs.then)) {
|
|
169
|
+
errors.push(`Step "${step.id}": conditional must have a "then" array`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Validate loop inline step
|
|
174
|
+
if (step.tool === 'loop' && step.inputs) {
|
|
175
|
+
if (!step.inputs.items) {
|
|
176
|
+
errors.push(`Step "${step.id}": loop must have an "items" input`);
|
|
177
|
+
}
|
|
178
|
+
if (!step.inputs.as || typeof step.inputs.as !== 'string') {
|
|
179
|
+
errors.push(`Step "${step.id}": loop must have a string "as" input`);
|
|
180
|
+
}
|
|
181
|
+
if (!step.inputs.step || typeof step.inputs.step !== 'object') {
|
|
182
|
+
errors.push(`Step "${step.id}": loop must have a "step" object`);
|
|
183
|
+
} else if (step.inputs.step.tool && !ALL_TOOLS.has(step.inputs.step.tool)) {
|
|
184
|
+
errors.push(`Step "${step.id}": loop sub-step has unknown tool "${step.inputs.step.tool}"`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
122
189
|
// Check for circular dependencies
|
|
123
190
|
const cycleErrors = detectCycles(definition.steps);
|
|
124
191
|
errors.push(...cycleErrors);
|
|
@@ -126,6 +193,74 @@ function validateWorkflow(definition) {
|
|
|
126
193
|
return errors;
|
|
127
194
|
}
|
|
128
195
|
|
|
196
|
+
/**
|
|
197
|
+
* Enhanced schema validation for publishable workflows.
|
|
198
|
+
* Runs all existing validateWorkflow() checks plus additional quality gates.
|
|
199
|
+
*
|
|
200
|
+
* @param {object} definition - Parsed workflow JSON
|
|
201
|
+
* @returns {string[]} errors
|
|
202
|
+
*/
|
|
203
|
+
function validateSchemaEnhanced(definition) {
|
|
204
|
+
const errors = validateWorkflow(definition);
|
|
205
|
+
|
|
206
|
+
if (!definition || typeof definition !== 'object') return errors;
|
|
207
|
+
|
|
208
|
+
// Step count limit
|
|
209
|
+
if (Array.isArray(definition.steps) && definition.steps.length > SCHEMA_LIMITS.maxSteps) {
|
|
210
|
+
errors.push(`Too many steps (${definition.steps.length}/${SCHEMA_LIMITS.maxSteps})`);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Input count limit
|
|
214
|
+
if (definition.inputs && Object.keys(definition.inputs).length > SCHEMA_LIMITS.maxInputs) {
|
|
215
|
+
errors.push(`Too many inputs (${Object.keys(definition.inputs).length}/${SCHEMA_LIMITS.maxInputs})`);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Name length
|
|
219
|
+
if (definition.name && definition.name.length > SCHEMA_LIMITS.maxNameLength) {
|
|
220
|
+
errors.push(`Workflow name too long (${definition.name.length}/${SCHEMA_LIMITS.maxNameLength})`);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Description required (min 10 chars) for publishable workflows
|
|
224
|
+
if (!definition.description || definition.description.length < 10) {
|
|
225
|
+
errors.push('Description must be at least 10 characters');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Description length limit
|
|
229
|
+
if (definition.description && definition.description.length > SCHEMA_LIMITS.maxDescriptionLength) {
|
|
230
|
+
errors.push(`Description too long (${definition.description.length}/${SCHEMA_LIMITS.maxDescriptionLength})`);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Version must be valid semver
|
|
234
|
+
if (definition.version && !/^\d+\.\d+\.\d+/.test(definition.version)) {
|
|
235
|
+
errors.push('Version must be valid semver (e.g. 1.0.0)');
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Every input should have a description
|
|
239
|
+
for (const [key, spec] of Object.entries(definition.inputs || {})) {
|
|
240
|
+
if (!spec.description) {
|
|
241
|
+
errors.push(`Input "${key}" missing description`);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Every step should have a human-readable name
|
|
246
|
+
if (Array.isArray(definition.steps)) {
|
|
247
|
+
for (const step of definition.steps) {
|
|
248
|
+
if (!step.name) {
|
|
249
|
+
errors.push(`Step "${step.id}" missing "name" field`);
|
|
250
|
+
} else if (step.name.length > SCHEMA_LIMITS.maxStepNameLength) {
|
|
251
|
+
errors.push(`Step "${step.id}" name too long (${step.name.length}/${SCHEMA_LIMITS.maxStepNameLength})`);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Output section should exist
|
|
257
|
+
if (!definition.output || Object.keys(definition.output).length === 0) {
|
|
258
|
+
errors.push('Workflow should define an "output" section');
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return errors;
|
|
262
|
+
}
|
|
263
|
+
|
|
129
264
|
/**
|
|
130
265
|
* Detect circular dependencies in steps using DFS.
|
|
131
266
|
* @param {Array} steps
|
|
@@ -197,6 +332,18 @@ function detectCycles(steps) {
|
|
|
197
332
|
function buildDependencyGraph(steps) {
|
|
198
333
|
const graph = new Map();
|
|
199
334
|
|
|
335
|
+
// First pass: build index of conditional branches
|
|
336
|
+
// Steps referenced in then/else of a conditional depend on that conditional
|
|
337
|
+
const conditionalDeps = new Map(); // stepId -> conditionalStepId
|
|
338
|
+
for (const step of steps) {
|
|
339
|
+
if (step.tool === 'conditional' && step.inputs) {
|
|
340
|
+
const branches = [...(step.inputs.then || []), ...(step.inputs.else || [])];
|
|
341
|
+
for (const ref of branches) {
|
|
342
|
+
conditionalDeps.set(ref, step.id);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
200
347
|
for (const step of steps) {
|
|
201
348
|
const deps = extractDependencies(step.inputs || {});
|
|
202
349
|
if (step.condition) {
|
|
@@ -207,6 +354,10 @@ function buildDependencyGraph(steps) {
|
|
|
207
354
|
const forDeps = extractDependencies(step.forEach);
|
|
208
355
|
for (const d of forDeps) deps.add(d);
|
|
209
356
|
}
|
|
357
|
+
// If this step is referenced by a conditional, it depends on that conditional
|
|
358
|
+
if (conditionalDeps.has(step.id)) {
|
|
359
|
+
deps.add(conditionalDeps.get(step.id));
|
|
360
|
+
}
|
|
210
361
|
graph.set(step.id, deps);
|
|
211
362
|
}
|
|
212
363
|
|
|
@@ -554,6 +705,309 @@ function executeTransform(inputs) {
|
|
|
554
705
|
return { results, resultCount: results.length };
|
|
555
706
|
}
|
|
556
707
|
|
|
708
|
+
/**
|
|
709
|
+
* Execute a conditional step: evaluate condition and determine branch.
|
|
710
|
+
*
|
|
711
|
+
* NOTE: The actual branch enforcement (skipping steps) is handled by
|
|
712
|
+
* the main execution loop, not here. This just evaluates and returns
|
|
713
|
+
* which branch was taken.
|
|
714
|
+
*
|
|
715
|
+
* @param {object} inputs - { condition: string, then: string[], else?: string[] }
|
|
716
|
+
* @param {object} context - workflow context
|
|
717
|
+
* @returns {{ conditionResult: boolean, branchTaken: string, enabledSteps: string[], skippedSteps: string[] }}
|
|
718
|
+
*/
|
|
719
|
+
function executeConditional(inputs, context) {
|
|
720
|
+
const { condition } = inputs;
|
|
721
|
+
const thenSteps = inputs.then || [];
|
|
722
|
+
const elseSteps = inputs.else || [];
|
|
723
|
+
|
|
724
|
+
if (!condition && condition !== false && condition !== 0) {
|
|
725
|
+
throw new Error('conditional: "condition" input is required');
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// Condition may already be resolved by template engine to a boolean
|
|
729
|
+
let result;
|
|
730
|
+
if (typeof condition === 'boolean') {
|
|
731
|
+
result = condition;
|
|
732
|
+
} else if (typeof condition === 'string') {
|
|
733
|
+
result = evaluateCondition(condition, context);
|
|
734
|
+
} else {
|
|
735
|
+
result = Boolean(condition);
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
const taken = result ? 'then' : 'else';
|
|
739
|
+
const enabled = result ? thenSteps : elseSteps;
|
|
740
|
+
const skipped = result ? elseSteps : thenSteps;
|
|
741
|
+
|
|
742
|
+
return {
|
|
743
|
+
conditionResult: result,
|
|
744
|
+
branchTaken: taken,
|
|
745
|
+
enabledSteps: enabled,
|
|
746
|
+
skippedSteps: skipped,
|
|
747
|
+
};
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
/**
|
|
751
|
+
* Execute a template step: compose text from template.
|
|
752
|
+
*
|
|
753
|
+
* @param {object} inputs - { text: string }
|
|
754
|
+
* @returns {{ text: string, charCount: number, referencedSteps: string[] }}
|
|
755
|
+
*/
|
|
756
|
+
function executeTemplate(inputs) {
|
|
757
|
+
const { text } = inputs;
|
|
758
|
+
|
|
759
|
+
if (text === undefined || text === null) {
|
|
760
|
+
throw new Error('template: "text" input is required');
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
const textStr = String(text);
|
|
764
|
+
// Extract referenced step IDs from the original template (before resolution)
|
|
765
|
+
// Since inputs are already resolved by this point, we just return the composed text
|
|
766
|
+
return {
|
|
767
|
+
text: textStr,
|
|
768
|
+
charCount: textStr.length,
|
|
769
|
+
};
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
/**
|
|
773
|
+
* Execute a loop step: iterate over an array, executing a sub-step per item.
|
|
774
|
+
*
|
|
775
|
+
* @param {object} inputs - { items, as, step, maxIterations? }
|
|
776
|
+
* @param {object} defaults - workflow defaults
|
|
777
|
+
* @param {object} context - workflow context
|
|
778
|
+
* @returns {Promise<{ iterations: number, results: any[], errors: any[] }>}
|
|
779
|
+
*/
|
|
780
|
+
async function executeLoop(inputs, defaults, context) {
|
|
781
|
+
const { items, as, step: subStepDef, maxIterations = 100 } = inputs;
|
|
782
|
+
|
|
783
|
+
if (!Array.isArray(items)) {
|
|
784
|
+
throw new Error('loop: "items" must resolve to an array');
|
|
785
|
+
}
|
|
786
|
+
if (!as || typeof as !== 'string') {
|
|
787
|
+
throw new Error('loop: "as" must be a string variable name');
|
|
788
|
+
}
|
|
789
|
+
if (!subStepDef || typeof subStepDef !== 'object') {
|
|
790
|
+
throw new Error('loop: "step" must be a step definition object');
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
const results = [];
|
|
794
|
+
const errors = [];
|
|
795
|
+
|
|
796
|
+
const limit = Math.min(items.length, maxIterations);
|
|
797
|
+
|
|
798
|
+
for (let i = 0; i < limit; i++) {
|
|
799
|
+
const item = items[i];
|
|
800
|
+
// Build scoped context with loop variable
|
|
801
|
+
const scopedContext = { ...context, [as]: item, _loopIndex: i };
|
|
802
|
+
|
|
803
|
+
try {
|
|
804
|
+
// Resolve the sub-step inputs in the scoped context
|
|
805
|
+
const resolvedInputs = resolveTemplate(subStepDef.inputs || {}, scopedContext);
|
|
806
|
+
// Create a temporary step object for the dispatcher
|
|
807
|
+
const tempStep = { id: `_loop_${i}`, tool: subStepDef.tool, inputs: subStepDef.inputs };
|
|
808
|
+
const output = await executeStep(tempStep, resolvedInputs, defaults, scopedContext);
|
|
809
|
+
results.push(output);
|
|
810
|
+
} catch (err) {
|
|
811
|
+
errors.push({ index: i, error: err.message });
|
|
812
|
+
// If the parent loop has continueOnError, we keep going (handled by caller)
|
|
813
|
+
// For now, loop always continues and collects errors
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
if (items.length > maxIterations) {
|
|
818
|
+
errors.push({ index: maxIterations, error: `Loop truncated at maxIterations (${maxIterations})` });
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
return {
|
|
822
|
+
iterations: results.length,
|
|
823
|
+
results,
|
|
824
|
+
errors,
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
/**
|
|
829
|
+
* Execute a chunk step: split text using vai's chunking strategies.
|
|
830
|
+
*
|
|
831
|
+
* @param {object} inputs - { text, strategy?, size?, overlap?, source? }
|
|
832
|
+
* @returns {{ chunks: object[], totalChunks: number, strategy: string, avgChunkSize: number }}
|
|
833
|
+
*/
|
|
834
|
+
function executeChunk(inputs) {
|
|
835
|
+
const { chunk: doChunk } = require('./chunker');
|
|
836
|
+
|
|
837
|
+
const { text, strategy = 'recursive', size = 512, overlap = 50, source } = inputs;
|
|
838
|
+
|
|
839
|
+
if (!text && text !== '') {
|
|
840
|
+
throw new Error('chunk: "text" input is required');
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
const chunkTexts = doChunk(text, { strategy, size, overlap });
|
|
844
|
+
|
|
845
|
+
const chunks = chunkTexts.map((content, index) => {
|
|
846
|
+
const obj = {
|
|
847
|
+
index,
|
|
848
|
+
content,
|
|
849
|
+
charCount: content.length,
|
|
850
|
+
};
|
|
851
|
+
if (source) obj.source = source;
|
|
852
|
+
obj.metadata = { strategy };
|
|
853
|
+
// For markdown strategy, try to extract heading
|
|
854
|
+
if (strategy === 'markdown') {
|
|
855
|
+
const headingMatch = content.match(/^#+\s+(.+)/m);
|
|
856
|
+
if (headingMatch) obj.metadata.heading = headingMatch[1];
|
|
857
|
+
}
|
|
858
|
+
return obj;
|
|
859
|
+
});
|
|
860
|
+
|
|
861
|
+
const totalChars = chunks.reduce((sum, c) => sum + c.charCount, 0);
|
|
862
|
+
|
|
863
|
+
return {
|
|
864
|
+
chunks,
|
|
865
|
+
totalChunks: chunks.length,
|
|
866
|
+
strategy,
|
|
867
|
+
avgChunkSize: chunks.length > 0 ? Math.round(totalChars / chunks.length) : 0,
|
|
868
|
+
};
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
/**
|
|
872
|
+
* Execute an HTTP request step.
|
|
873
|
+
*
|
|
874
|
+
* @param {object} inputs - { url, method?, headers?, body?, timeout?, responseType?, followRedirects? }
|
|
875
|
+
* @returns {Promise<{ status: number, statusText: string, headers: object, body: any, durationMs: number }>}
|
|
876
|
+
*/
|
|
877
|
+
async function executeHttp(inputs) {
|
|
878
|
+
const effectiveResponseType = inputs.responseType || (inputs.extract === 'text' ? 'text' : 'json');
|
|
879
|
+
const { url, method = 'GET', headers = {}, body, timeout = 30000, followRedirects = false } = inputs;
|
|
880
|
+
const responseType = effectiveResponseType;
|
|
881
|
+
|
|
882
|
+
if (!url || typeof url !== 'string') {
|
|
883
|
+
throw new Error('http: "url" input is required');
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
// URL allowlisting check
|
|
887
|
+
try {
|
|
888
|
+
const { loadProject } = require('./project');
|
|
889
|
+
const { config: proj } = loadProject();
|
|
890
|
+
if (proj && proj.allowedHosts && Array.isArray(proj.allowedHosts)) {
|
|
891
|
+
const parsed = new URL(url);
|
|
892
|
+
if (!proj.allowedHosts.includes(parsed.hostname)) {
|
|
893
|
+
throw new Error(`http: host "${parsed.hostname}" is not in allowedHosts. Allowed: ${proj.allowedHosts.join(', ')}`);
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
} catch (e) {
|
|
897
|
+
if (e.message.includes('allowedHosts')) throw e;
|
|
898
|
+
// If project config can't be loaded, allow all hosts
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
const startTime = Date.now();
|
|
902
|
+
|
|
903
|
+
// Build fetch options
|
|
904
|
+
const fetchOpts = {
|
|
905
|
+
method: method.toUpperCase(),
|
|
906
|
+
headers: { ...headers },
|
|
907
|
+
signal: AbortSignal.timeout(timeout),
|
|
908
|
+
redirect: followRedirects ? 'follow' : 'manual',
|
|
909
|
+
};
|
|
910
|
+
|
|
911
|
+
if (body && ['POST', 'PUT', 'PATCH'].includes(fetchOpts.method)) {
|
|
912
|
+
if (typeof body === 'object') {
|
|
913
|
+
fetchOpts.body = JSON.stringify(body);
|
|
914
|
+
if (!fetchOpts.headers['Content-Type'] && !fetchOpts.headers['content-type']) {
|
|
915
|
+
fetchOpts.headers['Content-Type'] = 'application/json';
|
|
916
|
+
}
|
|
917
|
+
} else {
|
|
918
|
+
fetchOpts.body = String(body);
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
const response = await fetch(url, fetchOpts);
|
|
923
|
+
const durationMs = Date.now() - startTime;
|
|
924
|
+
|
|
925
|
+
// Response size limit: 5MB
|
|
926
|
+
const MAX_RESPONSE_SIZE = 5 * 1024 * 1024;
|
|
927
|
+
const responseText = await response.text();
|
|
928
|
+
const truncated = responseText.length > MAX_RESPONSE_SIZE;
|
|
929
|
+
const rawBody = truncated ? responseText.slice(0, MAX_RESPONSE_SIZE) : responseText;
|
|
930
|
+
|
|
931
|
+
// Parse body
|
|
932
|
+
let parsedBody;
|
|
933
|
+
if (responseType === 'json') {
|
|
934
|
+
try {
|
|
935
|
+
parsedBody = JSON.parse(rawBody);
|
|
936
|
+
} catch {
|
|
937
|
+
parsedBody = rawBody; // Fall back to text
|
|
938
|
+
}
|
|
939
|
+
} else {
|
|
940
|
+
parsedBody = rawBody;
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
// Strip HTML if extract: "text"
|
|
944
|
+
if (inputs.extract === 'text' && typeof parsedBody === 'string') {
|
|
945
|
+
const { stripHtml } = require('./readers');
|
|
946
|
+
parsedBody = stripHtml(parsedBody);
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
// Collect response headers
|
|
950
|
+
const respHeaders = {};
|
|
951
|
+
response.headers.forEach((value, key) => {
|
|
952
|
+
respHeaders[key] = value;
|
|
953
|
+
});
|
|
954
|
+
|
|
955
|
+
return {
|
|
956
|
+
status: response.status,
|
|
957
|
+
statusText: response.statusText,
|
|
958
|
+
headers: respHeaders,
|
|
959
|
+
body: parsedBody,
|
|
960
|
+
durationMs,
|
|
961
|
+
...(truncated && { warning: 'Response truncated at 5MB' }),
|
|
962
|
+
};
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
/**
|
|
966
|
+
* Execute a MongoDB aggregation pipeline step.
|
|
967
|
+
*
|
|
968
|
+
* @param {object} inputs - { db?, collection?, pipeline, allowWrites? }
|
|
969
|
+
* @param {object} defaults - workflow defaults
|
|
970
|
+
* @returns {Promise<{ results: any[], count: number, durationMs: number }>}
|
|
971
|
+
*/
|
|
972
|
+
async function executeAggregate(inputs, defaults) {
|
|
973
|
+
const { getMongoCollection } = require('./mongo');
|
|
974
|
+
const { loadProject } = require('./project');
|
|
975
|
+
const { config: proj } = loadProject();
|
|
976
|
+
|
|
977
|
+
const db = inputs.db || defaults.db || proj.db;
|
|
978
|
+
const collection = inputs.collection || defaults.collection || proj.collection;
|
|
979
|
+
const pipeline = inputs.pipeline;
|
|
980
|
+
const allowWrites = inputs.allowWrites || false;
|
|
981
|
+
|
|
982
|
+
if (!db) throw new Error('aggregate: database not specified');
|
|
983
|
+
if (!collection) throw new Error('aggregate: collection not specified');
|
|
984
|
+
if (!Array.isArray(pipeline)) throw new Error('aggregate: "pipeline" must be an array');
|
|
985
|
+
if (pipeline.length > 20) throw new Error('aggregate: pipeline limited to 20 stages');
|
|
986
|
+
|
|
987
|
+
// Block write stages unless explicitly allowed
|
|
988
|
+
if (!allowWrites) {
|
|
989
|
+
for (const stage of pipeline) {
|
|
990
|
+
const stageKey = Object.keys(stage)[0];
|
|
991
|
+
if (stageKey === '$out' || stageKey === '$merge') {
|
|
992
|
+
throw new Error(`aggregate: "${stageKey}" stage is not allowed without allowWrites: true`);
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
const startTime = Date.now();
|
|
998
|
+
const { client, collection: col } = await getMongoCollection(db, collection);
|
|
999
|
+
try {
|
|
1000
|
+
const results = await col.aggregate(pipeline).toArray();
|
|
1001
|
+
return {
|
|
1002
|
+
results,
|
|
1003
|
+
count: results.length,
|
|
1004
|
+
durationMs: Date.now() - startTime,
|
|
1005
|
+
};
|
|
1006
|
+
} finally {
|
|
1007
|
+
await client.close();
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
|
|
557
1011
|
// ════════════════════════════════════════════════════════════════════
|
|
558
1012
|
// VAI Tool Executors
|
|
559
1013
|
// ════════════════════════════════════════════════════════════════════
|
|
@@ -754,11 +1208,40 @@ async function executeIngest(inputs, defaults) {
|
|
|
754
1208
|
const { client, collection: col } = await getMongoCollection(db, collection);
|
|
755
1209
|
try {
|
|
756
1210
|
const result = await col.insertMany(docs);
|
|
1211
|
+
|
|
1212
|
+
// Auto-create vector search index if it doesn't exist
|
|
1213
|
+
// Note: Atlas vector search indexes take a few seconds to become ready after creation.
|
|
1214
|
+
// The search step may need a brief delay on first run.
|
|
1215
|
+
let indexCreated = false;
|
|
1216
|
+
try {
|
|
1217
|
+
const indexes = await col.listSearchIndexes().toArray();
|
|
1218
|
+
const hasVectorIndex = indexes.some(idx => idx.name === 'vector_index');
|
|
1219
|
+
if (!hasVectorIndex) {
|
|
1220
|
+
await col.createSearchIndex({
|
|
1221
|
+
name: 'vector_index',
|
|
1222
|
+
type: 'vectorSearch',
|
|
1223
|
+
definition: {
|
|
1224
|
+
fields: [{
|
|
1225
|
+
type: 'vector',
|
|
1226
|
+
path: 'embedding',
|
|
1227
|
+
numDimensions: embRes.data[0].embedding.length,
|
|
1228
|
+
similarity: 'cosine'
|
|
1229
|
+
}]
|
|
1230
|
+
}
|
|
1231
|
+
});
|
|
1232
|
+
indexCreated = true;
|
|
1233
|
+
}
|
|
1234
|
+
} catch (indexErr) {
|
|
1235
|
+
// Ignore errors: index may already exist, or createSearchIndex may not be
|
|
1236
|
+
// available on non-Atlas deployments
|
|
1237
|
+
}
|
|
1238
|
+
|
|
757
1239
|
return {
|
|
758
1240
|
insertedCount: result.insertedCount,
|
|
759
1241
|
chunks: chunks.length,
|
|
760
1242
|
source,
|
|
761
1243
|
model: embRes.model,
|
|
1244
|
+
indexCreated,
|
|
762
1245
|
};
|
|
763
1246
|
} finally {
|
|
764
1247
|
await client.close();
|
|
@@ -938,6 +1421,18 @@ async function executeStep(step, resolvedInputs, defaults, context) {
|
|
|
938
1421
|
return executeTransform(resolvedInputs);
|
|
939
1422
|
case 'generate':
|
|
940
1423
|
return executeGenerate(resolvedInputs);
|
|
1424
|
+
case 'conditional':
|
|
1425
|
+
return executeConditional(resolvedInputs, context);
|
|
1426
|
+
case 'template':
|
|
1427
|
+
return executeTemplate(resolvedInputs);
|
|
1428
|
+
case 'loop':
|
|
1429
|
+
return executeLoop(resolvedInputs, defaults, context);
|
|
1430
|
+
case 'chunk':
|
|
1431
|
+
return executeChunk(resolvedInputs);
|
|
1432
|
+
case 'http':
|
|
1433
|
+
return executeHttp(resolvedInputs);
|
|
1434
|
+
case 'aggregate':
|
|
1435
|
+
return executeAggregate(resolvedInputs, defaults);
|
|
941
1436
|
|
|
942
1437
|
// VAI tools
|
|
943
1438
|
case 'query':
|
|
@@ -1056,12 +1551,27 @@ async function executeWorkflow(definition, opts = {}) {
|
|
|
1056
1551
|
|
|
1057
1552
|
// Execute layer by layer
|
|
1058
1553
|
const stepResults = [];
|
|
1554
|
+
const skippedByConditional = new Set(); // Steps skipped by conditional branches
|
|
1059
1555
|
|
|
1060
1556
|
for (const layer of layers) {
|
|
1061
1557
|
const layerPromises = layer.map(async (stepId) => {
|
|
1062
1558
|
const step = stepMap.get(stepId);
|
|
1063
1559
|
const stepStart = Date.now();
|
|
1064
1560
|
|
|
1561
|
+
// Check if this step was skipped by a conditional branch
|
|
1562
|
+
if (skippedByConditional.has(stepId)) {
|
|
1563
|
+
if (opts.onStepSkip) opts.onStepSkip(stepId, 'conditional branch not taken');
|
|
1564
|
+
context[stepId] = { output: null, skipped: true };
|
|
1565
|
+
stepResults.push({
|
|
1566
|
+
id: stepId,
|
|
1567
|
+
tool: step.tool,
|
|
1568
|
+
skipped: true,
|
|
1569
|
+
reason: 'conditional branch not taken',
|
|
1570
|
+
durationMs: Date.now() - stepStart,
|
|
1571
|
+
});
|
|
1572
|
+
return;
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1065
1575
|
// Evaluate condition
|
|
1066
1576
|
if (step.condition) {
|
|
1067
1577
|
const conditionMet = evaluateCondition(step.condition, context);
|
|
@@ -1083,7 +1593,31 @@ async function executeWorkflow(definition, opts = {}) {
|
|
|
1083
1593
|
try {
|
|
1084
1594
|
let output;
|
|
1085
1595
|
|
|
1086
|
-
if
|
|
1596
|
+
// Mock executor injection: if _mockExecutors has a mock for this tool, use it
|
|
1597
|
+
const mockExecutor = opts._mockExecutors && opts._mockExecutors[step.tool];
|
|
1598
|
+
if (mockExecutor && step.tool !== 'conditional') {
|
|
1599
|
+
const resolvedInputs = resolveTemplate(step.inputs || {}, context);
|
|
1600
|
+
output = await mockExecutor(resolvedInputs, step, context);
|
|
1601
|
+
} else if (step.tool === 'conditional') {
|
|
1602
|
+
// Special handling: resolve then/else but pass raw condition to evaluator
|
|
1603
|
+
const rawCondition = step.inputs.condition;
|
|
1604
|
+
const resolvedInputs = {
|
|
1605
|
+
condition: rawCondition, // Keep raw for condition evaluator
|
|
1606
|
+
then: step.inputs.then || [],
|
|
1607
|
+
else: step.inputs.else || [],
|
|
1608
|
+
};
|
|
1609
|
+
output = await executeStep(step, resolvedInputs, defaults, context);
|
|
1610
|
+
} else if (step.tool === 'loop') {
|
|
1611
|
+
// Special handling: resolve items but pass raw step def for per-iteration resolution
|
|
1612
|
+
const resolvedItems = resolveTemplate(step.inputs.items, context);
|
|
1613
|
+
const resolvedInputs = {
|
|
1614
|
+
items: resolvedItems,
|
|
1615
|
+
as: step.inputs.as,
|
|
1616
|
+
step: step.inputs.step, // Raw — resolved per iteration inside executeLoop
|
|
1617
|
+
maxIterations: step.inputs.maxIterations,
|
|
1618
|
+
};
|
|
1619
|
+
output = await executeStep(step, resolvedInputs, defaults, context);
|
|
1620
|
+
} else if (step.forEach) {
|
|
1087
1621
|
// Iterate over an array
|
|
1088
1622
|
const iterArray = resolveTemplate(step.forEach, context);
|
|
1089
1623
|
if (!Array.isArray(iterArray)) {
|
|
@@ -1104,6 +1638,13 @@ async function executeWorkflow(definition, opts = {}) {
|
|
|
1104
1638
|
output = await executeStep(step, resolvedInputs, defaults, context);
|
|
1105
1639
|
}
|
|
1106
1640
|
|
|
1641
|
+
// If this was a conditional node, mark skipped branch steps
|
|
1642
|
+
if (step.tool === 'conditional' && output.skippedSteps) {
|
|
1643
|
+
for (const skippedId of output.skippedSteps) {
|
|
1644
|
+
skippedByConditional.add(skippedId);
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1107
1648
|
const durationMs = Date.now() - stepStart;
|
|
1108
1649
|
context[stepId] = { output };
|
|
1109
1650
|
|
|
@@ -1320,7 +1861,9 @@ function loadWorkflow(nameOrPath) {
|
|
|
1320
1861
|
module.exports = {
|
|
1321
1862
|
// Validation
|
|
1322
1863
|
validateWorkflow,
|
|
1864
|
+
validateSchemaEnhanced,
|
|
1323
1865
|
detectCycles,
|
|
1866
|
+
SCHEMA_LIMITS,
|
|
1324
1867
|
|
|
1325
1868
|
// Dependency resolution
|
|
1326
1869
|
buildDependencyGraph,
|
|
@@ -1333,6 +1876,13 @@ module.exports = {
|
|
|
1333
1876
|
executeMerge,
|
|
1334
1877
|
executeFilter,
|
|
1335
1878
|
executeTransform,
|
|
1879
|
+
executeConditional,
|
|
1880
|
+
executeTemplate,
|
|
1881
|
+
executeLoop,
|
|
1882
|
+
executeChunk,
|
|
1883
|
+
executeHttp,
|
|
1884
|
+
executeIngest,
|
|
1885
|
+
executeAggregate,
|
|
1336
1886
|
|
|
1337
1887
|
// Main execution
|
|
1338
1888
|
executeStep,
|
|
@@ -1351,5 +1901,7 @@ module.exports = {
|
|
|
1351
1901
|
// Constants
|
|
1352
1902
|
VAI_TOOLS,
|
|
1353
1903
|
CONTROL_FLOW_TOOLS,
|
|
1904
|
+
PROCESSING_TOOLS,
|
|
1905
|
+
INTEGRATION_TOOLS,
|
|
1354
1906
|
ALL_TOOLS,
|
|
1355
1907
|
};
|