@probelabs/probe 0.6.0-rc231 → 0.6.0-rc233

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/bin/binaries/probe-v0.6.0-rc233-aarch64-apple-darwin.tar.gz +0 -0
  2. package/bin/binaries/probe-v0.6.0-rc233-aarch64-unknown-linux-musl.tar.gz +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc233-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc233-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc233-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.d.ts +2 -0
  7. package/build/agent/ProbeAgent.js +105 -12
  8. package/build/agent/dsl/agent-test.mjs +341 -0
  9. package/build/agent/dsl/analyze-test.mjs +237 -0
  10. package/build/agent/dsl/diag-test.mjs +78 -0
  11. package/build/agent/dsl/environment.js +387 -0
  12. package/build/agent/dsl/manual-test.mjs +662 -0
  13. package/build/agent/dsl/output-buffer-test.mjs +124 -0
  14. package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
  15. package/build/agent/dsl/pipeline-test.mjs +223 -0
  16. package/build/agent/dsl/runtime.js +206 -0
  17. package/build/agent/dsl/sandbox-experiment.mjs +309 -0
  18. package/build/agent/dsl/transformer.js +156 -0
  19. package/build/agent/dsl/trigger-test.mjs +159 -0
  20. package/build/agent/dsl/validator.js +183 -0
  21. package/build/agent/index.js +18776 -7675
  22. package/build/agent/probeTool.js +9 -0
  23. package/build/agent/tools.js +9 -1
  24. package/build/delegate.js +12 -6
  25. package/build/index.js +5 -0
  26. package/build/tools/common.js +7 -0
  27. package/build/tools/executePlan.js +761 -0
  28. package/build/tools/index.js +4 -0
  29. package/cjs/agent/ProbeAgent.cjs +12891 -1797
  30. package/cjs/index.cjs +12395 -1292
  31. package/package.json +5 -1
  32. package/src/agent/ProbeAgent.d.ts +2 -0
  33. package/src/agent/ProbeAgent.js +105 -12
  34. package/src/agent/dsl/agent-test.mjs +341 -0
  35. package/src/agent/dsl/analyze-test.mjs +237 -0
  36. package/src/agent/dsl/diag-test.mjs +78 -0
  37. package/src/agent/dsl/environment.js +387 -0
  38. package/src/agent/dsl/manual-test.mjs +662 -0
  39. package/src/agent/dsl/output-buffer-test.mjs +124 -0
  40. package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
  41. package/src/agent/dsl/pipeline-test.mjs +223 -0
  42. package/src/agent/dsl/runtime.js +206 -0
  43. package/src/agent/dsl/sandbox-experiment.mjs +309 -0
  44. package/src/agent/dsl/transformer.js +156 -0
  45. package/src/agent/dsl/trigger-test.mjs +159 -0
  46. package/src/agent/dsl/validator.js +183 -0
  47. package/src/agent/index.js +8 -0
  48. package/src/agent/probeTool.js +9 -0
  49. package/src/agent/tools.js +9 -1
  50. package/src/delegate.js +12 -6
  51. package/src/index.js +5 -0
  52. package/src/tools/common.js +7 -0
  53. package/src/tools/executePlan.js +761 -0
  54. package/src/tools/index.js +4 -0
  55. package/bin/binaries/probe-v0.6.0-rc231-aarch64-apple-darwin.tar.gz +0 -0
  56. package/bin/binaries/probe-v0.6.0-rc231-aarch64-unknown-linux-musl.tar.gz +0 -0
  57. package/bin/binaries/probe-v0.6.0-rc231-x86_64-apple-darwin.tar.gz +0 -0
  58. package/bin/binaries/probe-v0.6.0-rc231-x86_64-pc-windows-msvc.zip +0 -0
  59. package/bin/binaries/probe-v0.6.0-rc231-x86_64-unknown-linux-musl.tar.gz +0 -0
@@ -0,0 +1,124 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Quick E2E test of the output buffer feature.
4
+ */
5
+
6
+ import { createDSLRuntime } from './runtime.js';
7
+
8
+ const outputBuffer = { items: [] };
9
+ const runtime = createDSLRuntime({
10
+ toolImplementations: {
11
+ search: { execute: async (p) => 'Result for: ' + p.query + '\nLine 1\nLine 2\nLine 3' },
12
+ },
13
+ llmCall: async (inst, data) => 'LLM processed: ' + String(data).substring(0, 50),
14
+ outputBuffer,
15
+ });
16
+
17
+ let passed = 0;
18
+ let failed = 0;
19
+
20
+ function check(name, condition) {
21
+ if (condition) {
22
+ console.log(' ✓ ' + name);
23
+ passed++;
24
+ } else {
25
+ console.log(' ✗ ' + name);
26
+ failed++;
27
+ }
28
+ }
29
+
30
+ // Test 1: output() writes to buffer, return value separate
31
+ console.log('\nTest 1: output() + return');
32
+ outputBuffer.items = [];
33
+ const r1 = await runtime.execute(`
34
+ const data = search("test query");
35
+ output("## Full Results");
36
+ output(data);
37
+ return "Summary: found results";
38
+ `, 'test 1');
39
+
40
+ check('status is success', r1.status === 'success');
41
+ check('return value correct', r1.result === 'Summary: found results');
42
+ check('buffer has 2 items', outputBuffer.items.length === 2);
43
+ check('buffer[0] is header', outputBuffer.items[0] === '## Full Results');
44
+ check('buffer[1] has search data', outputBuffer.items[1].includes('Result for: test query'));
45
+ check('logs include [output]', r1.logs.some(l => l.startsWith('[output]')));
46
+
47
+ // Test 2: output() with JSON object
48
+ console.log('\nTest 2: output() with JSON');
49
+ outputBuffer.items = [];
50
+ const r2 = await runtime.execute(`
51
+ output({ customers: ["Acme", "BigCo"], count: 2 });
52
+ return "Found 2 customers";
53
+ `, 'test 2');
54
+
55
+ check('status is success', r2.status === 'success');
56
+ check('return is summary', r2.result === 'Found 2 customers');
57
+ check('buffer has 1 item', outputBuffer.items.length === 1);
58
+ const parsed = JSON.parse(outputBuffer.items[0]);
59
+ check('parsed JSON correct', parsed.count === 2 && parsed.customers[0] === 'Acme');
60
+
61
+ // Test 3: output() persists across calls (accumulates)
62
+ console.log('\nTest 3: Accumulation across calls');
63
+ outputBuffer.items = [];
64
+ await runtime.execute(`output("first call")`, 'call 1');
65
+ await runtime.execute(`output("second call")`, 'call 2');
66
+ check('buffer has 2 items from 2 calls', outputBuffer.items.length === 2);
67
+ check('items correct', outputBuffer.items[0] === 'first call' && outputBuffer.items[1] === 'second call');
68
+
69
+ // Test 4: output() ignores null/undefined
70
+ console.log('\nTest 4: Ignores null/undefined');
71
+ outputBuffer.items = [];
72
+ const r4 = await runtime.execute(`
73
+ output(null);
74
+ output(undefined);
75
+ output("real content");
76
+ return "done";
77
+ `, 'test 4');
78
+ check('buffer has only 1 item', outputBuffer.items.length === 1);
79
+ check('only real content', outputBuffer.items[0] === 'real content');
80
+
81
+ // Test 5: Large table simulation
82
+ console.log('\nTest 5: Large table');
83
+ outputBuffer.items = [];
84
+ const r5 = await runtime.execute(`
85
+ var rows = [];
86
+ for (var i = 0; i < 100; i++) {
87
+ rows.push("| Customer " + i + " | Tech | Active |");
88
+ }
89
+ var header = "| Customer | Industry | Status |\\n| --- | --- | --- |\\n";
90
+ var table = header;
91
+ for (const row of rows) {
92
+ table = table + row + "\\n";
93
+ }
94
+ output(table);
95
+ return "Generated table with 100 customers";
96
+ `, 'test 5');
97
+
98
+ check('status is success', r5.status === 'success');
99
+ check('return is summary', r5.result === 'Generated table with 100 customers');
100
+ check('buffer has table', outputBuffer.items[0].includes('Customer 99'));
101
+ check('table is large', outputBuffer.items[0].length > 2000);
102
+
103
+ // Test 6: No outputBuffer = no output() function
104
+ console.log('\nTest 6: No outputBuffer');
105
+ const runtimeNoBuffer = createDSLRuntime({
106
+ toolImplementations: {
107
+ search: { execute: async (p) => 'ok' },
108
+ },
109
+ llmCall: async () => 'ok',
110
+ });
111
+
112
+ const r6 = await runtimeNoBuffer.execute(`
113
+ if (typeof output === "undefined") {
114
+ return "output not available";
115
+ }
116
+ return "output available";
117
+ `, 'test 6');
118
+ check('output not available without buffer', r6.result === 'output not available');
119
+
120
+ // Summary
121
+ console.log('\n' + '═'.repeat(50));
122
+ console.log(` Output Buffer E2E: ${passed} passed, ${failed} failed`);
123
+ console.log('═'.repeat(50));
124
+ process.exit(failed > 0 ? 1 : 0);
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Direct DSL runtime test against customer-insights repo.
4
+ * Bypasses ProbeAgent — runs scripts directly against the runtime.
5
+ */
6
+
7
+ import { createDSLRuntime } from './runtime.js';
8
+ import { search } from '../../search.js';
9
+ import { extract } from '../../extract.js';
10
+ import { createGoogleGenerativeAI } from '@ai-sdk/google';
11
+ import { generateText } from 'ai';
12
+ import { config } from 'dotenv';
13
+ import { resolve, dirname } from 'path';
14
+ import { fileURLToPath } from 'url';
15
+
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const projectRoot = resolve(__dirname, '../../../..');
18
+ config({ path: resolve(projectRoot, '.env') });
19
+
20
+ const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
21
+ if (!apiKey) { console.error('No API key'); process.exit(1); }
22
+
23
+ const google = createGoogleGenerativeAI({ apiKey });
24
+
25
+ async function llmCall(instruction, data, options = {}) {
26
+ const dataStr = data == null ? '' : (typeof data === 'string' ? data : JSON.stringify(data, null, 2));
27
+ const prompt = (dataStr || '(empty)').substring(0, 100000);
28
+ const result = await generateText({
29
+ model: google('gemini-2.5-flash'),
30
+ system: instruction,
31
+ prompt,
32
+ temperature: options.temperature || 0.3,
33
+ maxTokens: options.maxTokens || 4000,
34
+ });
35
+ return result.text;
36
+ }
37
+
38
+ const TARGET = '/tmp/customer-insights';
39
+
40
+ const runtime = createDSLRuntime({
41
+ toolImplementations: {
42
+ search: { execute: async (params) => {
43
+ try {
44
+ return await search({ query: params.query, path: TARGET, maxTokens: 20000, timeout: 60 });
45
+ } catch(e) { return 'Search error: ' + e.message; }
46
+ }},
47
+ extract: { execute: async (params) => {
48
+ try {
49
+ return await extract({ targets: params.targets, cwd: TARGET });
50
+ } catch(e) { return 'Extract error: ' + e.message; }
51
+ }},
52
+ listFiles: { execute: async (params) => {
53
+ try {
54
+ return await search({ query: params.pattern || 'customer', path: TARGET, filesOnly: true, maxTokens: 10000, timeout: 60 });
55
+ } catch(e) { return 'listFiles error: ' + e.message; }
56
+ }},
57
+ },
58
+ llmCall,
59
+ mapConcurrency: 3,
60
+ timeoutMs: 300000,
61
+ maxLoopIterations: 5000,
62
+ });
63
+
64
+ console.log('═'.repeat(70));
65
+ console.log(' Direct DSL Pipeline Test — customer-insights repo');
66
+ console.log('═'.repeat(70));
67
+
68
+ const start = Date.now();
69
+ const result = await runtime.execute(`
70
+ // Step 1: Broad search for customer data
71
+ const results = search("customer onboarding playbook");
72
+ log("Search returned " + String(results).length + " chars");
73
+
74
+ // Step 2: Split into chunks and extract customer info using LLM
75
+ const chunks = chunk(results);
76
+ log("Split into " + chunks.length + " chunks");
77
+
78
+ const classified = map(chunks, (c) => LLM(
79
+ "Extract customer names and their industry from this text. " +
80
+ "Return a JSON array: [{customer: string, industry: string, notes: string}]. " +
81
+ "Return ONLY valid JSON array, no other text.",
82
+ c
83
+ ));
84
+
85
+ // Step 3: Accumulate parsed results
86
+ var allCustomers = [];
87
+ for (const batch of classified) {
88
+ try {
89
+ var text = String(batch).trim();
90
+ var jsonStart = text.indexOf("[");
91
+ var jsonEnd = text.lastIndexOf("]");
92
+ if (jsonStart >= 0 && jsonEnd > jsonStart) {
93
+ text = text.substring(jsonStart, jsonEnd + 1);
94
+ }
95
+ var parsed = JSON.parse(text);
96
+ if (Array.isArray(parsed)) {
97
+ for (const item of parsed) { allCustomers.push(item); }
98
+ }
99
+ } catch (e) {
100
+ log("Parse error, skipping chunk");
101
+ }
102
+ }
103
+
104
+ log("Total customers extracted: " + allCustomers.length);
105
+
106
+ // Step 4: Deduplicate
107
+ var seen = {};
108
+ var uniqueCustomers = [];
109
+ for (const c of allCustomers) {
110
+ var key = String(c.customer || "").trim().toLowerCase();
111
+ if (key.length > 0 && !seen[key]) {
112
+ seen[key] = true;
113
+ uniqueCustomers.push(c);
114
+ }
115
+ }
116
+
117
+ log("Unique customers: " + uniqueCustomers.length);
118
+
119
+ // Step 5: Build markdown table
120
+ var table = "| Customer | Industry | Notes |\\n|---|---|---|\\n";
121
+ for (const c of uniqueCustomers) {
122
+ table = table + "| " + (c.customer || "Unknown") + " | " + (c.industry || "Unknown") + " | " + (c.notes || "-") + " |\\n";
123
+ }
124
+
125
+ // Step 6: Small LLM summary
126
+ const summary = LLM(
127
+ "Based on this customer table, write a brief 2-3 sentence summary of the customer base — what industries are represented, any patterns.",
128
+ table
129
+ );
130
+
131
+ return table + "\\n" + summary;
132
+ `, 'Customer classification pipeline');
133
+
134
+ const elapsed = Math.round((Date.now() - start) / 1000);
135
+
136
+ console.log('\n' + '─'.repeat(70));
137
+ console.log(`Status: ${result.status} (${elapsed}s)`);
138
+ console.log(`Logs: ${result.logs.join(' | ')}`);
139
+
140
+ if (result.status === 'error') {
141
+ console.log(`Error: ${result.error}`);
142
+ } else {
143
+ console.log('─'.repeat(70));
144
+ console.log(result.result);
145
+ }
146
+
147
+ process.exit(result.status === 'error' ? 1 : 0);
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Data pipeline end-to-end test using ProbeAgent with enableExecutePlan.
4
+ *
5
+ * Tests against the TykTechnologies/customer-insights repo (/tmp/customer-insights)
6
+ * to verify the full data pipeline flow:
7
+ * 1. Agent picks execute_plan for comprehensive/inventory questions
8
+ * 2. LLM generates DSL scripts with search → chunk → LLM classify → accumulate
9
+ * 3. Session store persists data across multi-step execution
10
+ * 4. Returns structured results (tables, JSON, reports)
11
+ *
12
+ * Usage:
13
+ * node npm/src/agent/dsl/pipeline-test.mjs
14
+ *
15
+ * Requires:
16
+ * - GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY in .env
17
+ * - /tmp/customer-insights repo cloned
18
+ */
19
+
20
+ import { ProbeAgent } from '../ProbeAgent.js';
21
+ import { config } from 'dotenv';
22
+ import { resolve, dirname } from 'path';
23
+ import { fileURLToPath } from 'url';
24
+ import { existsSync } from 'fs';
25
+
26
+ const __dirname = dirname(fileURLToPath(import.meta.url));
27
+ const projectRoot = resolve(__dirname, '../../../..');
28
+
29
+ config({ path: resolve(projectRoot, '.env') });
30
+
31
+ const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
32
+ if (!apiKey) {
33
+ console.error('ERROR: No Google API key found. Set GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY');
34
+ process.exit(1);
35
+ }
36
+
37
+ const TARGET_REPO = '/tmp/customer-insights';
38
+ if (!existsSync(TARGET_REPO)) {
39
+ console.error('ERROR: customer-insights repo not found at ' + TARGET_REPO);
40
+ console.error('Clone it: git clone <repo-url> /tmp/customer-insights');
41
+ process.exit(1);
42
+ }
43
+
44
+ // ── Test definitions ──
45
+ const tests = [
46
+ {
47
+ name: 'Customer classification — categorize all customers by industry/type',
48
+ query: 'Analyze ALL customer files in this repository. For every customer, classify them by industry (finance, tech, healthcare, government, etc.) and determine their use case type (API management, security, integration, etc.). Produce a comprehensive markdown table with columns: Customer, Industry, Use Case Type, and a brief note. Give me complete inventory.',
49
+ maxIterations: 50,
50
+ timeoutMs: 300000,
51
+ check: (result, toolCalls) => {
52
+ // Should have triggered execute_plan
53
+ const usedExecutePlan = toolCalls.some(t => t === 'execute_plan');
54
+ if (!usedExecutePlan) return 'Did not trigger execute_plan — used: ' + toolCalls.join(', ');
55
+ // Result should be substantial
56
+ if (!result || result.length < 200) return 'Result too short: ' + (result?.length || 0);
57
+ return true;
58
+ },
59
+ },
60
+ {
61
+ name: 'Sentiment & pain points extraction — data pipeline pattern',
62
+ query: 'Go through every customer document in this repo. For each customer, extract their main pain points and sentiment (positive, neutral, negative) about Tyk. Produce a structured report with: 1) A summary table of sentiment distribution, 2) Top 5 most common pain points with customer counts, 3) Customers with negative sentiment and why. Be comprehensive — cover ALL customers.',
63
+ maxIterations: 50,
64
+ timeoutMs: 300000,
65
+ check: (result, toolCalls) => {
66
+ const usedExecutePlan = toolCalls.some(t => t === 'execute_plan');
67
+ if (!usedExecutePlan) return 'Did not trigger execute_plan';
68
+ if (!result || result.length < 200) return 'Result too short: ' + (result?.length || 0);
69
+ return true;
70
+ },
71
+ },
72
+ {
73
+ name: 'Feature adoption matrix — multi-search data pipeline',
74
+ query: 'Create a complete feature adoption matrix for this customer base. Search for mentions of: API gateway, dashboard, developer portal, analytics, rate limiting, authentication, policies, and GraphQL. For each feature, list which customers use it. Return a markdown table where rows are features and columns show customer count + list of customer names.',
75
+ maxIterations: 50,
76
+ timeoutMs: 300000,
77
+ check: (result, toolCalls) => {
78
+ const usedExecutePlan = toolCalls.some(t => t === 'execute_plan');
79
+ if (!usedExecutePlan) return 'Did not trigger execute_plan';
80
+ if (!result || result.length < 100) return 'Result too short: ' + (result?.length || 0);
81
+ return true;
82
+ },
83
+ },
84
+ ];
85
+
86
+ // ── Test runner ──
87
+ let testNum = 0;
88
+ let passed = 0;
89
+ let failed = 0;
90
+
91
+ async function runPipelineTest(test) {
92
+ testNum++;
93
+ console.log(`\n${'═'.repeat(70)}`);
94
+ console.log(`▶ Test ${testNum}/${tests.length}: ${test.name}`);
95
+ console.log(` Query: "${test.query.substring(0, 120)}..."`);
96
+ console.log('─'.repeat(70));
97
+
98
+ const toolCalls = [];
99
+ const toolDetails = [];
100
+
101
+ const agent = new ProbeAgent({
102
+ path: TARGET_REPO,
103
+ provider: 'google',
104
+ model: 'gemini-2.5-flash',
105
+ enableExecutePlan: true,
106
+ maxIterations: test.maxIterations || 50,
107
+ });
108
+
109
+ // Listen for tool call events
110
+ agent.events.on('toolCall', (event) => {
111
+ if (event.status === 'started') {
112
+ toolCalls.push(event.name);
113
+ const desc = event.description ? ` — ${event.description.substring(0, 80)}` : '';
114
+ console.log(` [tool:start] ${event.name}${desc}`);
115
+ }
116
+ if (event.status === 'completed') {
117
+ const preview = event.resultPreview || '';
118
+ console.log(` [tool:done] ${event.name} (${String(preview).length} chars preview)`);
119
+ }
120
+ if (event.status === 'error') {
121
+ console.log(` [tool:error] ${event.name}: ${event.error?.substring(0, 100)}`);
122
+ }
123
+ });
124
+
125
+ await agent.initialize();
126
+
127
+ const start = Date.now();
128
+ let result;
129
+ try {
130
+ result = await Promise.race([
131
+ agent.answer(test.query),
132
+ new Promise((_, reject) =>
133
+ setTimeout(() => reject(new Error('Test timeout')), test.timeoutMs || 180000)
134
+ ),
135
+ ]);
136
+ } catch (e) {
137
+ const elapsed = Math.round((Date.now() - start) / 1000);
138
+ console.log(`\n [warn] Agent finished with: ${e.message?.substring(0, 150)} (${elapsed}s)`);
139
+ // Still check what we got — agent may have partial result
140
+ result = e.message;
141
+ }
142
+
143
+ const elapsed = Math.round((Date.now() - start) / 1000);
144
+
145
+ console.log('─'.repeat(70));
146
+ console.log(` Duration: ${elapsed}s`);
147
+ console.log(` Tool calls: [${toolCalls.join(', ')}]`);
148
+ console.log(` execute_plan used: ${toolCalls.includes('execute_plan') ? 'YES' : 'NO'}`);
149
+
150
+ const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
151
+ console.log(` Result length: ${resultStr?.length || 0} chars`);
152
+
153
+ // Show result preview
154
+ if (resultStr) {
155
+ console.log('─'.repeat(70));
156
+ console.log(' Result preview:');
157
+ const lines = resultStr.split('\n').slice(0, 25);
158
+ for (const line of lines) {
159
+ console.log(' │ ' + line.substring(0, 100));
160
+ }
161
+ if (resultStr.split('\n').length > 25) {
162
+ console.log(' │ ... (' + (resultStr.split('\n').length - 25) + ' more lines)');
163
+ }
164
+ }
165
+
166
+ // Run check
167
+ const checkResult = test.check(resultStr, toolCalls);
168
+ if (checkResult === true) {
169
+ console.log(`\n ✓ PASSED (${elapsed}s)`);
170
+ passed++;
171
+ } else {
172
+ console.log(`\n ✗ FAILED — ${checkResult} (${elapsed}s)`);
173
+ failed++;
174
+ }
175
+
176
+ // Token usage
177
+ try {
178
+ const usage = agent.getTokenUsage();
179
+ if (usage) {
180
+ console.log(` Tokens: input=${usage.inputTokens || 0} output=${usage.outputTokens || 0} total=${usage.totalTokens || 0}`);
181
+ }
182
+ } catch (e) {
183
+ // ignore
184
+ }
185
+
186
+ try {
187
+ await agent.close();
188
+ } catch (e) {
189
+ // ignore cleanup errors
190
+ }
191
+ }
192
+
193
+ // ── Main ──
194
+ async function main() {
195
+ console.log('═'.repeat(70));
196
+ console.log(' Data Pipeline E2E Tests — ProbeAgent + execute_plan');
197
+ console.log(' Target: TykTechnologies/customer-insights');
198
+ console.log(' Config: enableExecutePlan=true, provider=google, model=gemini-2.5-flash');
199
+ console.log('═'.repeat(70));
200
+
201
+ // Allow running a specific test by number
202
+ const testIndex = process.argv[2] ? parseInt(process.argv[2], 10) - 1 : null;
203
+
204
+ if (testIndex !== null && testIndex >= 0 && testIndex < tests.length) {
205
+ console.log(`\nRunning test ${testIndex + 1} only: "${tests[testIndex].name}"`);
206
+ await runPipelineTest(tests[testIndex]);
207
+ } else {
208
+ for (const test of tests) {
209
+ await runPipelineTest(test);
210
+ }
211
+ }
212
+
213
+ console.log(`\n${'═'.repeat(70)}`);
214
+ console.log(` Results: ${passed} passed, ${failed} failed, ${testNum} total`);
215
+ console.log('═'.repeat(70));
216
+
217
+ process.exit(failed > 0 ? 1 : 0);
218
+ }
219
+
220
+ main().catch(e => {
221
+ console.error('Fatal error:', e);
222
+ process.exit(1);
223
+ });
@@ -0,0 +1,206 @@
1
+ /**
2
+ * DSL Runtime - SandboxJS execution engine.
3
+ *
4
+ * Orchestrates the full pipeline:
5
+ * 1. Validate (AST whitelist)
6
+ * 2. Transform (inject await, wrap in async IIFE)
7
+ * 3. Execute in SandboxJS with tool globals + timeout
8
+ *
9
+ * Returns the result or a structured error.
10
+ */
11
+
12
+ import SandboxModule from '@nyariv/sandboxjs';
13
+ import { validateDSL } from './validator.js';
14
+ import { transformDSL } from './transformer.js';
15
+ import { generateSandboxGlobals, getAsyncFunctionNames } from './environment.js';
16
+
17
+ const Sandbox = SandboxModule.default || SandboxModule;
18
+
19
+ /**
20
+ * Create a DSL runtime instance.
21
+ *
22
+ * @param {Object} options
23
+ * @param {Object} options.toolImplementations - Native tool execute functions
24
+ * @param {Object} [options.mcpBridge] - MCP bridge for calling MCP tools
25
+ * @param {Object} [options.mcpTools={}] - MCP tool metadata
26
+ * @param {Function} options.llmCall - Function for LLM() calls: (instruction, data, options?) => Promise<any>
27
+ * @param {number} [options.mapConcurrency=3] - Concurrency limit for map()
28
+ * @param {number} [options.timeoutMs=120000] - Execution timeout in milliseconds (default 2 min)
29
+ * @param {number} [options.maxLoopIterations=5000] - Max iterations for while/for loops
30
+ * @param {Object} [options.tracer=null] - SimpleAppTracer instance for OTEL telemetry
31
+ * @returns {Object} Runtime with execute() method
32
+ */
33
+ export function createDSLRuntime(options) {
34
+ const {
35
+ toolImplementations = {},
36
+ mcpBridge = null,
37
+ mcpTools = {},
38
+ llmCall,
39
+ mapConcurrency = 3,
40
+ timeoutMs = 120000,
41
+ maxLoopIterations = 5000,
42
+ tracer = null,
43
+ sessionStore = {},
44
+ outputBuffer = null,
45
+ } = options;
46
+
47
+ // Generate the globals and async function names, passing tracer for per-call tracing
48
+ const toolGlobals = generateSandboxGlobals({
49
+ toolImplementations,
50
+ mcpBridge,
51
+ mcpTools,
52
+ llmCall,
53
+ mapConcurrency,
54
+ tracer,
55
+ sessionStore,
56
+ outputBuffer,
57
+ });
58
+
59
+ const asyncFunctionNames = getAsyncFunctionNames(mcpTools);
60
+
61
+ /**
62
+ * Execute DSL code.
63
+ *
64
+ * @param {string} code - The LLM-generated DSL code (sync-looking)
65
+ * @param {string} [description] - Human-readable description for logging
66
+ * @returns {Promise<{ status: 'success'|'error', result?: any, error?: string, logs: string[] }>}
67
+ */
68
+ async function execute(code, description) {
69
+ const logs = [];
70
+ const startTime = Date.now();
71
+
72
+ // Step 1: Validate
73
+ tracer?.addEvent?.('dsl.phase.validate_start', {
74
+ 'dsl.code_length': code.length,
75
+ });
76
+
77
+ const validation = validateDSL(code);
78
+ if (!validation.valid) {
79
+ tracer?.addEvent?.('dsl.phase.validate_failed', {
80
+ 'dsl.error_count': validation.errors.length,
81
+ 'dsl.errors': validation.errors.join('; ').substring(0, 500),
82
+ });
83
+ return {
84
+ status: 'error',
85
+ error: `Validation failed:\n${validation.errors.join('\n')}`,
86
+ logs,
87
+ };
88
+ }
89
+
90
+ tracer?.addEvent?.('dsl.phase.validate_complete');
91
+
92
+ // Step 2: Transform (inject await, wrap in async IIFE)
93
+ let transformedCode;
94
+ try {
95
+ tracer?.addEvent?.('dsl.phase.transform_start');
96
+ transformedCode = transformDSL(code, asyncFunctionNames);
97
+ tracer?.addEvent?.('dsl.phase.transform_complete', {
98
+ 'dsl.transformed_length': transformedCode.length,
99
+ });
100
+ } catch (e) {
101
+ tracer?.addEvent?.('dsl.phase.transform_failed', {
102
+ 'dsl.error': e.message,
103
+ });
104
+ return {
105
+ status: 'error',
106
+ error: `Transform failed: ${e.message}`,
107
+ logs,
108
+ };
109
+ }
110
+
111
+ // Step 3: Execute in SandboxJS with timeout
112
+ tracer?.addEvent?.('dsl.phase.execute_start', {
113
+ 'dsl.timeout_ms': timeoutMs,
114
+ 'dsl.max_loop_iterations': maxLoopIterations,
115
+ });
116
+
117
+ try {
118
+ // Set up log collector
119
+ toolGlobals._logs = logs;
120
+
121
+ // Loop iteration counter for infinite loop protection
122
+ let loopIterations = 0;
123
+ toolGlobals.__checkLoop = () => {
124
+ loopIterations++;
125
+ if (loopIterations > maxLoopIterations) {
126
+ throw new Error(`Loop exceeded maximum of ${maxLoopIterations} iterations. Use break to exit loops earlier or process fewer items.`);
127
+ }
128
+ };
129
+
130
+ const sandbox = new Sandbox({
131
+ globals: {
132
+ ...Sandbox.SAFE_GLOBALS,
133
+ ...toolGlobals,
134
+ // Override: remove dangerous globals that SAFE_GLOBALS might include
135
+ Function: undefined,
136
+ eval: undefined,
137
+ },
138
+ prototypeWhitelist: Sandbox.SAFE_PROTOTYPES,
139
+ });
140
+
141
+ const exec = sandbox.compileAsync(transformedCode);
142
+
143
+ // Catch unhandled rejections from SandboxJS async error propagation
144
+ let escapedError = null;
145
+ const rejectionHandler = (reason) => {
146
+ escapedError = reason;
147
+ };
148
+ process.on('unhandledRejection', rejectionHandler);
149
+
150
+ // Race execution against timeout
151
+ let timeoutHandle;
152
+ const executionPromise = exec().run();
153
+ const timeoutPromise = new Promise((_, reject) => {
154
+ timeoutHandle = setTimeout(() => {
155
+ reject(new Error(`Execution timed out after ${Math.round(timeoutMs / 1000)}s. Script took too long — reduce the amount of work (fewer items, smaller data) or increase timeout.`));
156
+ }, timeoutMs);
157
+ });
158
+
159
+ let result;
160
+ try {
161
+ result = await Promise.race([executionPromise, timeoutPromise]);
162
+ } finally {
163
+ clearTimeout(timeoutHandle);
164
+ // Delay handler removal — SandboxJS can throw async errors after execution completes
165
+ setTimeout(() => {
166
+ process.removeListener('unhandledRejection', rejectionHandler);
167
+ }, 500);
168
+ }
169
+
170
+ // Check for escaped async errors
171
+ if (escapedError) {
172
+ throw escapedError;
173
+ }
174
+
175
+ const elapsed = Date.now() - startTime;
176
+ logs.push(`[runtime] Completed in ${elapsed}ms`);
177
+
178
+ tracer?.addEvent?.('dsl.phase.execute_complete', {
179
+ 'dsl.duration_ms': elapsed,
180
+ 'dsl.loop_iterations': loopIterations,
181
+ });
182
+
183
+ return {
184
+ status: 'success',
185
+ result,
186
+ logs,
187
+ };
188
+ } catch (e) {
189
+ const elapsed = Date.now() - startTime;
190
+ logs.push(`[runtime] Failed after ${elapsed}ms`);
191
+
192
+ tracer?.addEvent?.('dsl.phase.execute_failed', {
193
+ 'dsl.duration_ms': elapsed,
194
+ 'dsl.error': e.message?.substring(0, 500),
195
+ });
196
+
197
+ return {
198
+ status: 'error',
199
+ error: `Execution failed: ${e.message}`,
200
+ logs,
201
+ };
202
+ }
203
+ }
204
+
205
+ return { execute };
206
+ }