@probelabs/probe 0.6.0-rc231 → 0.6.0-rc233
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.d.ts +2 -0
- package/build/agent/ProbeAgent.js +105 -12
- package/build/agent/dsl/agent-test.mjs +341 -0
- package/build/agent/dsl/analyze-test.mjs +237 -0
- package/build/agent/dsl/diag-test.mjs +78 -0
- package/build/agent/dsl/environment.js +387 -0
- package/build/agent/dsl/manual-test.mjs +662 -0
- package/build/agent/dsl/output-buffer-test.mjs +124 -0
- package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/build/agent/dsl/pipeline-test.mjs +223 -0
- package/build/agent/dsl/runtime.js +206 -0
- package/build/agent/dsl/sandbox-experiment.mjs +309 -0
- package/build/agent/dsl/transformer.js +156 -0
- package/build/agent/dsl/trigger-test.mjs +159 -0
- package/build/agent/dsl/validator.js +183 -0
- package/build/agent/index.js +18776 -7675
- package/build/agent/probeTool.js +9 -0
- package/build/agent/tools.js +9 -1
- package/build/delegate.js +12 -6
- package/build/index.js +5 -0
- package/build/tools/common.js +7 -0
- package/build/tools/executePlan.js +761 -0
- package/build/tools/index.js +4 -0
- package/cjs/agent/ProbeAgent.cjs +12891 -1797
- package/cjs/index.cjs +12395 -1292
- package/package.json +5 -1
- package/src/agent/ProbeAgent.d.ts +2 -0
- package/src/agent/ProbeAgent.js +105 -12
- package/src/agent/dsl/agent-test.mjs +341 -0
- package/src/agent/dsl/analyze-test.mjs +237 -0
- package/src/agent/dsl/diag-test.mjs +78 -0
- package/src/agent/dsl/environment.js +387 -0
- package/src/agent/dsl/manual-test.mjs +662 -0
- package/src/agent/dsl/output-buffer-test.mjs +124 -0
- package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/src/agent/dsl/pipeline-test.mjs +223 -0
- package/src/agent/dsl/runtime.js +206 -0
- package/src/agent/dsl/sandbox-experiment.mjs +309 -0
- package/src/agent/dsl/transformer.js +156 -0
- package/src/agent/dsl/trigger-test.mjs +159 -0
- package/src/agent/dsl/validator.js +183 -0
- package/src/agent/index.js +8 -0
- package/src/agent/probeTool.js +9 -0
- package/src/agent/tools.js +9 -1
- package/src/delegate.js +12 -6
- package/src/index.js +5 -0
- package/src/tools/common.js +7 -0
- package/src/tools/executePlan.js +761 -0
- package/src/tools/index.js +4 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-unknown-linux-musl.tar.gz +0 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Quick E2E test of the output buffer feature.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { createDSLRuntime } from './runtime.js';
|
|
7
|
+
|
|
8
|
+
const outputBuffer = { items: [] };
|
|
9
|
+
const runtime = createDSLRuntime({
|
|
10
|
+
toolImplementations: {
|
|
11
|
+
search: { execute: async (p) => 'Result for: ' + p.query + '\nLine 1\nLine 2\nLine 3' },
|
|
12
|
+
},
|
|
13
|
+
llmCall: async (inst, data) => 'LLM processed: ' + String(data).substring(0, 50),
|
|
14
|
+
outputBuffer,
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
let passed = 0;
|
|
18
|
+
let failed = 0;
|
|
19
|
+
|
|
20
|
+
function check(name, condition) {
|
|
21
|
+
if (condition) {
|
|
22
|
+
console.log(' ✓ ' + name);
|
|
23
|
+
passed++;
|
|
24
|
+
} else {
|
|
25
|
+
console.log(' ✗ ' + name);
|
|
26
|
+
failed++;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Test 1: output() writes to buffer, return value separate
|
|
31
|
+
console.log('\nTest 1: output() + return');
|
|
32
|
+
outputBuffer.items = [];
|
|
33
|
+
const r1 = await runtime.execute(`
|
|
34
|
+
const data = search("test query");
|
|
35
|
+
output("## Full Results");
|
|
36
|
+
output(data);
|
|
37
|
+
return "Summary: found results";
|
|
38
|
+
`, 'test 1');
|
|
39
|
+
|
|
40
|
+
check('status is success', r1.status === 'success');
|
|
41
|
+
check('return value correct', r1.result === 'Summary: found results');
|
|
42
|
+
check('buffer has 2 items', outputBuffer.items.length === 2);
|
|
43
|
+
check('buffer[0] is header', outputBuffer.items[0] === '## Full Results');
|
|
44
|
+
check('buffer[1] has search data', outputBuffer.items[1].includes('Result for: test query'));
|
|
45
|
+
check('logs include [output]', r1.logs.some(l => l.startsWith('[output]')));
|
|
46
|
+
|
|
47
|
+
// Test 2: output() with JSON object
|
|
48
|
+
console.log('\nTest 2: output() with JSON');
|
|
49
|
+
outputBuffer.items = [];
|
|
50
|
+
const r2 = await runtime.execute(`
|
|
51
|
+
output({ customers: ["Acme", "BigCo"], count: 2 });
|
|
52
|
+
return "Found 2 customers";
|
|
53
|
+
`, 'test 2');
|
|
54
|
+
|
|
55
|
+
check('status is success', r2.status === 'success');
|
|
56
|
+
check('return is summary', r2.result === 'Found 2 customers');
|
|
57
|
+
check('buffer has 1 item', outputBuffer.items.length === 1);
|
|
58
|
+
const parsed = JSON.parse(outputBuffer.items[0]);
|
|
59
|
+
check('parsed JSON correct', parsed.count === 2 && parsed.customers[0] === 'Acme');
|
|
60
|
+
|
|
61
|
+
// Test 3: output() persists across calls (accumulates)
|
|
62
|
+
console.log('\nTest 3: Accumulation across calls');
|
|
63
|
+
outputBuffer.items = [];
|
|
64
|
+
await runtime.execute(`output("first call")`, 'call 1');
|
|
65
|
+
await runtime.execute(`output("second call")`, 'call 2');
|
|
66
|
+
check('buffer has 2 items from 2 calls', outputBuffer.items.length === 2);
|
|
67
|
+
check('items correct', outputBuffer.items[0] === 'first call' && outputBuffer.items[1] === 'second call');
|
|
68
|
+
|
|
69
|
+
// Test 4: output() ignores null/undefined
|
|
70
|
+
console.log('\nTest 4: Ignores null/undefined');
|
|
71
|
+
outputBuffer.items = [];
|
|
72
|
+
const r4 = await runtime.execute(`
|
|
73
|
+
output(null);
|
|
74
|
+
output(undefined);
|
|
75
|
+
output("real content");
|
|
76
|
+
return "done";
|
|
77
|
+
`, 'test 4');
|
|
78
|
+
check('buffer has only 1 item', outputBuffer.items.length === 1);
|
|
79
|
+
check('only real content', outputBuffer.items[0] === 'real content');
|
|
80
|
+
|
|
81
|
+
// Test 5: Large table simulation
|
|
82
|
+
console.log('\nTest 5: Large table');
|
|
83
|
+
outputBuffer.items = [];
|
|
84
|
+
const r5 = await runtime.execute(`
|
|
85
|
+
var rows = [];
|
|
86
|
+
for (var i = 0; i < 100; i++) {
|
|
87
|
+
rows.push("| Customer " + i + " | Tech | Active |");
|
|
88
|
+
}
|
|
89
|
+
var header = "| Customer | Industry | Status |\\n| --- | --- | --- |\\n";
|
|
90
|
+
var table = header;
|
|
91
|
+
for (const row of rows) {
|
|
92
|
+
table = table + row + "\\n";
|
|
93
|
+
}
|
|
94
|
+
output(table);
|
|
95
|
+
return "Generated table with 100 customers";
|
|
96
|
+
`, 'test 5');
|
|
97
|
+
|
|
98
|
+
check('status is success', r5.status === 'success');
|
|
99
|
+
check('return is summary', r5.result === 'Generated table with 100 customers');
|
|
100
|
+
check('buffer has table', outputBuffer.items[0].includes('Customer 99'));
|
|
101
|
+
check('table is large', outputBuffer.items[0].length > 2000);
|
|
102
|
+
|
|
103
|
+
// Test 6: No outputBuffer = no output() function
|
|
104
|
+
console.log('\nTest 6: No outputBuffer');
|
|
105
|
+
const runtimeNoBuffer = createDSLRuntime({
|
|
106
|
+
toolImplementations: {
|
|
107
|
+
search: { execute: async (p) => 'ok' },
|
|
108
|
+
},
|
|
109
|
+
llmCall: async () => 'ok',
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const r6 = await runtimeNoBuffer.execute(`
|
|
113
|
+
if (typeof output === "undefined") {
|
|
114
|
+
return "output not available";
|
|
115
|
+
}
|
|
116
|
+
return "output available";
|
|
117
|
+
`, 'test 6');
|
|
118
|
+
check('output not available without buffer', r6.result === 'output not available');
|
|
119
|
+
|
|
120
|
+
// Summary
|
|
121
|
+
console.log('\n' + '═'.repeat(50));
|
|
122
|
+
console.log(` Output Buffer E2E: ${passed} passed, ${failed} failed`);
|
|
123
|
+
console.log('═'.repeat(50));
|
|
124
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Direct DSL runtime test against customer-insights repo.
|
|
4
|
+
* Bypasses ProbeAgent — runs scripts directly against the runtime.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { createDSLRuntime } from './runtime.js';
|
|
8
|
+
import { search } from '../../search.js';
|
|
9
|
+
import { extract } from '../../extract.js';
|
|
10
|
+
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
11
|
+
import { generateText } from 'ai';
|
|
12
|
+
import { config } from 'dotenv';
|
|
13
|
+
import { resolve, dirname } from 'path';
|
|
14
|
+
import { fileURLToPath } from 'url';
|
|
15
|
+
|
|
16
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
const projectRoot = resolve(__dirname, '../../../..');
|
|
18
|
+
config({ path: resolve(projectRoot, '.env') });
|
|
19
|
+
|
|
20
|
+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
|
|
21
|
+
if (!apiKey) { console.error('No API key'); process.exit(1); }
|
|
22
|
+
|
|
23
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
24
|
+
|
|
25
|
+
async function llmCall(instruction, data, options = {}) {
|
|
26
|
+
const dataStr = data == null ? '' : (typeof data === 'string' ? data : JSON.stringify(data, null, 2));
|
|
27
|
+
const prompt = (dataStr || '(empty)').substring(0, 100000);
|
|
28
|
+
const result = await generateText({
|
|
29
|
+
model: google('gemini-2.5-flash'),
|
|
30
|
+
system: instruction,
|
|
31
|
+
prompt,
|
|
32
|
+
temperature: options.temperature || 0.3,
|
|
33
|
+
maxTokens: options.maxTokens || 4000,
|
|
34
|
+
});
|
|
35
|
+
return result.text;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const TARGET = '/tmp/customer-insights';
|
|
39
|
+
|
|
40
|
+
const runtime = createDSLRuntime({
|
|
41
|
+
toolImplementations: {
|
|
42
|
+
search: { execute: async (params) => {
|
|
43
|
+
try {
|
|
44
|
+
return await search({ query: params.query, path: TARGET, maxTokens: 20000, timeout: 60 });
|
|
45
|
+
} catch(e) { return 'Search error: ' + e.message; }
|
|
46
|
+
}},
|
|
47
|
+
extract: { execute: async (params) => {
|
|
48
|
+
try {
|
|
49
|
+
return await extract({ targets: params.targets, cwd: TARGET });
|
|
50
|
+
} catch(e) { return 'Extract error: ' + e.message; }
|
|
51
|
+
}},
|
|
52
|
+
listFiles: { execute: async (params) => {
|
|
53
|
+
try {
|
|
54
|
+
return await search({ query: params.pattern || 'customer', path: TARGET, filesOnly: true, maxTokens: 10000, timeout: 60 });
|
|
55
|
+
} catch(e) { return 'listFiles error: ' + e.message; }
|
|
56
|
+
}},
|
|
57
|
+
},
|
|
58
|
+
llmCall,
|
|
59
|
+
mapConcurrency: 3,
|
|
60
|
+
timeoutMs: 300000,
|
|
61
|
+
maxLoopIterations: 5000,
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
console.log('═'.repeat(70));
|
|
65
|
+
console.log(' Direct DSL Pipeline Test — customer-insights repo');
|
|
66
|
+
console.log('═'.repeat(70));
|
|
67
|
+
|
|
68
|
+
const start = Date.now();
|
|
69
|
+
const result = await runtime.execute(`
|
|
70
|
+
// Step 1: Broad search for customer data
|
|
71
|
+
const results = search("customer onboarding playbook");
|
|
72
|
+
log("Search returned " + String(results).length + " chars");
|
|
73
|
+
|
|
74
|
+
// Step 2: Split into chunks and extract customer info using LLM
|
|
75
|
+
const chunks = chunk(results);
|
|
76
|
+
log("Split into " + chunks.length + " chunks");
|
|
77
|
+
|
|
78
|
+
const classified = map(chunks, (c) => LLM(
|
|
79
|
+
"Extract customer names and their industry from this text. " +
|
|
80
|
+
"Return a JSON array: [{customer: string, industry: string, notes: string}]. " +
|
|
81
|
+
"Return ONLY valid JSON array, no other text.",
|
|
82
|
+
c
|
|
83
|
+
));
|
|
84
|
+
|
|
85
|
+
// Step 3: Accumulate parsed results
|
|
86
|
+
var allCustomers = [];
|
|
87
|
+
for (const batch of classified) {
|
|
88
|
+
try {
|
|
89
|
+
var text = String(batch).trim();
|
|
90
|
+
var jsonStart = text.indexOf("[");
|
|
91
|
+
var jsonEnd = text.lastIndexOf("]");
|
|
92
|
+
if (jsonStart >= 0 && jsonEnd > jsonStart) {
|
|
93
|
+
text = text.substring(jsonStart, jsonEnd + 1);
|
|
94
|
+
}
|
|
95
|
+
var parsed = JSON.parse(text);
|
|
96
|
+
if (Array.isArray(parsed)) {
|
|
97
|
+
for (const item of parsed) { allCustomers.push(item); }
|
|
98
|
+
}
|
|
99
|
+
} catch (e) {
|
|
100
|
+
log("Parse error, skipping chunk");
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
log("Total customers extracted: " + allCustomers.length);
|
|
105
|
+
|
|
106
|
+
// Step 4: Deduplicate
|
|
107
|
+
var seen = {};
|
|
108
|
+
var uniqueCustomers = [];
|
|
109
|
+
for (const c of allCustomers) {
|
|
110
|
+
var key = String(c.customer || "").trim().toLowerCase();
|
|
111
|
+
if (key.length > 0 && !seen[key]) {
|
|
112
|
+
seen[key] = true;
|
|
113
|
+
uniqueCustomers.push(c);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
log("Unique customers: " + uniqueCustomers.length);
|
|
118
|
+
|
|
119
|
+
// Step 5: Build markdown table
|
|
120
|
+
var table = "| Customer | Industry | Notes |\\n|---|---|---|\\n";
|
|
121
|
+
for (const c of uniqueCustomers) {
|
|
122
|
+
table = table + "| " + (c.customer || "Unknown") + " | " + (c.industry || "Unknown") + " | " + (c.notes || "-") + " |\\n";
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Step 6: Small LLM summary
|
|
126
|
+
const summary = LLM(
|
|
127
|
+
"Based on this customer table, write a brief 2-3 sentence summary of the customer base — what industries are represented, any patterns.",
|
|
128
|
+
table
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
return table + "\\n" + summary;
|
|
132
|
+
`, 'Customer classification pipeline');
|
|
133
|
+
|
|
134
|
+
const elapsed = Math.round((Date.now() - start) / 1000);
|
|
135
|
+
|
|
136
|
+
console.log('\n' + '─'.repeat(70));
|
|
137
|
+
console.log(`Status: ${result.status} (${elapsed}s)`);
|
|
138
|
+
console.log(`Logs: ${result.logs.join(' | ')}`);
|
|
139
|
+
|
|
140
|
+
if (result.status === 'error') {
|
|
141
|
+
console.log(`Error: ${result.error}`);
|
|
142
|
+
} else {
|
|
143
|
+
console.log('─'.repeat(70));
|
|
144
|
+
console.log(result.result);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
process.exit(result.status === 'error' ? 1 : 0);
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Data pipeline end-to-end test using ProbeAgent with enableExecutePlan.
|
|
4
|
+
*
|
|
5
|
+
* Tests against the TykTechnologies/customer-insights repo (/tmp/customer-insights)
|
|
6
|
+
* to verify the full data pipeline flow:
|
|
7
|
+
* 1. Agent picks execute_plan for comprehensive/inventory questions
|
|
8
|
+
* 2. LLM generates DSL scripts with search → chunk → LLM classify → accumulate
|
|
9
|
+
* 3. Session store persists data across multi-step execution
|
|
10
|
+
* 4. Returns structured results (tables, JSON, reports)
|
|
11
|
+
*
|
|
12
|
+
* Usage:
|
|
13
|
+
* node npm/src/agent/dsl/pipeline-test.mjs
|
|
14
|
+
*
|
|
15
|
+
* Requires:
|
|
16
|
+
* - GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY in .env
|
|
17
|
+
* - /tmp/customer-insights repo cloned
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { ProbeAgent } from '../ProbeAgent.js';
|
|
21
|
+
import { config } from 'dotenv';
|
|
22
|
+
import { resolve, dirname } from 'path';
|
|
23
|
+
import { fileURLToPath } from 'url';
|
|
24
|
+
import { existsSync } from 'fs';
|
|
25
|
+
|
|
26
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const projectRoot = resolve(__dirname, '../../../..');
|
|
28
|
+
|
|
29
|
+
config({ path: resolve(projectRoot, '.env') });
|
|
30
|
+
|
|
31
|
+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
|
|
32
|
+
if (!apiKey) {
|
|
33
|
+
console.error('ERROR: No Google API key found. Set GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY');
|
|
34
|
+
process.exit(1);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const TARGET_REPO = '/tmp/customer-insights';
|
|
38
|
+
if (!existsSync(TARGET_REPO)) {
|
|
39
|
+
console.error('ERROR: customer-insights repo not found at ' + TARGET_REPO);
|
|
40
|
+
console.error('Clone it: git clone <repo-url> /tmp/customer-insights');
|
|
41
|
+
process.exit(1);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ── Test definitions ──
|
|
45
|
+
const tests = [
|
|
46
|
+
{
|
|
47
|
+
name: 'Customer classification — categorize all customers by industry/type',
|
|
48
|
+
query: 'Analyze ALL customer files in this repository. For every customer, classify them by industry (finance, tech, healthcare, government, etc.) and determine their use case type (API management, security, integration, etc.). Produce a comprehensive markdown table with columns: Customer, Industry, Use Case Type, and a brief note. Give me complete inventory.',
|
|
49
|
+
maxIterations: 50,
|
|
50
|
+
timeoutMs: 300000,
|
|
51
|
+
check: (result, toolCalls) => {
|
|
52
|
+
// Should have triggered execute_plan
|
|
53
|
+
const usedExecutePlan = toolCalls.some(t => t === 'execute_plan');
|
|
54
|
+
if (!usedExecutePlan) return 'Did not trigger execute_plan — used: ' + toolCalls.join(', ');
|
|
55
|
+
// Result should be substantial
|
|
56
|
+
if (!result || result.length < 200) return 'Result too short: ' + (result?.length || 0);
|
|
57
|
+
return true;
|
|
58
|
+
},
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: 'Sentiment & pain points extraction — data pipeline pattern',
|
|
62
|
+
query: 'Go through every customer document in this repo. For each customer, extract their main pain points and sentiment (positive, neutral, negative) about Tyk. Produce a structured report with: 1) A summary table of sentiment distribution, 2) Top 5 most common pain points with customer counts, 3) Customers with negative sentiment and why. Be comprehensive — cover ALL customers.',
|
|
63
|
+
maxIterations: 50,
|
|
64
|
+
timeoutMs: 300000,
|
|
65
|
+
check: (result, toolCalls) => {
|
|
66
|
+
const usedExecutePlan = toolCalls.some(t => t === 'execute_plan');
|
|
67
|
+
if (!usedExecutePlan) return 'Did not trigger execute_plan';
|
|
68
|
+
if (!result || result.length < 200) return 'Result too short: ' + (result?.length || 0);
|
|
69
|
+
return true;
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
name: 'Feature adoption matrix — multi-search data pipeline',
|
|
74
|
+
query: 'Create a complete feature adoption matrix for this customer base. Search for mentions of: API gateway, dashboard, developer portal, analytics, rate limiting, authentication, policies, and GraphQL. For each feature, list which customers use it. Return a markdown table where rows are features and columns show customer count + list of customer names.',
|
|
75
|
+
maxIterations: 50,
|
|
76
|
+
timeoutMs: 300000,
|
|
77
|
+
check: (result, toolCalls) => {
|
|
78
|
+
const usedExecutePlan = toolCalls.some(t => t === 'execute_plan');
|
|
79
|
+
if (!usedExecutePlan) return 'Did not trigger execute_plan';
|
|
80
|
+
if (!result || result.length < 100) return 'Result too short: ' + (result?.length || 0);
|
|
81
|
+
return true;
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
];
|
|
85
|
+
|
|
86
|
+
// ── Test runner ──
|
|
87
|
+
let testNum = 0;
|
|
88
|
+
let passed = 0;
|
|
89
|
+
let failed = 0;
|
|
90
|
+
|
|
91
|
+
async function runPipelineTest(test) {
|
|
92
|
+
testNum++;
|
|
93
|
+
console.log(`\n${'═'.repeat(70)}`);
|
|
94
|
+
console.log(`▶ Test ${testNum}/${tests.length}: ${test.name}`);
|
|
95
|
+
console.log(` Query: "${test.query.substring(0, 120)}..."`);
|
|
96
|
+
console.log('─'.repeat(70));
|
|
97
|
+
|
|
98
|
+
const toolCalls = [];
|
|
99
|
+
const toolDetails = [];
|
|
100
|
+
|
|
101
|
+
const agent = new ProbeAgent({
|
|
102
|
+
path: TARGET_REPO,
|
|
103
|
+
provider: 'google',
|
|
104
|
+
model: 'gemini-2.5-flash',
|
|
105
|
+
enableExecutePlan: true,
|
|
106
|
+
maxIterations: test.maxIterations || 50,
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
// Listen for tool call events
|
|
110
|
+
agent.events.on('toolCall', (event) => {
|
|
111
|
+
if (event.status === 'started') {
|
|
112
|
+
toolCalls.push(event.name);
|
|
113
|
+
const desc = event.description ? ` — ${event.description.substring(0, 80)}` : '';
|
|
114
|
+
console.log(` [tool:start] ${event.name}${desc}`);
|
|
115
|
+
}
|
|
116
|
+
if (event.status === 'completed') {
|
|
117
|
+
const preview = event.resultPreview || '';
|
|
118
|
+
console.log(` [tool:done] ${event.name} (${String(preview).length} chars preview)`);
|
|
119
|
+
}
|
|
120
|
+
if (event.status === 'error') {
|
|
121
|
+
console.log(` [tool:error] ${event.name}: ${event.error?.substring(0, 100)}`);
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
await agent.initialize();
|
|
126
|
+
|
|
127
|
+
const start = Date.now();
|
|
128
|
+
let result;
|
|
129
|
+
try {
|
|
130
|
+
result = await Promise.race([
|
|
131
|
+
agent.answer(test.query),
|
|
132
|
+
new Promise((_, reject) =>
|
|
133
|
+
setTimeout(() => reject(new Error('Test timeout')), test.timeoutMs || 180000)
|
|
134
|
+
),
|
|
135
|
+
]);
|
|
136
|
+
} catch (e) {
|
|
137
|
+
const elapsed = Math.round((Date.now() - start) / 1000);
|
|
138
|
+
console.log(`\n [warn] Agent finished with: ${e.message?.substring(0, 150)} (${elapsed}s)`);
|
|
139
|
+
// Still check what we got — agent may have partial result
|
|
140
|
+
result = e.message;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const elapsed = Math.round((Date.now() - start) / 1000);
|
|
144
|
+
|
|
145
|
+
console.log('─'.repeat(70));
|
|
146
|
+
console.log(` Duration: ${elapsed}s`);
|
|
147
|
+
console.log(` Tool calls: [${toolCalls.join(', ')}]`);
|
|
148
|
+
console.log(` execute_plan used: ${toolCalls.includes('execute_plan') ? 'YES' : 'NO'}`);
|
|
149
|
+
|
|
150
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
|
|
151
|
+
console.log(` Result length: ${resultStr?.length || 0} chars`);
|
|
152
|
+
|
|
153
|
+
// Show result preview
|
|
154
|
+
if (resultStr) {
|
|
155
|
+
console.log('─'.repeat(70));
|
|
156
|
+
console.log(' Result preview:');
|
|
157
|
+
const lines = resultStr.split('\n').slice(0, 25);
|
|
158
|
+
for (const line of lines) {
|
|
159
|
+
console.log(' │ ' + line.substring(0, 100));
|
|
160
|
+
}
|
|
161
|
+
if (resultStr.split('\n').length > 25) {
|
|
162
|
+
console.log(' │ ... (' + (resultStr.split('\n').length - 25) + ' more lines)');
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Run check
|
|
167
|
+
const checkResult = test.check(resultStr, toolCalls);
|
|
168
|
+
if (checkResult === true) {
|
|
169
|
+
console.log(`\n ✓ PASSED (${elapsed}s)`);
|
|
170
|
+
passed++;
|
|
171
|
+
} else {
|
|
172
|
+
console.log(`\n ✗ FAILED — ${checkResult} (${elapsed}s)`);
|
|
173
|
+
failed++;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Token usage
|
|
177
|
+
try {
|
|
178
|
+
const usage = agent.getTokenUsage();
|
|
179
|
+
if (usage) {
|
|
180
|
+
console.log(` Tokens: input=${usage.inputTokens || 0} output=${usage.outputTokens || 0} total=${usage.totalTokens || 0}`);
|
|
181
|
+
}
|
|
182
|
+
} catch (e) {
|
|
183
|
+
// ignore
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
try {
|
|
187
|
+
await agent.close();
|
|
188
|
+
} catch (e) {
|
|
189
|
+
// ignore cleanup errors
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// ── Main ──
|
|
194
|
+
async function main() {
|
|
195
|
+
console.log('═'.repeat(70));
|
|
196
|
+
console.log(' Data Pipeline E2E Tests — ProbeAgent + execute_plan');
|
|
197
|
+
console.log(' Target: TykTechnologies/customer-insights');
|
|
198
|
+
console.log(' Config: enableExecutePlan=true, provider=google, model=gemini-2.5-flash');
|
|
199
|
+
console.log('═'.repeat(70));
|
|
200
|
+
|
|
201
|
+
// Allow running a specific test by number
|
|
202
|
+
const testIndex = process.argv[2] ? parseInt(process.argv[2], 10) - 1 : null;
|
|
203
|
+
|
|
204
|
+
if (testIndex !== null && testIndex >= 0 && testIndex < tests.length) {
|
|
205
|
+
console.log(`\nRunning test ${testIndex + 1} only: "${tests[testIndex].name}"`);
|
|
206
|
+
await runPipelineTest(tests[testIndex]);
|
|
207
|
+
} else {
|
|
208
|
+
for (const test of tests) {
|
|
209
|
+
await runPipelineTest(test);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
console.log(`\n${'═'.repeat(70)}`);
|
|
214
|
+
console.log(` Results: ${passed} passed, ${failed} failed, ${testNum} total`);
|
|
215
|
+
console.log('═'.repeat(70));
|
|
216
|
+
|
|
217
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
main().catch(e => {
|
|
221
|
+
console.error('Fatal error:', e);
|
|
222
|
+
process.exit(1);
|
|
223
|
+
});
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DSL Runtime - SandboxJS execution engine.
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the full pipeline:
|
|
5
|
+
* 1. Validate (AST whitelist)
|
|
6
|
+
* 2. Transform (inject await, wrap in async IIFE)
|
|
7
|
+
* 3. Execute in SandboxJS with tool globals + timeout
|
|
8
|
+
*
|
|
9
|
+
* Returns the result or a structured error.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import SandboxModule from '@nyariv/sandboxjs';
|
|
13
|
+
import { validateDSL } from './validator.js';
|
|
14
|
+
import { transformDSL } from './transformer.js';
|
|
15
|
+
import { generateSandboxGlobals, getAsyncFunctionNames } from './environment.js';
|
|
16
|
+
|
|
17
|
+
const Sandbox = SandboxModule.default || SandboxModule;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Create a DSL runtime instance.
|
|
21
|
+
*
|
|
22
|
+
* @param {Object} options
|
|
23
|
+
* @param {Object} options.toolImplementations - Native tool execute functions
|
|
24
|
+
* @param {Object} [options.mcpBridge] - MCP bridge for calling MCP tools
|
|
25
|
+
* @param {Object} [options.mcpTools={}] - MCP tool metadata
|
|
26
|
+
* @param {Function} options.llmCall - Function for LLM() calls: (instruction, data, options?) => Promise<any>
|
|
27
|
+
* @param {number} [options.mapConcurrency=3] - Concurrency limit for map()
|
|
28
|
+
* @param {number} [options.timeoutMs=120000] - Execution timeout in milliseconds (default 2 min)
|
|
29
|
+
* @param {number} [options.maxLoopIterations=5000] - Max iterations for while/for loops
|
|
30
|
+
* @param {Object} [options.tracer=null] - SimpleAppTracer instance for OTEL telemetry
|
|
31
|
+
* @returns {Object} Runtime with execute() method
|
|
32
|
+
*/
|
|
33
|
+
export function createDSLRuntime(options) {
|
|
34
|
+
const {
|
|
35
|
+
toolImplementations = {},
|
|
36
|
+
mcpBridge = null,
|
|
37
|
+
mcpTools = {},
|
|
38
|
+
llmCall,
|
|
39
|
+
mapConcurrency = 3,
|
|
40
|
+
timeoutMs = 120000,
|
|
41
|
+
maxLoopIterations = 5000,
|
|
42
|
+
tracer = null,
|
|
43
|
+
sessionStore = {},
|
|
44
|
+
outputBuffer = null,
|
|
45
|
+
} = options;
|
|
46
|
+
|
|
47
|
+
// Generate the globals and async function names, passing tracer for per-call tracing
|
|
48
|
+
const toolGlobals = generateSandboxGlobals({
|
|
49
|
+
toolImplementations,
|
|
50
|
+
mcpBridge,
|
|
51
|
+
mcpTools,
|
|
52
|
+
llmCall,
|
|
53
|
+
mapConcurrency,
|
|
54
|
+
tracer,
|
|
55
|
+
sessionStore,
|
|
56
|
+
outputBuffer,
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const asyncFunctionNames = getAsyncFunctionNames(mcpTools);
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Execute DSL code.
|
|
63
|
+
*
|
|
64
|
+
* @param {string} code - The LLM-generated DSL code (sync-looking)
|
|
65
|
+
* @param {string} [description] - Human-readable description for logging
|
|
66
|
+
* @returns {Promise<{ status: 'success'|'error', result?: any, error?: string, logs: string[] }>}
|
|
67
|
+
*/
|
|
68
|
+
async function execute(code, description) {
|
|
69
|
+
const logs = [];
|
|
70
|
+
const startTime = Date.now();
|
|
71
|
+
|
|
72
|
+
// Step 1: Validate
|
|
73
|
+
tracer?.addEvent?.('dsl.phase.validate_start', {
|
|
74
|
+
'dsl.code_length': code.length,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const validation = validateDSL(code);
|
|
78
|
+
if (!validation.valid) {
|
|
79
|
+
tracer?.addEvent?.('dsl.phase.validate_failed', {
|
|
80
|
+
'dsl.error_count': validation.errors.length,
|
|
81
|
+
'dsl.errors': validation.errors.join('; ').substring(0, 500),
|
|
82
|
+
});
|
|
83
|
+
return {
|
|
84
|
+
status: 'error',
|
|
85
|
+
error: `Validation failed:\n${validation.errors.join('\n')}`,
|
|
86
|
+
logs,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
tracer?.addEvent?.('dsl.phase.validate_complete');
|
|
91
|
+
|
|
92
|
+
// Step 2: Transform (inject await, wrap in async IIFE)
|
|
93
|
+
let transformedCode;
|
|
94
|
+
try {
|
|
95
|
+
tracer?.addEvent?.('dsl.phase.transform_start');
|
|
96
|
+
transformedCode = transformDSL(code, asyncFunctionNames);
|
|
97
|
+
tracer?.addEvent?.('dsl.phase.transform_complete', {
|
|
98
|
+
'dsl.transformed_length': transformedCode.length,
|
|
99
|
+
});
|
|
100
|
+
} catch (e) {
|
|
101
|
+
tracer?.addEvent?.('dsl.phase.transform_failed', {
|
|
102
|
+
'dsl.error': e.message,
|
|
103
|
+
});
|
|
104
|
+
return {
|
|
105
|
+
status: 'error',
|
|
106
|
+
error: `Transform failed: ${e.message}`,
|
|
107
|
+
logs,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Step 3: Execute in SandboxJS with timeout
|
|
112
|
+
tracer?.addEvent?.('dsl.phase.execute_start', {
|
|
113
|
+
'dsl.timeout_ms': timeoutMs,
|
|
114
|
+
'dsl.max_loop_iterations': maxLoopIterations,
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
// Set up log collector
|
|
119
|
+
toolGlobals._logs = logs;
|
|
120
|
+
|
|
121
|
+
// Loop iteration counter for infinite loop protection
|
|
122
|
+
let loopIterations = 0;
|
|
123
|
+
toolGlobals.__checkLoop = () => {
|
|
124
|
+
loopIterations++;
|
|
125
|
+
if (loopIterations > maxLoopIterations) {
|
|
126
|
+
throw new Error(`Loop exceeded maximum of ${maxLoopIterations} iterations. Use break to exit loops earlier or process fewer items.`);
|
|
127
|
+
}
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const sandbox = new Sandbox({
|
|
131
|
+
globals: {
|
|
132
|
+
...Sandbox.SAFE_GLOBALS,
|
|
133
|
+
...toolGlobals,
|
|
134
|
+
// Override: remove dangerous globals that SAFE_GLOBALS might include
|
|
135
|
+
Function: undefined,
|
|
136
|
+
eval: undefined,
|
|
137
|
+
},
|
|
138
|
+
prototypeWhitelist: Sandbox.SAFE_PROTOTYPES,
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
const exec = sandbox.compileAsync(transformedCode);
|
|
142
|
+
|
|
143
|
+
// Catch unhandled rejections from SandboxJS async error propagation
|
|
144
|
+
let escapedError = null;
|
|
145
|
+
const rejectionHandler = (reason) => {
|
|
146
|
+
escapedError = reason;
|
|
147
|
+
};
|
|
148
|
+
process.on('unhandledRejection', rejectionHandler);
|
|
149
|
+
|
|
150
|
+
// Race execution against timeout
|
|
151
|
+
let timeoutHandle;
|
|
152
|
+
const executionPromise = exec().run();
|
|
153
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
154
|
+
timeoutHandle = setTimeout(() => {
|
|
155
|
+
reject(new Error(`Execution timed out after ${Math.round(timeoutMs / 1000)}s. Script took too long — reduce the amount of work (fewer items, smaller data) or increase timeout.`));
|
|
156
|
+
}, timeoutMs);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
let result;
|
|
160
|
+
try {
|
|
161
|
+
result = await Promise.race([executionPromise, timeoutPromise]);
|
|
162
|
+
} finally {
|
|
163
|
+
clearTimeout(timeoutHandle);
|
|
164
|
+
// Delay handler removal — SandboxJS can throw async errors after execution completes
|
|
165
|
+
setTimeout(() => {
|
|
166
|
+
process.removeListener('unhandledRejection', rejectionHandler);
|
|
167
|
+
}, 500);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Check for escaped async errors
|
|
171
|
+
if (escapedError) {
|
|
172
|
+
throw escapedError;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
const elapsed = Date.now() - startTime;
|
|
176
|
+
logs.push(`[runtime] Completed in ${elapsed}ms`);
|
|
177
|
+
|
|
178
|
+
tracer?.addEvent?.('dsl.phase.execute_complete', {
|
|
179
|
+
'dsl.duration_ms': elapsed,
|
|
180
|
+
'dsl.loop_iterations': loopIterations,
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
status: 'success',
|
|
185
|
+
result,
|
|
186
|
+
logs,
|
|
187
|
+
};
|
|
188
|
+
} catch (e) {
|
|
189
|
+
const elapsed = Date.now() - startTime;
|
|
190
|
+
logs.push(`[runtime] Failed after ${elapsed}ms`);
|
|
191
|
+
|
|
192
|
+
tracer?.addEvent?.('dsl.phase.execute_failed', {
|
|
193
|
+
'dsl.duration_ms': elapsed,
|
|
194
|
+
'dsl.error': e.message?.substring(0, 500),
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
status: 'error',
|
|
199
|
+
error: `Execution failed: ${e.message}`,
|
|
200
|
+
logs,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return { execute };
|
|
206
|
+
}
|