@probelabs/probe 0.6.0-rc232 → 0.6.0-rc234
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc234-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc234-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc234-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc234-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc234-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.d.ts +2 -0
- package/build/agent/ProbeAgent.js +66 -7
- package/build/agent/dsl/agent-test.mjs +341 -0
- package/build/agent/dsl/analyze-test.mjs +237 -0
- package/build/agent/dsl/diag-test.mjs +78 -0
- package/build/agent/dsl/environment.js +387 -0
- package/build/agent/dsl/manual-test.mjs +662 -0
- package/build/agent/dsl/output-buffer-test.mjs +124 -0
- package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/build/agent/dsl/pipeline-test.mjs +223 -0
- package/build/agent/dsl/runtime.js +206 -0
- package/build/agent/dsl/sandbox-experiment.mjs +309 -0
- package/build/agent/dsl/transformer.js +156 -0
- package/build/agent/dsl/trigger-test.mjs +159 -0
- package/build/agent/dsl/validator.js +183 -0
- package/build/agent/index.js +18179 -7664
- package/build/agent/probeTool.js +9 -0
- package/build/agent/schemaUtils.js +74 -1
- package/build/agent/tasks/taskTool.js +6 -1
- package/build/agent/tools.js +9 -1
- package/build/index.js +5 -0
- package/build/tools/common.js +7 -0
- package/build/tools/executePlan.js +761 -0
- package/build/tools/index.js +4 -0
- package/cjs/agent/ProbeAgent.cjs +12146 -1638
- package/cjs/index.cjs +11800 -1283
- package/package.json +5 -1
- package/src/agent/ProbeAgent.d.ts +2 -0
- package/src/agent/ProbeAgent.js +66 -7
- package/src/agent/dsl/agent-test.mjs +341 -0
- package/src/agent/dsl/analyze-test.mjs +237 -0
- package/src/agent/dsl/diag-test.mjs +78 -0
- package/src/agent/dsl/environment.js +387 -0
- package/src/agent/dsl/manual-test.mjs +662 -0
- package/src/agent/dsl/output-buffer-test.mjs +124 -0
- package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/src/agent/dsl/pipeline-test.mjs +223 -0
- package/src/agent/dsl/runtime.js +206 -0
- package/src/agent/dsl/sandbox-experiment.mjs +309 -0
- package/src/agent/dsl/transformer.js +156 -0
- package/src/agent/dsl/trigger-test.mjs +159 -0
- package/src/agent/dsl/validator.js +183 -0
- package/src/agent/index.js +8 -0
- package/src/agent/probeTool.js +9 -0
- package/src/agent/schemaUtils.js +74 -1
- package/src/agent/tasks/taskTool.js +6 -1
- package/src/agent/tools.js +9 -1
- package/src/index.js +5 -0
- package/src/tools/common.js +7 -0
- package/src/tools/executePlan.js +761 -0
- package/src/tools/index.js +4 -0
- package/bin/binaries/probe-v0.6.0-rc232-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc232-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc232-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc232-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc232-x86_64-unknown-linux-musl.tar.gz +0 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Quick experiment to verify SandboxJS capabilities for our DSL runtime.
|
|
3
|
+
*
|
|
4
|
+
* Tests:
|
|
5
|
+
* 1. compileAsync() with host async functions as globals
|
|
6
|
+
* 2. Error propagation from sandbox to host
|
|
7
|
+
* 3. Tick limits
|
|
8
|
+
* 4. Sandbox.audit() for introspection
|
|
9
|
+
* 5. map() concurrency pattern
|
|
10
|
+
* 6. Nested async calls (callback inside map that calls async)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import SandboxModule from '@nyariv/sandboxjs';
|
|
14
|
+
const Sandbox = SandboxModule.default || SandboxModule;
|
|
15
|
+
|
|
16
|
+
async function test(name, fn) {
|
|
17
|
+
try {
|
|
18
|
+
const result = await fn();
|
|
19
|
+
console.log(`PASS: ${name}`, result !== undefined ? `→ ${JSON.stringify(result)}` : '');
|
|
20
|
+
} catch (e) {
|
|
21
|
+
console.log(`FAIL: ${name} → ${e.message}`);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Test 1: Basic async function as global
|
|
26
|
+
await test('Host async function as global', async () => {
|
|
27
|
+
const s = new Sandbox({
|
|
28
|
+
globals: {
|
|
29
|
+
...Sandbox.SAFE_GLOBALS,
|
|
30
|
+
fetchData: async (query) => {
|
|
31
|
+
return { results: [`result for: ${query}`] };
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
});
|
|
35
|
+
const exec = s.compileAsync(`
|
|
36
|
+
const data = await fetchData("test query");
|
|
37
|
+
return data.results[0];
|
|
38
|
+
`);
|
|
39
|
+
const result = await exec().run();
|
|
40
|
+
if (result !== 'result for: test query') throw new Error(`Expected 'result for: test query', got '${result}'`);
|
|
41
|
+
return result;
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Test 2: Multiple sequential async calls
|
|
45
|
+
await test('Multiple sequential async calls', async () => {
|
|
46
|
+
const callLog = [];
|
|
47
|
+
const s = new Sandbox({
|
|
48
|
+
globals: {
|
|
49
|
+
...Sandbox.SAFE_GLOBALS,
|
|
50
|
+
step: async (n) => {
|
|
51
|
+
callLog.push(n);
|
|
52
|
+
return `done-${n}`;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
const exec = s.compileAsync(`
|
|
57
|
+
const a = await step(1);
|
|
58
|
+
const b = await step(2);
|
|
59
|
+
const c = await step(3);
|
|
60
|
+
return a + "," + b + "," + c;
|
|
61
|
+
`);
|
|
62
|
+
const result = await exec().run();
|
|
63
|
+
if (callLog.join(',') !== '1,2,3') throw new Error(`Wrong call order: ${callLog}`);
|
|
64
|
+
return result;
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
// Test 3: Error propagation
|
|
68
|
+
await test('Error propagation from async global', async () => {
|
|
69
|
+
const s = new Sandbox({
|
|
70
|
+
globals: {
|
|
71
|
+
...Sandbox.SAFE_GLOBALS,
|
|
72
|
+
failingTool: async () => {
|
|
73
|
+
throw new Error('Tool failed!');
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
const exec = s.compileAsync(`
|
|
78
|
+
const result = await failingTool();
|
|
79
|
+
return result;
|
|
80
|
+
`);
|
|
81
|
+
try {
|
|
82
|
+
await exec().run();
|
|
83
|
+
throw new Error('Should have thrown');
|
|
84
|
+
} catch (e) {
|
|
85
|
+
if (!e.message.includes('Tool failed')) throw new Error(`Wrong error: ${e.message}`);
|
|
86
|
+
return 'Error correctly propagated';
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Test 4: Sandbox.audit()
|
|
91
|
+
await test('Sandbox.audit() reports accessed globals', async () => {
|
|
92
|
+
const audit = Sandbox.audit(`
|
|
93
|
+
const x = myFunc("test");
|
|
94
|
+
const y = otherFunc(x);
|
|
95
|
+
return y;
|
|
96
|
+
`);
|
|
97
|
+
return audit;
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// Test 5: Code without await (what LLM would write, before our transform)
|
|
101
|
+
await test('Async global called WITHOUT await', async () => {
|
|
102
|
+
const s = new Sandbox({
|
|
103
|
+
globals: {
|
|
104
|
+
...Sandbox.SAFE_GLOBALS,
|
|
105
|
+
fetchData: async (query) => {
|
|
106
|
+
return { results: [`result for: ${query}`] };
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
// Without await - should return a Promise object, not resolved value
|
|
111
|
+
const exec = s.compileAsync(`
|
|
112
|
+
const data = fetchData("test");
|
|
113
|
+
return data;
|
|
114
|
+
`);
|
|
115
|
+
const result = await exec().run();
|
|
116
|
+
// Check if it's a Promise (unresolved) or the actual value
|
|
117
|
+
const isPromise = result && typeof result.then === 'function';
|
|
118
|
+
return { isPromise, type: typeof result, value: isPromise ? 'Promise (unresolved)' : result };
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
// Test 6: Custom throw for pause-like mechanism (just to verify throws work)
|
|
122
|
+
await test('Custom throw propagation', async () => {
|
|
123
|
+
class PauseSignal {
|
|
124
|
+
constructor(value) { this.value = value; this.isPause = true; }
|
|
125
|
+
}
|
|
126
|
+
const s = new Sandbox({
|
|
127
|
+
globals: {
|
|
128
|
+
...Sandbox.SAFE_GLOBALS,
|
|
129
|
+
pause: (value) => { throw new PauseSignal(value); }
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
const exec = s.compileAsync(`
|
|
133
|
+
const x = 42;
|
|
134
|
+
pause({ result: x });
|
|
135
|
+
return "should not reach here";
|
|
136
|
+
`);
|
|
137
|
+
try {
|
|
138
|
+
await exec().run();
|
|
139
|
+
throw new Error('Should have thrown');
|
|
140
|
+
} catch (e) {
|
|
141
|
+
if (e.isPause) {
|
|
142
|
+
return { paused: true, value: e.value };
|
|
143
|
+
}
|
|
144
|
+
return { paused: false, error: e.message };
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
// Test 7: for...of loop with async
|
|
149
|
+
await test('for...of with async calls', async () => {
|
|
150
|
+
const s = new Sandbox({
|
|
151
|
+
globals: {
|
|
152
|
+
...Sandbox.SAFE_GLOBALS,
|
|
153
|
+
process: async (item) => item * 2 // Note: 'process' as a name - might conflict
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
const exec = s.compileAsync(`
|
|
157
|
+
const items = [1, 2, 3, 4, 5];
|
|
158
|
+
const results = [];
|
|
159
|
+
for (const item of items) {
|
|
160
|
+
results.push(await process(item));
|
|
161
|
+
}
|
|
162
|
+
return results;
|
|
163
|
+
`);
|
|
164
|
+
const result = await exec().run();
|
|
165
|
+
const expected = [2, 4, 6, 8, 10];
|
|
166
|
+
if (JSON.stringify(result) !== JSON.stringify(expected)) throw new Error(`Got ${JSON.stringify(result)}`);
|
|
167
|
+
return result;
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// Test 8: Passing scope variables
|
|
171
|
+
await test('Scope variables accessible in sandbox', async () => {
|
|
172
|
+
const s = new Sandbox({
|
|
173
|
+
globals: {
|
|
174
|
+
...Sandbox.SAFE_GLOBALS,
|
|
175
|
+
transform: async (x) => x.toUpperCase()
|
|
176
|
+
}
|
|
177
|
+
});
|
|
178
|
+
const exec = s.compileAsync(`
|
|
179
|
+
const result = await transform(inputData);
|
|
180
|
+
return result;
|
|
181
|
+
`);
|
|
182
|
+
const result = await exec({ inputData: 'hello world' }).run();
|
|
183
|
+
if (result !== 'HELLO WORLD') throw new Error(`Got '${result}'`);
|
|
184
|
+
return result;
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// Test 9: Arrow function callback with async inside
|
|
188
|
+
await test('Arrow function with async call inside', async () => {
|
|
189
|
+
const s = new Sandbox({
|
|
190
|
+
globals: {
|
|
191
|
+
...Sandbox.SAFE_GLOBALS,
|
|
192
|
+
processItem: async (item) => item * 10
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
const exec = s.compileAsync(`
|
|
196
|
+
const items = [1, 2, 3];
|
|
197
|
+
const fn = async (item) => {
|
|
198
|
+
const result = await processItem(item);
|
|
199
|
+
return result;
|
|
200
|
+
};
|
|
201
|
+
const results = [];
|
|
202
|
+
for (const item of items) {
|
|
203
|
+
results.push(await fn(item));
|
|
204
|
+
}
|
|
205
|
+
return results;
|
|
206
|
+
`);
|
|
207
|
+
const result = await exec().run();
|
|
208
|
+
if (JSON.stringify(result) !== '[10,20,30]') throw new Error(`Got ${JSON.stringify(result)}`);
|
|
209
|
+
return result;
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
// Test 10: map() as a custom global with concurrency
|
|
213
|
+
await test('Custom map() with concurrency control', async () => {
|
|
214
|
+
let concurrent = 0;
|
|
215
|
+
let maxConcurrent = 0;
|
|
216
|
+
|
|
217
|
+
const s = new Sandbox({
|
|
218
|
+
globals: {
|
|
219
|
+
...Sandbox.SAFE_GLOBALS,
|
|
220
|
+
processItem: async (item) => {
|
|
221
|
+
concurrent++;
|
|
222
|
+
maxConcurrent = Math.max(maxConcurrent, concurrent);
|
|
223
|
+
await new Promise(r => setTimeout(r, 50)); // simulate work
|
|
224
|
+
concurrent--;
|
|
225
|
+
return item * 2;
|
|
226
|
+
},
|
|
227
|
+
map: async (items, fn) => {
|
|
228
|
+
const concurrency = 3;
|
|
229
|
+
const results = [];
|
|
230
|
+
const executing = new Set();
|
|
231
|
+
for (const item of items) {
|
|
232
|
+
const p = fn(item).then(result => {
|
|
233
|
+
executing.delete(p);
|
|
234
|
+
return result;
|
|
235
|
+
});
|
|
236
|
+
executing.add(p);
|
|
237
|
+
results.push(p);
|
|
238
|
+
if (executing.size >= concurrency) {
|
|
239
|
+
await Promise.race(executing);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return Promise.all(results);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
const exec = s.compileAsync(`
|
|
247
|
+
const items = [1, 2, 3, 4, 5, 6, 7, 8];
|
|
248
|
+
const results = await map(items, async (item) => {
|
|
249
|
+
return await processItem(item);
|
|
250
|
+
});
|
|
251
|
+
return results;
|
|
252
|
+
`);
|
|
253
|
+
const result = await exec().run();
|
|
254
|
+
const expected = [2, 4, 6, 8, 10, 12, 14, 16];
|
|
255
|
+
if (JSON.stringify(result) !== JSON.stringify(expected)) throw new Error(`Got ${JSON.stringify(result)}`);
|
|
256
|
+
return { result, maxConcurrent };
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
// Test 11: map() called WITHOUT async/await in the callback (what LLM would write)
|
|
260
|
+
await test('map() where LLM writes sync-looking callback', async () => {
|
|
261
|
+
const s = new Sandbox({
|
|
262
|
+
globals: {
|
|
263
|
+
...Sandbox.SAFE_GLOBALS,
|
|
264
|
+
processItem: async (item) => item * 2,
|
|
265
|
+
map: async (items, fn) => {
|
|
266
|
+
// fn might return a promise even if not declared async
|
|
267
|
+
const results = [];
|
|
268
|
+
for (const item of items) {
|
|
269
|
+
const result = await fn(item);
|
|
270
|
+
results.push(result);
|
|
271
|
+
}
|
|
272
|
+
return results;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
});
|
|
276
|
+
// LLM writes this - no async, no await in callback
|
|
277
|
+
const exec = s.compileAsync(`
|
|
278
|
+
const items = [1, 2, 3];
|
|
279
|
+
const results = await map(items, (item) => {
|
|
280
|
+
return processItem(item);
|
|
281
|
+
});
|
|
282
|
+
return results;
|
|
283
|
+
`);
|
|
284
|
+
const result = await exec().run();
|
|
285
|
+
if (JSON.stringify(result) !== '[2,4,6]') throw new Error(`Got ${JSON.stringify(result)}`);
|
|
286
|
+
return result;
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
// Test 12: Verify blocked globals are truly inaccessible
|
|
290
|
+
await test('Blocked globals not accessible', async () => {
|
|
291
|
+
const s = new Sandbox({
|
|
292
|
+
globals: {
|
|
293
|
+
...Sandbox.SAFE_GLOBALS,
|
|
294
|
+
// Deliberately NOT including: require, process, setTimeout, fetch
|
|
295
|
+
}
|
|
296
|
+
});
|
|
297
|
+
const exec = s.compileAsync(`
|
|
298
|
+
try {
|
|
299
|
+
const x = setTimeout;
|
|
300
|
+
return "FAIL: setTimeout accessible";
|
|
301
|
+
} catch(e) {
|
|
302
|
+
return "PASS: setTimeout blocked";
|
|
303
|
+
}
|
|
304
|
+
`);
|
|
305
|
+
const result = await exec().run();
|
|
306
|
+
return result;
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
console.log('\n--- Experiment complete ---');
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST Transformer - Auto-injects await before async tool calls.
|
|
3
|
+
*
|
|
4
|
+
* The LLM writes synchronous-looking code. This transformer:
|
|
5
|
+
* 1. Parses the code into an AST
|
|
6
|
+
* 2. Finds all CallExpressions where the callee is a known async tool function
|
|
7
|
+
* 3. Inserts `await` before those calls in the source
|
|
8
|
+
* 4. Marks arrow functions containing async calls as `async`
|
|
9
|
+
* 5. Wraps the whole program in an async IIFE
|
|
10
|
+
*
|
|
11
|
+
* Uses offset-based string insertion (not AST regeneration) to preserve
|
|
12
|
+
* the original code structure as much as possible.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import * as acorn from 'acorn';
|
|
16
|
+
import * as walk from 'acorn-walk';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Transform DSL code by injecting await and async wrappers.
|
|
20
|
+
*
|
|
21
|
+
* @param {string} code - The sync-looking DSL code
|
|
22
|
+
* @param {Set<string>} asyncFunctionNames - Names of functions that are async (tool functions)
|
|
23
|
+
* @returns {string} Transformed code with await injected, wrapped in async IIFE
|
|
24
|
+
*/
|
|
25
|
+
export function transformDSL(code, asyncFunctionNames) {
|
|
26
|
+
let ast;
|
|
27
|
+
try {
|
|
28
|
+
ast = acorn.parse(code, {
|
|
29
|
+
ecmaVersion: 2022,
|
|
30
|
+
sourceType: 'script',
|
|
31
|
+
allowReturnOutsideFunction: true,
|
|
32
|
+
});
|
|
33
|
+
} catch (e) {
|
|
34
|
+
throw new Error(`Transform parse error: ${e.message}`);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Collect insertions: { offset, text } sorted by offset descending
|
|
38
|
+
// We insert from end to start so offsets don't shift
|
|
39
|
+
const insertions = [];
|
|
40
|
+
|
|
41
|
+
// Track which arrow/function expressions need to be marked async
|
|
42
|
+
const functionsNeedingAsync = new Set();
|
|
43
|
+
|
|
44
|
+
// Find the enclosing function for a given node position
|
|
45
|
+
function findEnclosingFunction(node) {
|
|
46
|
+
// Walk the AST to find parent functions
|
|
47
|
+
// We'll use a different approach: collect all functions and their ranges
|
|
48
|
+
return null; // Handled by the parent tracking below
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// First pass: collect all function scopes with their ranges
|
|
52
|
+
const functionScopes = [];
|
|
53
|
+
walk.full(ast, (node) => {
|
|
54
|
+
if (node.type === 'ArrowFunctionExpression' || node.type === 'FunctionExpression') {
|
|
55
|
+
functionScopes.push(node);
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// Second pass: find async calls and determine what needs transformation
|
|
60
|
+
walk.full(ast, (node) => {
|
|
61
|
+
if (node.type !== 'CallExpression') return;
|
|
62
|
+
|
|
63
|
+
const calleeName = getCalleeName(node);
|
|
64
|
+
if (!calleeName || !asyncFunctionNames.has(calleeName)) return;
|
|
65
|
+
|
|
66
|
+
// This call needs await. Check if it's already awaited.
|
|
67
|
+
// (It shouldn't be since we block AwaitExpression in the validator,
|
|
68
|
+
// but be defensive.)
|
|
69
|
+
|
|
70
|
+
// Insert 'await ' before the call expression
|
|
71
|
+
insertions.push({ offset: node.start, text: 'await ' });
|
|
72
|
+
|
|
73
|
+
// Find the enclosing function (if any) and mark it as needing async
|
|
74
|
+
for (const fn of functionScopes) {
|
|
75
|
+
if (fn.body.start <= node.start && fn.body.end >= node.end) {
|
|
76
|
+
functionsNeedingAsync.add(fn);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
// Also check: if 'map' is called with a callback that contains async calls,
|
|
82
|
+
// mark that callback as async. The callback is typically the second argument.
|
|
83
|
+
walk.full(ast, (node) => {
|
|
84
|
+
if (node.type !== 'CallExpression') return;
|
|
85
|
+
const calleeName = getCalleeName(node);
|
|
86
|
+
if (calleeName !== 'map' || node.arguments.length < 2) return;
|
|
87
|
+
|
|
88
|
+
const callback = node.arguments[1];
|
|
89
|
+
if (callback.type === 'ArrowFunctionExpression' || callback.type === 'FunctionExpression') {
|
|
90
|
+
// Check if this callback contains any async tool calls
|
|
91
|
+
let hasAsyncCall = false;
|
|
92
|
+
walk.full(callback, (inner) => {
|
|
93
|
+
if (inner.type === 'CallExpression') {
|
|
94
|
+
const innerName = getCalleeName(inner);
|
|
95
|
+
if (innerName && asyncFunctionNames.has(innerName)) {
|
|
96
|
+
hasAsyncCall = true;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
if (hasAsyncCall) {
|
|
101
|
+
functionsNeedingAsync.add(callback);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// Third pass: inject loop guards (__checkLoop()) into while/for loops
|
|
107
|
+
walk.full(ast, (node) => {
|
|
108
|
+
if (node.type === 'WhileStatement' || node.type === 'ForStatement' || node.type === 'ForOfStatement' || node.type === 'ForInStatement') {
|
|
109
|
+
// Insert __checkLoop(); at the start of the loop body
|
|
110
|
+
const body = node.body;
|
|
111
|
+
if (body.type === 'BlockStatement' && body.body.length > 0) {
|
|
112
|
+
// Insert after the opening brace
|
|
113
|
+
insertions.push({ offset: body.start + 1, text: ' __checkLoop();' });
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// Build insertions for async markers on functions
|
|
119
|
+
for (const fn of functionsNeedingAsync) {
|
|
120
|
+
// Insert 'async ' before the function
|
|
121
|
+
// For arrow functions: `(x) => ...` → `async (x) => ...`
|
|
122
|
+
// For function expressions: `function(x) { ... }` → `async function(x) { ... }`
|
|
123
|
+
insertions.push({ offset: fn.start, text: 'async ' });
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Sort insertions by offset descending (apply from end to preserve offsets)
|
|
127
|
+
insertions.sort((a, b) => b.offset - a.offset);
|
|
128
|
+
|
|
129
|
+
// Apply insertions to the source code
|
|
130
|
+
let transformed = code;
|
|
131
|
+
for (const ins of insertions) {
|
|
132
|
+
transformed = transformed.slice(0, ins.offset) + ins.text + transformed.slice(ins.offset);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Wrap in async IIFE with return so SandboxJS awaits the result
|
|
136
|
+
transformed = `return (async () => {\n${transformed}\n})()`;
|
|
137
|
+
|
|
138
|
+
return transformed;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Extract the function name from a CallExpression callee.
|
|
143
|
+
* Handles: `foo()` → 'foo', `obj.foo()` → 'foo' (for member access)
|
|
144
|
+
*
|
|
145
|
+
* @param {import('acorn').Node} callExpr
|
|
146
|
+
* @returns {string|null}
|
|
147
|
+
*/
|
|
148
|
+
function getCalleeName(callExpr) {
|
|
149
|
+
const callee = callExpr.callee;
|
|
150
|
+
if (callee.type === 'Identifier') {
|
|
151
|
+
return callee.name;
|
|
152
|
+
}
|
|
153
|
+
// For member expressions like mcp_server.tool(), get the full dotted name
|
|
154
|
+
// But our tools use flat names like mcp_github_create_issue, so Identifier is sufficient
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Trigger test: verifies that the agent picks execute_plan for the right queries.
|
|
4
|
+
*
|
|
5
|
+
* Runs the real ProbeAgent with enableExecutePlan=true and observes which tools
|
|
6
|
+
* get called for different types of questions. This tests the tool-selection
|
|
7
|
+
* logic end-to-end — the system prompt, tool descriptions, and LLM decision-making.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node npm/src/agent/dsl/trigger-test.mjs
|
|
11
|
+
*
|
|
12
|
+
* Requires: GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY in .env
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { ProbeAgent } from '../ProbeAgent.js';
|
|
16
|
+
import { config } from 'dotenv';
|
|
17
|
+
import { resolve, dirname } from 'path';
|
|
18
|
+
import { fileURLToPath } from 'url';
|
|
19
|
+
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const projectRoot = resolve(__dirname, '../../../..');
|
|
22
|
+
|
|
23
|
+
config({ path: resolve(projectRoot, '.env') });
|
|
24
|
+
|
|
25
|
+
// Check for API key
|
|
26
|
+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
|
|
27
|
+
if (!apiKey) {
|
|
28
|
+
console.error('ERROR: No Google API key found. Set GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY');
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// ── Test definitions ──
|
|
33
|
+
// Each test has a query and an expected tool choice
|
|
34
|
+
const tests = [
|
|
35
|
+
// ── Should trigger execute_plan ──
|
|
36
|
+
{
|
|
37
|
+
name: 'Aggregate question (all patterns)',
|
|
38
|
+
query: 'Find ALL error handling patterns across the entire codebase and give me a comprehensive summary covering every module.',
|
|
39
|
+
expectTool: 'execute_plan',
|
|
40
|
+
reason: 'Aggregate question needing full data coverage + "ALL" + "comprehensive" + "every module"',
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
name: 'Multi-topic bulk scan',
|
|
44
|
+
query: 'Search for authentication, authorization, and session management patterns. Analyze each topic across the full codebase and produce a security report.',
|
|
45
|
+
expectTool: 'execute_plan',
|
|
46
|
+
reason: 'Multiple topics + full codebase scan + synthesis',
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: 'Open-ended discovery',
|
|
50
|
+
query: 'What are all the different testing approaches used in this codebase? Give me a complete inventory.',
|
|
51
|
+
expectTool: 'execute_plan',
|
|
52
|
+
reason: 'Open-ended, needs discovery + comprehensive scan',
|
|
53
|
+
},
|
|
54
|
+
|
|
55
|
+
// ── Should NOT trigger execute_plan ──
|
|
56
|
+
{
|
|
57
|
+
name: 'Simple search (specific function)',
|
|
58
|
+
query: 'How does the validateDSL function work?',
|
|
59
|
+
expectTool: 'search',
|
|
60
|
+
reason: 'Specific function lookup, 1-2 tool calls',
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
name: 'Simple search (single concept)',
|
|
64
|
+
query: 'What is the timeout configuration for the DSL runtime?',
|
|
65
|
+
expectTool: 'search',
|
|
66
|
+
reason: 'Narrow question, single concept',
|
|
67
|
+
},
|
|
68
|
+
];
|
|
69
|
+
|
|
70
|
+
// ── Test runner ──
|
|
71
|
+
let testNum = 0;
|
|
72
|
+
let passed = 0;
|
|
73
|
+
let failed = 0;
|
|
74
|
+
|
|
75
|
+
async function runTriggerTest(test) {
|
|
76
|
+
testNum++;
|
|
77
|
+
console.log(`\n${'─'.repeat(70)}`);
|
|
78
|
+
console.log(`▶ Test ${testNum}: ${test.name}`);
|
|
79
|
+
console.log(` Query: "${test.query.substring(0, 100)}${test.query.length > 100 ? '...' : ''}"`);
|
|
80
|
+
console.log(` Expected tool: ${test.expectTool}`);
|
|
81
|
+
|
|
82
|
+
const toolCalls = [];
|
|
83
|
+
|
|
84
|
+
const agent = new ProbeAgent({
|
|
85
|
+
path: projectRoot,
|
|
86
|
+
provider: 'google',
|
|
87
|
+
model: 'gemini-2.5-flash',
|
|
88
|
+
enableExecutePlan: true,
|
|
89
|
+
maxIterations: 3, // Only need first few iterations to see what tool gets picked
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
// Listen for tool call events
|
|
93
|
+
agent.events.on('toolCall', (event) => {
|
|
94
|
+
if (event.status === 'started') {
|
|
95
|
+
toolCalls.push(event.name);
|
|
96
|
+
console.log(` [tool] ${event.name}`);
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
await agent.initialize();
|
|
101
|
+
|
|
102
|
+
const start = Date.now();
|
|
103
|
+
try {
|
|
104
|
+
await agent.answer(test.query);
|
|
105
|
+
} catch (e) {
|
|
106
|
+
// May hit maxIterations limit — that's fine, we just want tool selection
|
|
107
|
+
if (!e.message?.includes('iteration') && !e.message?.includes('cancelled')) {
|
|
108
|
+
console.log(` [warn] Agent error: ${e.message?.substring(0, 150)}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const elapsed = Date.now() - start;
|
|
113
|
+
const firstMeaningfulTool = toolCalls.find(t =>
|
|
114
|
+
t === 'execute_plan' || t === 'analyze_all' || t === 'search' || t === 'query'
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
console.log(` All tool calls: [${toolCalls.join(', ')}]`);
|
|
118
|
+
console.log(` First meaningful tool: ${firstMeaningfulTool || '(none)'}`);
|
|
119
|
+
|
|
120
|
+
const toolMatch = firstMeaningfulTool === test.expectTool;
|
|
121
|
+
|
|
122
|
+
if (toolMatch) {
|
|
123
|
+
console.log(` ✓ PASSED — picked ${firstMeaningfulTool} as expected (${elapsed}ms)`);
|
|
124
|
+
passed++;
|
|
125
|
+
} else {
|
|
126
|
+
console.log(` ✗ FAILED — expected ${test.expectTool}, got ${firstMeaningfulTool || '(none)'} (${elapsed}ms)`);
|
|
127
|
+
console.log(` Reason it should use ${test.expectTool}: ${test.reason}`);
|
|
128
|
+
failed++;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
await agent.close();
|
|
133
|
+
} catch (e) {
|
|
134
|
+
// ignore cleanup errors
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
async function main() {
|
|
139
|
+
console.log('═'.repeat(70));
|
|
140
|
+
console.log(' Execute Plan Trigger Tests — Tool Selection Verification');
|
|
141
|
+
console.log('═'.repeat(70));
|
|
142
|
+
console.log(`\nRunning with: enableExecutePlan=true, provider=google, model=gemini-2.5-flash`);
|
|
143
|
+
console.log(`Project root: ${projectRoot}`);
|
|
144
|
+
|
|
145
|
+
for (const test of tests) {
|
|
146
|
+
await runTriggerTest(test);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
console.log(`\n${'═'.repeat(70)}`);
|
|
150
|
+
console.log(` Results: ${passed} passed, ${failed} failed, ${testNum} total`);
|
|
151
|
+
console.log('═'.repeat(70));
|
|
152
|
+
|
|
153
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
main().catch(e => {
|
|
157
|
+
console.error('Fatal error:', e);
|
|
158
|
+
process.exit(1);
|
|
159
|
+
});
|