@probelabs/probe 0.6.0-rc231 → 0.6.0-rc233
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.d.ts +2 -0
- package/build/agent/ProbeAgent.js +105 -12
- package/build/agent/dsl/agent-test.mjs +341 -0
- package/build/agent/dsl/analyze-test.mjs +237 -0
- package/build/agent/dsl/diag-test.mjs +78 -0
- package/build/agent/dsl/environment.js +387 -0
- package/build/agent/dsl/manual-test.mjs +662 -0
- package/build/agent/dsl/output-buffer-test.mjs +124 -0
- package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/build/agent/dsl/pipeline-test.mjs +223 -0
- package/build/agent/dsl/runtime.js +206 -0
- package/build/agent/dsl/sandbox-experiment.mjs +309 -0
- package/build/agent/dsl/transformer.js +156 -0
- package/build/agent/dsl/trigger-test.mjs +159 -0
- package/build/agent/dsl/validator.js +183 -0
- package/build/agent/index.js +18776 -7675
- package/build/agent/probeTool.js +9 -0
- package/build/agent/tools.js +9 -1
- package/build/delegate.js +12 -6
- package/build/index.js +5 -0
- package/build/tools/common.js +7 -0
- package/build/tools/executePlan.js +761 -0
- package/build/tools/index.js +4 -0
- package/cjs/agent/ProbeAgent.cjs +12891 -1797
- package/cjs/index.cjs +12395 -1292
- package/package.json +5 -1
- package/src/agent/ProbeAgent.d.ts +2 -0
- package/src/agent/ProbeAgent.js +105 -12
- package/src/agent/dsl/agent-test.mjs +341 -0
- package/src/agent/dsl/analyze-test.mjs +237 -0
- package/src/agent/dsl/diag-test.mjs +78 -0
- package/src/agent/dsl/environment.js +387 -0
- package/src/agent/dsl/manual-test.mjs +662 -0
- package/src/agent/dsl/output-buffer-test.mjs +124 -0
- package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/src/agent/dsl/pipeline-test.mjs +223 -0
- package/src/agent/dsl/runtime.js +206 -0
- package/src/agent/dsl/sandbox-experiment.mjs +309 -0
- package/src/agent/dsl/transformer.js +156 -0
- package/src/agent/dsl/trigger-test.mjs +159 -0
- package/src/agent/dsl/validator.js +183 -0
- package/src/agent/index.js +8 -0
- package/src/agent/probeTool.js +9 -0
- package/src/agent/tools.js +9 -1
- package/src/delegate.js +12 -6
- package/src/index.js +5 -0
- package/src/tools/common.js +7 -0
- package/src/tools/executePlan.js +761 -0
- package/src/tools/index.js +4 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-unknown-linux-musl.tar.gz +0 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Real-world test of the analyze_all replacement pattern.
|
|
4
|
+
*
|
|
5
|
+
* Tests against the TykTechnologies/customer-insights repo (582 markdown files, 16MB)
|
|
6
|
+
* to verify the search → chunk → map(LLM) → synthesize pipeline works at scale.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* node npm/src/agent/dsl/analyze-test.mjs
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { createDSLRuntime } from './runtime.js';
|
|
13
|
+
import { search } from '../../search.js';
|
|
14
|
+
import { extract } from '../../extract.js';
|
|
15
|
+
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
16
|
+
import { generateText } from 'ai';
|
|
17
|
+
import { config } from 'dotenv';
|
|
18
|
+
import { resolve, dirname } from 'path';
|
|
19
|
+
import { fileURLToPath } from 'url';
|
|
20
|
+
|
|
21
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
const projectRoot = resolve(__dirname, '../../../..');
|
|
23
|
+
|
|
24
|
+
config({ path: resolve(projectRoot, '.env') });
|
|
25
|
+
|
|
26
|
+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
|
|
27
|
+
if (!apiKey) {
|
|
28
|
+
console.error('ERROR: No Google API key found.');
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
33
|
+
|
|
34
|
+
async function llmCall(instruction, data, options = {}) {
|
|
35
|
+
const dataStr = data == null ? '' : (typeof data === 'string' ? data : JSON.stringify(data, null, 2));
|
|
36
|
+
const prompt = (dataStr || '(empty)').substring(0, 100000);
|
|
37
|
+
const result = await generateText({
|
|
38
|
+
model: google('gemini-2.5-flash'),
|
|
39
|
+
system: instruction,
|
|
40
|
+
prompt,
|
|
41
|
+
temperature: options.temperature || 0.3,
|
|
42
|
+
maxTokens: options.maxTokens || 4000,
|
|
43
|
+
});
|
|
44
|
+
return result.text;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const TARGET_REPO = '/tmp/customer-insights';
|
|
48
|
+
|
|
49
|
+
const toolImplementations = {
|
|
50
|
+
search: {
|
|
51
|
+
execute: async (params) => {
|
|
52
|
+
try {
|
|
53
|
+
return await search({
|
|
54
|
+
query: params.query,
|
|
55
|
+
path: params.path || TARGET_REPO,
|
|
56
|
+
cwd: TARGET_REPO,
|
|
57
|
+
maxTokens: 20000,
|
|
58
|
+
timeout: 30,
|
|
59
|
+
exact: params.exact || false,
|
|
60
|
+
});
|
|
61
|
+
} catch (e) {
|
|
62
|
+
return "Search error: " + e.message;
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
extract: {
|
|
67
|
+
execute: async (params) => {
|
|
68
|
+
try {
|
|
69
|
+
return await extract({
|
|
70
|
+
targets: params.targets,
|
|
71
|
+
input_content: params.input_content,
|
|
72
|
+
cwd: TARGET_REPO,
|
|
73
|
+
});
|
|
74
|
+
} catch (e) {
|
|
75
|
+
return "Extract error: " + e.message;
|
|
76
|
+
}
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const runtime = createDSLRuntime({
|
|
82
|
+
toolImplementations,
|
|
83
|
+
llmCall,
|
|
84
|
+
mapConcurrency: 3,
|
|
85
|
+
timeoutMs: 120000,
|
|
86
|
+
maxLoopIterations: 5000,
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// ── Tests ──
|
|
90
|
+
let testNum = 0;
|
|
91
|
+
let passed = 0;
|
|
92
|
+
let failed = 0;
|
|
93
|
+
|
|
94
|
+
async function runTest(name, code, check) {
|
|
95
|
+
testNum++;
|
|
96
|
+
console.log(`\n${'─'.repeat(70)}`);
|
|
97
|
+
console.log(`▶ Test ${testNum}: ${name}`);
|
|
98
|
+
console.log(` Code (${code.trim().split('\n').length} lines):`);
|
|
99
|
+
const preview = code.trim().split('\n').slice(0, 8).map(l => ' ' + l.trim()).join('\n');
|
|
100
|
+
console.log(preview);
|
|
101
|
+
if (code.trim().split('\n').length > 8) console.log(' ...');
|
|
102
|
+
|
|
103
|
+
const start = Date.now();
|
|
104
|
+
try {
|
|
105
|
+
const result = await runtime.execute(code, name);
|
|
106
|
+
const elapsed = Date.now() - start;
|
|
107
|
+
|
|
108
|
+
if (result.status === 'error') {
|
|
109
|
+
console.log(` ✗ EXECUTION ERROR (${elapsed}ms)`);
|
|
110
|
+
console.log(` Error: ${result.error.substring(0, 300)}`);
|
|
111
|
+
if (result.logs.length) console.log(` Logs: ${result.logs.join(' | ')}`);
|
|
112
|
+
failed++;
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const userLogs = result.logs.filter(l => !l.startsWith('[runtime]'));
|
|
117
|
+
if (userLogs.length) {
|
|
118
|
+
console.log(` Logs: ${userLogs.join(' | ')}`);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const checkResult = check(result);
|
|
122
|
+
if (checkResult === true) {
|
|
123
|
+
console.log(` ✓ PASSED (${elapsed}ms)`);
|
|
124
|
+
const resultStr = typeof result.result === 'string'
|
|
125
|
+
? result.result.substring(0, 500)
|
|
126
|
+
: JSON.stringify(result.result, null, 2).substring(0, 500);
|
|
127
|
+
console.log(` Result: ${resultStr}${resultStr.length >= 500 ? '...' : ''}`);
|
|
128
|
+
passed++;
|
|
129
|
+
} else {
|
|
130
|
+
console.log(` ✗ CHECK FAILED (${elapsed}ms) — ${checkResult}`);
|
|
131
|
+
failed++;
|
|
132
|
+
}
|
|
133
|
+
} catch (e) {
|
|
134
|
+
const elapsed = Date.now() - start;
|
|
135
|
+
console.log(` ✗ CRASHED (${elapsed}ms) — ${e.message}`);
|
|
136
|
+
failed++;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
async function main() {
|
|
141
|
+
console.log('═'.repeat(70));
|
|
142
|
+
console.log(' analyze_all Replacement — Real-World Tests');
|
|
143
|
+
console.log(' Target: TykTechnologies/customer-insights (582 .md files, 16MB)');
|
|
144
|
+
console.log('═'.repeat(70));
|
|
145
|
+
|
|
146
|
+
// Test 1: Core analyze_all pattern — search → chunk → map(LLM) → synthesize
|
|
147
|
+
await runTest(
|
|
148
|
+
'analyze_all pattern: "api governance"',
|
|
149
|
+
`
|
|
150
|
+
const results = search("api governance");
|
|
151
|
+
log("Search returned " + String(results).length + " chars");
|
|
152
|
+
const chunks = chunk(results);
|
|
153
|
+
log("Split into " + chunks.length + " chunks");
|
|
154
|
+
const extracted = map(chunks, (c) => LLM("List every mention of API governance — who uses it, what for, any specific policies or tools mentioned. Be brief and factual.", c));
|
|
155
|
+
var combined = "";
|
|
156
|
+
for (const e of extracted) { combined = combined + String(e) + "\\n---\\n"; }
|
|
157
|
+
return LLM("Synthesize into a comprehensive report about API governance across all customers. Group by: 1) Customers using API governance, 2) Governance tools/approaches, 3) Common patterns. Be thorough.", combined);
|
|
158
|
+
`,
|
|
159
|
+
(r) => {
|
|
160
|
+
if (typeof r.result !== 'string') return 'Expected string result';
|
|
161
|
+
if (r.result.length < 100) return 'Result too short: ' + r.result.length;
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
// Test 2: Multi-topic search — governance + rate limiting + security
|
|
167
|
+
await runTest(
|
|
168
|
+
'Multi-topic: governance, rate limiting, security policies',
|
|
169
|
+
`
|
|
170
|
+
const topics = ["api governance", "rate limiting", "security policy"];
|
|
171
|
+
const allFindings = [];
|
|
172
|
+
for (const topic of topics) {
|
|
173
|
+
const results = search(topic);
|
|
174
|
+
log(topic + ": " + String(results).length + " chars");
|
|
175
|
+
const chunks = chunk(results);
|
|
176
|
+
const findings = map(chunks, (c) => LLM("Extract key findings about " + topic + ". Include customer names and specifics. Be brief.", c));
|
|
177
|
+
for (const f of findings) { allFindings.push(topic + ": " + String(f)); }
|
|
178
|
+
}
|
|
179
|
+
var combined = "";
|
|
180
|
+
for (const f of allFindings) { combined = combined + f + "\\n---\\n"; }
|
|
181
|
+
return LLM("Create a cross-topic analysis: How do customers approach API governance, rate limiting, and security together? What patterns emerge?", combined);
|
|
182
|
+
`,
|
|
183
|
+
(r) => {
|
|
184
|
+
if (typeof r.result !== 'string') return 'Expected string result';
|
|
185
|
+
if (r.result.length < 100) return 'Result too short';
|
|
186
|
+
return true;
|
|
187
|
+
}
|
|
188
|
+
);
|
|
189
|
+
|
|
190
|
+
// Test 3: Extract specific data points
|
|
191
|
+
await runTest(
|
|
192
|
+
'Extract customer use cases for API management',
|
|
193
|
+
`
|
|
194
|
+
const results = search("use case API management");
|
|
195
|
+
log("Search: " + String(results).length + " chars");
|
|
196
|
+
const chunks = chunk(results);
|
|
197
|
+
log("Chunks: " + chunks.length);
|
|
198
|
+
const extracted = map(chunks, (c) => LLM("Extract a JSON array of objects with fields: customer (string), use_case (string), outcome (string or null). Only include clearly stated use cases. Return valid JSON array only.", c));
|
|
199
|
+
var allUseCases = [];
|
|
200
|
+
for (const e of extracted) {
|
|
201
|
+
try {
|
|
202
|
+
var text = String(e).trim();
|
|
203
|
+
var jsonStart = text.indexOf("[");
|
|
204
|
+
var jsonEnd = text.lastIndexOf("]");
|
|
205
|
+
if (jsonStart >= 0 && jsonEnd > jsonStart) {
|
|
206
|
+
text = text.substring(jsonStart, jsonEnd + 1);
|
|
207
|
+
}
|
|
208
|
+
var parsed = JSON.parse(text);
|
|
209
|
+
if (Array.isArray(parsed)) {
|
|
210
|
+
for (const item of parsed) { allUseCases.push(item); }
|
|
211
|
+
}
|
|
212
|
+
} catch (err) {
|
|
213
|
+
log("Parse failed for chunk, skipping");
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
log("Total use cases found: " + allUseCases.length);
|
|
217
|
+
return allUseCases;
|
|
218
|
+
`,
|
|
219
|
+
(r) => {
|
|
220
|
+
if (!Array.isArray(r.result)) return 'Expected array result';
|
|
221
|
+
if (r.result.length === 0) return 'No use cases extracted';
|
|
222
|
+
return true;
|
|
223
|
+
}
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
// ── Summary ──
|
|
227
|
+
console.log(`\n${'═'.repeat(70)}`);
|
|
228
|
+
console.log(` Results: ${passed} passed, ${failed} failed, ${testNum} total`);
|
|
229
|
+
console.log('═'.repeat(70));
|
|
230
|
+
|
|
231
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
main().catch(e => {
|
|
235
|
+
console.error('Fatal error:', e);
|
|
236
|
+
process.exit(1);
|
|
237
|
+
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Diagnostic test — traces exactly what execute_plan returns through ProbeAgent.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { ProbeAgent } from '../ProbeAgent.js';
|
|
7
|
+
import { config } from 'dotenv';
|
|
8
|
+
import { resolve, dirname } from 'path';
|
|
9
|
+
import { fileURLToPath } from 'url';
|
|
10
|
+
|
|
11
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
12
|
+
const projectRoot = resolve(__dirname, '../../../..');
|
|
13
|
+
|
|
14
|
+
config({ path: resolve(projectRoot, '.env') });
|
|
15
|
+
|
|
16
|
+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
|
|
17
|
+
if (!apiKey) {
|
|
18
|
+
console.error('ERROR: No Google API key found');
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const agent = new ProbeAgent({
|
|
23
|
+
path: '/tmp/customer-insights',
|
|
24
|
+
provider: 'google',
|
|
25
|
+
model: 'gemini-2.5-flash',
|
|
26
|
+
enableExecutePlan: true,
|
|
27
|
+
maxIterations: 15,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
let callNum = 0;
|
|
31
|
+
|
|
32
|
+
agent.events.on('toolCall', (event) => {
|
|
33
|
+
if (event.status === 'started') {
|
|
34
|
+
if (event.name === 'execute_plan') {
|
|
35
|
+
callNum++;
|
|
36
|
+
console.log(`\n>>> EXECUTE_PLAN #${callNum} START`);
|
|
37
|
+
console.log(`>>> CODE:\n${String(event.args?.code || '').substring(0, 1200)}`);
|
|
38
|
+
if (String(event.args?.code || '').length > 1200) console.log('>>> ... (truncated)');
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (event.status === 'error') {
|
|
42
|
+
console.log(`>>> TOOL ERROR: ${event.name}: ${event.error}`);
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
await agent.initialize();
|
|
47
|
+
|
|
48
|
+
// Monkey-patch to see full results
|
|
49
|
+
const origExecute = agent.toolImplementations.execute_plan.execute;
|
|
50
|
+
agent.toolImplementations.execute_plan.execute = async (params) => {
|
|
51
|
+
const result = await origExecute(params);
|
|
52
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
|
|
53
|
+
console.log(`\n>>> EXECUTE_PLAN #${callNum} RETURNED (${resultStr.length} chars):`);
|
|
54
|
+
console.log(`>>> ${resultStr.substring(0, 500)}`);
|
|
55
|
+
if (resultStr.length > 500) console.log(`>>> ... (${resultStr.length - 500} more chars)`);
|
|
56
|
+
return result;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
const query = 'Analyze ALL customer files in this repository. For every customer, classify them by industry. Produce a markdown table with columns: Customer, Industry, Use Case.';
|
|
60
|
+
|
|
61
|
+
console.log(`\nQUERY: ${query}\n`);
|
|
62
|
+
|
|
63
|
+
try {
|
|
64
|
+
const result = await Promise.race([
|
|
65
|
+
agent.answer(query),
|
|
66
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout 600s')), 600000)),
|
|
67
|
+
]);
|
|
68
|
+
|
|
69
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
70
|
+
console.log(`FINAL RESULT (${String(result).length} chars):`);
|
|
71
|
+
console.log(String(result).substring(0, 2000));
|
|
72
|
+
console.log(`${'='.repeat(60)}`);
|
|
73
|
+
} catch (e) {
|
|
74
|
+
console.log(`\nFAILED: ${e.message}`);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
try { await agent.close(); } catch (e) {}
|
|
78
|
+
process.exit(0);
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool Environment Generator
|
|
3
|
+
*
|
|
4
|
+
* Reads Zod schemas (native tools) and MCP tool schemas to generate:
|
|
5
|
+
* 1. Sandbox globals object (function bindings that bridge to real tools)
|
|
6
|
+
* 2. Set of async function names (for the AST transformer)
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
searchSchema,
|
|
11
|
+
querySchema,
|
|
12
|
+
extractSchema,
|
|
13
|
+
bashSchema,
|
|
14
|
+
} from '../../tools/common.js';
|
|
15
|
+
|
|
16
|
+
// Map of native tool names to their Zod schemas
|
|
17
|
+
const NATIVE_TOOL_SCHEMAS = {
|
|
18
|
+
search: searchSchema,
|
|
19
|
+
query: querySchema,
|
|
20
|
+
extract: extractSchema,
|
|
21
|
+
bash: bashSchema,
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
// Tools that are inherently async (make network/LLM calls)
|
|
25
|
+
const ALWAYS_ASYNC = new Set([
|
|
26
|
+
'search', 'query', 'extract', 'listFiles', 'searchFiles', 'bash',
|
|
27
|
+
'LLM', 'map',
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Generate the set of async function names from native tools and MCP tools.
|
|
32
|
+
*
|
|
33
|
+
* @param {Object} [mcpTools={}] - MCP tools keyed by name
|
|
34
|
+
* @returns {Set<string>} Names of all async functions available in the DSL
|
|
35
|
+
*/
|
|
36
|
+
export function getAsyncFunctionNames(mcpTools = {}) {
|
|
37
|
+
const names = new Set(ALWAYS_ASYNC);
|
|
38
|
+
// All MCP tools are async
|
|
39
|
+
for (const name of Object.keys(mcpTools)) {
|
|
40
|
+
names.add(name);
|
|
41
|
+
}
|
|
42
|
+
return names;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Wrap a tool function with OTEL tracing and error-safe return.
|
|
47
|
+
* On error, returns "ERROR: <message>" instead of throwing — SandboxJS
|
|
48
|
+
* has unreliable try/catch for async errors, so tools never throw.
|
|
49
|
+
*
|
|
50
|
+
* @param {string} toolName - Name of the tool for the span
|
|
51
|
+
* @param {Function} fn - The async tool function to wrap
|
|
52
|
+
* @param {Object|null} tracer - SimpleAppTracer instance (or null)
|
|
53
|
+
* @param {Function} logFn - Function to write to execution logs
|
|
54
|
+
* @returns {Function} Wrapped function
|
|
55
|
+
*/
|
|
56
|
+
function traceToolCall(toolName, fn, tracer, logFn) {
|
|
57
|
+
if (!tracer) {
|
|
58
|
+
return async (...args) => {
|
|
59
|
+
try {
|
|
60
|
+
return await fn(...args);
|
|
61
|
+
} catch (e) {
|
|
62
|
+
const msg = 'ERROR: ' + (e.message || String(e));
|
|
63
|
+
logFn?.('[' + toolName + '] ' + msg);
|
|
64
|
+
return msg;
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return async (...args) => {
|
|
70
|
+
const span = tracer.createToolSpan?.(`dsl.${toolName}`, {
|
|
71
|
+
'dsl.tool': toolName,
|
|
72
|
+
'dsl.params': JSON.stringify(args).substring(0, 500),
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
const startTime = Date.now();
|
|
76
|
+
try {
|
|
77
|
+
const result = await fn(...args);
|
|
78
|
+
const elapsed = Date.now() - startTime;
|
|
79
|
+
|
|
80
|
+
const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
|
|
81
|
+
span?.setAttributes?.({
|
|
82
|
+
'dsl.tool.duration_ms': elapsed,
|
|
83
|
+
'dsl.tool.result_length': resultStr?.length || 0,
|
|
84
|
+
'dsl.tool.success': true,
|
|
85
|
+
});
|
|
86
|
+
span?.setStatus?.('OK');
|
|
87
|
+
span?.end?.();
|
|
88
|
+
|
|
89
|
+
tracer.recordToolResult?.(
|
|
90
|
+
`dsl.${toolName}`, result, true, elapsed,
|
|
91
|
+
{ 'dsl.context': 'execute_plan' }
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
return result;
|
|
95
|
+
} catch (e) {
|
|
96
|
+
const elapsed = Date.now() - startTime;
|
|
97
|
+
span?.setAttributes?.({
|
|
98
|
+
'dsl.tool.duration_ms': elapsed,
|
|
99
|
+
'dsl.tool.success': false,
|
|
100
|
+
'dsl.tool.error': e.message?.substring(0, 500),
|
|
101
|
+
});
|
|
102
|
+
span?.setStatus?.('ERROR');
|
|
103
|
+
span?.addEvent?.('exception', {
|
|
104
|
+
'exception.message': e.message,
|
|
105
|
+
});
|
|
106
|
+
span?.end?.();
|
|
107
|
+
|
|
108
|
+
tracer.recordToolResult?.(
|
|
109
|
+
`dsl.${toolName}`, e.message, false, elapsed,
|
|
110
|
+
{ 'dsl.context': 'execute_plan' }
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
const msg = 'ERROR: ' + (e.message || String(e));
|
|
114
|
+
logFn?.('[' + toolName + '] ' + msg);
|
|
115
|
+
return msg;
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Generate sandbox globals that bridge DSL function calls to real tool implementations.
|
|
122
|
+
*
|
|
123
|
+
* @param {Object} options
|
|
124
|
+
* @param {Object} options.toolImplementations - Native tool execute functions keyed by name
|
|
125
|
+
* @param {Object} [options.mcpBridge] - MCP bridge with callTool method
|
|
126
|
+
* @param {Object} [options.mcpTools={}] - MCP tools metadata keyed by name
|
|
127
|
+
* @param {Function} options.llmCall - Function to make focused LLM calls: (instruction, data, options?) => Promise<any>
|
|
128
|
+
* @param {number} [options.mapConcurrency=3] - Max concurrent operations in map()
|
|
129
|
+
* @param {Object} [options.tracer=null] - SimpleAppTracer for OTEL tracing
|
|
130
|
+
* @returns {Object} Globals object to pass to SandboxJS
|
|
131
|
+
*/
|
|
132
|
+
export function generateSandboxGlobals(options) {
|
|
133
|
+
const {
|
|
134
|
+
toolImplementations = {},
|
|
135
|
+
mcpBridge = null,
|
|
136
|
+
mcpTools = {},
|
|
137
|
+
llmCall,
|
|
138
|
+
mapConcurrency = 3,
|
|
139
|
+
tracer = null,
|
|
140
|
+
sessionStore = {},
|
|
141
|
+
outputBuffer = null,
|
|
142
|
+
} = options;
|
|
143
|
+
|
|
144
|
+
const globals = {};
|
|
145
|
+
|
|
146
|
+
// Log function — writes to the execution logs array (set by runtime before each execute())
|
|
147
|
+
const logFn = (msg) => { if (globals._logs) globals._logs.push(String(msg)); };
|
|
148
|
+
|
|
149
|
+
// Bridge native tools
|
|
150
|
+
for (const [name, schema] of Object.entries(NATIVE_TOOL_SCHEMAS)) {
|
|
151
|
+
if (!toolImplementations[name]) continue;
|
|
152
|
+
|
|
153
|
+
const rawFn = async (...args) => {
|
|
154
|
+
// Support both (params) and (arg1, arg2) calling conventions
|
|
155
|
+
let params;
|
|
156
|
+
if (args.length === 1 && typeof args[0] === 'object' && args[0] !== null && !Array.isArray(args[0])) {
|
|
157
|
+
params = args[0];
|
|
158
|
+
} else {
|
|
159
|
+
// Map positional args to schema keys
|
|
160
|
+
const keys = Object.keys(schema.shape);
|
|
161
|
+
params = {};
|
|
162
|
+
args.forEach((arg, i) => {
|
|
163
|
+
if (i < keys.length) params[keys[i]] = arg;
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const validated = schema.safeParse(params);
|
|
168
|
+
if (!validated.success) {
|
|
169
|
+
throw new Error(`Invalid parameters for ${name}: ${validated.error.message}`);
|
|
170
|
+
}
|
|
171
|
+
return toolImplementations[name].execute(validated.data);
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
globals[name] = traceToolCall(name, rawFn, tracer, logFn);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Bridge listFiles and searchFiles (no Zod schema, simpler interface)
|
|
178
|
+
if (toolImplementations.listFiles) {
|
|
179
|
+
const rawListFiles = async (pattern) => {
|
|
180
|
+
return toolImplementations.listFiles.execute({ pattern });
|
|
181
|
+
};
|
|
182
|
+
globals.listFiles = traceToolCall('listFiles', rawListFiles, tracer, logFn);
|
|
183
|
+
}
|
|
184
|
+
if (toolImplementations.searchFiles) {
|
|
185
|
+
const rawSearchFiles = async (query) => {
|
|
186
|
+
return toolImplementations.searchFiles.execute({ query });
|
|
187
|
+
};
|
|
188
|
+
globals.searchFiles = traceToolCall('searchFiles', rawSearchFiles, tracer, logFn);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Bridge MCP tools
|
|
192
|
+
if (mcpBridge) {
|
|
193
|
+
for (const [name, tool] of Object.entries(mcpTools)) {
|
|
194
|
+
const rawMcpFn = async (params = {}) => {
|
|
195
|
+
return mcpBridge.callTool(name, params);
|
|
196
|
+
};
|
|
197
|
+
globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// LLM() built-in — delegate already has its own OTEL, but we add a DSL-level span
|
|
202
|
+
if (llmCall) {
|
|
203
|
+
const rawLLM = async (instruction, data, opts = {}) => {
|
|
204
|
+
return llmCall(instruction, data, opts);
|
|
205
|
+
};
|
|
206
|
+
globals.LLM = traceToolCall('LLM', rawLLM, tracer, logFn);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// map() with concurrency control
|
|
210
|
+
const rawMap = async (items, fn) => {
|
|
211
|
+
if (!Array.isArray(items)) {
|
|
212
|
+
throw new Error('map() first argument must be an array');
|
|
213
|
+
}
|
|
214
|
+
const results = [];
|
|
215
|
+
const executing = new Set();
|
|
216
|
+
|
|
217
|
+
for (const item of items) {
|
|
218
|
+
const p = Promise.resolve(fn(item)).then(result => {
|
|
219
|
+
executing.delete(p);
|
|
220
|
+
return result;
|
|
221
|
+
});
|
|
222
|
+
executing.add(p);
|
|
223
|
+
results.push(p);
|
|
224
|
+
|
|
225
|
+
if (executing.size >= mapConcurrency) {
|
|
226
|
+
await Promise.race(executing);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return Promise.all(results);
|
|
231
|
+
};
|
|
232
|
+
globals.map = traceToolCall('map', rawMap, tracer, logFn);
|
|
233
|
+
|
|
234
|
+
// chunk() - split data into token-sized chunks
|
|
235
|
+
globals.chunk = (data, tokens = 20000) => {
|
|
236
|
+
const CHARS_PER_TOKEN = 4;
|
|
237
|
+
const chunkSizeChars = tokens * CHARS_PER_TOKEN;
|
|
238
|
+
const text = typeof data === 'string' ? data : JSON.stringify(data);
|
|
239
|
+
|
|
240
|
+
// Split by file blocks (``` markers) to avoid breaking mid-block
|
|
241
|
+
const fileBlocks = text.split(/(?=^```)/m);
|
|
242
|
+
const chunks = [];
|
|
243
|
+
let current = '';
|
|
244
|
+
|
|
245
|
+
for (const block of fileBlocks) {
|
|
246
|
+
const blockSize = block.length;
|
|
247
|
+
|
|
248
|
+
// If a single block exceeds chunk size and we have accumulated content, flush first
|
|
249
|
+
if (blockSize > chunkSizeChars && current.length > 0) {
|
|
250
|
+
chunks.push(current.trim());
|
|
251
|
+
current = '';
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// If a single block exceeds chunk size, split it by character boundary
|
|
255
|
+
if (blockSize > chunkSizeChars) {
|
|
256
|
+
for (let i = 0; i < blockSize; i += chunkSizeChars) {
|
|
257
|
+
const slice = block.slice(i, i + chunkSizeChars);
|
|
258
|
+
if (slice.trim().length > 0) {
|
|
259
|
+
chunks.push(slice.trim());
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
continue;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// If adding this block exceeds chunk size, flush
|
|
266
|
+
if (current.length + blockSize > chunkSizeChars && current.length > 0) {
|
|
267
|
+
chunks.push(current.trim());
|
|
268
|
+
current = '';
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
current += block;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (current.trim().length > 0) {
|
|
275
|
+
chunks.push(current.trim());
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return chunks;
|
|
279
|
+
};
|
|
280
|
+
|
|
281
|
+
// Utility functions (pure, no async)
|
|
282
|
+
globals.log = (message) => {
|
|
283
|
+
// Collected by the runtime for the execution log
|
|
284
|
+
if (globals._logs) globals._logs.push(String(message));
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
globals.range = (start, end) => {
|
|
288
|
+
const result = [];
|
|
289
|
+
for (let i = start; i < end; i++) result.push(i);
|
|
290
|
+
return result;
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
globals.flatten = (arr) => {
|
|
294
|
+
if (!Array.isArray(arr)) return arr;
|
|
295
|
+
return arr.flat(1);
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
globals.unique = (arr) => {
|
|
299
|
+
if (!Array.isArray(arr)) return arr;
|
|
300
|
+
const seen = new Set();
|
|
301
|
+
return arr.filter(item => {
|
|
302
|
+
const key = JSON.stringify(item);
|
|
303
|
+
if (seen.has(key)) return false;
|
|
304
|
+
seen.add(key);
|
|
305
|
+
return true;
|
|
306
|
+
});
|
|
307
|
+
};
|
|
308
|
+
|
|
309
|
+
globals.batch = (arr, size) => {
|
|
310
|
+
if (!Array.isArray(arr)) return [arr];
|
|
311
|
+
if (!size || size < 1) size = 10;
|
|
312
|
+
const batches = [];
|
|
313
|
+
for (let i = 0; i < arr.length; i += size) {
|
|
314
|
+
batches.push(arr.slice(i, i + size));
|
|
315
|
+
}
|
|
316
|
+
return batches;
|
|
317
|
+
};
|
|
318
|
+
|
|
319
|
+
// parseJSON — safely parse JSON from LLM responses that may be wrapped in markdown fences
|
|
320
|
+
// Returns null on parse failure instead of throwing (SandboxJS try/catch is unreliable)
|
|
321
|
+
globals.parseJSON = (text) => {
|
|
322
|
+
try {
|
|
323
|
+
let s = String(text || '').trim();
|
|
324
|
+
// Strip markdown code fences (```json ... ``` or ``` ... ```)
|
|
325
|
+
s = s.replace(/^```(?:json|javascript|js)?\s*\n?/i, '').replace(/\n?```\s*$/i, '').trim();
|
|
326
|
+
// Try to find JSON array or object within the text
|
|
327
|
+
const arrayStart = s.indexOf('[');
|
|
328
|
+
const objectStart = s.indexOf('{');
|
|
329
|
+
if (arrayStart >= 0 && (objectStart < 0 || arrayStart < objectStart)) {
|
|
330
|
+
const end = s.lastIndexOf(']');
|
|
331
|
+
if (end > arrayStart) s = s.substring(arrayStart, end + 1);
|
|
332
|
+
} else if (objectStart >= 0) {
|
|
333
|
+
const end = s.lastIndexOf('}');
|
|
334
|
+
if (end > objectStart) s = s.substring(objectStart, end + 1);
|
|
335
|
+
}
|
|
336
|
+
return JSON.parse(s);
|
|
337
|
+
} catch (e) {
|
|
338
|
+
logFn('[parseJSON] ERROR: ' + e.message);
|
|
339
|
+
return null;
|
|
340
|
+
}
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
globals.groupBy = (arr, key) => {
|
|
344
|
+
if (!Array.isArray(arr)) return {};
|
|
345
|
+
const groups = {};
|
|
346
|
+
for (const item of arr) {
|
|
347
|
+
const k = typeof key === 'function' ? key(item) : item[key];
|
|
348
|
+
const groupKey = String(k);
|
|
349
|
+
if (!groups[groupKey]) groups[groupKey] = [];
|
|
350
|
+
groups[groupKey].push(item);
|
|
351
|
+
}
|
|
352
|
+
return groups;
|
|
353
|
+
};
|
|
354
|
+
|
|
355
|
+
// Session-scoped store — persists across execute_plan calls within the same agent session
|
|
356
|
+
globals.storeSet = (key, value) => {
|
|
357
|
+
if (typeof key !== 'string') throw new Error('storeSet: key must be a string');
|
|
358
|
+
sessionStore[key] = value;
|
|
359
|
+
};
|
|
360
|
+
|
|
361
|
+
globals.storeGet = (key) => {
|
|
362
|
+
if (typeof key !== 'string') throw new Error('storeGet: key must be a string');
|
|
363
|
+
return sessionStore[key];
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
globals.storeAppend = (key, item) => {
|
|
367
|
+
if (typeof key !== 'string') throw new Error('storeAppend: key must be a string');
|
|
368
|
+
if (!Array.isArray(sessionStore[key])) sessionStore[key] = [];
|
|
369
|
+
sessionStore[key].push(item);
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
globals.storeKeys = () => Object.keys(sessionStore);
|
|
373
|
+
|
|
374
|
+
globals.storeGetAll = () => ({ ...sessionStore });
|
|
375
|
+
|
|
376
|
+
// output() — write content directly to user's response, bypassing LLM rewriting
|
|
377
|
+
if (outputBuffer) {
|
|
378
|
+
globals.output = (content) => {
|
|
379
|
+
if (content === undefined || content === null) return;
|
|
380
|
+
const str = typeof content === 'string' ? content : JSON.stringify(content, null, 2);
|
|
381
|
+
outputBuffer.items.push(str);
|
|
382
|
+
if (globals._logs) globals._logs.push('[output] ' + str.length + ' chars written to output buffer');
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
return globals;
|
|
387
|
+
}
|