@probelabs/probe 0.6.0-rc231 → 0.6.0-rc233
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.d.ts +2 -0
- package/build/agent/ProbeAgent.js +105 -12
- package/build/agent/dsl/agent-test.mjs +341 -0
- package/build/agent/dsl/analyze-test.mjs +237 -0
- package/build/agent/dsl/diag-test.mjs +78 -0
- package/build/agent/dsl/environment.js +387 -0
- package/build/agent/dsl/manual-test.mjs +662 -0
- package/build/agent/dsl/output-buffer-test.mjs +124 -0
- package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/build/agent/dsl/pipeline-test.mjs +223 -0
- package/build/agent/dsl/runtime.js +206 -0
- package/build/agent/dsl/sandbox-experiment.mjs +309 -0
- package/build/agent/dsl/transformer.js +156 -0
- package/build/agent/dsl/trigger-test.mjs +159 -0
- package/build/agent/dsl/validator.js +183 -0
- package/build/agent/index.js +18776 -7675
- package/build/agent/probeTool.js +9 -0
- package/build/agent/tools.js +9 -1
- package/build/delegate.js +12 -6
- package/build/index.js +5 -0
- package/build/tools/common.js +7 -0
- package/build/tools/executePlan.js +761 -0
- package/build/tools/index.js +4 -0
- package/cjs/agent/ProbeAgent.cjs +12891 -1797
- package/cjs/index.cjs +12395 -1292
- package/package.json +5 -1
- package/src/agent/ProbeAgent.d.ts +2 -0
- package/src/agent/ProbeAgent.js +105 -12
- package/src/agent/dsl/agent-test.mjs +341 -0
- package/src/agent/dsl/analyze-test.mjs +237 -0
- package/src/agent/dsl/diag-test.mjs +78 -0
- package/src/agent/dsl/environment.js +387 -0
- package/src/agent/dsl/manual-test.mjs +662 -0
- package/src/agent/dsl/output-buffer-test.mjs +124 -0
- package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/src/agent/dsl/pipeline-test.mjs +223 -0
- package/src/agent/dsl/runtime.js +206 -0
- package/src/agent/dsl/sandbox-experiment.mjs +309 -0
- package/src/agent/dsl/transformer.js +156 -0
- package/src/agent/dsl/trigger-test.mjs +159 -0
- package/src/agent/dsl/validator.js +183 -0
- package/src/agent/index.js +8 -0
- package/src/agent/probeTool.js +9 -0
- package/src/agent/tools.js +9 -1
- package/src/delegate.js +12 -6
- package/src/index.js +5 -0
- package/src/tools/common.js +7 -0
- package/src/tools/executePlan.js +761 -0
- package/src/tools/index.js +4 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-unknown-linux-musl.tar.gz +0 -0
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Manual test script for the DSL runtime with real tools.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* node npm/src/agent/dsl/manual-test.mjs
|
|
7
|
+
*
|
|
8
|
+
* Requires: GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY in .env or env
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { createDSLRuntime } from './runtime.js';
|
|
12
|
+
import { search } from '../../search.js';
|
|
13
|
+
import { extract } from '../../extract.js';
|
|
14
|
+
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
15
|
+
import { generateText } from 'ai';
|
|
16
|
+
import { config } from 'dotenv';
|
|
17
|
+
import { resolve, dirname } from 'path';
|
|
18
|
+
import { fileURLToPath } from 'url';
|
|
19
|
+
|
|
20
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
const projectRoot = resolve(__dirname, '../../../..');
|
|
22
|
+
|
|
23
|
+
// Load .env from project root
|
|
24
|
+
config({ path: resolve(projectRoot, '.env') });
|
|
25
|
+
|
|
26
|
+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
|
|
27
|
+
if (!apiKey) {
|
|
28
|
+
console.error('ERROR: No Google API key found. Set GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY');
|
|
29
|
+
process.exit(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
console.log('API key found, initializing...\n');
|
|
33
|
+
|
|
34
|
+
// Create Google provider
|
|
35
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
36
|
+
|
|
37
|
+
// Create real LLM call function
|
|
38
|
+
async function llmCall(instruction, data, options = {}) {
|
|
39
|
+
const prompt = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
|
|
40
|
+
const result = await generateText({
|
|
41
|
+
model: google('gemini-2.5-flash'),
|
|
42
|
+
system: instruction,
|
|
43
|
+
prompt: prompt.substring(0, 100000),
|
|
44
|
+
temperature: options.temperature || 0.3,
|
|
45
|
+
maxTokens: options.maxTokens || 4000,
|
|
46
|
+
});
|
|
47
|
+
return result.text;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// The cwd for search operations
|
|
51
|
+
const cwd = projectRoot;
|
|
52
|
+
|
|
53
|
+
// Create real tool implementations
|
|
54
|
+
const toolImplementations = {
|
|
55
|
+
search: {
|
|
56
|
+
execute: async (params) => {
|
|
57
|
+
try {
|
|
58
|
+
return await search({
|
|
59
|
+
query: params.query,
|
|
60
|
+
path: params.path || cwd,
|
|
61
|
+
cwd,
|
|
62
|
+
maxTokens: 20000,
|
|
63
|
+
timeout: 30,
|
|
64
|
+
exact: params.exact || false,
|
|
65
|
+
});
|
|
66
|
+
} catch (e) {
|
|
67
|
+
return `Search error: ${e.message}`;
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
extract: {
|
|
72
|
+
execute: async (params) => {
|
|
73
|
+
try {
|
|
74
|
+
return await extract({
|
|
75
|
+
targets: params.targets,
|
|
76
|
+
input_content: params.input_content,
|
|
77
|
+
cwd,
|
|
78
|
+
});
|
|
79
|
+
} catch (e) {
|
|
80
|
+
return `Extract error: ${e.message}`;
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
listFiles: {
|
|
85
|
+
execute: async (params) => {
|
|
86
|
+
try {
|
|
87
|
+
return await search({
|
|
88
|
+
query: params.pattern || '*',
|
|
89
|
+
path: cwd,
|
|
90
|
+
cwd,
|
|
91
|
+
filesOnly: true,
|
|
92
|
+
maxTokens: 10000,
|
|
93
|
+
});
|
|
94
|
+
} catch (e) {
|
|
95
|
+
return `listFiles error: ${e.message}`;
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
// Create the DSL runtime
|
|
102
|
+
const runtime = createDSLRuntime({
|
|
103
|
+
toolImplementations,
|
|
104
|
+
llmCall,
|
|
105
|
+
mapConcurrency: 3,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// ── Test helpers ──
|
|
109
|
+
let testNum = 0;
|
|
110
|
+
let passed = 0;
|
|
111
|
+
let failed = 0;
|
|
112
|
+
|
|
113
|
+
async function runTest(name, code, check) {
|
|
114
|
+
testNum++;
|
|
115
|
+
const label = `Test ${testNum}: ${name}`;
|
|
116
|
+
console.log(`\n${'─'.repeat(70)}`);
|
|
117
|
+
console.log(`▶ ${label}`);
|
|
118
|
+
const codePreview = code.trim().split('\n').map(l => l.trim()).filter(Boolean).join(' ').substring(0, 140);
|
|
119
|
+
console.log(` Code: ${codePreview}...`);
|
|
120
|
+
|
|
121
|
+
const start = Date.now();
|
|
122
|
+
try {
|
|
123
|
+
const result = await runtime.execute(code, name);
|
|
124
|
+
const elapsed = Date.now() - start;
|
|
125
|
+
|
|
126
|
+
const checkResult = check(result);
|
|
127
|
+
if (checkResult === true || checkResult === undefined) {
|
|
128
|
+
console.log(` ✓ PASSED (${elapsed}ms)`);
|
|
129
|
+
if (result.status === 'error') {
|
|
130
|
+
console.log(` (Expected error: ${result.error.substring(0, 120)})`);
|
|
131
|
+
} else {
|
|
132
|
+
const preview = typeof result.result === 'string'
|
|
133
|
+
? result.result.substring(0, 300)
|
|
134
|
+
: JSON.stringify(result.result, null, 2).substring(0, 300);
|
|
135
|
+
console.log(` Result preview: ${preview}${preview.length >= 300 ? '...' : ''}`);
|
|
136
|
+
}
|
|
137
|
+
if (result.logs && result.logs.filter(l => !l.startsWith('[runtime]')).length) {
|
|
138
|
+
console.log(` Logs: ${result.logs.filter(l => !l.startsWith('[runtime]')).join(' | ')}`);
|
|
139
|
+
}
|
|
140
|
+
passed++;
|
|
141
|
+
} else {
|
|
142
|
+
console.log(` ✗ FAILED (${elapsed}ms) — ${checkResult}`);
|
|
143
|
+
if (result.logs && result.logs.length) {
|
|
144
|
+
console.log(` Logs: ${result.logs.join(' | ')}`);
|
|
145
|
+
}
|
|
146
|
+
failed++;
|
|
147
|
+
}
|
|
148
|
+
} catch (e) {
|
|
149
|
+
console.log(` ✗ CRASHED — ${e.message}`);
|
|
150
|
+
console.log(` Stack: ${e.stack?.split('\n').slice(0, 3).join(' ')}`);
|
|
151
|
+
failed++;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// ── Tests ──
|
|
156
|
+
async function main() {
|
|
157
|
+
console.log('═'.repeat(70));
|
|
158
|
+
console.log(' DSL Runtime — Complex Manual Tests');
|
|
159
|
+
console.log('═'.repeat(70));
|
|
160
|
+
|
|
161
|
+
// ────────────────────────────────────────────────
|
|
162
|
+
// SECTION 1: Basic sanity
|
|
163
|
+
// ────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
await runTest(
|
|
166
|
+
'Pure computation',
|
|
167
|
+
'const x = [1,2,3,4,5]; return x.filter(n => n > 2).length;',
|
|
168
|
+
(r) => r.result === 3 || `Expected 3, got ${r.result}`
|
|
169
|
+
);
|
|
170
|
+
|
|
171
|
+
await runTest(
|
|
172
|
+
'Validation: rejects eval()',
|
|
173
|
+
'eval("console.log(1)");',
|
|
174
|
+
(r) => r.status === 'error' ? true : `Expected error, got success`
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
// ────────────────────────────────────────────────
|
|
178
|
+
// SECTION 2: While loops & pagination simulation
|
|
179
|
+
// ────────────────────────────────────────────────
|
|
180
|
+
|
|
181
|
+
await runTest(
|
|
182
|
+
'While loop: accumulate until condition',
|
|
183
|
+
`
|
|
184
|
+
const pages = [];
|
|
185
|
+
let page = 0;
|
|
186
|
+
while (page < 5) {
|
|
187
|
+
pages.push({ page: page, items: range(page * 10, page * 10 + 10) });
|
|
188
|
+
page = page + 1;
|
|
189
|
+
}
|
|
190
|
+
log("Collected " + pages.length + " pages");
|
|
191
|
+
return pages.length;
|
|
192
|
+
`,
|
|
193
|
+
(r) => r.result === 5 || `Expected 5, got ${r.result}`
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
await runTest(
|
|
197
|
+
'While loop with break: simulated pagination',
|
|
198
|
+
`
|
|
199
|
+
const allItems = [];
|
|
200
|
+
let page = 1;
|
|
201
|
+
while (true) {
|
|
202
|
+
// Simulate a paginated API that returns 3 pages of data
|
|
203
|
+
const pageData = range((page - 1) * 5, page * 5);
|
|
204
|
+
const hasMore = page < 3;
|
|
205
|
+
for (const item of pageData) {
|
|
206
|
+
allItems.push(item);
|
|
207
|
+
}
|
|
208
|
+
log("Page " + page + ": " + pageData.length + " items, hasMore=" + hasMore);
|
|
209
|
+
if (!hasMore) break;
|
|
210
|
+
page = page + 1;
|
|
211
|
+
}
|
|
212
|
+
return allItems;
|
|
213
|
+
`,
|
|
214
|
+
(r) => {
|
|
215
|
+
if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
|
|
216
|
+
if (r.result.length !== 15) return `Expected 15 items, got ${r.result.length}`;
|
|
217
|
+
return true;
|
|
218
|
+
}
|
|
219
|
+
);
|
|
220
|
+
|
|
221
|
+
// ────────────────────────────────────────────────
|
|
222
|
+
// SECTION 3: Try/catch error handling
|
|
223
|
+
// ────────────────────────────────────────────────
|
|
224
|
+
|
|
225
|
+
await runTest(
|
|
226
|
+
'Try/catch: graceful error recovery',
|
|
227
|
+
`
|
|
228
|
+
const results = [];
|
|
229
|
+
const queries = ["validateDSL", "thisQueryWillProbablyReturnNothing12345xyz"];
|
|
230
|
+
for (const q of queries) {
|
|
231
|
+
try {
|
|
232
|
+
const r = search(q);
|
|
233
|
+
results.push({ query: q, found: true, length: r.length });
|
|
234
|
+
} catch (e) {
|
|
235
|
+
results.push({ query: q, found: false, error: "failed" });
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return results;
|
|
239
|
+
`,
|
|
240
|
+
(r) => {
|
|
241
|
+
if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
|
|
242
|
+
if (r.result.length !== 2) return `Expected 2 results, got ${r.result.length}`;
|
|
243
|
+
return true;
|
|
244
|
+
}
|
|
245
|
+
);
|
|
246
|
+
|
|
247
|
+
// ────────────────────────────────────────────────
|
|
248
|
+
// SECTION 4: Multi-search & data aggregation
|
|
249
|
+
// ────────────────────────────────────────────────
|
|
250
|
+
|
|
251
|
+
await runTest(
|
|
252
|
+
'Multi-search: combine results from multiple queries',
|
|
253
|
+
`
|
|
254
|
+
const queries = ["error handling", "validation", "timeout"];
|
|
255
|
+
const searchResults = map(queries, (q) => {
|
|
256
|
+
const r = search(q);
|
|
257
|
+
return { query: q, resultLength: r.length };
|
|
258
|
+
});
|
|
259
|
+
log("Searched " + searchResults.length + " queries");
|
|
260
|
+
const totalChars = searchResults.reduce((sum, r) => sum + r.resultLength, 0);
|
|
261
|
+
log("Total result chars: " + totalChars);
|
|
262
|
+
return { queries: searchResults, totalChars: totalChars };
|
|
263
|
+
`,
|
|
264
|
+
(r) => {
|
|
265
|
+
if (!r.result.queries) return `Expected queries array`;
|
|
266
|
+
if (r.result.queries.length !== 3) return `Expected 3 query results`;
|
|
267
|
+
if (r.result.totalChars < 100) return `Expected substantial results`;
|
|
268
|
+
return true;
|
|
269
|
+
}
|
|
270
|
+
);
|
|
271
|
+
|
|
272
|
+
await runTest(
|
|
273
|
+
'Search + extract: find code then extract specific files',
|
|
274
|
+
`
|
|
275
|
+
const searchResult = search("transformDSL");
|
|
276
|
+
// Extract the transformer file specifically
|
|
277
|
+
const code = extract({ targets: "npm/src/agent/dsl/transformer.js" });
|
|
278
|
+
const summary = LLM(
|
|
279
|
+
"How many functions are exported from this file? List their names. Be very concise.",
|
|
280
|
+
code
|
|
281
|
+
);
|
|
282
|
+
return summary;
|
|
283
|
+
`,
|
|
284
|
+
(r) => {
|
|
285
|
+
if (typeof r.result !== 'string') return `Expected string, got ${typeof r.result}`;
|
|
286
|
+
if (r.result.length < 10) return `Summary too short: ${r.result}`;
|
|
287
|
+
return true;
|
|
288
|
+
}
|
|
289
|
+
);
|
|
290
|
+
|
|
291
|
+
// ────────────────────────────────────────────────
|
|
292
|
+
// SECTION 5: Complex data transformation
|
|
293
|
+
// ────────────────────────────────────────────────
|
|
294
|
+
|
|
295
|
+
await runTest(
|
|
296
|
+
'Complex data pipeline: group, transform, aggregate',
|
|
297
|
+
`
|
|
298
|
+
// Simulate analyzing a batch of items with different categories
|
|
299
|
+
const items = [];
|
|
300
|
+
for (let i = 0; i < 20; i = i + 1) {
|
|
301
|
+
const categories = ["bug", "feature", "docs", "refactor"];
|
|
302
|
+
const priorities = ["high", "medium", "low"];
|
|
303
|
+
items.push({
|
|
304
|
+
id: i,
|
|
305
|
+
category: categories[i % 4],
|
|
306
|
+
priority: priorities[i % 3],
|
|
307
|
+
title: "Item " + i
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Group by category
|
|
312
|
+
const byCategory = groupBy(items, "category");
|
|
313
|
+
|
|
314
|
+
// Count per category
|
|
315
|
+
const categoryNames = ["bug", "feature", "docs", "refactor"];
|
|
316
|
+
const counts = [];
|
|
317
|
+
for (const cat of categoryNames) {
|
|
318
|
+
const count = byCategory[cat] ? byCategory[cat].length : 0;
|
|
319
|
+
const highCount = byCategory[cat]
|
|
320
|
+
? byCategory[cat].filter((item) => item.priority === "high").length
|
|
321
|
+
: 0;
|
|
322
|
+
counts.push({ category: cat, total: count, high: highCount });
|
|
323
|
+
log(cat + ": " + count + " total, " + highCount + " high priority");
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return { counts: counts, totalItems: items.length };
|
|
327
|
+
`,
|
|
328
|
+
(r) => {
|
|
329
|
+
if (r.status === 'error') return `Execution error: ${r.error}`;
|
|
330
|
+
if (!r.result) return `Result is falsy: ${JSON.stringify(r)}`;
|
|
331
|
+
// Debug: show what we got
|
|
332
|
+
if (r.result.totalItems !== 20) return `Expected 20 total items, got type=${typeof r.result} value=${JSON.stringify(r.result).substring(0, 300)}`;
|
|
333
|
+
if (!Array.isArray(r.result.counts)) return `Expected counts array, got ${JSON.stringify(r.result).substring(0, 300)}`;
|
|
334
|
+
const bugs = r.result.counts.find((c) => c.category === 'bug');
|
|
335
|
+
if (!bugs || bugs.total !== 5) return `Expected 5 bugs`;
|
|
336
|
+
return true;
|
|
337
|
+
}
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
// ────────────────────────────────────────────────
|
|
341
|
+
// SECTION 6: Nested map() and LLM chaining
|
|
342
|
+
// ────────────────────────────────────────────────
|
|
343
|
+
|
|
344
|
+
await runTest(
|
|
345
|
+
'Nested processing: search multiple topics, classify each result',
|
|
346
|
+
`
|
|
347
|
+
const topics = ["error handling", "caching"];
|
|
348
|
+
|
|
349
|
+
// For each topic: search, then have LLM extract key patterns
|
|
350
|
+
const analysis = map(topics, (topic) => {
|
|
351
|
+
const results = search(topic);
|
|
352
|
+
const patterns = LLM(
|
|
353
|
+
"From this code, extract exactly 3 key patterns related to '" + topic + "'. " +
|
|
354
|
+
"Return a brief bullet list, one pattern per line.",
|
|
355
|
+
results
|
|
356
|
+
);
|
|
357
|
+
return { topic: topic, patterns: patterns };
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
log("Analyzed " + analysis.length + " topics");
|
|
361
|
+
return analysis;
|
|
362
|
+
`,
|
|
363
|
+
(r) => {
|
|
364
|
+
if (r.status === 'error') return `Execution error: ${r.error}`;
|
|
365
|
+
if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
|
|
366
|
+
if (r.result.length !== 2) return `Expected 2 topics analyzed`;
|
|
367
|
+
// patterns is a string from LLM, not parsed
|
|
368
|
+
if (typeof r.result[0].topic !== 'string') return `Missing topic`;
|
|
369
|
+
if (typeof r.result[0].patterns !== 'string') return `Expected patterns to be string, got ${typeof r.result[0].patterns}`;
|
|
370
|
+
return true;
|
|
371
|
+
}
|
|
372
|
+
);
|
|
373
|
+
|
|
374
|
+
// ────────────────────────────────────────────────
|
|
375
|
+
// SECTION 7: Real-world scenario — code review pipeline
|
|
376
|
+
// ────────────────────────────────────────────────
|
|
377
|
+
|
|
378
|
+
await runTest(
|
|
379
|
+
'Code review pipeline: find, chunk, analyze, synthesize',
|
|
380
|
+
`
|
|
381
|
+
// Step 1: Search for the validator module
|
|
382
|
+
const code = search("validateDSL ALLOWED_NODE_TYPES BLOCKED_IDENTIFIERS");
|
|
383
|
+
|
|
384
|
+
// Step 2: Chunk if needed
|
|
385
|
+
const codeChunks = chunk(code, 8000);
|
|
386
|
+
log("Code split into " + codeChunks.length + " chunks");
|
|
387
|
+
|
|
388
|
+
// Step 3: Analyze each chunk for issues
|
|
389
|
+
const reviews = map(codeChunks, (c) => LLM(
|
|
390
|
+
"You are a senior code reviewer. Analyze this code for potential issues: " +
|
|
391
|
+
"security concerns, edge cases, performance problems. " +
|
|
392
|
+
"Return a JSON object with: { issues: [{ severity: 'high'|'medium'|'low', description: string }] }. " +
|
|
393
|
+
"Return ONLY JSON.",
|
|
394
|
+
c
|
|
395
|
+
));
|
|
396
|
+
|
|
397
|
+
// Step 4: Synthesize
|
|
398
|
+
const synthesis = LLM(
|
|
399
|
+
"Combine these code review findings into a prioritized summary. " +
|
|
400
|
+
"Group by severity (high, medium, low). Be concise — max 5 bullet points total.",
|
|
401
|
+
reviews.join("\\n---\\n")
|
|
402
|
+
);
|
|
403
|
+
|
|
404
|
+
return synthesis;
|
|
405
|
+
`,
|
|
406
|
+
(r) => {
|
|
407
|
+
if (typeof r.result !== 'string') return `Expected string`;
|
|
408
|
+
if (r.result.length < 50) return `Review too short`;
|
|
409
|
+
return true;
|
|
410
|
+
}
|
|
411
|
+
);
|
|
412
|
+
|
|
413
|
+
// ────────────────────────────────────────────────
|
|
414
|
+
// SECTION 8: Real-world — dependency analysis
|
|
415
|
+
// ────────────────────────────────────────────────
|
|
416
|
+
|
|
417
|
+
await runTest(
|
|
418
|
+
'Dependency analysis: find imports across multiple files',
|
|
419
|
+
`
|
|
420
|
+
// Search for all imports in the DSL module files
|
|
421
|
+
const files = ["validator.js", "transformer.js", "environment.js", "runtime.js"];
|
|
422
|
+
const imports = map(files, (file) => {
|
|
423
|
+
const code = extract({ targets: "npm/src/agent/dsl/" + file });
|
|
424
|
+
const analysis = LLM(
|
|
425
|
+
"List all import statements from this file. Return a JSON object: " +
|
|
426
|
+
"{ file: string, imports: [{ from: string, names: string[] }] }. Return ONLY JSON.",
|
|
427
|
+
code
|
|
428
|
+
);
|
|
429
|
+
return analysis;
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
log("Analyzed " + imports.length + " files");
|
|
433
|
+
|
|
434
|
+
// Have LLM create a dependency graph summary
|
|
435
|
+
const summary = LLM(
|
|
436
|
+
"Given these import analyses for DSL module files, create a brief dependency summary: " +
|
|
437
|
+
"which files depend on what external packages and internal modules. " +
|
|
438
|
+
"Format as a simple list. Be concise.",
|
|
439
|
+
imports.join("\\n")
|
|
440
|
+
);
|
|
441
|
+
|
|
442
|
+
return summary;
|
|
443
|
+
`,
|
|
444
|
+
(r) => {
|
|
445
|
+
if (typeof r.result !== 'string') return `Expected string`;
|
|
446
|
+
if (r.result.length < 30) return `Summary too short`;
|
|
447
|
+
return true;
|
|
448
|
+
}
|
|
449
|
+
);
|
|
450
|
+
|
|
451
|
+
// ────────────────────────────────────────────────
|
|
452
|
+
// SECTION 9: Stress test — many parallel LLM calls
|
|
453
|
+
// ────────────────────────────────────────────────
|
|
454
|
+
|
|
455
|
+
await runTest(
|
|
456
|
+
'Stress: 10 parallel LLM calls via map()',
|
|
457
|
+
`
|
|
458
|
+
const items = range(1, 11);
|
|
459
|
+
const results = map(items, (n) => {
|
|
460
|
+
const answer = LLM(
|
|
461
|
+
"Return ONLY a single number: the square of " + n + ". Nothing else, just the number.",
|
|
462
|
+
"Calculate " + n + " * " + n
|
|
463
|
+
);
|
|
464
|
+
return { n: n, squared: String(answer).trim() };
|
|
465
|
+
});
|
|
466
|
+
log("Completed " + results.length + " parallel LLM calls");
|
|
467
|
+
return results;
|
|
468
|
+
`,
|
|
469
|
+
(r) => {
|
|
470
|
+
if (r.status === 'error') return `Execution error: ${r.error}`;
|
|
471
|
+
if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
|
|
472
|
+
if (r.result.length !== 10) return `Expected 10 results, got ${r.result.length}`;
|
|
473
|
+
const first = r.result[0];
|
|
474
|
+
if (first.n === undefined || first.squared === undefined) return `Missing fields: ${JSON.stringify(first)}`;
|
|
475
|
+
return true;
|
|
476
|
+
}
|
|
477
|
+
);
|
|
478
|
+
|
|
479
|
+
// ────────────────────────────────────────────────
|
|
480
|
+
// SECTION 10: Complex conditional logic
|
|
481
|
+
// ────────────────────────────────────────────────
|
|
482
|
+
|
|
483
|
+
await runTest(
|
|
484
|
+
'Conditional routing: different processing based on search results',
|
|
485
|
+
`
|
|
486
|
+
const queries = ["BLOCKED_IDENTIFIERS", "nonexistent_symbol_xyz_12345"];
|
|
487
|
+
const results = [];
|
|
488
|
+
|
|
489
|
+
for (const q of queries) {
|
|
490
|
+
const searchResult = search(q);
|
|
491
|
+
|
|
492
|
+
if (searchResult.length > 500) {
|
|
493
|
+
// Rich results — summarize
|
|
494
|
+
const summary = LLM("Summarize this code in one sentence.", searchResult);
|
|
495
|
+
results.push({ query: q, status: "found", summary });
|
|
496
|
+
} else if (searchResult.length > 100) {
|
|
497
|
+
// Some results — note them
|
|
498
|
+
results.push({ query: q, status: "partial", chars: searchResult.length });
|
|
499
|
+
} else {
|
|
500
|
+
// No meaningful results
|
|
501
|
+
results.push({ query: q, status: "not_found" });
|
|
502
|
+
}
|
|
503
|
+
log(q + " -> " + results[results.length - 1].status);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
return results;
|
|
507
|
+
`,
|
|
508
|
+
(r) => {
|
|
509
|
+
if (!Array.isArray(r.result)) return `Expected array`;
|
|
510
|
+
if (r.result.length !== 2) return `Expected 2 results`;
|
|
511
|
+
if (r.result[0].status !== 'found') return `First query should be 'found'`;
|
|
512
|
+
return true;
|
|
513
|
+
}
|
|
514
|
+
);
|
|
515
|
+
|
|
516
|
+
// ────────────────────────────────────────────────
|
|
517
|
+
// SECTION 11: While + search iteration (paginated search simulation)
|
|
518
|
+
// ────────────────────────────────────────────────
|
|
519
|
+
|
|
520
|
+
await runTest(
|
|
521
|
+
'Iterative deepening: search, then search within results',
|
|
522
|
+
`
|
|
523
|
+
// First broad search
|
|
524
|
+
const broad = search("sandbox");
|
|
525
|
+
const broadSummary = LLM(
|
|
526
|
+
"From these search results, identify the 2 most important function names " +
|
|
527
|
+
"related to sandboxing. Return ONLY the function names separated by comma.",
|
|
528
|
+
broad
|
|
529
|
+
);
|
|
530
|
+
log("Broad search found key functions: " + broadSummary);
|
|
531
|
+
|
|
532
|
+
// Now search specifically for each function
|
|
533
|
+
const parts = broadSummary.split(",");
|
|
534
|
+
const functions = [];
|
|
535
|
+
for (const p of parts) {
|
|
536
|
+
const trimmed = p.trim();
|
|
537
|
+
if (trimmed.length > 0) functions.push(trimmed);
|
|
538
|
+
}
|
|
539
|
+
log("Will search for " + functions.length + " functions");
|
|
540
|
+
|
|
541
|
+
const details = map(functions.slice(0, 2), (fn) => {
|
|
542
|
+
const detail = search(fn);
|
|
543
|
+
const analysis = LLM(
|
|
544
|
+
"Explain what the function '" + fn + "' does in 1-2 sentences based on this code.",
|
|
545
|
+
detail
|
|
546
|
+
);
|
|
547
|
+
return { name: fn, description: analysis };
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
return details;
|
|
551
|
+
`,
|
|
552
|
+
(r) => {
|
|
553
|
+
if (r.status === 'error') return `Execution error: ${r.error}`;
|
|
554
|
+
if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}: ${JSON.stringify(r.result).substring(0, 200)}`;
|
|
555
|
+
if (r.result.length < 1) return `Expected at least 1 function analyzed`;
|
|
556
|
+
if (!r.result[0].description) return `Missing description`;
|
|
557
|
+
return true;
|
|
558
|
+
}
|
|
559
|
+
);
|
|
560
|
+
|
|
561
|
+
// ────────────────────────────────────────────────
|
|
562
|
+
// SECTION 12: Full analyze_all replacement pattern
|
|
563
|
+
// ────────────────────────────────────────────────
|
|
564
|
+
|
|
565
|
+
await runTest(
|
|
566
|
+
'analyze_all replacement: comprehensive codebase question',
|
|
567
|
+
`
|
|
568
|
+
// Question: "What testing patterns are used in the DSL module?"
|
|
569
|
+
|
|
570
|
+
// Phase 1: Search for test-related code
|
|
571
|
+
const testResults = search("test DSL validator transformer runtime");
|
|
572
|
+
|
|
573
|
+
// Phase 2: Chunk and extract patterns
|
|
574
|
+
const chunks = chunk(testResults, 6000);
|
|
575
|
+
log("Processing " + chunks.length + " test chunks");
|
|
576
|
+
|
|
577
|
+
const patterns = map(chunks, (c) => LLM(
|
|
578
|
+
"Extract testing patterns from this code. For each pattern found, note: " +
|
|
579
|
+
"1) Pattern name (e.g., 'mock functions', 'assertion style', 'test structure') " +
|
|
580
|
+
"2) Brief description " +
|
|
581
|
+
"Return as a bullet list. Be concise.",
|
|
582
|
+
c
|
|
583
|
+
));
|
|
584
|
+
|
|
585
|
+
// Phase 3: Synthesize
|
|
586
|
+
const answer = LLM(
|
|
587
|
+
"You are answering the question: 'What testing patterns are used in the DSL module?' " +
|
|
588
|
+
"Based on the analysis below, provide a comprehensive but concise answer. " +
|
|
589
|
+
"Organize by pattern type. Use bullet points. Max 10 bullet points.",
|
|
590
|
+
patterns.join("\\n---\\n")
|
|
591
|
+
);
|
|
592
|
+
|
|
593
|
+
return answer;
|
|
594
|
+
`,
|
|
595
|
+
(r) => {
|
|
596
|
+
if (typeof r.result !== 'string') return `Expected string`;
|
|
597
|
+
if (r.result.length < 100) return `Answer too short`;
|
|
598
|
+
return true;
|
|
599
|
+
}
|
|
600
|
+
);
|
|
601
|
+
|
|
602
|
+
// ────────────────────────────────────────────────
|
|
603
|
+
// SECTION 13: Discovery-first pattern
|
|
604
|
+
// ────────────────────────────────────────────────
|
|
605
|
+
|
|
606
|
+
await runTest(
|
|
607
|
+
'Discovery-first: explore repo then plan search strategy',
|
|
608
|
+
`
|
|
609
|
+
// Phase 1: Discover repo structure
|
|
610
|
+
const files = listFiles("**/*");
|
|
611
|
+
const sample = search("error handling");
|
|
612
|
+
log("Files length: " + String(files).length + ", sample length: " + String(sample).length);
|
|
613
|
+
|
|
614
|
+
// Phase 2: Ask LLM to determine optimal search strategy
|
|
615
|
+
const plan = LLM(
|
|
616
|
+
"Based on this repository structure and sample search results, determine the best search strategy " +
|
|
617
|
+
"to answer: 'What are all the validation approaches in this codebase?' " +
|
|
618
|
+
"Return a JSON object with: keywords (array of 2-3 search queries that will find relevant data), " +
|
|
619
|
+
"extractionFocus (what to extract from each result), " +
|
|
620
|
+
"and aggregation (summarize or list_unique). " +
|
|
621
|
+
"IMPORTANT: Only suggest keywords likely to match actual content you see. Return ONLY valid JSON.",
|
|
622
|
+
"Repository files:\\n" + String(files).substring(0, 3000) + "\\nSample results:\\n" + String(sample).substring(0, 3000)
|
|
623
|
+
);
|
|
624
|
+
const strategy = JSON.parse(String(plan));
|
|
625
|
+
log("Strategy keywords: " + strategy.keywords.length + ", focus: " + strategy.extractionFocus);
|
|
626
|
+
|
|
627
|
+
// Phase 3: Execute with discovered strategy
|
|
628
|
+
const allFindings = [];
|
|
629
|
+
for (const kw of strategy.keywords) {
|
|
630
|
+
const results = search(kw);
|
|
631
|
+
if (String(results).length > 500) {
|
|
632
|
+
const chunks = chunk(results);
|
|
633
|
+
const findings = map(chunks, (c) => LLM(strategy.extractionFocus, c));
|
|
634
|
+
for (const f of findings) { allFindings.push(String(f)); }
|
|
635
|
+
log("Keyword '" + kw + "': " + chunks.length + " chunks processed");
|
|
636
|
+
} else {
|
|
637
|
+
log("Keyword '" + kw + "': skipped (too few results)");
|
|
638
|
+
}
|
|
639
|
+
}
|
|
640
|
+
var combined = "";
|
|
641
|
+
for (const f of allFindings) { combined = combined + f + "\\n---\\n"; }
|
|
642
|
+
return LLM("Synthesize all findings about validation approaches into a comprehensive answer.", combined);
|
|
643
|
+
`,
|
|
644
|
+
(r) => {
|
|
645
|
+
if (typeof r.result !== 'string') return `Expected string`;
|
|
646
|
+
if (r.result.length < 100) return `Answer too short: ${r.result.length} chars`;
|
|
647
|
+
return true;
|
|
648
|
+
}
|
|
649
|
+
);
|
|
650
|
+
|
|
651
|
+
// ── Summary ──
|
|
652
|
+
console.log(`\n${'═'.repeat(70)}`);
|
|
653
|
+
console.log(` Results: ${passed} passed, ${failed} failed, ${testNum} total`);
|
|
654
|
+
console.log('═'.repeat(70));
|
|
655
|
+
|
|
656
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
main().catch(e => {
|
|
660
|
+
console.error('Fatal error:', e);
|
|
661
|
+
process.exit(1);
|
|
662
|
+
});
|