@probelabs/probe 0.6.0-rc232 → 0.6.0-rc233

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/bin/binaries/probe-v0.6.0-rc233-aarch64-apple-darwin.tar.gz +0 -0
  2. package/bin/binaries/probe-v0.6.0-rc233-aarch64-unknown-linux-musl.tar.gz +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc233-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc233-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc233-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.d.ts +2 -0
  7. package/build/agent/ProbeAgent.js +51 -6
  8. package/build/agent/dsl/agent-test.mjs +341 -0
  9. package/build/agent/dsl/analyze-test.mjs +237 -0
  10. package/build/agent/dsl/diag-test.mjs +78 -0
  11. package/build/agent/dsl/environment.js +387 -0
  12. package/build/agent/dsl/manual-test.mjs +662 -0
  13. package/build/agent/dsl/output-buffer-test.mjs +124 -0
  14. package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
  15. package/build/agent/dsl/pipeline-test.mjs +223 -0
  16. package/build/agent/dsl/runtime.js +206 -0
  17. package/build/agent/dsl/sandbox-experiment.mjs +309 -0
  18. package/build/agent/dsl/transformer.js +156 -0
  19. package/build/agent/dsl/trigger-test.mjs +159 -0
  20. package/build/agent/dsl/validator.js +183 -0
  21. package/build/agent/index.js +18724 -7666
  22. package/build/agent/probeTool.js +9 -0
  23. package/build/agent/tools.js +9 -1
  24. package/build/index.js +5 -0
  25. package/build/tools/common.js +7 -0
  26. package/build/tools/executePlan.js +761 -0
  27. package/build/tools/index.js +4 -0
  28. package/cjs/agent/ProbeAgent.cjs +12687 -1636
  29. package/cjs/index.cjs +12341 -1281
  30. package/package.json +5 -1
  31. package/src/agent/ProbeAgent.d.ts +2 -0
  32. package/src/agent/ProbeAgent.js +51 -6
  33. package/src/agent/dsl/agent-test.mjs +341 -0
  34. package/src/agent/dsl/analyze-test.mjs +237 -0
  35. package/src/agent/dsl/diag-test.mjs +78 -0
  36. package/src/agent/dsl/environment.js +387 -0
  37. package/src/agent/dsl/manual-test.mjs +662 -0
  38. package/src/agent/dsl/output-buffer-test.mjs +124 -0
  39. package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
  40. package/src/agent/dsl/pipeline-test.mjs +223 -0
  41. package/src/agent/dsl/runtime.js +206 -0
  42. package/src/agent/dsl/sandbox-experiment.mjs +309 -0
  43. package/src/agent/dsl/transformer.js +156 -0
  44. package/src/agent/dsl/trigger-test.mjs +159 -0
  45. package/src/agent/dsl/validator.js +183 -0
  46. package/src/agent/index.js +8 -0
  47. package/src/agent/probeTool.js +9 -0
  48. package/src/agent/tools.js +9 -1
  49. package/src/index.js +5 -0
  50. package/src/tools/common.js +7 -0
  51. package/src/tools/executePlan.js +761 -0
  52. package/src/tools/index.js +4 -0
  53. package/bin/binaries/probe-v0.6.0-rc232-aarch64-apple-darwin.tar.gz +0 -0
  54. package/bin/binaries/probe-v0.6.0-rc232-aarch64-unknown-linux-musl.tar.gz +0 -0
  55. package/bin/binaries/probe-v0.6.0-rc232-x86_64-apple-darwin.tar.gz +0 -0
  56. package/bin/binaries/probe-v0.6.0-rc232-x86_64-pc-windows-msvc.zip +0 -0
  57. package/bin/binaries/probe-v0.6.0-rc232-x86_64-unknown-linux-musl.tar.gz +0 -0
@@ -0,0 +1,662 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Manual test script for the DSL runtime with real tools.
4
+ *
5
+ * Usage:
6
+ * node npm/src/agent/dsl/manual-test.mjs
7
+ *
8
+ * Requires: GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY in .env or env
9
+ */
10
+
11
+ import { createDSLRuntime } from './runtime.js';
12
+ import { search } from '../../search.js';
13
+ import { extract } from '../../extract.js';
14
+ import { createGoogleGenerativeAI } from '@ai-sdk/google';
15
+ import { generateText } from 'ai';
16
+ import { config } from 'dotenv';
17
+ import { resolve, dirname } from 'path';
18
+ import { fileURLToPath } from 'url';
19
+
20
+ const __dirname = dirname(fileURLToPath(import.meta.url));
21
+ const projectRoot = resolve(__dirname, '../../../..');
22
+
23
+ // Load .env from project root
24
+ config({ path: resolve(projectRoot, '.env') });
25
+
26
+ const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
27
+ if (!apiKey) {
28
+ console.error('ERROR: No Google API key found. Set GOOGLE_API_KEY or GOOGLE_GENERATIVE_AI_API_KEY');
29
+ process.exit(1);
30
+ }
31
+
32
+ console.log('API key found, initializing...\n');
33
+
34
+ // Create Google provider
35
+ const google = createGoogleGenerativeAI({ apiKey });
36
+
37
+ // Create real LLM call function
38
+ async function llmCall(instruction, data, options = {}) {
39
+ const prompt = typeof data === 'string' ? data : JSON.stringify(data, null, 2);
40
+ const result = await generateText({
41
+ model: google('gemini-2.5-flash'),
42
+ system: instruction,
43
+ prompt: prompt.substring(0, 100000),
44
+ temperature: options.temperature || 0.3,
45
+ maxTokens: options.maxTokens || 4000,
46
+ });
47
+ return result.text;
48
+ }
49
+
50
+ // The cwd for search operations
51
+ const cwd = projectRoot;
52
+
53
+ // Create real tool implementations
54
+ const toolImplementations = {
55
+ search: {
56
+ execute: async (params) => {
57
+ try {
58
+ return await search({
59
+ query: params.query,
60
+ path: params.path || cwd,
61
+ cwd,
62
+ maxTokens: 20000,
63
+ timeout: 30,
64
+ exact: params.exact || false,
65
+ });
66
+ } catch (e) {
67
+ return `Search error: ${e.message}`;
68
+ }
69
+ },
70
+ },
71
+ extract: {
72
+ execute: async (params) => {
73
+ try {
74
+ return await extract({
75
+ targets: params.targets,
76
+ input_content: params.input_content,
77
+ cwd,
78
+ });
79
+ } catch (e) {
80
+ return `Extract error: ${e.message}`;
81
+ }
82
+ },
83
+ },
84
+ listFiles: {
85
+ execute: async (params) => {
86
+ try {
87
+ return await search({
88
+ query: params.pattern || '*',
89
+ path: cwd,
90
+ cwd,
91
+ filesOnly: true,
92
+ maxTokens: 10000,
93
+ });
94
+ } catch (e) {
95
+ return `listFiles error: ${e.message}`;
96
+ }
97
+ },
98
+ },
99
+ };
100
+
101
+ // Create the DSL runtime
102
+ const runtime = createDSLRuntime({
103
+ toolImplementations,
104
+ llmCall,
105
+ mapConcurrency: 3,
106
+ });
107
+
108
+ // ── Test helpers ──
109
+ let testNum = 0;
110
+ let passed = 0;
111
+ let failed = 0;
112
+
113
+ async function runTest(name, code, check) {
114
+ testNum++;
115
+ const label = `Test ${testNum}: ${name}`;
116
+ console.log(`\n${'─'.repeat(70)}`);
117
+ console.log(`▶ ${label}`);
118
+ const codePreview = code.trim().split('\n').map(l => l.trim()).filter(Boolean).join(' ').substring(0, 140);
119
+ console.log(` Code: ${codePreview}...`);
120
+
121
+ const start = Date.now();
122
+ try {
123
+ const result = await runtime.execute(code, name);
124
+ const elapsed = Date.now() - start;
125
+
126
+ const checkResult = check(result);
127
+ if (checkResult === true || checkResult === undefined) {
128
+ console.log(` ✓ PASSED (${elapsed}ms)`);
129
+ if (result.status === 'error') {
130
+ console.log(` (Expected error: ${result.error.substring(0, 120)})`);
131
+ } else {
132
+ const preview = typeof result.result === 'string'
133
+ ? result.result.substring(0, 300)
134
+ : JSON.stringify(result.result, null, 2).substring(0, 300);
135
+ console.log(` Result preview: ${preview}${preview.length >= 300 ? '...' : ''}`);
136
+ }
137
+ if (result.logs && result.logs.filter(l => !l.startsWith('[runtime]')).length) {
138
+ console.log(` Logs: ${result.logs.filter(l => !l.startsWith('[runtime]')).join(' | ')}`);
139
+ }
140
+ passed++;
141
+ } else {
142
+ console.log(` ✗ FAILED (${elapsed}ms) — ${checkResult}`);
143
+ if (result.logs && result.logs.length) {
144
+ console.log(` Logs: ${result.logs.join(' | ')}`);
145
+ }
146
+ failed++;
147
+ }
148
+ } catch (e) {
149
+ console.log(` ✗ CRASHED — ${e.message}`);
150
+ console.log(` Stack: ${e.stack?.split('\n').slice(0, 3).join(' ')}`);
151
+ failed++;
152
+ }
153
+ }
154
+
155
+ // ── Tests ──
156
+ async function main() {
157
+ console.log('═'.repeat(70));
158
+ console.log(' DSL Runtime — Complex Manual Tests');
159
+ console.log('═'.repeat(70));
160
+
161
+ // ────────────────────────────────────────────────
162
+ // SECTION 1: Basic sanity
163
+ // ────────────────────────────────────────────────
164
+
165
+ await runTest(
166
+ 'Pure computation',
167
+ 'const x = [1,2,3,4,5]; return x.filter(n => n > 2).length;',
168
+ (r) => r.result === 3 || `Expected 3, got ${r.result}`
169
+ );
170
+
171
+ await runTest(
172
+ 'Validation: rejects eval()',
173
+ 'eval("console.log(1)");',
174
+ (r) => r.status === 'error' ? true : `Expected error, got success`
175
+ );
176
+
177
+ // ────────────────────────────────────────────────
178
+ // SECTION 2: While loops & pagination simulation
179
+ // ────────────────────────────────────────────────
180
+
181
+ await runTest(
182
+ 'While loop: accumulate until condition',
183
+ `
184
+ const pages = [];
185
+ let page = 0;
186
+ while (page < 5) {
187
+ pages.push({ page: page, items: range(page * 10, page * 10 + 10) });
188
+ page = page + 1;
189
+ }
190
+ log("Collected " + pages.length + " pages");
191
+ return pages.length;
192
+ `,
193
+ (r) => r.result === 5 || `Expected 5, got ${r.result}`
194
+ );
195
+
196
+ await runTest(
197
+ 'While loop with break: simulated pagination',
198
+ `
199
+ const allItems = [];
200
+ let page = 1;
201
+ while (true) {
202
+ // Simulate a paginated API that returns 3 pages of data
203
+ const pageData = range((page - 1) * 5, page * 5);
204
+ const hasMore = page < 3;
205
+ for (const item of pageData) {
206
+ allItems.push(item);
207
+ }
208
+ log("Page " + page + ": " + pageData.length + " items, hasMore=" + hasMore);
209
+ if (!hasMore) break;
210
+ page = page + 1;
211
+ }
212
+ return allItems;
213
+ `,
214
+ (r) => {
215
+ if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
216
+ if (r.result.length !== 15) return `Expected 15 items, got ${r.result.length}`;
217
+ return true;
218
+ }
219
+ );
220
+
221
+ // ────────────────────────────────────────────────
222
+ // SECTION 3: Try/catch error handling
223
+ // ────────────────────────────────────────────────
224
+
225
+ await runTest(
226
+ 'Try/catch: graceful error recovery',
227
+ `
228
+ const results = [];
229
+ const queries = ["validateDSL", "thisQueryWillProbablyReturnNothing12345xyz"];
230
+ for (const q of queries) {
231
+ try {
232
+ const r = search(q);
233
+ results.push({ query: q, found: true, length: r.length });
234
+ } catch (e) {
235
+ results.push({ query: q, found: false, error: "failed" });
236
+ }
237
+ }
238
+ return results;
239
+ `,
240
+ (r) => {
241
+ if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
242
+ if (r.result.length !== 2) return `Expected 2 results, got ${r.result.length}`;
243
+ return true;
244
+ }
245
+ );
246
+
247
+ // ────────────────────────────────────────────────
248
+ // SECTION 4: Multi-search & data aggregation
249
+ // ────────────────────────────────────────────────
250
+
251
+ await runTest(
252
+ 'Multi-search: combine results from multiple queries',
253
+ `
254
+ const queries = ["error handling", "validation", "timeout"];
255
+ const searchResults = map(queries, (q) => {
256
+ const r = search(q);
257
+ return { query: q, resultLength: r.length };
258
+ });
259
+ log("Searched " + searchResults.length + " queries");
260
+ const totalChars = searchResults.reduce((sum, r) => sum + r.resultLength, 0);
261
+ log("Total result chars: " + totalChars);
262
+ return { queries: searchResults, totalChars: totalChars };
263
+ `,
264
+ (r) => {
265
+ if (!r.result.queries) return `Expected queries array`;
266
+ if (r.result.queries.length !== 3) return `Expected 3 query results`;
267
+ if (r.result.totalChars < 100) return `Expected substantial results`;
268
+ return true;
269
+ }
270
+ );
271
+
272
+ await runTest(
273
+ 'Search + extract: find code then extract specific files',
274
+ `
275
+ const searchResult = search("transformDSL");
276
+ // Extract the transformer file specifically
277
+ const code = extract({ targets: "npm/src/agent/dsl/transformer.js" });
278
+ const summary = LLM(
279
+ "How many functions are exported from this file? List their names. Be very concise.",
280
+ code
281
+ );
282
+ return summary;
283
+ `,
284
+ (r) => {
285
+ if (typeof r.result !== 'string') return `Expected string, got ${typeof r.result}`;
286
+ if (r.result.length < 10) return `Summary too short: ${r.result}`;
287
+ return true;
288
+ }
289
+ );
290
+
291
+ // ────────────────────────────────────────────────
292
+ // SECTION 5: Complex data transformation
293
+ // ────────────────────────────────────────────────
294
+
295
+ await runTest(
296
+ 'Complex data pipeline: group, transform, aggregate',
297
+ `
298
+ // Simulate analyzing a batch of items with different categories
299
+ const items = [];
300
+ for (let i = 0; i < 20; i = i + 1) {
301
+ const categories = ["bug", "feature", "docs", "refactor"];
302
+ const priorities = ["high", "medium", "low"];
303
+ items.push({
304
+ id: i,
305
+ category: categories[i % 4],
306
+ priority: priorities[i % 3],
307
+ title: "Item " + i
308
+ });
309
+ }
310
+
311
+ // Group by category
312
+ const byCategory = groupBy(items, "category");
313
+
314
+ // Count per category
315
+ const categoryNames = ["bug", "feature", "docs", "refactor"];
316
+ const counts = [];
317
+ for (const cat of categoryNames) {
318
+ const count = byCategory[cat] ? byCategory[cat].length : 0;
319
+ const highCount = byCategory[cat]
320
+ ? byCategory[cat].filter((item) => item.priority === "high").length
321
+ : 0;
322
+ counts.push({ category: cat, total: count, high: highCount });
323
+ log(cat + ": " + count + " total, " + highCount + " high priority");
324
+ }
325
+
326
+ return { counts: counts, totalItems: items.length };
327
+ `,
328
+ (r) => {
329
+ if (r.status === 'error') return `Execution error: ${r.error}`;
330
+ if (!r.result) return `Result is falsy: ${JSON.stringify(r)}`;
331
+ // Debug: show what we got
332
+ if (r.result.totalItems !== 20) return `Expected 20 total items, got type=${typeof r.result} value=${JSON.stringify(r.result).substring(0, 300)}`;
333
+ if (!Array.isArray(r.result.counts)) return `Expected counts array, got ${JSON.stringify(r.result).substring(0, 300)}`;
334
+ const bugs = r.result.counts.find((c) => c.category === 'bug');
335
+ if (!bugs || bugs.total !== 5) return `Expected 5 bugs`;
336
+ return true;
337
+ }
338
+ );
339
+
340
+ // ────────────────────────────────────────────────
341
+ // SECTION 6: Nested map() and LLM chaining
342
+ // ────────────────────────────────────────────────
343
+
344
+ await runTest(
345
+ 'Nested processing: search multiple topics, classify each result',
346
+ `
347
+ const topics = ["error handling", "caching"];
348
+
349
+ // For each topic: search, then have LLM extract key patterns
350
+ const analysis = map(topics, (topic) => {
351
+ const results = search(topic);
352
+ const patterns = LLM(
353
+ "From this code, extract exactly 3 key patterns related to '" + topic + "'. " +
354
+ "Return a brief bullet list, one pattern per line.",
355
+ results
356
+ );
357
+ return { topic: topic, patterns: patterns };
358
+ });
359
+
360
+ log("Analyzed " + analysis.length + " topics");
361
+ return analysis;
362
+ `,
363
+ (r) => {
364
+ if (r.status === 'error') return `Execution error: ${r.error}`;
365
+ if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
366
+ if (r.result.length !== 2) return `Expected 2 topics analyzed`;
367
+ // patterns is a string from LLM, not parsed
368
+ if (typeof r.result[0].topic !== 'string') return `Missing topic`;
369
+ if (typeof r.result[0].patterns !== 'string') return `Expected patterns to be string, got ${typeof r.result[0].patterns}`;
370
+ return true;
371
+ }
372
+ );
373
+
374
+ // ────────────────────────────────────────────────
375
+ // SECTION 7: Real-world scenario — code review pipeline
376
+ // ────────────────────────────────────────────────
377
+
378
+ await runTest(
379
+ 'Code review pipeline: find, chunk, analyze, synthesize',
380
+ `
381
+ // Step 1: Search for the validator module
382
+ const code = search("validateDSL ALLOWED_NODE_TYPES BLOCKED_IDENTIFIERS");
383
+
384
+ // Step 2: Chunk if needed
385
+ const codeChunks = chunk(code, 8000);
386
+ log("Code split into " + codeChunks.length + " chunks");
387
+
388
+ // Step 3: Analyze each chunk for issues
389
+ const reviews = map(codeChunks, (c) => LLM(
390
+ "You are a senior code reviewer. Analyze this code for potential issues: " +
391
+ "security concerns, edge cases, performance problems. " +
392
+ "Return a JSON object with: { issues: [{ severity: 'high'|'medium'|'low', description: string }] }. " +
393
+ "Return ONLY JSON.",
394
+ c
395
+ ));
396
+
397
+ // Step 4: Synthesize
398
+ const synthesis = LLM(
399
+ "Combine these code review findings into a prioritized summary. " +
400
+ "Group by severity (high, medium, low). Be concise — max 5 bullet points total.",
401
+ reviews.join("\\n---\\n")
402
+ );
403
+
404
+ return synthesis;
405
+ `,
406
+ (r) => {
407
+ if (typeof r.result !== 'string') return `Expected string`;
408
+ if (r.result.length < 50) return `Review too short`;
409
+ return true;
410
+ }
411
+ );
412
+
413
+ // ────────────────────────────────────────────────
414
+ // SECTION 8: Real-world — dependency analysis
415
+ // ────────────────────────────────────────────────
416
+
417
+ await runTest(
418
+ 'Dependency analysis: find imports across multiple files',
419
+ `
420
+ // Search for all imports in the DSL module files
421
+ const files = ["validator.js", "transformer.js", "environment.js", "runtime.js"];
422
+ const imports = map(files, (file) => {
423
+ const code = extract({ targets: "npm/src/agent/dsl/" + file });
424
+ const analysis = LLM(
425
+ "List all import statements from this file. Return a JSON object: " +
426
+ "{ file: string, imports: [{ from: string, names: string[] }] }. Return ONLY JSON.",
427
+ code
428
+ );
429
+ return analysis;
430
+ });
431
+
432
+ log("Analyzed " + imports.length + " files");
433
+
434
+ // Have LLM create a dependency graph summary
435
+ const summary = LLM(
436
+ "Given these import analyses for DSL module files, create a brief dependency summary: " +
437
+ "which files depend on what external packages and internal modules. " +
438
+ "Format as a simple list. Be concise.",
439
+ imports.join("\\n")
440
+ );
441
+
442
+ return summary;
443
+ `,
444
+ (r) => {
445
+ if (typeof r.result !== 'string') return `Expected string`;
446
+ if (r.result.length < 30) return `Summary too short`;
447
+ return true;
448
+ }
449
+ );
450
+
451
+ // ────────────────────────────────────────────────
452
+ // SECTION 9: Stress test — many parallel LLM calls
453
+ // ────────────────────────────────────────────────
454
+
455
+ await runTest(
456
+ 'Stress: 10 parallel LLM calls via map()',
457
+ `
458
+ const items = range(1, 11);
459
+ const results = map(items, (n) => {
460
+ const answer = LLM(
461
+ "Return ONLY a single number: the square of " + n + ". Nothing else, just the number.",
462
+ "Calculate " + n + " * " + n
463
+ );
464
+ return { n: n, squared: String(answer).trim() };
465
+ });
466
+ log("Completed " + results.length + " parallel LLM calls");
467
+ return results;
468
+ `,
469
+ (r) => {
470
+ if (r.status === 'error') return `Execution error: ${r.error}`;
471
+ if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}`;
472
+ if (r.result.length !== 10) return `Expected 10 results, got ${r.result.length}`;
473
+ const first = r.result[0];
474
+ if (first.n === undefined || first.squared === undefined) return `Missing fields: ${JSON.stringify(first)}`;
475
+ return true;
476
+ }
477
+ );
478
+
479
+ // ────────────────────────────────────────────────
480
+ // SECTION 10: Complex conditional logic
481
+ // ────────────────────────────────────────────────
482
+
483
+ await runTest(
484
+ 'Conditional routing: different processing based on search results',
485
+ `
486
+ const queries = ["BLOCKED_IDENTIFIERS", "nonexistent_symbol_xyz_12345"];
487
+ const results = [];
488
+
489
+ for (const q of queries) {
490
+ const searchResult = search(q);
491
+
492
+ if (searchResult.length > 500) {
493
+ // Rich results — summarize
494
+ const summary = LLM("Summarize this code in one sentence.", searchResult);
495
+ results.push({ query: q, status: "found", summary });
496
+ } else if (searchResult.length > 100) {
497
+ // Some results — note them
498
+ results.push({ query: q, status: "partial", chars: searchResult.length });
499
+ } else {
500
+ // No meaningful results
501
+ results.push({ query: q, status: "not_found" });
502
+ }
503
+ log(q + " -> " + results[results.length - 1].status);
504
+ }
505
+
506
+ return results;
507
+ `,
508
+ (r) => {
509
+ if (!Array.isArray(r.result)) return `Expected array`;
510
+ if (r.result.length !== 2) return `Expected 2 results`;
511
+ if (r.result[0].status !== 'found') return `First query should be 'found'`;
512
+ return true;
513
+ }
514
+ );
515
+
516
+ // ────────────────────────────────────────────────
517
+ // SECTION 11: While + search iteration (paginated search simulation)
518
+ // ────────────────────────────────────────────────
519
+
520
+ await runTest(
521
+ 'Iterative deepening: search, then search within results',
522
+ `
523
+ // First broad search
524
+ const broad = search("sandbox");
525
+ const broadSummary = LLM(
526
+ "From these search results, identify the 2 most important function names " +
527
+ "related to sandboxing. Return ONLY the function names separated by comma.",
528
+ broad
529
+ );
530
+ log("Broad search found key functions: " + broadSummary);
531
+
532
+ // Now search specifically for each function
533
+ const parts = broadSummary.split(",");
534
+ const functions = [];
535
+ for (const p of parts) {
536
+ const trimmed = p.trim();
537
+ if (trimmed.length > 0) functions.push(trimmed);
538
+ }
539
+ log("Will search for " + functions.length + " functions");
540
+
541
+ const details = map(functions.slice(0, 2), (fn) => {
542
+ const detail = search(fn);
543
+ const analysis = LLM(
544
+ "Explain what the function '" + fn + "' does in 1-2 sentences based on this code.",
545
+ detail
546
+ );
547
+ return { name: fn, description: analysis };
548
+ });
549
+
550
+ return details;
551
+ `,
552
+ (r) => {
553
+ if (r.status === 'error') return `Execution error: ${r.error}`;
554
+ if (!Array.isArray(r.result)) return `Expected array, got ${typeof r.result}: ${JSON.stringify(r.result).substring(0, 200)}`;
555
+ if (r.result.length < 1) return `Expected at least 1 function analyzed`;
556
+ if (!r.result[0].description) return `Missing description`;
557
+ return true;
558
+ }
559
+ );
560
+
561
+ // ────────────────────────────────────────────────
562
+ // SECTION 12: Full analyze_all replacement pattern
563
+ // ────────────────────────────────────────────────
564
+
565
+ await runTest(
566
+ 'analyze_all replacement: comprehensive codebase question',
567
+ `
568
+ // Question: "What testing patterns are used in the DSL module?"
569
+
570
+ // Phase 1: Search for test-related code
571
+ const testResults = search("test DSL validator transformer runtime");
572
+
573
+ // Phase 2: Chunk and extract patterns
574
+ const chunks = chunk(testResults, 6000);
575
+ log("Processing " + chunks.length + " test chunks");
576
+
577
+ const patterns = map(chunks, (c) => LLM(
578
+ "Extract testing patterns from this code. For each pattern found, note: " +
579
+ "1) Pattern name (e.g., 'mock functions', 'assertion style', 'test structure') " +
580
+ "2) Brief description " +
581
+ "Return as a bullet list. Be concise.",
582
+ c
583
+ ));
584
+
585
+ // Phase 3: Synthesize
586
+ const answer = LLM(
587
+ "You are answering the question: 'What testing patterns are used in the DSL module?' " +
588
+ "Based on the analysis below, provide a comprehensive but concise answer. " +
589
+ "Organize by pattern type. Use bullet points. Max 10 bullet points.",
590
+ patterns.join("\\n---\\n")
591
+ );
592
+
593
+ return answer;
594
+ `,
595
+ (r) => {
596
+ if (typeof r.result !== 'string') return `Expected string`;
597
+ if (r.result.length < 100) return `Answer too short`;
598
+ return true;
599
+ }
600
+ );
601
+
602
+ // ────────────────────────────────────────────────
603
+ // SECTION 13: Discovery-first pattern
604
+ // ────────────────────────────────────────────────
605
+
606
+ await runTest(
607
+ 'Discovery-first: explore repo then plan search strategy',
608
+ `
609
+ // Phase 1: Discover repo structure
610
+ const files = listFiles("**/*");
611
+ const sample = search("error handling");
612
+ log("Files length: " + String(files).length + ", sample length: " + String(sample).length);
613
+
614
+ // Phase 2: Ask LLM to determine optimal search strategy
615
+ const plan = LLM(
616
+ "Based on this repository structure and sample search results, determine the best search strategy " +
617
+ "to answer: 'What are all the validation approaches in this codebase?' " +
618
+ "Return a JSON object with: keywords (array of 2-3 search queries that will find relevant data), " +
619
+ "extractionFocus (what to extract from each result), " +
620
+ "and aggregation (summarize or list_unique). " +
621
+ "IMPORTANT: Only suggest keywords likely to match actual content you see. Return ONLY valid JSON.",
622
+ "Repository files:\\n" + String(files).substring(0, 3000) + "\\nSample results:\\n" + String(sample).substring(0, 3000)
623
+ );
624
+ const strategy = JSON.parse(String(plan));
625
+ log("Strategy keywords: " + strategy.keywords.length + ", focus: " + strategy.extractionFocus);
626
+
627
+ // Phase 3: Execute with discovered strategy
628
+ const allFindings = [];
629
+ for (const kw of strategy.keywords) {
630
+ const results = search(kw);
631
+ if (String(results).length > 500) {
632
+ const chunks = chunk(results);
633
+ const findings = map(chunks, (c) => LLM(strategy.extractionFocus, c));
634
+ for (const f of findings) { allFindings.push(String(f)); }
635
+ log("Keyword '" + kw + "': " + chunks.length + " chunks processed");
636
+ } else {
637
+ log("Keyword '" + kw + "': skipped (too few results)");
638
+ }
639
+ }
640
+ var combined = "";
641
+ for (const f of allFindings) { combined = combined + f + "\\n---\\n"; }
642
+ return LLM("Synthesize all findings about validation approaches into a comprehensive answer.", combined);
643
+ `,
644
+ (r) => {
645
+ if (typeof r.result !== 'string') return `Expected string`;
646
+ if (r.result.length < 100) return `Answer too short: ${r.result.length} chars`;
647
+ return true;
648
+ }
649
+ );
650
+
651
+ // ── Summary ──
652
+ console.log(`\n${'═'.repeat(70)}`);
653
+ console.log(` Results: ${passed} passed, ${failed} failed, ${testNum} total`);
654
+ console.log('═'.repeat(70));
655
+
656
+ process.exit(failed > 0 ? 1 : 0);
657
+ }
658
+
659
+ main().catch(e => {
660
+ console.error('Fatal error:', e);
661
+ process.exit(1);
662
+ });