@probelabs/probe 0.6.0-rc232 → 0.6.0-rc234

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/bin/binaries/probe-v0.6.0-rc234-aarch64-apple-darwin.tar.gz +0 -0
  2. package/bin/binaries/probe-v0.6.0-rc234-aarch64-unknown-linux-musl.tar.gz +0 -0
  3. package/bin/binaries/probe-v0.6.0-rc234-x86_64-apple-darwin.tar.gz +0 -0
  4. package/bin/binaries/probe-v0.6.0-rc234-x86_64-pc-windows-msvc.zip +0 -0
  5. package/bin/binaries/probe-v0.6.0-rc234-x86_64-unknown-linux-musl.tar.gz +0 -0
  6. package/build/agent/ProbeAgent.d.ts +2 -0
  7. package/build/agent/ProbeAgent.js +66 -7
  8. package/build/agent/dsl/agent-test.mjs +341 -0
  9. package/build/agent/dsl/analyze-test.mjs +237 -0
  10. package/build/agent/dsl/diag-test.mjs +78 -0
  11. package/build/agent/dsl/environment.js +387 -0
  12. package/build/agent/dsl/manual-test.mjs +662 -0
  13. package/build/agent/dsl/output-buffer-test.mjs +124 -0
  14. package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
  15. package/build/agent/dsl/pipeline-test.mjs +223 -0
  16. package/build/agent/dsl/runtime.js +206 -0
  17. package/build/agent/dsl/sandbox-experiment.mjs +309 -0
  18. package/build/agent/dsl/transformer.js +156 -0
  19. package/build/agent/dsl/trigger-test.mjs +159 -0
  20. package/build/agent/dsl/validator.js +183 -0
  21. package/build/agent/index.js +18179 -7664
  22. package/build/agent/probeTool.js +9 -0
  23. package/build/agent/schemaUtils.js +74 -1
  24. package/build/agent/tasks/taskTool.js +6 -1
  25. package/build/agent/tools.js +9 -1
  26. package/build/index.js +5 -0
  27. package/build/tools/common.js +7 -0
  28. package/build/tools/executePlan.js +761 -0
  29. package/build/tools/index.js +4 -0
  30. package/cjs/agent/ProbeAgent.cjs +12146 -1638
  31. package/cjs/index.cjs +11800 -1283
  32. package/package.json +5 -1
  33. package/src/agent/ProbeAgent.d.ts +2 -0
  34. package/src/agent/ProbeAgent.js +66 -7
  35. package/src/agent/dsl/agent-test.mjs +341 -0
  36. package/src/agent/dsl/analyze-test.mjs +237 -0
  37. package/src/agent/dsl/diag-test.mjs +78 -0
  38. package/src/agent/dsl/environment.js +387 -0
  39. package/src/agent/dsl/manual-test.mjs +662 -0
  40. package/src/agent/dsl/output-buffer-test.mjs +124 -0
  41. package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
  42. package/src/agent/dsl/pipeline-test.mjs +223 -0
  43. package/src/agent/dsl/runtime.js +206 -0
  44. package/src/agent/dsl/sandbox-experiment.mjs +309 -0
  45. package/src/agent/dsl/transformer.js +156 -0
  46. package/src/agent/dsl/trigger-test.mjs +159 -0
  47. package/src/agent/dsl/validator.js +183 -0
  48. package/src/agent/index.js +8 -0
  49. package/src/agent/probeTool.js +9 -0
  50. package/src/agent/schemaUtils.js +74 -1
  51. package/src/agent/tasks/taskTool.js +6 -1
  52. package/src/agent/tools.js +9 -1
  53. package/src/index.js +5 -0
  54. package/src/tools/common.js +7 -0
  55. package/src/tools/executePlan.js +761 -0
  56. package/src/tools/index.js +4 -0
  57. package/bin/binaries/probe-v0.6.0-rc232-aarch64-apple-darwin.tar.gz +0 -0
  58. package/bin/binaries/probe-v0.6.0-rc232-aarch64-unknown-linux-musl.tar.gz +0 -0
  59. package/bin/binaries/probe-v0.6.0-rc232-x86_64-apple-darwin.tar.gz +0 -0
  60. package/bin/binaries/probe-v0.6.0-rc232-x86_64-pc-windows-msvc.zip +0 -0
  61. package/bin/binaries/probe-v0.6.0-rc232-x86_64-unknown-linux-musl.tar.gz +0 -0
@@ -41,6 +41,8 @@ export interface ProbeAgentOptions {
41
41
  enableDelegate?: boolean;
42
42
  /** Architecture context filename to embed from repo root (defaults to AGENTS.md with CLAUDE.md fallback; ARCHITECTURE.md is always included when present) */
43
43
  architectureFileName?: string;
44
+ /** Enable the execute_plan DSL orchestration tool */
45
+ enableExecutePlan?: boolean;
44
46
  /** Enable bash tool for command execution */
45
47
  enableBash?: boolean;
46
48
  /** Bash tool configuration (allow/deny patterns) */
@@ -48,6 +48,7 @@ import {
48
48
  extractToolDefinition,
49
49
  delegateToolDefinition,
50
50
  analyzeAllToolDefinition,
51
+ getExecutePlanToolDefinition,
51
52
  bashToolDefinition,
52
53
  listFilesToolDefinition,
53
54
  searchFilesToolDefinition,
@@ -80,7 +81,8 @@ import {
80
81
  generateSchemaInstructions,
81
82
  isJsonSchemaDefinition,
82
83
  createSchemaDefinitionCorrectionPrompt,
83
- validateAndFixMermaidResponse
84
+ validateAndFixMermaidResponse,
85
+ tryAutoWrapForSimpleSchema
84
86
  } from './schemaUtils.js';
85
87
  import { removeThinkingTags } from './xmlParsingUtils.js';
86
88
  import { predefinedPrompts } from './shared/prompts.js';
@@ -176,6 +178,7 @@ export class ProbeAgent {
176
178
  * @param {string} [options.promptType] - Predefined prompt type (code-explorer, code-searcher, architect, code-review, support)
177
179
  * @param {boolean} [options.allowEdit=false] - Allow the use of the 'implement' tool
178
180
  * @param {boolean} [options.enableDelegate=false] - Enable the delegate tool for task distribution to subagents
181
+ * @param {boolean} [options.enableExecutePlan=false] - Enable the execute_plan DSL orchestration tool
179
182
  * @param {string} [options.architectureFileName] - Architecture context filename to embed from repo root (defaults to AGENTS.md with CLAUDE.md fallback; ARCHITECTURE.md is always included when present)
180
183
  * @param {string} [options.path] - Search directory path
181
184
  * @param {string} [options.cwd] - Working directory for resolving relative paths (independent of allowedFolders)
@@ -225,6 +228,7 @@ export class ProbeAgent {
225
228
  this.promptType = options.promptType || 'code-explorer';
226
229
  this.allowEdit = !!options.allowEdit;
227
230
  this.enableDelegate = !!options.enableDelegate;
231
+ this.enableExecutePlan = !!options.enableExecutePlan;
228
232
  this.debug = options.debug || process.env.DEBUG === '1';
229
233
  this.cancelled = false;
230
234
  this.tracer = options.tracer || null;
@@ -809,6 +813,10 @@ export class ProbeAgent {
809
813
  initializeTools() {
810
814
  const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
811
815
 
816
+ // Output buffer for DSL output() function — shared mutable object,
817
+ // reset at the start of each answer() call
818
+ this._outputBuffer = { items: [] };
819
+
812
820
  const configOptions = {
813
821
  sessionId: this.sessionId,
814
822
  debug: this.debug,
@@ -820,6 +828,7 @@ export class ProbeAgent {
820
828
  searchDelegate: this.searchDelegate,
821
829
  allowEdit: this.allowEdit,
822
830
  enableDelegate: this.enableDelegate,
831
+ enableExecutePlan: this.enableExecutePlan,
823
832
  enableBash: this.enableBash,
824
833
  bashConfig: this.bashConfig,
825
834
  tracer: this.tracer,
@@ -828,6 +837,7 @@ export class ProbeAgent {
828
837
  provider: this.clientApiProvider,
829
838
  model: this.clientApiModel,
830
839
  delegationManager: this.delegationManager, // Per-instance delegation limits
840
+ outputBuffer: this._outputBuffer,
831
841
  concurrencyLimiter: this.concurrencyLimiter, // Global AI concurrency limiter
832
842
  isToolAllowed
833
843
  };
@@ -853,7 +863,10 @@ export class ProbeAgent {
853
863
  if (this.enableDelegate && wrappedTools.delegateToolInstance && isToolAllowed('delegate')) {
854
864
  this.toolImplementations.delegate = wrappedTools.delegateToolInstance;
855
865
  }
856
- if (wrappedTools.analyzeAllToolInstance && isToolAllowed('analyze_all')) {
866
+ if (this.enableExecutePlan && wrappedTools.executePlanToolInstance && isToolAllowed('execute_plan')) {
867
+ this.toolImplementations.execute_plan = wrappedTools.executePlanToolInstance;
868
+ } else if (wrappedTools.analyzeAllToolInstance && isToolAllowed('analyze_all')) {
869
+ // analyze_all is fallback when execute_plan is not enabled
857
870
  this.toolImplementations.analyze_all = wrappedTools.analyzeAllToolInstance;
858
871
  }
859
872
 
@@ -2554,8 +2567,18 @@ ${extractGuidance}
2554
2567
  toolDefinitions += `${delegateToolDefinition}\n`;
2555
2568
  }
2556
2569
 
2557
- // Analyze All tool for bulk data processing
2558
- if (isToolAllowed('analyze_all')) {
2570
+ // Execute Plan tool for DSL-based orchestration (requires enableExecutePlan flag, supersedes analyze_all)
2571
+ if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
2572
+ // Build available function list based on what tools are registered
2573
+ const dslFunctions = ['LLM', 'map', 'chunk', 'batch', 'log', 'range', 'flatten', 'unique', 'groupBy', 'parseJSON', 'storeSet', 'storeGet', 'storeAppend', 'storeKeys', 'storeGetAll', 'output'];
2574
+ if (isToolAllowed('search')) dslFunctions.unshift('search');
2575
+ if (isToolAllowed('query')) dslFunctions.unshift('query');
2576
+ if (isToolAllowed('extract')) dslFunctions.unshift('extract');
2577
+ if (isToolAllowed('listFiles')) dslFunctions.push('listFiles');
2578
+ if (this.enableBash && isToolAllowed('bash')) dslFunctions.push('bash');
2579
+ toolDefinitions += `${getExecutePlanToolDefinition(dslFunctions)}\n`;
2580
+ } else if (isToolAllowed('analyze_all')) {
2581
+ // Fallback: only register analyze_all if execute_plan is not available
2559
2582
  toolDefinitions += `${analyzeAllToolDefinition}\n`;
2560
2583
  }
2561
2584
 
@@ -2631,7 +2654,9 @@ The configuration is loaded from src/config.js lines 15-25 which contains the da
2631
2654
  if (this.enableDelegate && isToolAllowed('delegate')) {
2632
2655
  availableToolsList += '- delegate: Delegate big distinct tasks to specialized probe subagents.\n';
2633
2656
  }
2634
- if (isToolAllowed('analyze_all')) {
2657
+ if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
2658
+ availableToolsList += '- execute_plan: Execute a DSL program to orchestrate tool calls. ALWAYS use this for: questions containing "all"/"every"/"comprehensive"/"complete inventory", multi-topic analysis, open-ended discovery questions, or any task requiring full codebase coverage.\n';
2659
+ } else if (isToolAllowed('analyze_all')) {
2635
2660
  availableToolsList += '- analyze_all: Process ALL data matching a query using map-reduce (for aggregate questions needing 100% coverage).\n';
2636
2661
  }
2637
2662
  if (this.enableBash && isToolAllowed('bash')) {
@@ -2861,6 +2886,11 @@ Follow these instructions carefully:
2861
2886
  // Track initial history length for storage
2862
2887
  const oldHistoryLength = this.history.length;
2863
2888
 
2889
+ // Reset output buffer for this answer() call
2890
+ if (this._outputBuffer) {
2891
+ this._outputBuffer.items = [];
2892
+ }
2893
+
2864
2894
  // START CHECKPOINT: Initialize task management for this request
2865
2895
  if (this.enableTasks) {
2866
2896
  try {
@@ -3368,8 +3398,10 @@ Follow these instructions carefully:
3368
3398
  if (this.enableDelegate && this.allowedTools.isEnabled('delegate')) {
3369
3399
  validTools.push('delegate');
3370
3400
  }
3371
- // Analyze All tool (for bulk data processing with map-reduce)
3372
- if (this.allowedTools.isEnabled('analyze_all')) {
3401
+ // Execute Plan tool (requires enableExecutePlan flag, supersedes analyze_all)
3402
+ if (this.enableExecutePlan && this.allowedTools.isEnabled('execute_plan')) {
3403
+ validTools.push('execute_plan');
3404
+ } else if (this.allowedTools.isEnabled('analyze_all')) {
3373
3405
  validTools.push('analyze_all');
3374
3406
  }
3375
3407
  // Task tool (require both enableTasks flag AND allowedTools permission)
@@ -4470,6 +4502,19 @@ Convert your previous response content into actual JSON data that follows this s
4470
4502
  retryCount = 1; // Start at 1 since we already did one correction
4471
4503
  }
4472
4504
 
4505
+ // Before entering correction loop, try auto-wrapping for simple schemas
4506
+ // This avoids re-invoking AI for schemas like {text: string} where we can just wrap programmatically
4507
+ if (!validation.isValid) {
4508
+ const autoWrapped = tryAutoWrapForSimpleSchema(finalResult, options.schema, { debug: this.debug });
4509
+ if (autoWrapped) {
4510
+ if (this.debug) {
4511
+ console.log(`[DEBUG] JSON validation: Auto-wrapped plain text for simple schema`);
4512
+ }
4513
+ finalResult = autoWrapped;
4514
+ validation = validateJsonResponse(finalResult, { debug: this.debug });
4515
+ }
4516
+ }
4517
+
4473
4518
  while (!validation.isValid && retryCount < maxRetries) {
4474
4519
  if (this.debug) {
4475
4520
  console.log(`[DEBUG] JSON validation: attempt_completion validation failed (attempt ${retryCount + 1}/${maxRetries}):`, validation.error);
@@ -4594,6 +4639,19 @@ Convert your previous response content into actual JSON data that follows this s
4594
4639
  }
4595
4640
  }
4596
4641
 
4642
+ // Append DSL output buffer directly to response (bypasses LLM rewriting)
4643
+ if (this._outputBuffer && this._outputBuffer.items.length > 0 && !options._schemaFormatted) {
4644
+ const outputContent = this._outputBuffer.items.join('\n\n');
4645
+ finalResult = (finalResult || '') + '\n\n' + outputContent;
4646
+ if (options.onStream) {
4647
+ options.onStream('\n\n' + outputContent);
4648
+ }
4649
+ if (this.debug) {
4650
+ console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result`);
4651
+ }
4652
+ this._outputBuffer.items = [];
4653
+ }
4654
+
4597
4655
  return finalResult;
4598
4656
 
4599
4657
  } catch (error) {
@@ -4756,6 +4814,7 @@ Convert your previous response content into actual JSON data that follows this s
4756
4814
  promptType: this.promptType,
4757
4815
  allowEdit: this.allowEdit,
4758
4816
  enableDelegate: this.enableDelegate,
4817
+ enableExecutePlan: this.enableExecutePlan,
4759
4818
  architectureFileName: this.architectureFileName,
4760
4819
  // Pass allowedFolders which will recompute workspaceRoot correctly
4761
4820
  allowedFolders: [...this.allowedFolders],
@@ -0,0 +1,341 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Agent-realistic test: the LLM writes DSL scripts itself.
4
+ *
5
+ * This simulates the real production flow:
6
+ * 1. We give the LLM a task + the tool definition (system prompt)
7
+ * 2. The LLM generates the DSL script
8
+ * 3. The runtime validates, transforms, and executes it
9
+ * 4. The result comes back
10
+ *
11
+ * Usage:
12
+ * node npm/src/agent/dsl/agent-test.mjs
13
+ */
14
+
15
+ import { createDSLRuntime } from './runtime.js';
16
+ import { getExecutePlanToolDefinition } from '../../tools/executePlan.js';
17
+ import { search } from '../../search.js';
18
+ import { extract } from '../../extract.js';
19
+ import { createGoogleGenerativeAI } from '@ai-sdk/google';
20
+ import { generateText } from 'ai';
21
+ import { config } from 'dotenv';
22
+ import { resolve, dirname } from 'path';
23
+ import { fileURLToPath } from 'url';
24
+
25
+ const __dirname = dirname(fileURLToPath(import.meta.url));
26
+ const projectRoot = resolve(__dirname, '../../../..');
27
+
28
+ config({ path: resolve(projectRoot, '.env') });
29
+
30
+ const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
31
+ if (!apiKey) {
32
+ console.error('ERROR: No Google API key found.');
33
+ process.exit(1);
34
+ }
35
+
36
+ const google = createGoogleGenerativeAI({ apiKey });
37
+
38
+ async function llmCall(instruction, data, options = {}) {
39
+ const dataStr = data == null ? '' : (typeof data === 'string' ? data : JSON.stringify(data, null, 2));
40
+ const prompt = (dataStr || '(empty)').substring(0, 100000);
41
+ const result = await generateText({
42
+ model: google('gemini-2.5-flash'),
43
+ system: instruction,
44
+ prompt,
45
+ temperature: options.temperature || 0.3,
46
+ maxTokens: options.maxTokens || 4000,
47
+ });
48
+ return result.text;
49
+ }
50
+
51
+ // For generating DSL scripts (the "agent" role)
52
+ async function agentGenerate(systemPrompt, userTask) {
53
+ const result = await generateText({
54
+ model: google('gemini-2.5-flash'),
55
+ system: systemPrompt,
56
+ prompt: userTask,
57
+ temperature: 0.3,
58
+ maxTokens: 4000,
59
+ });
60
+ return result.text;
61
+ }
62
+
63
+ const cwd = projectRoot;
64
+
65
+ const toolImplementations = {
66
+ search: {
67
+ execute: async (params) => {
68
+ try {
69
+ return await search({
70
+ query: params.query,
71
+ path: params.path || cwd,
72
+ cwd,
73
+ maxTokens: 20000,
74
+ timeout: 30,
75
+ exact: params.exact || false,
76
+ });
77
+ } catch (e) {
78
+ return "Search error: " + e.message;
79
+ }
80
+ },
81
+ },
82
+ extract: {
83
+ execute: async (params) => {
84
+ try {
85
+ return await extract({
86
+ targets: params.targets,
87
+ input_content: params.input_content,
88
+ cwd,
89
+ });
90
+ } catch (e) {
91
+ return "Extract error: " + e.message;
92
+ }
93
+ },
94
+ },
95
+ listFiles: {
96
+ execute: async (params) => {
97
+ try {
98
+ return await search({
99
+ query: params.pattern || '*',
100
+ path: cwd,
101
+ cwd,
102
+ filesOnly: true,
103
+ maxTokens: 10000,
104
+ });
105
+ } catch (e) {
106
+ return "listFiles error: " + e.message;
107
+ }
108
+ },
109
+ },
110
+ };
111
+
112
+ const runtime = createDSLRuntime({
113
+ toolImplementations,
114
+ llmCall,
115
+ mapConcurrency: 3,
116
+ timeoutMs: 60000, // 60s timeout per execution
117
+ maxLoopIterations: 5000, // loop guard
118
+ });
119
+
120
+ /**
121
+ * Strip markdown fences and XML tags that LLMs sometimes wrap code in.
122
+ */
123
+ function stripCodeWrapping(code) {
124
+ let s = String(code || '');
125
+ s = s.replace(/^```(?:javascript|js)?\n?/gm, '').replace(/```$/gm, '');
126
+ s = s.replace(/<\/?(?:execute_plan|code)>/g, '');
127
+ return s.trim();
128
+ }
129
+
130
+ // The tool definition that goes into the agent's system prompt
131
+ const toolDef = getExecutePlanToolDefinition(['search', 'extract', 'LLM', 'map', 'chunk', 'listFiles', 'log', 'range', 'flatten', 'unique', 'groupBy']);
132
+
133
+ const SYSTEM_PROMPT = `You are a coding assistant with access to the execute_plan tool.
134
+
135
+ ${toolDef}
136
+
137
+ When the user asks a question that requires searching a codebase, batch processing, or handling large data,
138
+ write a DSL script to handle it. Return ONLY the JavaScript code — no markdown fences, no explanation,
139
+ no \`\`\` blocks. Just the raw code that goes into the execute_plan tool.
140
+
141
+ CRITICAL RULES:
142
+ - Do NOT use async/await — the runtime handles it.
143
+ - Do NOT use template literals (backticks) — use string concatenation with +.
144
+ - Do NOT use shorthand properties like { key } — use { key: key }.
145
+ - search() returns a STRING, not an array. Use chunk() to split it into an array.
146
+ - map(items, fn) requires an ARRAY as first argument. Do NOT pass a string to map().
147
+ - Do NOT use .map(), .forEach(), .filter(), .join() array methods. Use for..of loops or the global map() function.
148
+ - To join an array, use a for..of loop: var s = ""; for (const item of arr) { s = s + item + "\\n"; }
149
+ - Do NOT define helper functions that call tools. Write all logic inline or use for..of loops.
150
+ - Use String(value) to safely convert to string before calling .trim() or .split().
151
+ - Do NOT use regex literals (/pattern/) — use String methods like indexOf, includes, startsWith instead.
152
+ - ONLY call functions listed in the tool definition. Do NOT invent or guess function names.
153
+ - ALWAYS write executable DSL code, never answer in plain text.
154
+ - Always return a value at the end.`;
155
+
156
+ // ── Test runner ──
157
+ let testNum = 0;
158
+ let passed = 0;
159
+ let failed = 0;
160
+
161
+ const MAX_RETRIES = 2;
162
+
163
+ async function runAgentTest(taskDescription, check) {
164
+ testNum++;
165
+ console.log(`\n${'─'.repeat(70)}`);
166
+ console.log(`▶ Test ${testNum}: ${taskDescription}`);
167
+
168
+ const start = Date.now();
169
+
170
+ try {
171
+ // Step 1: Agent generates the DSL script
172
+ console.log(' [1/4] Agent generating DSL script...');
173
+ const generatedCode = await agentGenerate(SYSTEM_PROMPT, taskDescription);
174
+ let currentCode = stripCodeWrapping(generatedCode);
175
+ console.log(` Generated (${currentCode.split('\n').length} lines):`);
176
+ const preview = currentCode.split('\n').slice(0, 6).map(l => ' ' + l).join('\n');
177
+ console.log(preview);
178
+ if (currentCode.split('\n').length > 6) console.log(' ...');
179
+
180
+ // Step 2: Execute with self-healing retries
181
+ let result;
182
+ let attempt = 0;
183
+
184
+ while (attempt <= MAX_RETRIES) {
185
+ console.log(` [2/4] Executing DSL script${attempt > 0 ? ' (retry ' + attempt + ')' : ''}...`);
186
+ result = await runtime.execute(currentCode, taskDescription);
187
+
188
+ if (result.status === 'success') break;
189
+
190
+ // Execution failed — try self-healing
191
+ const logOutput = result.logs.length > 0 ? '\nLogs: ' + result.logs.join(' | ') : '';
192
+ const errorMsg = result.error + logOutput;
193
+ console.log(` [!] Execution failed: ${errorMsg.substring(0, 150)}`);
194
+
195
+ if (attempt >= MAX_RETRIES) break;
196
+
197
+ console.log(` [3/4] Self-healing — asking LLM to fix (attempt ${attempt + 1})...`);
198
+ const fixPrompt = `The following DSL script failed with an error. Fix the script and return ONLY the corrected JavaScript code — no markdown, no explanation, no backtick fences.
199
+
200
+ ORIGINAL SCRIPT:
201
+ ${currentCode}
202
+
203
+ ERROR:
204
+ ${errorMsg}
205
+
206
+ RULES REMINDER:
207
+ - search(), listFiles(), extract() all return STRINGS, not arrays.
208
+ - Use chunk(stringData) to split a string into an array of chunks.
209
+ - map(items, fn) requires an ARRAY as first argument. Do NOT pass strings to map().
210
+ - Do NOT use .map(), .forEach(), .filter(), .join() — use for..of loops instead.
211
+ - Do NOT define helper functions that call tools — write logic inline.
212
+ - Do NOT use async/await, template literals, or shorthand properties.
213
+ - Do NOT use regex literals (/pattern/) — use String methods like indexOf, includes, startsWith instead.
214
+ - String concatenation with +, not template literals.`;
215
+
216
+ const fixedCode = await llmCall(fixPrompt, '', { maxTokens: 4000, temperature: 0.2 });
217
+ currentCode = stripCodeWrapping(fixedCode);
218
+
219
+ if (!currentCode) {
220
+ console.log(' [!] Self-heal returned empty code');
221
+ break;
222
+ }
223
+
224
+ console.log(` Fixed code (${currentCode.split('\n').length} lines):`);
225
+ const fixPreview = currentCode.split('\n').slice(0, 4).map(l => ' ' + l).join('\n');
226
+ console.log(fixPreview);
227
+ if (currentCode.split('\n').length > 4) console.log(' ...');
228
+
229
+ attempt++;
230
+ }
231
+
232
+ const elapsed = Date.now() - start;
233
+ console.log(` [4/4] Checking result... (${elapsed}ms)`);
234
+
235
+ if (result.status === 'error') {
236
+ console.log(` ✗ EXECUTION ERROR after ${attempt} retries (${elapsed}ms)`);
237
+ console.log(` Error: ${result.error.substring(0, 200)}`);
238
+ if (result.logs.length) console.log(` Logs: ${result.logs.join(' | ')}`);
239
+ failed++;
240
+ return;
241
+ }
242
+
243
+ const checkResult = check(result);
244
+ if (checkResult === true || checkResult === undefined) {
245
+ const healNote = attempt > 0 ? ` (self-healed after ${attempt} ${attempt === 1 ? 'retry' : 'retries'})` : '';
246
+ console.log(` ✓ PASSED${healNote} (${elapsed}ms)`);
247
+ const resultPreview = typeof result.result === 'string'
248
+ ? result.result.substring(0, 300)
249
+ : JSON.stringify(result.result, null, 2).substring(0, 300);
250
+ console.log(` Result: ${resultPreview}${resultPreview.length >= 300 ? '...' : ''}`);
251
+ if (result.logs && result.logs.filter(l => !l.startsWith('[runtime]')).length) {
252
+ console.log(` Logs: ${result.logs.filter(l => !l.startsWith('[runtime]')).join(' | ')}`);
253
+ }
254
+ passed++;
255
+ } else {
256
+ console.log(` ✗ CHECK FAILED (${elapsed}ms) — ${checkResult}`);
257
+ failed++;
258
+ }
259
+ } catch (e) {
260
+ console.log(` ✗ CRASHED — ${e.message}`);
261
+ failed++;
262
+ }
263
+ }
264
+
265
+ // ── Agent tests ──
266
+ async function main() {
267
+ console.log('═'.repeat(70));
268
+ console.log(' Agent-Realistic DSL Tests — LLM writes its own scripts');
269
+ console.log('═'.repeat(70));
270
+
271
+ // Test 1: Simple search + summarize
272
+ await runAgentTest(
273
+ 'Search this codebase for how error handling is done and give me a brief summary.',
274
+ (r) => {
275
+ if (typeof r.result !== 'string') return 'Expected string result';
276
+ if (r.result.length < 50) return 'Summary too short';
277
+ return true;
278
+ }
279
+ );
280
+
281
+ // Test 2: Find and count patterns
282
+ await runAgentTest(
283
+ 'Write a DSL script to search this codebase for tool definitions (search, extract, query, etc.). Count how many unique tools are defined and return an object with the count and an array of tool names.',
284
+ (r) => {
285
+ if (!r.result) return 'No result';
286
+ return true;
287
+ }
288
+ );
289
+
290
+ // Test 3: Multi-file analysis
291
+ await runAgentTest(
292
+ 'Look at the files in npm/src/agent/dsl/ directory — search for each one, and for each file give me a one-sentence description of what it does. Return as a list.',
293
+ (r) => {
294
+ if (!r.result) return 'No result';
295
+ const s = typeof r.result === 'string' ? r.result : JSON.stringify(r.result);
296
+ if (s.length < 50) return 'Result too short';
297
+ return true;
298
+ }
299
+ );
300
+
301
+ // Test 4: Code quality check
302
+ await runAgentTest(
303
+ 'Search for all TODO and FIXME comments in this codebase. Group them by urgency (TODO vs FIXME) and summarize what needs attention.',
304
+ (r) => {
305
+ if (!r.result) return 'No result';
306
+ return true;
307
+ }
308
+ );
309
+
310
+ // Test 5: Complex analysis requiring chunking
311
+ await runAgentTest(
312
+ 'Analyze the test coverage of this project. Search for test files, see what modules they test, and identify any modules that might be missing tests. Give me a brief report.',
313
+ (r) => {
314
+ if (!r.result) return 'No result';
315
+ const s = typeof r.result === 'string' ? r.result : JSON.stringify(r.result);
316
+ if (s.length < 50) return 'Report too short';
317
+ return true;
318
+ }
319
+ );
320
+
321
+ // Test 6: Data extraction + classification
322
+ await runAgentTest(
323
+ 'Find all the Zod schemas defined in this codebase (search for "z.object"). For each schema, extract its name and list its fields. Return a structured summary.',
324
+ (r) => {
325
+ if (!r.result) return 'No result';
326
+ return true;
327
+ }
328
+ );
329
+
330
+ // ── Summary ──
331
+ console.log(`\n${'═'.repeat(70)}`);
332
+ console.log(` Agent-Realistic Results: ${passed} passed, ${failed} failed, ${testNum} total`);
333
+ console.log('═'.repeat(70));
334
+
335
+ process.exit(failed > 0 ? 1 : 0);
336
+ }
337
+
338
+ main().catch(e => {
339
+ console.error('Fatal error:', e);
340
+ process.exit(1);
341
+ });