@probelabs/probe 0.6.0-rc231 → 0.6.0-rc233
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc233-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.d.ts +2 -0
- package/build/agent/ProbeAgent.js +105 -12
- package/build/agent/dsl/agent-test.mjs +341 -0
- package/build/agent/dsl/analyze-test.mjs +237 -0
- package/build/agent/dsl/diag-test.mjs +78 -0
- package/build/agent/dsl/environment.js +387 -0
- package/build/agent/dsl/manual-test.mjs +662 -0
- package/build/agent/dsl/output-buffer-test.mjs +124 -0
- package/build/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/build/agent/dsl/pipeline-test.mjs +223 -0
- package/build/agent/dsl/runtime.js +206 -0
- package/build/agent/dsl/sandbox-experiment.mjs +309 -0
- package/build/agent/dsl/transformer.js +156 -0
- package/build/agent/dsl/trigger-test.mjs +159 -0
- package/build/agent/dsl/validator.js +183 -0
- package/build/agent/index.js +18776 -7675
- package/build/agent/probeTool.js +9 -0
- package/build/agent/tools.js +9 -1
- package/build/delegate.js +12 -6
- package/build/index.js +5 -0
- package/build/tools/common.js +7 -0
- package/build/tools/executePlan.js +761 -0
- package/build/tools/index.js +4 -0
- package/cjs/agent/ProbeAgent.cjs +12891 -1797
- package/cjs/index.cjs +12395 -1292
- package/package.json +5 -1
- package/src/agent/ProbeAgent.d.ts +2 -0
- package/src/agent/ProbeAgent.js +105 -12
- package/src/agent/dsl/agent-test.mjs +341 -0
- package/src/agent/dsl/analyze-test.mjs +237 -0
- package/src/agent/dsl/diag-test.mjs +78 -0
- package/src/agent/dsl/environment.js +387 -0
- package/src/agent/dsl/manual-test.mjs +662 -0
- package/src/agent/dsl/output-buffer-test.mjs +124 -0
- package/src/agent/dsl/pipeline-direct-test.mjs +147 -0
- package/src/agent/dsl/pipeline-test.mjs +223 -0
- package/src/agent/dsl/runtime.js +206 -0
- package/src/agent/dsl/sandbox-experiment.mjs +309 -0
- package/src/agent/dsl/transformer.js +156 -0
- package/src/agent/dsl/trigger-test.mjs +159 -0
- package/src/agent/dsl/validator.js +183 -0
- package/src/agent/index.js +8 -0
- package/src/agent/probeTool.js +9 -0
- package/src/agent/tools.js +9 -1
- package/src/delegate.js +12 -6
- package/src/index.js +5 -0
- package/src/tools/common.js +7 -0
- package/src/tools/executePlan.js +761 -0
- package/src/tools/index.js +4 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc231-x86_64-unknown-linux-musl.tar.gz +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -41,6 +41,8 @@ export interface ProbeAgentOptions {
|
|
|
41
41
|
enableDelegate?: boolean;
|
|
42
42
|
/** Architecture context filename to embed from repo root (defaults to AGENTS.md with CLAUDE.md fallback; ARCHITECTURE.md is always included when present) */
|
|
43
43
|
architectureFileName?: string;
|
|
44
|
+
/** Enable the execute_plan DSL orchestration tool */
|
|
45
|
+
enableExecutePlan?: boolean;
|
|
44
46
|
/** Enable bash tool for command execution */
|
|
45
47
|
enableBash?: boolean;
|
|
46
48
|
/** Bash tool configuration (allow/deny patterns) */
|
|
@@ -48,6 +48,7 @@ import {
|
|
|
48
48
|
extractToolDefinition,
|
|
49
49
|
delegateToolDefinition,
|
|
50
50
|
analyzeAllToolDefinition,
|
|
51
|
+
getExecutePlanToolDefinition,
|
|
51
52
|
bashToolDefinition,
|
|
52
53
|
listFilesToolDefinition,
|
|
53
54
|
searchFilesToolDefinition,
|
|
@@ -176,6 +177,7 @@ export class ProbeAgent {
|
|
|
176
177
|
* @param {string} [options.promptType] - Predefined prompt type (code-explorer, code-searcher, architect, code-review, support)
|
|
177
178
|
* @param {boolean} [options.allowEdit=false] - Allow the use of the 'implement' tool
|
|
178
179
|
* @param {boolean} [options.enableDelegate=false] - Enable the delegate tool for task distribution to subagents
|
|
180
|
+
* @param {boolean} [options.enableExecutePlan=false] - Enable the execute_plan DSL orchestration tool
|
|
179
181
|
* @param {string} [options.architectureFileName] - Architecture context filename to embed from repo root (defaults to AGENTS.md with CLAUDE.md fallback; ARCHITECTURE.md is always included when present)
|
|
180
182
|
* @param {string} [options.path] - Search directory path
|
|
181
183
|
* @param {string} [options.cwd] - Working directory for resolving relative paths (independent of allowedFolders)
|
|
@@ -225,6 +227,7 @@ export class ProbeAgent {
|
|
|
225
227
|
this.promptType = options.promptType || 'code-explorer';
|
|
226
228
|
this.allowEdit = !!options.allowEdit;
|
|
227
229
|
this.enableDelegate = !!options.enableDelegate;
|
|
230
|
+
this.enableExecutePlan = !!options.enableExecutePlan;
|
|
228
231
|
this.debug = options.debug || process.env.DEBUG === '1';
|
|
229
232
|
this.cancelled = false;
|
|
230
233
|
this.tracer = options.tracer || null;
|
|
@@ -357,6 +360,10 @@ export class ProbeAgent {
|
|
|
357
360
|
// Each ProbeAgent instance has its own limits, not shared globally
|
|
358
361
|
this.delegationManager = new DelegationManager();
|
|
359
362
|
|
|
363
|
+
// Optional global concurrency limiter shared across all ProbeAgent instances.
|
|
364
|
+
// When set, every AI API call acquires a slot before calling the provider.
|
|
365
|
+
this.concurrencyLimiter = options.concurrencyLimiter || null;
|
|
366
|
+
|
|
360
367
|
// Request timeout configuration (default 2 minutes)
|
|
361
368
|
// Validates env var to prevent NaN or unreasonable values
|
|
362
369
|
this.requestTimeout = options.requestTimeout ?? (() => {
|
|
@@ -805,6 +812,10 @@ export class ProbeAgent {
|
|
|
805
812
|
initializeTools() {
|
|
806
813
|
const isToolAllowed = (toolName) => this.allowedTools.isEnabled(toolName);
|
|
807
814
|
|
|
815
|
+
// Output buffer for DSL output() function — shared mutable object,
|
|
816
|
+
// reset at the start of each answer() call
|
|
817
|
+
this._outputBuffer = { items: [] };
|
|
818
|
+
|
|
808
819
|
const configOptions = {
|
|
809
820
|
sessionId: this.sessionId,
|
|
810
821
|
debug: this.debug,
|
|
@@ -816,6 +827,7 @@ export class ProbeAgent {
|
|
|
816
827
|
searchDelegate: this.searchDelegate,
|
|
817
828
|
allowEdit: this.allowEdit,
|
|
818
829
|
enableDelegate: this.enableDelegate,
|
|
830
|
+
enableExecutePlan: this.enableExecutePlan,
|
|
819
831
|
enableBash: this.enableBash,
|
|
820
832
|
bashConfig: this.bashConfig,
|
|
821
833
|
tracer: this.tracer,
|
|
@@ -824,6 +836,8 @@ export class ProbeAgent {
|
|
|
824
836
|
provider: this.clientApiProvider,
|
|
825
837
|
model: this.clientApiModel,
|
|
826
838
|
delegationManager: this.delegationManager, // Per-instance delegation limits
|
|
839
|
+
outputBuffer: this._outputBuffer,
|
|
840
|
+
concurrencyLimiter: this.concurrencyLimiter, // Global AI concurrency limiter
|
|
827
841
|
isToolAllowed
|
|
828
842
|
};
|
|
829
843
|
|
|
@@ -848,7 +862,10 @@ export class ProbeAgent {
|
|
|
848
862
|
if (this.enableDelegate && wrappedTools.delegateToolInstance && isToolAllowed('delegate')) {
|
|
849
863
|
this.toolImplementations.delegate = wrappedTools.delegateToolInstance;
|
|
850
864
|
}
|
|
851
|
-
if (wrappedTools.
|
|
865
|
+
if (this.enableExecutePlan && wrappedTools.executePlanToolInstance && isToolAllowed('execute_plan')) {
|
|
866
|
+
this.toolImplementations.execute_plan = wrappedTools.executePlanToolInstance;
|
|
867
|
+
} else if (wrappedTools.analyzeAllToolInstance && isToolAllowed('analyze_all')) {
|
|
868
|
+
// analyze_all is fallback when execute_plan is not enabled
|
|
852
869
|
this.toolImplementations.analyze_all = wrappedTools.analyzeAllToolInstance;
|
|
853
870
|
}
|
|
854
871
|
|
|
@@ -1363,6 +1380,16 @@ export class ProbeAgent {
|
|
|
1363
1380
|
* @private
|
|
1364
1381
|
*/
|
|
1365
1382
|
async streamTextWithRetryAndFallback(options) {
|
|
1383
|
+
// Acquire global concurrency slot if limiter is configured
|
|
1384
|
+
const limiter = this.concurrencyLimiter;
|
|
1385
|
+
if (limiter) {
|
|
1386
|
+
await limiter.acquire(null);
|
|
1387
|
+
if (this.debug) {
|
|
1388
|
+
const stats = limiter.getStats();
|
|
1389
|
+
console.log(`[DEBUG] Acquired global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1390
|
+
}
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1366
1393
|
// Create AbortController for overall operation timeout
|
|
1367
1394
|
const controller = new AbortController();
|
|
1368
1395
|
const timeoutState = { timeoutId: null };
|
|
@@ -1382,12 +1409,10 @@ export class ProbeAgent {
|
|
|
1382
1409
|
const useClaudeCode = this.clientApiProvider === 'claude-code' || process.env.USE_CLAUDE_CODE === 'true';
|
|
1383
1410
|
const useCodex = this.clientApiProvider === 'codex' || process.env.USE_CODEX === 'true';
|
|
1384
1411
|
|
|
1412
|
+
let result;
|
|
1385
1413
|
if (useClaudeCode || useCodex) {
|
|
1386
1414
|
try {
|
|
1387
|
-
|
|
1388
|
-
if (result) {
|
|
1389
|
-
return result;
|
|
1390
|
-
}
|
|
1415
|
+
result = await this._tryEngineStreamPath(options, controller, timeoutState);
|
|
1391
1416
|
} catch (error) {
|
|
1392
1417
|
if (this.debug) {
|
|
1393
1418
|
const engineType = useClaudeCode ? 'Claude Code' : 'Codex';
|
|
@@ -1397,8 +1422,43 @@ export class ProbeAgent {
|
|
|
1397
1422
|
}
|
|
1398
1423
|
}
|
|
1399
1424
|
|
|
1400
|
-
|
|
1401
|
-
|
|
1425
|
+
if (!result) {
|
|
1426
|
+
// Use Vercel AI SDK with retry/fallback
|
|
1427
|
+
result = await this._executeWithVercelProvider(options, controller);
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
// Wrap textStream so limiter slot is held until stream completes
|
|
1431
|
+
if (limiter && result.textStream) {
|
|
1432
|
+
const originalStream = result.textStream;
|
|
1433
|
+
const debug = this.debug;
|
|
1434
|
+
result.textStream = (async function* () {
|
|
1435
|
+
try {
|
|
1436
|
+
for await (const chunk of originalStream) {
|
|
1437
|
+
yield chunk;
|
|
1438
|
+
}
|
|
1439
|
+
} finally {
|
|
1440
|
+
limiter.release(null);
|
|
1441
|
+
if (debug) {
|
|
1442
|
+
const stats = limiter.getStats();
|
|
1443
|
+
console.log(`[DEBUG] Released global AI concurrency slot (${stats.globalActive}/${stats.maxConcurrent}, queue: ${stats.queueSize})`);
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
})();
|
|
1447
|
+
} else if (limiter) {
|
|
1448
|
+
// No textStream (shouldn't happen, but release just in case)
|
|
1449
|
+
limiter.release(null);
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
return result;
|
|
1453
|
+
} catch (error) {
|
|
1454
|
+
// Release on error if limiter was acquired
|
|
1455
|
+
if (limiter) {
|
|
1456
|
+
limiter.release(null);
|
|
1457
|
+
if (this.debug) {
|
|
1458
|
+
console.log(`[DEBUG] Released global AI concurrency slot on error`);
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
throw error;
|
|
1402
1462
|
} finally {
|
|
1403
1463
|
// Clean up timeout (for non-engine paths; engine paths clean up in the generator)
|
|
1404
1464
|
if (timeoutState.timeoutId) {
|
|
@@ -2506,8 +2566,18 @@ ${extractGuidance}
|
|
|
2506
2566
|
toolDefinitions += `${delegateToolDefinition}\n`;
|
|
2507
2567
|
}
|
|
2508
2568
|
|
|
2509
|
-
//
|
|
2510
|
-
if (isToolAllowed('
|
|
2569
|
+
// Execute Plan tool for DSL-based orchestration (requires enableExecutePlan flag, supersedes analyze_all)
|
|
2570
|
+
if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
|
|
2571
|
+
// Build available function list based on what tools are registered
|
|
2572
|
+
const dslFunctions = ['LLM', 'map', 'chunk', 'batch', 'log', 'range', 'flatten', 'unique', 'groupBy', 'parseJSON', 'storeSet', 'storeGet', 'storeAppend', 'storeKeys', 'storeGetAll', 'output'];
|
|
2573
|
+
if (isToolAllowed('search')) dslFunctions.unshift('search');
|
|
2574
|
+
if (isToolAllowed('query')) dslFunctions.unshift('query');
|
|
2575
|
+
if (isToolAllowed('extract')) dslFunctions.unshift('extract');
|
|
2576
|
+
if (isToolAllowed('listFiles')) dslFunctions.push('listFiles');
|
|
2577
|
+
if (this.enableBash && isToolAllowed('bash')) dslFunctions.push('bash');
|
|
2578
|
+
toolDefinitions += `${getExecutePlanToolDefinition(dslFunctions)}\n`;
|
|
2579
|
+
} else if (isToolAllowed('analyze_all')) {
|
|
2580
|
+
// Fallback: only register analyze_all if execute_plan is not available
|
|
2511
2581
|
toolDefinitions += `${analyzeAllToolDefinition}\n`;
|
|
2512
2582
|
}
|
|
2513
2583
|
|
|
@@ -2583,7 +2653,9 @@ The configuration is loaded from src/config.js lines 15-25 which contains the da
|
|
|
2583
2653
|
if (this.enableDelegate && isToolAllowed('delegate')) {
|
|
2584
2654
|
availableToolsList += '- delegate: Delegate big distinct tasks to specialized probe subagents.\n';
|
|
2585
2655
|
}
|
|
2586
|
-
if (isToolAllowed('
|
|
2656
|
+
if (this.enableExecutePlan && isToolAllowed('execute_plan')) {
|
|
2657
|
+
availableToolsList += '- execute_plan: Execute a DSL program to orchestrate tool calls. ALWAYS use this for: questions containing "all"/"every"/"comprehensive"/"complete inventory", multi-topic analysis, open-ended discovery questions, or any task requiring full codebase coverage.\n';
|
|
2658
|
+
} else if (isToolAllowed('analyze_all')) {
|
|
2587
2659
|
availableToolsList += '- analyze_all: Process ALL data matching a query using map-reduce (for aggregate questions needing 100% coverage).\n';
|
|
2588
2660
|
}
|
|
2589
2661
|
if (this.enableBash && isToolAllowed('bash')) {
|
|
@@ -2813,6 +2885,11 @@ Follow these instructions carefully:
|
|
|
2813
2885
|
// Track initial history length for storage
|
|
2814
2886
|
const oldHistoryLength = this.history.length;
|
|
2815
2887
|
|
|
2888
|
+
// Reset output buffer for this answer() call
|
|
2889
|
+
if (this._outputBuffer) {
|
|
2890
|
+
this._outputBuffer.items = [];
|
|
2891
|
+
}
|
|
2892
|
+
|
|
2816
2893
|
// START CHECKPOINT: Initialize task management for this request
|
|
2817
2894
|
if (this.enableTasks) {
|
|
2818
2895
|
try {
|
|
@@ -3320,8 +3397,10 @@ Follow these instructions carefully:
|
|
|
3320
3397
|
if (this.enableDelegate && this.allowedTools.isEnabled('delegate')) {
|
|
3321
3398
|
validTools.push('delegate');
|
|
3322
3399
|
}
|
|
3323
|
-
//
|
|
3324
|
-
if (this.allowedTools.isEnabled('
|
|
3400
|
+
// Execute Plan tool (requires enableExecutePlan flag, supersedes analyze_all)
|
|
3401
|
+
if (this.enableExecutePlan && this.allowedTools.isEnabled('execute_plan')) {
|
|
3402
|
+
validTools.push('execute_plan');
|
|
3403
|
+
} else if (this.allowedTools.isEnabled('analyze_all')) {
|
|
3325
3404
|
validTools.push('analyze_all');
|
|
3326
3405
|
}
|
|
3327
3406
|
// Task tool (require both enableTasks flag AND allowedTools permission)
|
|
@@ -4546,6 +4625,19 @@ Convert your previous response content into actual JSON data that follows this s
|
|
|
4546
4625
|
}
|
|
4547
4626
|
}
|
|
4548
4627
|
|
|
4628
|
+
// Append DSL output buffer directly to response (bypasses LLM rewriting)
|
|
4629
|
+
if (this._outputBuffer && this._outputBuffer.items.length > 0 && !options._schemaFormatted) {
|
|
4630
|
+
const outputContent = this._outputBuffer.items.join('\n\n');
|
|
4631
|
+
finalResult = (finalResult || '') + '\n\n' + outputContent;
|
|
4632
|
+
if (options.onStream) {
|
|
4633
|
+
options.onStream('\n\n' + outputContent);
|
|
4634
|
+
}
|
|
4635
|
+
if (this.debug) {
|
|
4636
|
+
console.log(`[DEBUG] Appended ${this._outputBuffer.items.length} output buffer items (${outputContent.length} chars) to final result`);
|
|
4637
|
+
}
|
|
4638
|
+
this._outputBuffer.items = [];
|
|
4639
|
+
}
|
|
4640
|
+
|
|
4549
4641
|
return finalResult;
|
|
4550
4642
|
|
|
4551
4643
|
} catch (error) {
|
|
@@ -4708,6 +4800,7 @@ Convert your previous response content into actual JSON data that follows this s
|
|
|
4708
4800
|
promptType: this.promptType,
|
|
4709
4801
|
allowEdit: this.allowEdit,
|
|
4710
4802
|
enableDelegate: this.enableDelegate,
|
|
4803
|
+
enableExecutePlan: this.enableExecutePlan,
|
|
4711
4804
|
architectureFileName: this.architectureFileName,
|
|
4712
4805
|
// Pass allowedFolders which will recompute workspaceRoot correctly
|
|
4713
4806
|
allowedFolders: [...this.allowedFolders],
|
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Agent-realistic test: the LLM writes DSL scripts itself.
|
|
4
|
+
*
|
|
5
|
+
* This simulates the real production flow:
|
|
6
|
+
* 1. We give the LLM a task + the tool definition (system prompt)
|
|
7
|
+
* 2. The LLM generates the DSL script
|
|
8
|
+
* 3. The runtime validates, transforms, and executes it
|
|
9
|
+
* 4. The result comes back
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* node npm/src/agent/dsl/agent-test.mjs
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { createDSLRuntime } from './runtime.js';
|
|
16
|
+
import { getExecutePlanToolDefinition } from '../../tools/executePlan.js';
|
|
17
|
+
import { search } from '../../search.js';
|
|
18
|
+
import { extract } from '../../extract.js';
|
|
19
|
+
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
20
|
+
import { generateText } from 'ai';
|
|
21
|
+
import { config } from 'dotenv';
|
|
22
|
+
import { resolve, dirname } from 'path';
|
|
23
|
+
import { fileURLToPath } from 'url';
|
|
24
|
+
|
|
25
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
26
|
+
const projectRoot = resolve(__dirname, '../../../..');
|
|
27
|
+
|
|
28
|
+
config({ path: resolve(projectRoot, '.env') });
|
|
29
|
+
|
|
30
|
+
const apiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
|
|
31
|
+
if (!apiKey) {
|
|
32
|
+
console.error('ERROR: No Google API key found.');
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const google = createGoogleGenerativeAI({ apiKey });
|
|
37
|
+
|
|
38
|
+
async function llmCall(instruction, data, options = {}) {
|
|
39
|
+
const dataStr = data == null ? '' : (typeof data === 'string' ? data : JSON.stringify(data, null, 2));
|
|
40
|
+
const prompt = (dataStr || '(empty)').substring(0, 100000);
|
|
41
|
+
const result = await generateText({
|
|
42
|
+
model: google('gemini-2.5-flash'),
|
|
43
|
+
system: instruction,
|
|
44
|
+
prompt,
|
|
45
|
+
temperature: options.temperature || 0.3,
|
|
46
|
+
maxTokens: options.maxTokens || 4000,
|
|
47
|
+
});
|
|
48
|
+
return result.text;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// For generating DSL scripts (the "agent" role)
|
|
52
|
+
async function agentGenerate(systemPrompt, userTask) {
|
|
53
|
+
const result = await generateText({
|
|
54
|
+
model: google('gemini-2.5-flash'),
|
|
55
|
+
system: systemPrompt,
|
|
56
|
+
prompt: userTask,
|
|
57
|
+
temperature: 0.3,
|
|
58
|
+
maxTokens: 4000,
|
|
59
|
+
});
|
|
60
|
+
return result.text;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const cwd = projectRoot;
|
|
64
|
+
|
|
65
|
+
const toolImplementations = {
|
|
66
|
+
search: {
|
|
67
|
+
execute: async (params) => {
|
|
68
|
+
try {
|
|
69
|
+
return await search({
|
|
70
|
+
query: params.query,
|
|
71
|
+
path: params.path || cwd,
|
|
72
|
+
cwd,
|
|
73
|
+
maxTokens: 20000,
|
|
74
|
+
timeout: 30,
|
|
75
|
+
exact: params.exact || false,
|
|
76
|
+
});
|
|
77
|
+
} catch (e) {
|
|
78
|
+
return "Search error: " + e.message;
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
extract: {
|
|
83
|
+
execute: async (params) => {
|
|
84
|
+
try {
|
|
85
|
+
return await extract({
|
|
86
|
+
targets: params.targets,
|
|
87
|
+
input_content: params.input_content,
|
|
88
|
+
cwd,
|
|
89
|
+
});
|
|
90
|
+
} catch (e) {
|
|
91
|
+
return "Extract error: " + e.message;
|
|
92
|
+
}
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
listFiles: {
|
|
96
|
+
execute: async (params) => {
|
|
97
|
+
try {
|
|
98
|
+
return await search({
|
|
99
|
+
query: params.pattern || '*',
|
|
100
|
+
path: cwd,
|
|
101
|
+
cwd,
|
|
102
|
+
filesOnly: true,
|
|
103
|
+
maxTokens: 10000,
|
|
104
|
+
});
|
|
105
|
+
} catch (e) {
|
|
106
|
+
return "listFiles error: " + e.message;
|
|
107
|
+
}
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
const runtime = createDSLRuntime({
|
|
113
|
+
toolImplementations,
|
|
114
|
+
llmCall,
|
|
115
|
+
mapConcurrency: 3,
|
|
116
|
+
timeoutMs: 60000, // 60s timeout per execution
|
|
117
|
+
maxLoopIterations: 5000, // loop guard
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Strip markdown fences and XML tags that LLMs sometimes wrap code in.
|
|
122
|
+
*/
|
|
123
|
+
function stripCodeWrapping(code) {
|
|
124
|
+
let s = String(code || '');
|
|
125
|
+
s = s.replace(/^```(?:javascript|js)?\n?/gm, '').replace(/```$/gm, '');
|
|
126
|
+
s = s.replace(/<\/?(?:execute_plan|code)>/g, '');
|
|
127
|
+
return s.trim();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// The tool definition that goes into the agent's system prompt
|
|
131
|
+
const toolDef = getExecutePlanToolDefinition(['search', 'extract', 'LLM', 'map', 'chunk', 'listFiles', 'log', 'range', 'flatten', 'unique', 'groupBy']);
|
|
132
|
+
|
|
133
|
+
const SYSTEM_PROMPT = `You are a coding assistant with access to the execute_plan tool.
|
|
134
|
+
|
|
135
|
+
${toolDef}
|
|
136
|
+
|
|
137
|
+
When the user asks a question that requires searching a codebase, batch processing, or handling large data,
|
|
138
|
+
write a DSL script to handle it. Return ONLY the JavaScript code — no markdown fences, no explanation,
|
|
139
|
+
no \`\`\` blocks. Just the raw code that goes into the execute_plan tool.
|
|
140
|
+
|
|
141
|
+
CRITICAL RULES:
|
|
142
|
+
- Do NOT use async/await — the runtime handles it.
|
|
143
|
+
- Do NOT use template literals (backticks) — use string concatenation with +.
|
|
144
|
+
- Do NOT use shorthand properties like { key } — use { key: key }.
|
|
145
|
+
- search() returns a STRING, not an array. Use chunk() to split it into an array.
|
|
146
|
+
- map(items, fn) requires an ARRAY as first argument. Do NOT pass a string to map().
|
|
147
|
+
- Do NOT use .map(), .forEach(), .filter(), .join() array methods. Use for..of loops or the global map() function.
|
|
148
|
+
- To join an array, use a for..of loop: var s = ""; for (const item of arr) { s = s + item + "\\n"; }
|
|
149
|
+
- Do NOT define helper functions that call tools. Write all logic inline or use for..of loops.
|
|
150
|
+
- Use String(value) to safely convert to string before calling .trim() or .split().
|
|
151
|
+
- Do NOT use regex literals (/pattern/) — use String methods like indexOf, includes, startsWith instead.
|
|
152
|
+
- ONLY call functions listed in the tool definition. Do NOT invent or guess function names.
|
|
153
|
+
- ALWAYS write executable DSL code, never answer in plain text.
|
|
154
|
+
- Always return a value at the end.`;
|
|
155
|
+
|
|
156
|
+
// ── Test runner ──
|
|
157
|
+
let testNum = 0;
|
|
158
|
+
let passed = 0;
|
|
159
|
+
let failed = 0;
|
|
160
|
+
|
|
161
|
+
const MAX_RETRIES = 2;
|
|
162
|
+
|
|
163
|
+
async function runAgentTest(taskDescription, check) {
|
|
164
|
+
testNum++;
|
|
165
|
+
console.log(`\n${'─'.repeat(70)}`);
|
|
166
|
+
console.log(`▶ Test ${testNum}: ${taskDescription}`);
|
|
167
|
+
|
|
168
|
+
const start = Date.now();
|
|
169
|
+
|
|
170
|
+
try {
|
|
171
|
+
// Step 1: Agent generates the DSL script
|
|
172
|
+
console.log(' [1/4] Agent generating DSL script...');
|
|
173
|
+
const generatedCode = await agentGenerate(SYSTEM_PROMPT, taskDescription);
|
|
174
|
+
let currentCode = stripCodeWrapping(generatedCode);
|
|
175
|
+
console.log(` Generated (${currentCode.split('\n').length} lines):`);
|
|
176
|
+
const preview = currentCode.split('\n').slice(0, 6).map(l => ' ' + l).join('\n');
|
|
177
|
+
console.log(preview);
|
|
178
|
+
if (currentCode.split('\n').length > 6) console.log(' ...');
|
|
179
|
+
|
|
180
|
+
// Step 2: Execute with self-healing retries
|
|
181
|
+
let result;
|
|
182
|
+
let attempt = 0;
|
|
183
|
+
|
|
184
|
+
while (attempt <= MAX_RETRIES) {
|
|
185
|
+
console.log(` [2/4] Executing DSL script${attempt > 0 ? ' (retry ' + attempt + ')' : ''}...`);
|
|
186
|
+
result = await runtime.execute(currentCode, taskDescription);
|
|
187
|
+
|
|
188
|
+
if (result.status === 'success') break;
|
|
189
|
+
|
|
190
|
+
// Execution failed — try self-healing
|
|
191
|
+
const logOutput = result.logs.length > 0 ? '\nLogs: ' + result.logs.join(' | ') : '';
|
|
192
|
+
const errorMsg = result.error + logOutput;
|
|
193
|
+
console.log(` [!] Execution failed: ${errorMsg.substring(0, 150)}`);
|
|
194
|
+
|
|
195
|
+
if (attempt >= MAX_RETRIES) break;
|
|
196
|
+
|
|
197
|
+
console.log(` [3/4] Self-healing — asking LLM to fix (attempt ${attempt + 1})...`);
|
|
198
|
+
const fixPrompt = `The following DSL script failed with an error. Fix the script and return ONLY the corrected JavaScript code — no markdown, no explanation, no backtick fences.
|
|
199
|
+
|
|
200
|
+
ORIGINAL SCRIPT:
|
|
201
|
+
${currentCode}
|
|
202
|
+
|
|
203
|
+
ERROR:
|
|
204
|
+
${errorMsg}
|
|
205
|
+
|
|
206
|
+
RULES REMINDER:
|
|
207
|
+
- search(), listFiles(), extract() all return STRINGS, not arrays.
|
|
208
|
+
- Use chunk(stringData) to split a string into an array of chunks.
|
|
209
|
+
- map(items, fn) requires an ARRAY as first argument. Do NOT pass strings to map().
|
|
210
|
+
- Do NOT use .map(), .forEach(), .filter(), .join() — use for..of loops instead.
|
|
211
|
+
- Do NOT define helper functions that call tools — write logic inline.
|
|
212
|
+
- Do NOT use async/await, template literals, or shorthand properties.
|
|
213
|
+
- Do NOT use regex literals (/pattern/) — use String methods like indexOf, includes, startsWith instead.
|
|
214
|
+
- String concatenation with +, not template literals.`;
|
|
215
|
+
|
|
216
|
+
const fixedCode = await llmCall(fixPrompt, '', { maxTokens: 4000, temperature: 0.2 });
|
|
217
|
+
currentCode = stripCodeWrapping(fixedCode);
|
|
218
|
+
|
|
219
|
+
if (!currentCode) {
|
|
220
|
+
console.log(' [!] Self-heal returned empty code');
|
|
221
|
+
break;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
console.log(` Fixed code (${currentCode.split('\n').length} lines):`);
|
|
225
|
+
const fixPreview = currentCode.split('\n').slice(0, 4).map(l => ' ' + l).join('\n');
|
|
226
|
+
console.log(fixPreview);
|
|
227
|
+
if (currentCode.split('\n').length > 4) console.log(' ...');
|
|
228
|
+
|
|
229
|
+
attempt++;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const elapsed = Date.now() - start;
|
|
233
|
+
console.log(` [4/4] Checking result... (${elapsed}ms)`);
|
|
234
|
+
|
|
235
|
+
if (result.status === 'error') {
|
|
236
|
+
console.log(` ✗ EXECUTION ERROR after ${attempt} retries (${elapsed}ms)`);
|
|
237
|
+
console.log(` Error: ${result.error.substring(0, 200)}`);
|
|
238
|
+
if (result.logs.length) console.log(` Logs: ${result.logs.join(' | ')}`);
|
|
239
|
+
failed++;
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const checkResult = check(result);
|
|
244
|
+
if (checkResult === true || checkResult === undefined) {
|
|
245
|
+
const healNote = attempt > 0 ? ` (self-healed after ${attempt} ${attempt === 1 ? 'retry' : 'retries'})` : '';
|
|
246
|
+
console.log(` ✓ PASSED${healNote} (${elapsed}ms)`);
|
|
247
|
+
const resultPreview = typeof result.result === 'string'
|
|
248
|
+
? result.result.substring(0, 300)
|
|
249
|
+
: JSON.stringify(result.result, null, 2).substring(0, 300);
|
|
250
|
+
console.log(` Result: ${resultPreview}${resultPreview.length >= 300 ? '...' : ''}`);
|
|
251
|
+
if (result.logs && result.logs.filter(l => !l.startsWith('[runtime]')).length) {
|
|
252
|
+
console.log(` Logs: ${result.logs.filter(l => !l.startsWith('[runtime]')).join(' | ')}`);
|
|
253
|
+
}
|
|
254
|
+
passed++;
|
|
255
|
+
} else {
|
|
256
|
+
console.log(` ✗ CHECK FAILED (${elapsed}ms) — ${checkResult}`);
|
|
257
|
+
failed++;
|
|
258
|
+
}
|
|
259
|
+
} catch (e) {
|
|
260
|
+
console.log(` ✗ CRASHED — ${e.message}`);
|
|
261
|
+
failed++;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// ── Agent tests ──
|
|
266
|
+
async function main() {
|
|
267
|
+
console.log('═'.repeat(70));
|
|
268
|
+
console.log(' Agent-Realistic DSL Tests — LLM writes its own scripts');
|
|
269
|
+
console.log('═'.repeat(70));
|
|
270
|
+
|
|
271
|
+
// Test 1: Simple search + summarize
|
|
272
|
+
await runAgentTest(
|
|
273
|
+
'Search this codebase for how error handling is done and give me a brief summary.',
|
|
274
|
+
(r) => {
|
|
275
|
+
if (typeof r.result !== 'string') return 'Expected string result';
|
|
276
|
+
if (r.result.length < 50) return 'Summary too short';
|
|
277
|
+
return true;
|
|
278
|
+
}
|
|
279
|
+
);
|
|
280
|
+
|
|
281
|
+
// Test 2: Find and count patterns
|
|
282
|
+
await runAgentTest(
|
|
283
|
+
'Write a DSL script to search this codebase for tool definitions (search, extract, query, etc.). Count how many unique tools are defined and return an object with the count and an array of tool names.',
|
|
284
|
+
(r) => {
|
|
285
|
+
if (!r.result) return 'No result';
|
|
286
|
+
return true;
|
|
287
|
+
}
|
|
288
|
+
);
|
|
289
|
+
|
|
290
|
+
// Test 3: Multi-file analysis
|
|
291
|
+
await runAgentTest(
|
|
292
|
+
'Look at the files in npm/src/agent/dsl/ directory — search for each one, and for each file give me a one-sentence description of what it does. Return as a list.',
|
|
293
|
+
(r) => {
|
|
294
|
+
if (!r.result) return 'No result';
|
|
295
|
+
const s = typeof r.result === 'string' ? r.result : JSON.stringify(r.result);
|
|
296
|
+
if (s.length < 50) return 'Result too short';
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
299
|
+
);
|
|
300
|
+
|
|
301
|
+
// Test 4: Code quality check
|
|
302
|
+
await runAgentTest(
|
|
303
|
+
'Search for all TODO and FIXME comments in this codebase. Group them by urgency (TODO vs FIXME) and summarize what needs attention.',
|
|
304
|
+
(r) => {
|
|
305
|
+
if (!r.result) return 'No result';
|
|
306
|
+
return true;
|
|
307
|
+
}
|
|
308
|
+
);
|
|
309
|
+
|
|
310
|
+
// Test 5: Complex analysis requiring chunking
|
|
311
|
+
await runAgentTest(
|
|
312
|
+
'Analyze the test coverage of this project. Search for test files, see what modules they test, and identify any modules that might be missing tests. Give me a brief report.',
|
|
313
|
+
(r) => {
|
|
314
|
+
if (!r.result) return 'No result';
|
|
315
|
+
const s = typeof r.result === 'string' ? r.result : JSON.stringify(r.result);
|
|
316
|
+
if (s.length < 50) return 'Report too short';
|
|
317
|
+
return true;
|
|
318
|
+
}
|
|
319
|
+
);
|
|
320
|
+
|
|
321
|
+
// Test 6: Data extraction + classification
|
|
322
|
+
await runAgentTest(
|
|
323
|
+
'Find all the Zod schemas defined in this codebase (search for "z.object"). For each schema, extract its name and list its fields. Return a structured summary.',
|
|
324
|
+
(r) => {
|
|
325
|
+
if (!r.result) return 'No result';
|
|
326
|
+
return true;
|
|
327
|
+
}
|
|
328
|
+
);
|
|
329
|
+
|
|
330
|
+
// ── Summary ──
|
|
331
|
+
console.log(`\n${'═'.repeat(70)}`);
|
|
332
|
+
console.log(` Agent-Realistic Results: ${passed} passed, ${failed} failed, ${testNum} total`);
|
|
333
|
+
console.log('═'.repeat(70));
|
|
334
|
+
|
|
335
|
+
process.exit(failed > 0 ? 1 : 0);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
main().catch(e => {
|
|
339
|
+
console.error('Fatal error:', e);
|
|
340
|
+
process.exit(1);
|
|
341
|
+
});
|