@zhijiewang/openharness 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/index.js +45 -6
- package/dist/harness/config.d.ts +12 -1
- package/dist/harness/config.js +5 -0
- package/dist/harness/hooks.d.ts +19 -4
- package/dist/harness/hooks.js +82 -23
- package/dist/harness/rules.js +32 -4
- package/dist/harness/submit-handler.js +18 -2
- package/dist/harness/traces.d.ts +58 -0
- package/dist/harness/traces.js +178 -0
- package/dist/main.js +1 -0
- package/dist/query/compress.js +5 -1
- package/dist/query/context-manager.d.ts +56 -0
- package/dist/query/context-manager.js +111 -0
- package/dist/query/index.js +5 -1
- package/dist/query/tools.js +7 -0
- package/dist/sdk/index.d.ts +75 -0
- package/dist/sdk/index.js +135 -0
- package/dist/services/EvaluatorLoop.d.ts +61 -0
- package/dist/services/EvaluatorLoop.js +157 -0
- package/dist/services/MetaHarness.d.ts +61 -0
- package/dist/services/MetaHarness.js +210 -0
- package/dist/tools/AgentTool/index.js +13 -2
- package/dist/tools/DiagnosticsTool/index.d.ts +3 -3
- package/dist/tools/DiagnosticsTool/index.js +37 -8
- package/dist/tools/MonitorTool/index.d.ts +21 -0
- package/dist/tools/MonitorTool/index.js +114 -0
- package/dist/tools/PowerShellTool/index.d.ts +15 -0
- package/dist/tools/PowerShellTool/index.js +32 -0
- package/dist/tools.js +4 -0
- package/dist/types/permissions.js +42 -2
- package/package.json +6 -2
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Meta-Harness — self-optimizing agent harness.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by AutoAgent (which hit #1 on SpreadsheetBench by letting
|
|
5
|
+
* the agent optimize its own harness overnight).
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* 1. Run benchmark → get baseline score
|
|
9
|
+
* 2. Ask LLM to suggest a config change
|
|
10
|
+
* 3. Apply change → re-run benchmark
|
|
11
|
+
* 4. If score improved, keep; otherwise revert
|
|
12
|
+
* 5. Repeat for N iterations
|
|
13
|
+
*
|
|
14
|
+
* What it optimizes:
|
|
15
|
+
* - System prompt (trim, rephrase, add instructions)
|
|
16
|
+
* - Tool selection (which tools are core vs deferred)
|
|
17
|
+
* - Model router configuration
|
|
18
|
+
* - Compression strategy
|
|
19
|
+
* - Permission rules
|
|
20
|
+
*/
|
|
21
|
+
import type { Provider } from '../providers/base.js';
|
|
22
|
+
export type BenchmarkResult = {
|
|
23
|
+
score: number;
|
|
24
|
+
details: string;
|
|
25
|
+
durationMs: number;
|
|
26
|
+
};
|
|
27
|
+
export type OptimizationChange = {
|
|
28
|
+
description: string;
|
|
29
|
+
field: string;
|
|
30
|
+
oldValue: unknown;
|
|
31
|
+
newValue: unknown;
|
|
32
|
+
impact: number;
|
|
33
|
+
};
|
|
34
|
+
export type OptimizationResult = {
|
|
35
|
+
initialScore: number;
|
|
36
|
+
finalScore: number;
|
|
37
|
+
iterations: number;
|
|
38
|
+
changes: OptimizationChange[];
|
|
39
|
+
totalDurationMs: number;
|
|
40
|
+
};
|
|
41
|
+
/**
|
|
42
|
+
* Run a benchmark command and extract a score.
|
|
43
|
+
* Score is derived from test results: pass_rate + speed_bonus.
|
|
44
|
+
*/
|
|
45
|
+
export declare function runBenchmark(command: string): Promise<BenchmarkResult>;
|
|
46
|
+
export declare class MetaHarness {
|
|
47
|
+
private provider;
|
|
48
|
+
private benchmarkCommand;
|
|
49
|
+
private model?;
|
|
50
|
+
constructor(provider: Provider, benchmarkCommand: string, model?: string | undefined);
|
|
51
|
+
/**
|
|
52
|
+
* Run the optimization loop.
|
|
53
|
+
*/
|
|
54
|
+
optimize(iterations: number): Promise<OptimizationResult>;
|
|
55
|
+
private suggestChange;
|
|
56
|
+
private applyChange;
|
|
57
|
+
private revertChange;
|
|
58
|
+
}
|
|
59
|
+
/** Format optimization results for display */
|
|
60
|
+
export declare function formatOptimizationResult(result: OptimizationResult): string;
|
|
61
|
+
//# sourceMappingURL=MetaHarness.d.ts.map
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Meta-Harness — self-optimizing agent harness.
|
|
3
|
+
*
|
|
4
|
+
* Inspired by AutoAgent (which hit #1 on SpreadsheetBench by letting
|
|
5
|
+
* the agent optimize its own harness overnight).
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* 1. Run benchmark → get baseline score
|
|
9
|
+
* 2. Ask LLM to suggest a config change
|
|
10
|
+
* 3. Apply change → re-run benchmark
|
|
11
|
+
* 4. If score improved, keep; otherwise revert
|
|
12
|
+
* 5. Repeat for N iterations
|
|
13
|
+
*
|
|
14
|
+
* What it optimizes:
|
|
15
|
+
* - System prompt (trim, rephrase, add instructions)
|
|
16
|
+
* - Tool selection (which tools are core vs deferred)
|
|
17
|
+
* - Model router configuration
|
|
18
|
+
* - Compression strategy
|
|
19
|
+
* - Permission rules
|
|
20
|
+
*/
|
|
21
|
+
import { readOhConfig, invalidateConfigCache } from '../harness/config.js';
|
|
22
|
+
import { copyFileSync, existsSync } from 'node:fs';
|
|
23
|
+
import { join } from 'node:path';
|
|
24
|
+
import { execSync } from 'node:child_process';
|
|
25
|
+
// ── Benchmark Runner ──
|
|
26
|
+
/**
|
|
27
|
+
* Run a benchmark command and extract a score.
|
|
28
|
+
* Score is derived from test results: pass_rate + speed_bonus.
|
|
29
|
+
*/
|
|
30
|
+
export async function runBenchmark(command) {
|
|
31
|
+
const start = Date.now();
|
|
32
|
+
try {
|
|
33
|
+
const output = execSync(command, {
|
|
34
|
+
encoding: 'utf-8',
|
|
35
|
+
timeout: 300_000, // 5 minute max
|
|
36
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
37
|
+
});
|
|
38
|
+
// Parse test results to extract score
|
|
39
|
+
const score = extractScore(output);
|
|
40
|
+
return {
|
|
41
|
+
score,
|
|
42
|
+
details: output.slice(-500),
|
|
43
|
+
durationMs: Date.now() - start,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
const output = String(err.stdout ?? err.stderr ?? err.message ?? '');
|
|
48
|
+
const score = extractScore(output);
|
|
49
|
+
return {
|
|
50
|
+
score: score > 0 ? score * 0.5 : 0, // Penalty for non-zero exit
|
|
51
|
+
details: output.slice(-500),
|
|
52
|
+
durationMs: Date.now() - start,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/** Extract a 0-1 score from test output */
|
|
57
|
+
function extractScore(output) {
|
|
58
|
+
// Look for common test result patterns
|
|
59
|
+
// "X passed, Y failed" → pass_rate
|
|
60
|
+
const passMatch = output.match(/(\d+)\s+pass/i);
|
|
61
|
+
const failMatch = output.match(/(\d+)\s+fail/i);
|
|
62
|
+
if (passMatch) {
|
|
63
|
+
const passed = parseInt(passMatch[1]);
|
|
64
|
+
const failed = failMatch ? parseInt(failMatch[1]) : 0;
|
|
65
|
+
const total = passed + failed;
|
|
66
|
+
return total > 0 ? passed / total : 0;
|
|
67
|
+
}
|
|
68
|
+
// "# pass N" (TAP format)
|
|
69
|
+
const tapPass = output.match(/# pass\s+(\d+)/);
|
|
70
|
+
const tapFail = output.match(/# fail\s+(\d+)/);
|
|
71
|
+
if (tapPass) {
|
|
72
|
+
const passed = parseInt(tapPass[1]);
|
|
73
|
+
const failed = tapFail ? parseInt(tapFail[1]) : 0;
|
|
74
|
+
const total = passed + failed;
|
|
75
|
+
return total > 0 ? passed / total : 0;
|
|
76
|
+
}
|
|
77
|
+
// Exit code 0 = 1.0, non-zero = 0
|
|
78
|
+
return output.includes('error') || output.includes('FAIL') ? 0.3 : 0.8;
|
|
79
|
+
}
|
|
80
|
+
// ── Meta-Harness ──
|
|
81
|
+
export class MetaHarness {
|
|
82
|
+
provider;
|
|
83
|
+
benchmarkCommand;
|
|
84
|
+
model;
|
|
85
|
+
constructor(provider, benchmarkCommand, model) {
|
|
86
|
+
this.provider = provider;
|
|
87
|
+
this.benchmarkCommand = benchmarkCommand;
|
|
88
|
+
this.model = model;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Run the optimization loop.
|
|
92
|
+
*/
|
|
93
|
+
async optimize(iterations) {
|
|
94
|
+
const totalStart = Date.now();
|
|
95
|
+
const changes = [];
|
|
96
|
+
// Backup current config
|
|
97
|
+
const configPath = join('.oh', 'config.yaml');
|
|
98
|
+
const backupPath = join('.oh', 'config.yaml.backup');
|
|
99
|
+
if (existsSync(configPath)) {
|
|
100
|
+
copyFileSync(configPath, backupPath);
|
|
101
|
+
}
|
|
102
|
+
// Get baseline score
|
|
103
|
+
const baseline = await runBenchmark(this.benchmarkCommand);
|
|
104
|
+
let bestScore = baseline.score;
|
|
105
|
+
for (let i = 0; i < iterations; i++) {
|
|
106
|
+
// Ask LLM to suggest an optimization
|
|
107
|
+
const suggestion = await this.suggestChange(bestScore, changes);
|
|
108
|
+
if (!suggestion)
|
|
109
|
+
continue;
|
|
110
|
+
// Apply the change
|
|
111
|
+
this.applyChange(suggestion);
|
|
112
|
+
// Re-benchmark
|
|
113
|
+
const result = await runBenchmark(this.benchmarkCommand);
|
|
114
|
+
if (result.score > bestScore) {
|
|
115
|
+
// Keep the change
|
|
116
|
+
const impact = result.score - bestScore;
|
|
117
|
+
changes.push({ ...suggestion, impact });
|
|
118
|
+
bestScore = result.score;
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
// Revert
|
|
122
|
+
this.revertChange(suggestion);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return {
|
|
126
|
+
initialScore: baseline.score,
|
|
127
|
+
finalScore: bestScore,
|
|
128
|
+
iterations,
|
|
129
|
+
changes,
|
|
130
|
+
totalDurationMs: Date.now() - totalStart,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
async suggestChange(currentScore, previousChanges) {
|
|
134
|
+
const config = readOhConfig();
|
|
135
|
+
const configStr = JSON.stringify(config, null, 2);
|
|
136
|
+
const prevChangesStr = previousChanges.length > 0
|
|
137
|
+
? `\nPrevious successful changes:\n${previousChanges.map(c => `- ${c.description} (+${c.impact.toFixed(3)})`).join('\n')}`
|
|
138
|
+
: '';
|
|
139
|
+
const prompt = `You are optimizing an AI agent harness configuration. Current score: ${currentScore.toFixed(3)}/1.0.
|
|
140
|
+
${prevChangesStr}
|
|
141
|
+
|
|
142
|
+
Current config:
|
|
143
|
+
${configStr.slice(0, 2000)}
|
|
144
|
+
|
|
145
|
+
Suggest ONE specific configuration change that might improve the benchmark score. Focus on:
|
|
146
|
+
- System prompt optimization
|
|
147
|
+
- Tool selection (which tools are core)
|
|
148
|
+
- Permission rules that speed up automation
|
|
149
|
+
- Verification configuration
|
|
150
|
+
|
|
151
|
+
Respond with JSON: {"description": "what to change", "field": "config.path", "newValue": "the new value"}`;
|
|
152
|
+
try {
|
|
153
|
+
const response = await this.provider.complete([{ role: 'user', content: prompt, uuid: `meta-${Date.now()}`, timestamp: Date.now() }], 'You are a harness optimization engine. Respond ONLY with valid JSON.', undefined, this.model);
|
|
154
|
+
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
|
|
155
|
+
if (!jsonMatch)
|
|
156
|
+
return null;
|
|
157
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
158
|
+
return {
|
|
159
|
+
description: parsed.description ?? 'unknown change',
|
|
160
|
+
field: parsed.field ?? 'unknown',
|
|
161
|
+
oldValue: undefined,
|
|
162
|
+
newValue: parsed.newValue,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
applyChange(change) {
|
|
170
|
+
invalidateConfigCache();
|
|
171
|
+
// For now, log the change. Full config mutation would require
|
|
172
|
+
// a safe config updater that handles nested paths.
|
|
173
|
+
// This is a placeholder — real implementation would use lodash.set or similar.
|
|
174
|
+
}
|
|
175
|
+
revertChange(change) {
|
|
176
|
+
invalidateConfigCache();
|
|
177
|
+
// Revert by re-reading the backup config
|
|
178
|
+
const backupPath = join('.oh', 'config.yaml.backup');
|
|
179
|
+
const configPath = join('.oh', 'config.yaml');
|
|
180
|
+
if (existsSync(backupPath)) {
|
|
181
|
+
copyFileSync(backupPath, configPath);
|
|
182
|
+
invalidateConfigCache();
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
/** Format optimization results for display */
|
|
187
|
+
export function formatOptimizationResult(result) {
|
|
188
|
+
const lines = [];
|
|
189
|
+
const improvement = result.finalScore - result.initialScore;
|
|
190
|
+
const pct = result.initialScore > 0 ? (improvement / result.initialScore * 100).toFixed(1) : '0';
|
|
191
|
+
lines.push(`Meta-Harness Optimization Complete`);
|
|
192
|
+
lines.push(`${'─'.repeat(40)}`);
|
|
193
|
+
lines.push(`Initial score: ${result.initialScore.toFixed(3)}`);
|
|
194
|
+
lines.push(`Final score: ${result.finalScore.toFixed(3)} (${improvement >= 0 ? '+' : ''}${pct}%)`);
|
|
195
|
+
lines.push(`Iterations: ${result.iterations}`);
|
|
196
|
+
lines.push(`Duration: ${Math.round(result.totalDurationMs / 1000)}s`);
|
|
197
|
+
if (result.changes.length > 0) {
|
|
198
|
+
lines.push('');
|
|
199
|
+
lines.push('Applied changes:');
|
|
200
|
+
for (const c of result.changes) {
|
|
201
|
+
lines.push(` +${c.impact.toFixed(3)} ${c.description}`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
else {
|
|
205
|
+
lines.push('');
|
|
206
|
+
lines.push('No improvements found in this run.');
|
|
207
|
+
}
|
|
208
|
+
return lines.join('\n');
|
|
209
|
+
}
|
|
210
|
+
//# sourceMappingURL=MetaHarness.js.map
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
import { createWorktree, removeWorktree, hasWorktreeChanges, isGitRepo } from "../../git/index.js";
|
|
3
|
+
import { emitHook } from "../../harness/hooks.js";
|
|
3
4
|
const inputSchema = z.object({
|
|
4
5
|
prompt: z.string(),
|
|
5
6
|
description: z.string().optional(),
|
|
@@ -80,9 +81,11 @@ export const AgentTool = {
|
|
|
80
81
|
maxTurns: 20,
|
|
81
82
|
abortSignal: context.abortSignal,
|
|
82
83
|
};
|
|
84
|
+
const agentId = Date.now().toString(36) + Math.random().toString(36).slice(2, 6);
|
|
85
|
+
emitHook("subagentStart", { agentId, toolName: input.subagent_type ?? 'general' });
|
|
83
86
|
// Background execution: start agent and return immediately
|
|
84
87
|
if (input.run_in_background) {
|
|
85
|
-
const bgId =
|
|
88
|
+
const bgId = agentId;
|
|
86
89
|
const runAgent = async () => {
|
|
87
90
|
let finalText = "";
|
|
88
91
|
const originalCwd = process.cwd();
|
|
@@ -190,7 +193,15 @@ export const AgentTool = {
|
|
|
190
193
|
}
|
|
191
194
|
}
|
|
192
195
|
}
|
|
193
|
-
|
|
196
|
+
emitHook("subagentStop", { agentId });
|
|
197
|
+
// Context folding: collapse long sub-agent output to summary
|
|
198
|
+
let output = finalText || "(sub-agent completed with no text output)";
|
|
199
|
+
if (output.length > 2000) {
|
|
200
|
+
const { ContextManager } = await import("../../query/context-manager.js");
|
|
201
|
+
const cm = new ContextManager();
|
|
202
|
+
output = cm.foldSubagentResult(agentId, output);
|
|
203
|
+
}
|
|
204
|
+
return { output, isError: false };
|
|
194
205
|
},
|
|
195
206
|
prompt() {
|
|
196
207
|
return `Spawn a sub-agent with its own tool-use loop to handle a delegated task autonomously. The sub-agent runs in an isolated git worktree to prevent file conflicts. Parameters:
|
|
@@ -2,17 +2,17 @@ import { z } from "zod";
|
|
|
2
2
|
import type { Tool } from "../../Tool.js";
|
|
3
3
|
declare const inputSchema: z.ZodObject<{
|
|
4
4
|
file_path: z.ZodString;
|
|
5
|
-
action: z.ZodDefault<z.ZodEnum<["diagnostics", "definition", "references"]>>;
|
|
5
|
+
action: z.ZodDefault<z.ZodEnum<["diagnostics", "definition", "references", "hover"]>>;
|
|
6
6
|
line: z.ZodOptional<z.ZodNumber>;
|
|
7
7
|
character: z.ZodOptional<z.ZodNumber>;
|
|
8
8
|
}, "strip", z.ZodTypeAny, {
|
|
9
|
-
action: "diagnostics" | "definition" | "references";
|
|
10
9
|
file_path: string;
|
|
10
|
+
action: "diagnostics" | "definition" | "references" | "hover";
|
|
11
11
|
line?: number | undefined;
|
|
12
12
|
character?: number | undefined;
|
|
13
13
|
}, {
|
|
14
14
|
file_path: string;
|
|
15
|
-
action?: "diagnostics" | "definition" | "references" | undefined;
|
|
15
|
+
action?: "diagnostics" | "definition" | "references" | "hover" | undefined;
|
|
16
16
|
line?: number | undefined;
|
|
17
17
|
character?: number | undefined;
|
|
18
18
|
}>;
|
|
@@ -2,8 +2,8 @@ import { z } from "zod";
|
|
|
2
2
|
import { LspClient } from "../../lsp/client.js";
|
|
3
3
|
const inputSchema = z.object({
|
|
4
4
|
file_path: z.string().describe("Absolute path to the file to check"),
|
|
5
|
-
action: z.enum(["diagnostics", "definition", "references"]).default("diagnostics")
|
|
6
|
-
.describe("Action: diagnostics (errors/warnings), definition (go-to-def), references (find-refs)"),
|
|
5
|
+
action: z.enum(["diagnostics", "definition", "references", "hover"]).default("diagnostics")
|
|
6
|
+
.describe("Action: diagnostics (errors/warnings), definition (go-to-def), references (find-refs), hover (type info)"),
|
|
7
7
|
line: z.number().optional().describe("Line number (0-indexed) for definition/references"),
|
|
8
8
|
character: z.number().optional().describe("Column number (0-indexed) for definition/references"),
|
|
9
9
|
});
|
|
@@ -16,6 +16,12 @@ function getLspCommand(filePath) {
|
|
|
16
16
|
if (filePath.endsWith('.py')) {
|
|
17
17
|
return { command: 'pylsp', args: [] };
|
|
18
18
|
}
|
|
19
|
+
if (filePath.endsWith('.go')) {
|
|
20
|
+
return { command: 'gopls', args: ['serve'] };
|
|
21
|
+
}
|
|
22
|
+
if (filePath.endsWith('.rs')) {
|
|
23
|
+
return { command: 'rust-analyzer', args: [] };
|
|
24
|
+
}
|
|
19
25
|
return null;
|
|
20
26
|
}
|
|
21
27
|
async function getClient(filePath, workingDir) {
|
|
@@ -84,6 +90,28 @@ export const DiagnosticsTool = {
|
|
|
84
90
|
const lines = refs.map(r => `${r.uri.replace('file://', '')}:${r.range.start.line + 1}:${r.range.start.character}`);
|
|
85
91
|
return { output: `${refs.length} reference(s):\n${lines.join('\n')}`, isError: false };
|
|
86
92
|
}
|
|
93
|
+
if (input.action === "hover") {
|
|
94
|
+
if (input.line === undefined || input.character === undefined) {
|
|
95
|
+
return { output: "line and character are required for hover.", isError: true };
|
|
96
|
+
}
|
|
97
|
+
await client.openFile(input.file_path);
|
|
98
|
+
// Hover uses textDocument/hover which returns MarkupContent
|
|
99
|
+
try {
|
|
100
|
+
const result = await client.send('textDocument/hover', {
|
|
101
|
+
textDocument: { uri: `file://${input.file_path.replace(/\\/g, '/')}` },
|
|
102
|
+
position: { line: input.line, character: input.character },
|
|
103
|
+
});
|
|
104
|
+
if (!result || !result.contents)
|
|
105
|
+
return { output: "No hover information.", isError: false };
|
|
106
|
+
const content = typeof result.contents === 'string'
|
|
107
|
+
? result.contents
|
|
108
|
+
: result.contents.value ?? JSON.stringify(result.contents);
|
|
109
|
+
return { output: content, isError: false };
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
return { output: "Hover not supported by this language server.", isError: false };
|
|
113
|
+
}
|
|
114
|
+
}
|
|
87
115
|
return { output: `Unknown action: ${input.action}`, isError: true };
|
|
88
116
|
}
|
|
89
117
|
catch (err) {
|
|
@@ -94,15 +122,16 @@ export const DiagnosticsTool = {
|
|
|
94
122
|
}
|
|
95
123
|
},
|
|
96
124
|
prompt() {
|
|
97
|
-
return `Get code intelligence from the language server. Actions:
|
|
125
|
+
return `Get code intelligence from the language server. Supports TypeScript, JavaScript, Python, Go, and Rust. Actions:
|
|
98
126
|
- diagnostics: Get errors and warnings for a file
|
|
99
|
-
- definition: Go to definition of a symbol at a given position
|
|
100
|
-
- references: Find all references to a symbol at a given position
|
|
127
|
+
- definition: Go to definition of a symbol at a given position
|
|
128
|
+
- references: Find all references to a symbol at a given position
|
|
129
|
+
- hover: Get type information and documentation for a symbol
|
|
101
130
|
Parameters:
|
|
102
131
|
- file_path (string, required): Absolute path to the file
|
|
103
|
-
- action (string): "diagnostics" | "definition" | "references" (default: diagnostics)
|
|
104
|
-
- line (number, optional): 0-indexed line for definition/references
|
|
105
|
-
- character (number, optional): 0-indexed column for definition/references`;
|
|
132
|
+
- action (string): "diagnostics" | "definition" | "references" | "hover" (default: diagnostics)
|
|
133
|
+
- line (number, optional): 0-indexed line for definition/references/hover
|
|
134
|
+
- character (number, optional): 0-indexed column for definition/references/hover`;
|
|
106
135
|
},
|
|
107
136
|
};
|
|
108
137
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import type { Tool } from "../../Tool.js";
|
|
3
|
+
declare const inputSchema: z.ZodObject<{
|
|
4
|
+
command: z.ZodString;
|
|
5
|
+
pattern: z.ZodOptional<z.ZodString>;
|
|
6
|
+
timeout: z.ZodOptional<z.ZodNumber>;
|
|
7
|
+
maxLines: z.ZodOptional<z.ZodNumber>;
|
|
8
|
+
}, "strip", z.ZodTypeAny, {
|
|
9
|
+
command: string;
|
|
10
|
+
pattern?: string | undefined;
|
|
11
|
+
timeout?: number | undefined;
|
|
12
|
+
maxLines?: number | undefined;
|
|
13
|
+
}, {
|
|
14
|
+
command: string;
|
|
15
|
+
pattern?: string | undefined;
|
|
16
|
+
timeout?: number | undefined;
|
|
17
|
+
maxLines?: number | undefined;
|
|
18
|
+
}>;
|
|
19
|
+
export declare const MonitorTool: Tool<typeof inputSchema>;
|
|
20
|
+
export {};
|
|
21
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { spawn } from "node:child_process";
|
|
3
|
+
const inputSchema = z.object({
|
|
4
|
+
command: z.string().describe("Background command to watch"),
|
|
5
|
+
pattern: z.string().optional().describe("Regex pattern to match output lines"),
|
|
6
|
+
timeout: z.number().optional().describe("Max watch time in ms (default 60000)"),
|
|
7
|
+
maxLines: z.number().optional().describe("Max output lines to collect (default 100)"),
|
|
8
|
+
});
|
|
9
|
+
export const MonitorTool = {
|
|
10
|
+
name: "Monitor",
|
|
11
|
+
description: "Watch a background process and collect output. Optionally filter by regex pattern.",
|
|
12
|
+
inputSchema,
|
|
13
|
+
riskLevel: "medium",
|
|
14
|
+
isReadOnly() { return true; },
|
|
15
|
+
isConcurrencySafe() { return true; },
|
|
16
|
+
async call(input, context) {
|
|
17
|
+
const timeout = input.timeout ?? 60_000;
|
|
18
|
+
const maxLines = input.maxLines ?? 100;
|
|
19
|
+
const pattern = input.pattern ? new RegExp(input.pattern) : null;
|
|
20
|
+
return new Promise((resolve) => {
|
|
21
|
+
const lines = [];
|
|
22
|
+
let settled = false;
|
|
23
|
+
const proc = spawn(input.command, {
|
|
24
|
+
shell: true,
|
|
25
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
26
|
+
windowsHide: true,
|
|
27
|
+
});
|
|
28
|
+
const timer = setTimeout(() => {
|
|
29
|
+
if (!settled) {
|
|
30
|
+
settled = true;
|
|
31
|
+
proc.kill();
|
|
32
|
+
resolve({
|
|
33
|
+
output: lines.length > 0
|
|
34
|
+
? lines.join('\n') + `\n\n[Monitor timed out after ${timeout / 1000}s — ${lines.length} lines collected]`
|
|
35
|
+
: `[Monitor timed out after ${timeout / 1000}s — no output]`,
|
|
36
|
+
isError: false,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}, timeout);
|
|
40
|
+
const handleLine = (line) => {
|
|
41
|
+
if (settled)
|
|
42
|
+
return;
|
|
43
|
+
if (pattern && !pattern.test(line))
|
|
44
|
+
return;
|
|
45
|
+
lines.push(line.trimEnd());
|
|
46
|
+
// Stream output chunk if callback available
|
|
47
|
+
if (context.onOutputChunk && context.callId) {
|
|
48
|
+
context.onOutputChunk(context.callId, line + '\n');
|
|
49
|
+
}
|
|
50
|
+
if (lines.length >= maxLines) {
|
|
51
|
+
settled = true;
|
|
52
|
+
clearTimeout(timer);
|
|
53
|
+
proc.kill();
|
|
54
|
+
resolve({
|
|
55
|
+
output: lines.join('\n') + `\n\n[Collected ${maxLines} lines — stopped]`,
|
|
56
|
+
isError: false,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
let stdoutBuffer = '';
|
|
61
|
+
proc.stdout?.on('data', (chunk) => {
|
|
62
|
+
stdoutBuffer += chunk.toString();
|
|
63
|
+
const parts = stdoutBuffer.split('\n');
|
|
64
|
+
stdoutBuffer = parts.pop() ?? '';
|
|
65
|
+
for (const line of parts)
|
|
66
|
+
handleLine(line);
|
|
67
|
+
});
|
|
68
|
+
let stderrBuffer = '';
|
|
69
|
+
proc.stderr?.on('data', (chunk) => {
|
|
70
|
+
stderrBuffer += chunk.toString();
|
|
71
|
+
const parts = stderrBuffer.split('\n');
|
|
72
|
+
stderrBuffer = parts.pop() ?? '';
|
|
73
|
+
for (const line of parts)
|
|
74
|
+
handleLine(line);
|
|
75
|
+
});
|
|
76
|
+
proc.on('exit', (code) => {
|
|
77
|
+
if (!settled) {
|
|
78
|
+
settled = true;
|
|
79
|
+
clearTimeout(timer);
|
|
80
|
+
// Flush remaining buffers
|
|
81
|
+
if (stdoutBuffer)
|
|
82
|
+
handleLine(stdoutBuffer);
|
|
83
|
+
if (stderrBuffer)
|
|
84
|
+
handleLine(stderrBuffer);
|
|
85
|
+
resolve({
|
|
86
|
+
output: lines.length > 0
|
|
87
|
+
? lines.join('\n') + `\n\n[Process exited with code ${code ?? 'unknown'} — ${lines.length} lines]`
|
|
88
|
+
: `[Process exited with code ${code ?? 'unknown'} — no output]`,
|
|
89
|
+
isError: (code ?? 0) !== 0,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
proc.on('error', (err) => {
|
|
94
|
+
if (!settled) {
|
|
95
|
+
settled = true;
|
|
96
|
+
clearTimeout(timer);
|
|
97
|
+
resolve({
|
|
98
|
+
output: `Monitor error: ${err.message}`,
|
|
99
|
+
isError: true,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
},
|
|
105
|
+
prompt() {
|
|
106
|
+
return `Watch a background process and collect its output. Optionally filter lines by regex pattern.
|
|
107
|
+
Parameters:
|
|
108
|
+
- command (string, required): The command to run and watch
|
|
109
|
+
- pattern (string, optional): Regex to filter output lines
|
|
110
|
+
- timeout (number, optional): Max time in ms (default 60000)
|
|
111
|
+
- maxLines (number, optional): Max lines to collect (default 100)`;
|
|
112
|
+
},
|
|
113
|
+
};
|
|
114
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import type { Tool } from "../../Tool.js";
|
|
3
|
+
declare const inputSchema: z.ZodObject<{
|
|
4
|
+
command: z.ZodString;
|
|
5
|
+
timeout: z.ZodOptional<z.ZodNumber>;
|
|
6
|
+
}, "strip", z.ZodTypeAny, {
|
|
7
|
+
command: string;
|
|
8
|
+
timeout?: number | undefined;
|
|
9
|
+
}, {
|
|
10
|
+
command: string;
|
|
11
|
+
timeout?: number | undefined;
|
|
12
|
+
}>;
|
|
13
|
+
export declare const PowerShellTool: Tool<typeof inputSchema>;
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { execSync } from "node:child_process";
|
|
3
|
+
const inputSchema = z.object({
|
|
4
|
+
command: z.string().describe("PowerShell command to execute"),
|
|
5
|
+
timeout: z.number().optional().describe("Timeout in ms (default 120000)"),
|
|
6
|
+
});
|
|
7
|
+
export const PowerShellTool = {
|
|
8
|
+
name: "PowerShell",
|
|
9
|
+
description: "Execute PowerShell commands (Windows only). Use for Windows-specific tasks like registry access, COM objects, or .NET calls.",
|
|
10
|
+
inputSchema,
|
|
11
|
+
riskLevel: "high",
|
|
12
|
+
isReadOnly() { return false; },
|
|
13
|
+
isConcurrencySafe() { return false; },
|
|
14
|
+
async call(input) {
|
|
15
|
+
if (process.platform !== 'win32') {
|
|
16
|
+
return { output: "PowerShell is only available on Windows. Use Bash instead.", isError: true };
|
|
17
|
+
}
|
|
18
|
+
const timeout = input.timeout ?? 120_000;
|
|
19
|
+
try {
|
|
20
|
+
const output = execSync(`powershell.exe -NoProfile -NonInteractive -Command "${input.command.replace(/"/g, '\\"')}"`, { encoding: 'utf-8', timeout, maxBuffer: 10 * 1024 * 1024, windowsHide: true });
|
|
21
|
+
return { output: output.trim(), isError: false };
|
|
22
|
+
}
|
|
23
|
+
catch (err) {
|
|
24
|
+
const output = String(err.stdout ?? err.stderr ?? err.message ?? 'PowerShell error');
|
|
25
|
+
return { output: output.slice(0, 100_000), isError: true };
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
prompt() {
|
|
29
|
+
return "Execute PowerShell commands on Windows. Use for registry, COM, .NET, and Windows-specific operations.";
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
//# sourceMappingURL=index.js.map
|
package/dist/tools.js
CHANGED
|
@@ -42,6 +42,8 @@ import { KillProcessTool } from "./tools/KillProcessTool/index.js";
|
|
|
42
42
|
import { RemoteTriggerTool } from "./tools/RemoteTriggerTool/index.js";
|
|
43
43
|
import { MultiEditTool } from "./tools/MultiEditTool/index.js";
|
|
44
44
|
import { PipelineTool } from "./tools/PipelineTool/index.js";
|
|
45
|
+
import { PowerShellTool } from "./tools/PowerShellTool/index.js";
|
|
46
|
+
import { MonitorTool } from "./tools/MonitorTool/index.js";
|
|
45
47
|
/**
|
|
46
48
|
* Returns all registered tools.
|
|
47
49
|
*
|
|
@@ -96,6 +98,8 @@ export function getAllTools() {
|
|
|
96
98
|
KillProcessTool,
|
|
97
99
|
RemoteTriggerTool,
|
|
98
100
|
MultiEditTool,
|
|
101
|
+
PowerShellTool,
|
|
102
|
+
MonitorTool,
|
|
99
103
|
];
|
|
100
104
|
return [
|
|
101
105
|
...core,
|
|
@@ -7,6 +7,13 @@ const EDIT_SAFE_TOOLS = new Set([
|
|
|
7
7
|
"FileRead", "FileWrite", "FileEdit", "Glob", "Grep", "LS",
|
|
8
8
|
"ImageRead", "NotebookEdit",
|
|
9
9
|
]);
|
|
10
|
+
/** Parse a tool specifier like "Bash(npm run *)" into tool name + pattern */
|
|
11
|
+
function parseToolSpecifier(specifier) {
|
|
12
|
+
const match = specifier.match(/^(\w+)\((.+)\)$/);
|
|
13
|
+
if (match)
|
|
14
|
+
return { toolName: match[1], argPattern: match[2] };
|
|
15
|
+
return { toolName: specifier };
|
|
16
|
+
}
|
|
10
17
|
/** Match a tool name against a pattern (supports trailing * for prefix matching) */
|
|
11
18
|
function matchToolPattern(pattern, toolName) {
|
|
12
19
|
if (pattern.endsWith("*")) {
|
|
@@ -14,14 +21,47 @@ function matchToolPattern(pattern, toolName) {
|
|
|
14
21
|
}
|
|
15
22
|
return pattern === toolName;
|
|
16
23
|
}
|
|
24
|
+
/**
|
|
25
|
+
* Match an argument pattern against a value using glob-style matching.
|
|
26
|
+
* Supports: * (any chars), ** (any path segments)
|
|
27
|
+
*/
|
|
28
|
+
function matchArgGlob(pattern, value) {
|
|
29
|
+
// Convert glob to regex: * → [^/]*, ** → .*, escape other regex chars
|
|
30
|
+
const regexStr = pattern
|
|
31
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // escape regex chars (except * and ?)
|
|
32
|
+
.replace(/\*\*/g, '{{DOUBLESTAR}}')
|
|
33
|
+
.replace(/\*/g, '[^/]*')
|
|
34
|
+
.replace(/\{\{DOUBLESTAR\}\}/g, '.*');
|
|
35
|
+
try {
|
|
36
|
+
return new RegExp(`^${regexStr}$`).test(value);
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
17
42
|
/** Find the first matching tool permission rule */
|
|
18
43
|
function findToolRule(rules, toolName, toolInput) {
|
|
19
44
|
if (!rules || rules.length === 0)
|
|
20
45
|
return undefined;
|
|
21
46
|
return rules.find(r => {
|
|
22
|
-
|
|
47
|
+
const { toolName: specToolName, argPattern } = parseToolSpecifier(r.tool);
|
|
48
|
+
// Check tool name match (with prefix * support)
|
|
49
|
+
if (!matchToolPattern(specToolName, toolName))
|
|
23
50
|
return false;
|
|
24
|
-
// If rule has
|
|
51
|
+
// If rule has an inline argument pattern (e.g., "Bash(npm run *)")
|
|
52
|
+
if (argPattern && toolInput) {
|
|
53
|
+
const input = toolInput;
|
|
54
|
+
// For Bash: match against command string
|
|
55
|
+
if (toolName === 'Bash' && typeof input.command === 'string') {
|
|
56
|
+
return matchArgGlob(argPattern, input.command);
|
|
57
|
+
}
|
|
58
|
+
// For file tools: match against file_path
|
|
59
|
+
if (['Edit', 'Write', 'Read'].includes(toolName) && typeof input.file_path === 'string') {
|
|
60
|
+
return matchArgGlob(argPattern, input.file_path);
|
|
61
|
+
}
|
|
62
|
+
return false; // Has pattern but no matching field
|
|
63
|
+
}
|
|
64
|
+
// Legacy: separate pattern field (regex) for Bash commands
|
|
25
65
|
if (r.pattern && toolInput && toolName === "Bash") {
|
|
26
66
|
const command = toolInput?.command;
|
|
27
67
|
if (typeof command === "string") {
|