@zhijiewang/openharness 2.1.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/DeferredTool.js +3 -1
- package/dist/Tool.d.ts +1 -1
- package/dist/agents/roles.js +58 -62
- package/dist/commands/cybergotchi.d.ts +1 -1
- package/dist/commands/cybergotchi.js +30 -30
- package/dist/commands/index.js +288 -132
- package/dist/components/App.d.ts +1 -1
- package/dist/components/App.js +6 -6
- package/dist/components/CompanionFooter.d.ts +1 -1
- package/dist/components/CompanionFooter.js +6 -8
- package/dist/components/CybergotchiBubble.js +5 -5
- package/dist/components/CybergotchiPanel.d.ts +1 -1
- package/dist/components/CybergotchiPanel.js +7 -7
- package/dist/components/CybergotchiPanelConnected.js +2 -2
- package/dist/components/CybergotchiSetup.js +26 -24
- package/dist/components/CybergotchiSprite.d.ts +1 -1
- package/dist/components/CybergotchiSprite.js +8 -12
- package/dist/components/DiffView.d.ts +1 -1
- package/dist/components/DiffView.js +10 -10
- package/dist/components/ErrorBoundary.d.ts +1 -1
- package/dist/components/ErrorBoundary.js +1 -1
- package/dist/components/InitWizard.js +65 -33
- package/dist/components/Markdown.js +2 -4
- package/dist/components/Messages.js +4 -4
- package/dist/components/PermissionPrompt.d.ts +1 -1
- package/dist/components/PermissionPrompt.js +15 -17
- package/dist/components/REPL.d.ts +1 -1
- package/dist/components/REPL.js +74 -49
- package/dist/components/Spinner.js +2 -2
- package/dist/components/TextInput.js +35 -29
- package/dist/components/ToolCallDisplay.js +3 -5
- package/dist/cybergotchi/bones.d.ts +1 -1
- package/dist/cybergotchi/bones.js +8 -8
- package/dist/cybergotchi/config.d.ts +2 -2
- package/dist/cybergotchi/config.js +13 -13
- package/dist/cybergotchi/events.d.ts +5 -5
- package/dist/cybergotchi/events.js +7 -7
- package/dist/cybergotchi/needs.d.ts +2 -2
- package/dist/cybergotchi/needs.js +7 -9
- package/dist/cybergotchi/personality.d.ts +2 -2
- package/dist/cybergotchi/personality.js +2 -2
- package/dist/cybergotchi/species.d.ts +1 -1
- package/dist/cybergotchi/species.js +145 -217
- package/dist/cybergotchi/speech.d.ts +2 -2
- package/dist/cybergotchi/speech.js +43 -43
- package/dist/cybergotchi/types.d.ts +4 -4
- package/dist/cybergotchi/types.js +26 -26
- package/dist/cybergotchi/useCybergotchi.d.ts +1 -1
- package/dist/cybergotchi/useCybergotchi.js +29 -25
- package/dist/git/index.js +11 -9
- package/dist/harness/checkpoints.js +29 -21
- package/dist/harness/config.d.ts +3 -3
- package/dist/harness/config.js +15 -9
- package/dist/harness/context-warning.d.ts +1 -1
- package/dist/harness/context-warning.js +1 -1
- package/dist/harness/cost.js +1 -1
- package/dist/harness/credentials.js +13 -13
- package/dist/harness/hooks.js +7 -5
- package/dist/harness/keybindings.js +20 -18
- package/dist/harness/marketplace.d.ts +3 -3
- package/dist/harness/marketplace.js +55 -42
- package/dist/harness/memory.d.ts +23 -5
- package/dist/harness/memory.js +142 -41
- package/dist/harness/onboarding.js +30 -10
- package/dist/harness/plugins.d.ts +9 -1
- package/dist/harness/plugins.js +54 -30
- package/dist/harness/rules.js +12 -7
- package/dist/harness/sandbox.js +15 -15
- package/dist/harness/session-db.d.ts +55 -0
- package/dist/harness/session-db.js +165 -0
- package/dist/harness/session.d.ts +1 -1
- package/dist/harness/session.js +34 -15
- package/dist/harness/store.d.ts +3 -3
- package/dist/harness/store.js +6 -4
- package/dist/harness/submit-handler.d.ts +4 -4
- package/dist/harness/submit-handler.js +25 -23
- package/dist/harness/telemetry.d.ts +1 -1
- package/dist/harness/telemetry.js +23 -19
- package/dist/harness/traces.d.ts +2 -2
- package/dist/harness/traces.js +39 -33
- package/dist/harness/verification.d.ts +1 -1
- package/dist/harness/verification.js +50 -44
- package/dist/lsp/client.js +44 -40
- package/dist/main.js +114 -59
- package/dist/mcp/DeferredMcpTool.d.ts +4 -4
- package/dist/mcp/DeferredMcpTool.js +9 -5
- package/dist/mcp/McpTool.d.ts +4 -4
- package/dist/mcp/McpTool.js +8 -4
- package/dist/mcp/client.d.ts +2 -2
- package/dist/mcp/client.js +21 -21
- package/dist/mcp/loader.d.ts +1 -1
- package/dist/mcp/loader.js +17 -12
- package/dist/mcp/registry.d.ts +3 -3
- package/dist/mcp/registry.js +97 -97
- package/dist/mcp/schema.d.ts +1 -1
- package/dist/mcp/schema.js +16 -16
- package/dist/mcp/server.d.ts +1 -1
- package/dist/mcp/server.js +21 -21
- package/dist/mcp/types.d.ts +3 -3
- package/dist/providers/anthropic.d.ts +2 -2
- package/dist/providers/anthropic.js +10 -9
- package/dist/providers/base.d.ts +1 -1
- package/dist/providers/index.js +10 -3
- package/dist/providers/llamacpp.d.ts +2 -2
- package/dist/providers/llamacpp.js +1 -3
- package/dist/providers/ollama.d.ts +2 -2
- package/dist/providers/ollama.js +3 -4
- package/dist/providers/openai.d.ts +2 -2
- package/dist/providers/openai.js +3 -5
- package/dist/providers/openrouter.d.ts +2 -2
- package/dist/providers/router.d.ts +1 -1
- package/dist/providers/router.js +7 -7
- package/dist/query/compress.d.ts +2 -2
- package/dist/query/compress.js +22 -21
- package/dist/query/context-manager.d.ts +1 -1
- package/dist/query/context-manager.js +5 -5
- package/dist/query/errors.js +1 -1
- package/dist/query/index.d.ts +1 -1
- package/dist/query/index.js +42 -24
- package/dist/query/tools.js +15 -12
- package/dist/query/types.d.ts +3 -1
- package/dist/query.d.ts +1 -1
- package/dist/query.js +1 -1
- package/dist/remote/auth.d.ts +2 -2
- package/dist/remote/auth.js +8 -8
- package/dist/remote/server.d.ts +3 -3
- package/dist/remote/server.js +60 -60
- package/dist/renderer/cells.js +9 -9
- package/dist/renderer/colors.js +24 -6
- package/dist/renderer/diff.d.ts +2 -2
- package/dist/renderer/diff.js +27 -19
- package/dist/renderer/differ.d.ts +1 -1
- package/dist/renderer/differ.js +9 -9
- package/dist/renderer/image.js +19 -19
- package/dist/renderer/index.d.ts +6 -6
- package/dist/renderer/index.js +163 -93
- package/dist/renderer/input.js +66 -48
- package/dist/renderer/layout.d.ts +6 -6
- package/dist/renderer/layout.js +163 -124
- package/dist/renderer/markdown.d.ts +2 -2
- package/dist/renderer/markdown.js +173 -54
- package/dist/renderer/session-browser.d.ts +2 -2
- package/dist/renderer/session-browser.js +19 -21
- package/dist/repl.d.ts +5 -5
- package/dist/repl.js +311 -198
- package/dist/sdk/index.d.ts +5 -5
- package/dist/sdk/index.js +32 -26
- package/dist/services/AgentDispatcher.d.ts +3 -3
- package/dist/services/AgentDispatcher.js +33 -29
- package/dist/services/CronExecutor.d.ts +4 -4
- package/dist/services/CronExecutor.js +12 -8
- package/dist/services/EvaluatorLoop.d.ts +3 -3
- package/dist/services/EvaluatorLoop.js +29 -21
- package/dist/services/MetaHarness.d.ts +1 -1
- package/dist/services/MetaHarness.js +34 -32
- package/dist/services/PipelineExecutor.d.ts +1 -1
- package/dist/services/PipelineExecutor.js +23 -25
- package/dist/services/SkillExtractor.d.ts +43 -0
- package/dist/services/SkillExtractor.js +163 -0
- package/dist/services/StreamingToolExecutor.d.ts +2 -2
- package/dist/services/StreamingToolExecutor.js +11 -7
- package/dist/services/a2a.d.ts +8 -8
- package/dist/services/a2a.js +44 -34
- package/dist/services/agent-messaging.d.ts +33 -15
- package/dist/services/agent-messaging.js +65 -13
- package/dist/services/cron.js +16 -16
- package/dist/tools/AgentTool/index.d.ts +5 -2
- package/dist/tools/AgentTool/index.js +25 -39
- package/dist/tools/AskUserTool/index.js +1 -1
- package/dist/tools/BashTool/index.d.ts +2 -2
- package/dist/tools/BashTool/index.js +18 -10
- package/dist/tools/CronTool/index.js +30 -12
- package/dist/tools/DiagnosticsTool/index.js +28 -22
- package/dist/tools/EnterPlanModeTool/index.js +93 -14
- package/dist/tools/EnterWorktreeTool/index.js +7 -3
- package/dist/tools/ExitPlanModeTool/index.d.ts +22 -1
- package/dist/tools/ExitPlanModeTool/index.js +20 -5
- package/dist/tools/ExitWorktreeTool/index.js +11 -4
- package/dist/tools/FileEditTool/index.js +3 -5
- package/dist/tools/FileReadTool/index.js +16 -10
- package/dist/tools/FileWriteTool/index.js +2 -2
- package/dist/tools/GlobTool/index.js +5 -9
- package/dist/tools/GrepTool/index.d.ts +2 -2
- package/dist/tools/GrepTool/index.js +14 -9
- package/dist/tools/ImageReadTool/index.js +2 -2
- package/dist/tools/KillProcessTool/index.js +11 -7
- package/dist/tools/LSTool/index.js +3 -3
- package/dist/tools/MemoryTool/index.d.ts +5 -5
- package/dist/tools/MemoryTool/index.js +28 -14
- package/dist/tools/MonitorTool/index.js +24 -19
- package/dist/tools/MultiEditTool/index.js +9 -5
- package/dist/tools/NotebookEditTool/index.js +3 -3
- package/dist/tools/ParallelAgentTool/index.d.ts +4 -4
- package/dist/tools/ParallelAgentTool/index.js +12 -6
- package/dist/tools/PipelineTool/index.js +3 -3
- package/dist/tools/PowerShellTool/index.js +10 -6
- package/dist/tools/RemoteTriggerTool/index.js +8 -4
- package/dist/tools/ScheduleWakeupTool/index.d.ts +42 -0
- package/dist/tools/ScheduleWakeupTool/index.js +115 -0
- package/dist/tools/SendMessageTool/index.js +25 -7
- package/dist/tools/SessionSearchTool/index.d.ts +15 -0
- package/dist/tools/SessionSearchTool/index.js +36 -0
- package/dist/tools/SkillTool/index.d.ts +3 -0
- package/dist/tools/SkillTool/index.js +39 -9
- package/dist/tools/TaskCreateTool/index.d.ts +2 -2
- package/dist/tools/TaskCreateTool/index.js +2 -2
- package/dist/tools/TaskGetTool/index.js +2 -2
- package/dist/tools/TaskListTool/index.js +3 -5
- package/dist/tools/TaskOutputTool/index.js +2 -2
- package/dist/tools/TaskStopTool/index.js +3 -3
- package/dist/tools/TaskUpdateTool/index.d.ts +4 -4
- package/dist/tools/TaskUpdateTool/index.js +2 -2
- package/dist/tools/ToolSearchTool/index.js +9 -6
- package/dist/tools/WebFetchTool/index.js +1 -1
- package/dist/tools/WebSearchTool/index.js +2 -6
- package/dist/tools.js +31 -30
- package/dist/types/permissions.js +15 -9
- package/dist/utils/bash-safety.d.ts +1 -1
- package/dist/utils/bash-safety.js +64 -54
- package/dist/utils/diff-algorithm.d.ts +3 -3
- package/dist/utils/diff-algorithm.js +7 -7
- package/dist/utils/fs.js +3 -3
- package/dist/utils/safe-env.js +1 -1
- package/dist/utils/theme-data.d.ts +1 -1
- package/dist/utils/theme-data.js +1 -1
- package/dist/utils/theme.d.ts +1 -1
- package/dist/utils/theme.js +1 -1
- package/dist/utils/tool-summary.d.ts +1 -1
- package/dist/utils/tool-summary.js +27 -9
- package/package.json +10 -3
|
@@ -18,10 +18,10 @@
|
|
|
18
18
|
* - Compression strategy
|
|
19
19
|
* - Permission rules
|
|
20
20
|
*/
|
|
21
|
-
import {
|
|
22
|
-
import { copyFileSync, existsSync } from
|
|
23
|
-
import { join } from
|
|
24
|
-
import {
|
|
21
|
+
import { execSync } from "node:child_process";
|
|
22
|
+
import { copyFileSync, existsSync } from "node:fs";
|
|
23
|
+
import { join } from "node:path";
|
|
24
|
+
import { invalidateConfigCache, readOhConfig, writeOhConfig } from "../harness/config.js";
|
|
25
25
|
// ── Benchmark Runner ──
|
|
26
26
|
/**
|
|
27
27
|
* Run a benchmark command and extract a score.
|
|
@@ -31,9 +31,9 @@ export async function runBenchmark(command) {
|
|
|
31
31
|
const start = Date.now();
|
|
32
32
|
try {
|
|
33
33
|
const output = execSync(command, {
|
|
34
|
-
encoding:
|
|
34
|
+
encoding: "utf-8",
|
|
35
35
|
timeout: 300_000, // 5 minute max
|
|
36
|
-
stdio: [
|
|
36
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
37
37
|
});
|
|
38
38
|
// Parse test results to extract score
|
|
39
39
|
const score = extractScore(output);
|
|
@@ -44,7 +44,7 @@ export async function runBenchmark(command) {
|
|
|
44
44
|
};
|
|
45
45
|
}
|
|
46
46
|
catch (err) {
|
|
47
|
-
const output = String(err.stdout ?? err.stderr ?? err.message ??
|
|
47
|
+
const output = String(err.stdout ?? err.stderr ?? err.message ?? "");
|
|
48
48
|
const score = extractScore(output);
|
|
49
49
|
return {
|
|
50
50
|
score: score > 0 ? score * 0.5 : 0, // Penalty for non-zero exit
|
|
@@ -60,8 +60,8 @@ function extractScore(output) {
|
|
|
60
60
|
const passMatch = output.match(/(\d+)\s+pass/i);
|
|
61
61
|
const failMatch = output.match(/(\d+)\s+fail/i);
|
|
62
62
|
if (passMatch) {
|
|
63
|
-
const passed = parseInt(passMatch[1]);
|
|
64
|
-
const failed = failMatch ? parseInt(failMatch[1]) : 0;
|
|
63
|
+
const passed = parseInt(passMatch[1], 10);
|
|
64
|
+
const failed = failMatch ? parseInt(failMatch[1], 10) : 0;
|
|
65
65
|
const total = passed + failed;
|
|
66
66
|
return total > 0 ? passed / total : 0;
|
|
67
67
|
}
|
|
@@ -69,13 +69,13 @@ function extractScore(output) {
|
|
|
69
69
|
const tapPass = output.match(/# pass\s+(\d+)/);
|
|
70
70
|
const tapFail = output.match(/# fail\s+(\d+)/);
|
|
71
71
|
if (tapPass) {
|
|
72
|
-
const passed = parseInt(tapPass[1]);
|
|
73
|
-
const failed = tapFail ? parseInt(tapFail[1]) : 0;
|
|
72
|
+
const passed = parseInt(tapPass[1], 10);
|
|
73
|
+
const failed = tapFail ? parseInt(tapFail[1], 10) : 0;
|
|
74
74
|
const total = passed + failed;
|
|
75
75
|
return total > 0 ? passed / total : 0;
|
|
76
76
|
}
|
|
77
77
|
// Exit code 0 = 1.0, non-zero = 0
|
|
78
|
-
return output.includes(
|
|
78
|
+
return output.includes("error") || output.includes("FAIL") ? 0.3 : 0.8;
|
|
79
79
|
}
|
|
80
80
|
// ── Meta-Harness ──
|
|
81
81
|
export class MetaHarness {
|
|
@@ -94,8 +94,8 @@ export class MetaHarness {
|
|
|
94
94
|
const totalStart = Date.now();
|
|
95
95
|
const changes = [];
|
|
96
96
|
// Backup current config
|
|
97
|
-
const configPath = join(
|
|
98
|
-
const backupPath = join(
|
|
97
|
+
const configPath = join(".oh", "config.yaml");
|
|
98
|
+
const backupPath = join(".oh", "config.yaml.backup");
|
|
99
99
|
if (existsSync(configPath)) {
|
|
100
100
|
copyFileSync(configPath, backupPath);
|
|
101
101
|
}
|
|
@@ -134,8 +134,8 @@ export class MetaHarness {
|
|
|
134
134
|
const config = readOhConfig();
|
|
135
135
|
const configStr = JSON.stringify(config, null, 2);
|
|
136
136
|
const prevChangesStr = previousChanges.length > 0
|
|
137
|
-
? `\nPrevious successful changes:\n${previousChanges.map(c => `- ${c.description} (+${c.impact.toFixed(3)})`).join(
|
|
138
|
-
:
|
|
137
|
+
? `\nPrevious successful changes:\n${previousChanges.map((c) => `- ${c.description} (+${c.impact.toFixed(3)})`).join("\n")}`
|
|
138
|
+
: "";
|
|
139
139
|
const prompt = `You are optimizing an AI agent harness configuration. Current score: ${currentScore.toFixed(3)}/1.0.
|
|
140
140
|
${prevChangesStr}
|
|
141
141
|
|
|
@@ -150,14 +150,14 @@ Suggest ONE specific configuration change that might improve the benchmark score
|
|
|
150
150
|
|
|
151
151
|
Respond with JSON: {"description": "what to change", "field": "config.path", "newValue": "the new value"}`;
|
|
152
152
|
try {
|
|
153
|
-
const response = await this.provider.complete([{ role:
|
|
153
|
+
const response = await this.provider.complete([{ role: "user", content: prompt, uuid: `meta-${Date.now()}`, timestamp: Date.now() }], "You are a harness optimization engine. Respond ONLY with valid JSON.", undefined, this.model);
|
|
154
154
|
const jsonMatch = response.content.match(/\{[\s\S]*\}/);
|
|
155
155
|
if (!jsonMatch)
|
|
156
156
|
return null;
|
|
157
157
|
const parsed = JSON.parse(jsonMatch[0]);
|
|
158
158
|
return {
|
|
159
|
-
description: parsed.description ??
|
|
160
|
-
field: parsed.field ??
|
|
159
|
+
description: parsed.description ?? "unknown change",
|
|
160
|
+
field: parsed.field ?? "unknown",
|
|
161
161
|
oldValue: undefined,
|
|
162
162
|
newValue: parsed.newValue,
|
|
163
163
|
};
|
|
@@ -172,17 +172,19 @@ Respond with JSON: {"description": "what to change", "field": "config.path", "ne
|
|
|
172
172
|
const config = readOhConfig() ?? {};
|
|
173
173
|
try {
|
|
174
174
|
// Simple top-level field update (nested paths would need lodash.set)
|
|
175
|
-
const field = change.field.replace(/^config\./,
|
|
175
|
+
const field = change.field.replace(/^config\./, "");
|
|
176
176
|
config[field] = change.newValue;
|
|
177
177
|
writeOhConfig(config);
|
|
178
178
|
}
|
|
179
|
-
catch {
|
|
179
|
+
catch {
|
|
180
|
+
/* revert will handle failures */
|
|
181
|
+
}
|
|
180
182
|
}
|
|
181
|
-
revertChange(
|
|
183
|
+
revertChange(_change) {
|
|
182
184
|
invalidateConfigCache();
|
|
183
185
|
// Revert by re-reading the backup config
|
|
184
|
-
const backupPath = join(
|
|
185
|
-
const configPath = join(
|
|
186
|
+
const backupPath = join(".oh", "config.yaml.backup");
|
|
187
|
+
const configPath = join(".oh", "config.yaml");
|
|
186
188
|
if (existsSync(backupPath)) {
|
|
187
189
|
copyFileSync(backupPath, configPath);
|
|
188
190
|
invalidateConfigCache();
|
|
@@ -193,24 +195,24 @@ Respond with JSON: {"description": "what to change", "field": "config.path", "ne
|
|
|
193
195
|
export function formatOptimizationResult(result) {
|
|
194
196
|
const lines = [];
|
|
195
197
|
const improvement = result.finalScore - result.initialScore;
|
|
196
|
-
const pct = result.initialScore > 0 ? (improvement / result.initialScore * 100).toFixed(1) :
|
|
198
|
+
const pct = result.initialScore > 0 ? ((improvement / result.initialScore) * 100).toFixed(1) : "0";
|
|
197
199
|
lines.push(`Meta-Harness Optimization Complete`);
|
|
198
|
-
lines.push(`${
|
|
200
|
+
lines.push(`${"─".repeat(40)}`);
|
|
199
201
|
lines.push(`Initial score: ${result.initialScore.toFixed(3)}`);
|
|
200
|
-
lines.push(`Final score: ${result.finalScore.toFixed(3)} (${improvement >= 0 ?
|
|
202
|
+
lines.push(`Final score: ${result.finalScore.toFixed(3)} (${improvement >= 0 ? "+" : ""}${pct}%)`);
|
|
201
203
|
lines.push(`Iterations: ${result.iterations}`);
|
|
202
204
|
lines.push(`Duration: ${Math.round(result.totalDurationMs / 1000)}s`);
|
|
203
205
|
if (result.changes.length > 0) {
|
|
204
|
-
lines.push(
|
|
205
|
-
lines.push(
|
|
206
|
+
lines.push("");
|
|
207
|
+
lines.push("Applied changes:");
|
|
206
208
|
for (const c of result.changes) {
|
|
207
209
|
lines.push(` +${c.impact.toFixed(3)} ${c.description}`);
|
|
208
210
|
}
|
|
209
211
|
}
|
|
210
212
|
else {
|
|
211
|
-
lines.push(
|
|
212
|
-
lines.push(
|
|
213
|
+
lines.push("");
|
|
214
|
+
lines.push("No improvements found in this run.");
|
|
213
215
|
}
|
|
214
|
-
return lines.join(
|
|
216
|
+
return lines.join("\n");
|
|
215
217
|
}
|
|
216
218
|
//# sourceMappingURL=MetaHarness.js.map
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
*
|
|
10
10
|
* Reuses the dependency resolution pattern from AgentDispatcher.
|
|
11
11
|
*/
|
|
12
|
-
import { findToolByName } from
|
|
12
|
+
import { findToolByName } from "../Tool.js";
|
|
13
13
|
// ── Executor ──
|
|
14
14
|
export class PipelineExecutor {
|
|
15
15
|
tools;
|
|
@@ -24,30 +24,30 @@ export class PipelineExecutor {
|
|
|
24
24
|
*/
|
|
25
25
|
async execute(steps) {
|
|
26
26
|
// Validate step IDs are unique
|
|
27
|
-
const ids = new Set(steps.map(s => s.id));
|
|
27
|
+
const ids = new Set(steps.map((s) => s.id));
|
|
28
28
|
if (ids.size !== steps.length) {
|
|
29
|
-
return [{ stepId:
|
|
29
|
+
return [{ stepId: "pipeline", output: "Error: duplicate step IDs", isError: true, durationMs: 0 }];
|
|
30
30
|
}
|
|
31
31
|
const internal = new Map();
|
|
32
32
|
for (const step of steps) {
|
|
33
|
-
internal.set(step.id, { ...step, status:
|
|
33
|
+
internal.set(step.id, { ...step, status: "pending" });
|
|
34
34
|
}
|
|
35
35
|
const results = [];
|
|
36
36
|
// Process steps in dependency order
|
|
37
37
|
while (true) {
|
|
38
|
-
const ready = [...internal.values()].filter(s => s.status ===
|
|
39
|
-
const running = [...internal.values()].filter(s => s.status ===
|
|
38
|
+
const ready = [...internal.values()].filter((s) => s.status === "pending" && this.isReady(s, internal));
|
|
39
|
+
const running = [...internal.values()].filter((s) => s.status === "running");
|
|
40
40
|
if (ready.length === 0 && running.length === 0)
|
|
41
41
|
break;
|
|
42
42
|
// Execute ready steps (sequentially for safety — tools may have side effects)
|
|
43
43
|
for (const step of ready) {
|
|
44
|
-
step.status =
|
|
44
|
+
step.status = "running";
|
|
45
45
|
// Check if any blocker failed — skip this step
|
|
46
46
|
if (this.hasFailedBlocker(step, internal)) {
|
|
47
|
-
step.status =
|
|
47
|
+
step.status = "skipped";
|
|
48
48
|
const result = {
|
|
49
49
|
stepId: step.id,
|
|
50
|
-
output:
|
|
50
|
+
output: "Skipped: dependency failed",
|
|
51
51
|
isError: true,
|
|
52
52
|
durationMs: 0,
|
|
53
53
|
};
|
|
@@ -57,7 +57,7 @@ export class PipelineExecutor {
|
|
|
57
57
|
}
|
|
58
58
|
const result = await this.executeStep(step, internal);
|
|
59
59
|
step.result = result;
|
|
60
|
-
step.status = result.isError ?
|
|
60
|
+
step.status = result.isError ? "failed" : "completed";
|
|
61
61
|
results.push(result);
|
|
62
62
|
}
|
|
63
63
|
}
|
|
@@ -66,17 +66,17 @@ export class PipelineExecutor {
|
|
|
66
66
|
isReady(step, all) {
|
|
67
67
|
if (!step.dependsOn || step.dependsOn.length === 0)
|
|
68
68
|
return true;
|
|
69
|
-
return step.dependsOn.every(id => {
|
|
69
|
+
return step.dependsOn.every((id) => {
|
|
70
70
|
const dep = all.get(id);
|
|
71
|
-
return dep && (dep.status ===
|
|
71
|
+
return dep && (dep.status === "completed" || dep.status === "failed" || dep.status === "skipped");
|
|
72
72
|
});
|
|
73
73
|
}
|
|
74
74
|
hasFailedBlocker(step, all) {
|
|
75
75
|
if (!step.dependsOn)
|
|
76
76
|
return false;
|
|
77
|
-
return step.dependsOn.some(id => {
|
|
77
|
+
return step.dependsOn.some((id) => {
|
|
78
78
|
const dep = all.get(id);
|
|
79
|
-
return dep && (dep.status ===
|
|
79
|
+
return dep && (dep.status === "failed" || dep.status === "skipped");
|
|
80
80
|
});
|
|
81
81
|
}
|
|
82
82
|
async executeStep(step, all) {
|
|
@@ -128,7 +128,7 @@ export class PipelineExecutor {
|
|
|
128
128
|
*/
|
|
129
129
|
resolveArgs(args, all) {
|
|
130
130
|
const resolve = (value) => {
|
|
131
|
-
if (typeof value ===
|
|
131
|
+
if (typeof value === "string" && value.startsWith("$")) {
|
|
132
132
|
const refId = value.slice(1);
|
|
133
133
|
const refStep = all.get(refId);
|
|
134
134
|
if (refStep?.result && !refStep.result.isError) {
|
|
@@ -138,7 +138,7 @@ export class PipelineExecutor {
|
|
|
138
138
|
}
|
|
139
139
|
if (Array.isArray(value))
|
|
140
140
|
return value.map(resolve);
|
|
141
|
-
if (value && typeof value ===
|
|
141
|
+
if (value && typeof value === "object") {
|
|
142
142
|
const resolved = {};
|
|
143
143
|
for (const [k, v] of Object.entries(value)) {
|
|
144
144
|
resolved[k] = resolve(v);
|
|
@@ -157,23 +157,21 @@ export function formatPipelineResults(results) {
|
|
|
157
157
|
const lines = [];
|
|
158
158
|
let totalMs = 0;
|
|
159
159
|
for (const r of results) {
|
|
160
|
-
const status = r.isError ?
|
|
161
|
-
const duration = r.durationMs > 0 ? ` (${r.durationMs}ms)` :
|
|
160
|
+
const status = r.isError ? "✗" : "✓";
|
|
161
|
+
const duration = r.durationMs > 0 ? ` (${r.durationMs}ms)` : "";
|
|
162
162
|
lines.push(`${status} Step "${r.stepId}"${duration}`);
|
|
163
163
|
// Show truncated output
|
|
164
|
-
const output = r.output.length > 200
|
|
165
|
-
? r.output.slice(0, 200) + '...'
|
|
166
|
-
: r.output;
|
|
164
|
+
const output = r.output.length > 200 ? `${r.output.slice(0, 200)}...` : r.output;
|
|
167
165
|
if (output) {
|
|
168
|
-
for (const line of output.split(
|
|
166
|
+
for (const line of output.split("\n").slice(0, 5)) {
|
|
169
167
|
lines.push(` ${line}`);
|
|
170
168
|
}
|
|
171
169
|
}
|
|
172
|
-
lines.push(
|
|
170
|
+
lines.push("");
|
|
173
171
|
totalMs += r.durationMs;
|
|
174
172
|
}
|
|
175
|
-
const passed = results.filter(r => !r.isError).length;
|
|
173
|
+
const passed = results.filter((r) => !r.isError).length;
|
|
176
174
|
lines.push(`Pipeline: ${passed}/${results.length} steps passed (${totalMs}ms total)`);
|
|
177
|
-
return lines.join(
|
|
175
|
+
return lines.join("\n");
|
|
178
176
|
}
|
|
179
177
|
//# sourceMappingURL=PipelineExecutor.js.map
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SkillExtractor — auto-creates skill files from session message history.
|
|
3
|
+
*
|
|
4
|
+
* After a session completes, if enough tool usage was observed, we ask the LLM
|
|
5
|
+
* to identify reusable patterns and persist them as skill markdown files under
|
|
6
|
+
* .oh/skills/auto/.
|
|
7
|
+
*/
|
|
8
|
+
import type { Provider } from "../providers/base.js";
|
|
9
|
+
import type { Message } from "../types/message.js";
|
|
10
|
+
export type SkillCandidate = {
|
|
11
|
+
name: string;
|
|
12
|
+
description: string;
|
|
13
|
+
trigger: string;
|
|
14
|
+
procedure: string;
|
|
15
|
+
pitfalls: string;
|
|
16
|
+
verification: string;
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Returns true if the message history has enough tool usage to warrant
|
|
20
|
+
* extracting skills (5 or more total tool calls).
|
|
21
|
+
*/
|
|
22
|
+
export declare function shouldExtract(messages: Message[]): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* Ask the LLM to identify reusable patterns in the conversation and return
|
|
25
|
+
* them as an array of SkillCandidates.
|
|
26
|
+
*/
|
|
27
|
+
export declare function extractSkills(provider: Provider, messages: Message[], model?: string): Promise<SkillCandidate[]>;
|
|
28
|
+
/**
|
|
29
|
+
* Persist a skill candidate to .oh/skills/auto/<slug>.md.
|
|
30
|
+
* If a file already exists, increments the version in the frontmatter.
|
|
31
|
+
*/
|
|
32
|
+
export declare function persistSkill(candidate: SkillCandidate, sessionId: string): string;
|
|
33
|
+
/**
|
|
34
|
+
* Orchestrate the full extraction pipeline:
|
|
35
|
+
* 1. Check if extraction is warranted
|
|
36
|
+
* 2. Ask LLM to extract skill candidates
|
|
37
|
+
* 3. Deduplicate against existing skills
|
|
38
|
+
* 4. Persist each new/updated candidate
|
|
39
|
+
*
|
|
40
|
+
* Returns the list of file paths written.
|
|
41
|
+
*/
|
|
42
|
+
export declare function runExtraction(provider: Provider, messages: Message[], sessionId: string, model?: string): Promise<string[]>;
|
|
43
|
+
//# sourceMappingURL=SkillExtractor.d.ts.map
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SkillExtractor — auto-creates skill files from session message history.
|
|
3
|
+
*
|
|
4
|
+
* After a session completes, if enough tool usage was observed, we ask the LLM
|
|
5
|
+
* to identify reusable patterns and persist them as skill markdown files under
|
|
6
|
+
* .oh/skills/auto/.
|
|
7
|
+
*/
|
|
8
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
9
|
+
import { join } from "node:path";
|
|
10
|
+
import { discoverSkills, findSimilarSkill } from "../harness/plugins.js";
|
|
11
|
+
import { createUserMessage } from "../types/message.js";
|
|
12
|
+
// ── Helpers ──
|
|
13
|
+
/** Count total tool calls across all messages */
|
|
14
|
+
function countToolCalls(messages) {
|
|
15
|
+
return messages.reduce((sum, m) => sum + (m.toolCalls?.length ?? 0), 0);
|
|
16
|
+
}
|
|
17
|
+
/** Convert a skill name to a slug suitable for a filename */
|
|
18
|
+
function toSlug(name) {
|
|
19
|
+
return name
|
|
20
|
+
.toLowerCase()
|
|
21
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
22
|
+
.replace(/^-+|-+$/g, "");
|
|
23
|
+
}
|
|
24
|
+
// ── Public API ──
|
|
25
|
+
/**
|
|
26
|
+
* Returns true if the message history has enough tool usage to warrant
|
|
27
|
+
* extracting skills (5 or more total tool calls).
|
|
28
|
+
*/
|
|
29
|
+
export function shouldExtract(messages) {
|
|
30
|
+
return countToolCalls(messages) >= 5;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Ask the LLM to identify reusable patterns in the conversation and return
|
|
34
|
+
* them as an array of SkillCandidates.
|
|
35
|
+
*/
|
|
36
|
+
export async function extractSkills(provider, messages, model) {
|
|
37
|
+
const systemPrompt = `You are a skill extraction assistant. Analyze the conversation and identify reusable patterns or procedures that could be turned into skills for future sessions.
|
|
38
|
+
|
|
39
|
+
Return a JSON array of skill candidates. Each candidate must have these fields:
|
|
40
|
+
- name: short kebab-case identifier (e.g. "run-tests")
|
|
41
|
+
- description: one-line description of what the skill does
|
|
42
|
+
- trigger: a short phrase that would trigger this skill (e.g. "run the tests")
|
|
43
|
+
- procedure: step-by-step instructions as a markdown string
|
|
44
|
+
- pitfalls: common mistakes to avoid as a markdown string
|
|
45
|
+
- verification: how to verify the skill succeeded as a markdown string
|
|
46
|
+
|
|
47
|
+
Return ONLY the JSON array, no other text. If no reusable patterns exist, return [].`;
|
|
48
|
+
const prompt = createUserMessage("Analyze this conversation and extract reusable skill patterns as a JSON array.");
|
|
49
|
+
const response = await provider.complete([...messages, prompt], systemPrompt, undefined, model);
|
|
50
|
+
try {
|
|
51
|
+
// Extract JSON from response — handle code fences if present
|
|
52
|
+
const text = response.content.trim();
|
|
53
|
+
const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/) ?? null;
|
|
54
|
+
const jsonText = jsonMatch ? jsonMatch[1].trim() : text;
|
|
55
|
+
const parsed = JSON.parse(jsonText);
|
|
56
|
+
if (!Array.isArray(parsed))
|
|
57
|
+
return [];
|
|
58
|
+
return parsed.filter((item) => typeof item === "object" &&
|
|
59
|
+
item !== null &&
|
|
60
|
+
typeof item.name === "string" &&
|
|
61
|
+
typeof item.description === "string");
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
return [];
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Persist a skill candidate to .oh/skills/auto/<slug>.md.
|
|
69
|
+
* If a file already exists, increments the version in the frontmatter.
|
|
70
|
+
*/
|
|
71
|
+
export function persistSkill(candidate, sessionId) {
|
|
72
|
+
const autoDir = join(".oh", "skills", "auto");
|
|
73
|
+
mkdirSync(autoDir, { recursive: true });
|
|
74
|
+
const slug = toSlug(candidate.name);
|
|
75
|
+
const filePath = join(autoDir, `${slug}.md`);
|
|
76
|
+
// Determine version
|
|
77
|
+
let version = 1;
|
|
78
|
+
if (existsSync(filePath)) {
|
|
79
|
+
try {
|
|
80
|
+
const existing = readFileSync(filePath, "utf-8");
|
|
81
|
+
const versionMatch = existing.match(/^version:\s*(\d+)$/m);
|
|
82
|
+
if (versionMatch) {
|
|
83
|
+
version = parseInt(versionMatch[1], 10) + 1;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
/* ignore */
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
const now = new Date().toISOString();
|
|
91
|
+
const content = `---
|
|
92
|
+
name: ${candidate.name}
|
|
93
|
+
description: ${candidate.description}
|
|
94
|
+
trigger: ${candidate.trigger}
|
|
95
|
+
source: auto
|
|
96
|
+
extractedFrom: ${sessionId}
|
|
97
|
+
extractedAt: ${now}
|
|
98
|
+
version: ${version}
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Procedure
|
|
102
|
+
|
|
103
|
+
${candidate.procedure}
|
|
104
|
+
|
|
105
|
+
## Pitfalls
|
|
106
|
+
|
|
107
|
+
${candidate.pitfalls}
|
|
108
|
+
|
|
109
|
+
## Verification
|
|
110
|
+
|
|
111
|
+
${candidate.verification}
|
|
112
|
+
`;
|
|
113
|
+
writeFileSync(filePath, content, "utf-8");
|
|
114
|
+
return filePath;
|
|
115
|
+
}
|
|
116
|
+
/** Quick LLM quality check — is this skill worth keeping? */
|
|
117
|
+
async function isSkillWorthy(provider, candidate, model) {
|
|
118
|
+
try {
|
|
119
|
+
const prompt = `Is this extracted skill worth saving for future reuse? Answer YES or NO (one word only).
|
|
120
|
+
|
|
121
|
+
Name: ${candidate.name}
|
|
122
|
+
Description: ${candidate.description}
|
|
123
|
+
Procedure: ${candidate.procedure}
|
|
124
|
+
|
|
125
|
+
Criteria: Is it reusable (not a one-off)? Is the procedure clear and complete? Would it save time in future sessions?`;
|
|
126
|
+
const response = await provider.complete([createUserMessage(prompt)], "Answer YES or NO only.", undefined, model);
|
|
127
|
+
return response.content.trim().toUpperCase().startsWith("YES");
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
return true; // On error, allow the skill through
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Orchestrate the full extraction pipeline:
|
|
135
|
+
* 1. Check if extraction is warranted
|
|
136
|
+
* 2. Ask LLM to extract skill candidates
|
|
137
|
+
* 3. Deduplicate against existing skills
|
|
138
|
+
* 4. Persist each new/updated candidate
|
|
139
|
+
*
|
|
140
|
+
* Returns the list of file paths written.
|
|
141
|
+
*/
|
|
142
|
+
export async function runExtraction(provider, messages, sessionId, model) {
|
|
143
|
+
if (!shouldExtract(messages))
|
|
144
|
+
return [];
|
|
145
|
+
const candidates = await extractSkills(provider, messages, model);
|
|
146
|
+
if (candidates.length === 0)
|
|
147
|
+
return [];
|
|
148
|
+
const existingSkills = discoverSkills().map((s) => ({ name: s.name, description: s.description }));
|
|
149
|
+
const written = [];
|
|
150
|
+
for (const candidate of candidates) {
|
|
151
|
+
const similar = findSimilarSkill(candidate.name, candidate.description, existingSkills);
|
|
152
|
+
if (similar)
|
|
153
|
+
continue;
|
|
154
|
+
// Quality gate: quick LLM check before persisting
|
|
155
|
+
const worthy = await isSkillWorthy(provider, candidate, model);
|
|
156
|
+
if (!worthy)
|
|
157
|
+
continue;
|
|
158
|
+
const filePath = persistSkill(candidate, sessionId);
|
|
159
|
+
written.push(filePath);
|
|
160
|
+
}
|
|
161
|
+
return written;
|
|
162
|
+
}
|
|
163
|
+
//# sourceMappingURL=SkillExtractor.js.map
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* Tool execution during LLM streaming — concurrent tool execution
|
|
3
3
|
* with permission checks and queue management.
|
|
4
4
|
*/
|
|
5
|
+
import type { ToolContext, ToolResult, Tools } from "../Tool.js";
|
|
5
6
|
import type { ToolCall } from "../types/message.js";
|
|
6
|
-
import type {
|
|
7
|
-
import type { PermissionMode, AskUserFn } from "../types/permissions.js";
|
|
7
|
+
import type { AskUserFn, PermissionMode } from "../types/permissions.js";
|
|
8
8
|
export declare class StreamingToolExecutor {
|
|
9
9
|
private tools;
|
|
10
10
|
private context;
|
|
@@ -90,14 +90,16 @@ export class StreamingToolExecutor {
|
|
|
90
90
|
...this.context,
|
|
91
91
|
callId,
|
|
92
92
|
abortSignal: this.abortSignal,
|
|
93
|
-
onOutputChunk: (id, chunk) => {
|
|
93
|
+
onOutputChunk: (id, chunk) => {
|
|
94
|
+
this.outputChunks.push({ callId: id, chunk });
|
|
95
|
+
},
|
|
94
96
|
};
|
|
95
97
|
try {
|
|
96
98
|
tracked.result = await tool.call(parsed.data, callContext);
|
|
97
99
|
// Verification loop: auto-run lint/typecheck after file-modifying tools
|
|
98
|
-
if (tracked.result && !tracked.result.isError && [
|
|
100
|
+
if (tracked.result && !tracked.result.isError && ["Edit", "Write", "MultiEdit"].includes(tool.name)) {
|
|
99
101
|
try {
|
|
100
|
-
const { runVerificationForFiles, getVerificationConfig, extractFilePaths } = await import(
|
|
102
|
+
const { runVerificationForFiles, getVerificationConfig, extractFilePaths } = await import("../harness/verification.js");
|
|
101
103
|
const vConfig = getVerificationConfig();
|
|
102
104
|
if (vConfig?.enabled) {
|
|
103
105
|
const filePaths = extractFilePaths(tool.name, tracked.toolCall.arguments);
|
|
@@ -106,13 +108,13 @@ export class StreamingToolExecutor {
|
|
|
106
108
|
if (vResult.ran) {
|
|
107
109
|
if (!vResult.passed) {
|
|
108
110
|
tracked.result = {
|
|
109
|
-
output: tracked.result.output
|
|
110
|
-
isError: vConfig.mode ===
|
|
111
|
+
output: `${tracked.result.output}\n\n[Verification FAILED]\n${vResult.summary}`,
|
|
112
|
+
isError: vConfig.mode === "block",
|
|
111
113
|
};
|
|
112
114
|
}
|
|
113
115
|
else {
|
|
114
116
|
tracked.result = {
|
|
115
|
-
output: tracked.result.output
|
|
117
|
+
output: `${tracked.result.output}\n\n[Verification passed]`,
|
|
116
118
|
isError: false,
|
|
117
119
|
};
|
|
118
120
|
}
|
|
@@ -120,7 +122,9 @@ export class StreamingToolExecutor {
|
|
|
120
122
|
}
|
|
121
123
|
}
|
|
122
124
|
}
|
|
123
|
-
catch {
|
|
125
|
+
catch {
|
|
126
|
+
/* verification should never break tool execution */
|
|
127
|
+
}
|
|
124
128
|
}
|
|
125
129
|
}
|
|
126
130
|
catch (err) {
|
package/dist/services/a2a.d.ts
CHANGED
|
@@ -45,7 +45,7 @@ export type AgentCapability = {
|
|
|
45
45
|
};
|
|
46
46
|
export type AgentEndpoint = {
|
|
47
47
|
/** Transport type */
|
|
48
|
-
type:
|
|
48
|
+
type: "http" | "ipc" | "stdio";
|
|
49
49
|
/** Address (URL for http, socket path for ipc, pid for stdio) */
|
|
50
50
|
address: string;
|
|
51
51
|
/** Port for HTTP transport */
|
|
@@ -59,32 +59,32 @@ export type A2AMessage = {
|
|
|
59
59
|
/** Target agent ID or capability name */
|
|
60
60
|
to: string;
|
|
61
61
|
/** Message type */
|
|
62
|
-
type:
|
|
62
|
+
type: "task" | "result" | "status" | "cancel" | "discover";
|
|
63
63
|
/** Payload */
|
|
64
64
|
payload: A2APayload;
|
|
65
65
|
/** Timestamp */
|
|
66
66
|
timestamp: number;
|
|
67
67
|
};
|
|
68
68
|
export type A2APayload = {
|
|
69
|
-
kind:
|
|
69
|
+
kind: "task";
|
|
70
70
|
capability: string;
|
|
71
71
|
input: unknown;
|
|
72
72
|
timeout?: number;
|
|
73
73
|
} | {
|
|
74
|
-
kind:
|
|
74
|
+
kind: "result";
|
|
75
75
|
taskId: string;
|
|
76
76
|
output: unknown;
|
|
77
77
|
error?: string;
|
|
78
78
|
} | {
|
|
79
|
-
kind:
|
|
80
|
-
state:
|
|
79
|
+
kind: "status";
|
|
80
|
+
state: "idle" | "working" | "done" | "error";
|
|
81
81
|
progress?: string;
|
|
82
82
|
} | {
|
|
83
|
-
kind:
|
|
83
|
+
kind: "cancel";
|
|
84
84
|
taskId: string;
|
|
85
85
|
reason?: string;
|
|
86
86
|
} | {
|
|
87
|
-
kind:
|
|
87
|
+
kind: "discover";
|
|
88
88
|
filter?: {
|
|
89
89
|
capability?: string;
|
|
90
90
|
name?: string;
|