itermbot 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +15 -20
- package/.github/workflows/release.yml +32 -20
- package/README.md +11 -20
- package/cleanup-unused.patch +108 -0
- package/config/app.yaml +32 -13
- package/config/memory.yaml +38 -31
- package/config/model.yaml +33 -0
- package/config/skill.yaml +8 -0
- package/config/tool.yaml +50 -17
- package/config/tsconfig.json +4 -1
- package/dist/chat/builtin-commands.d.ts +8 -0
- package/dist/chat/builtin-commands.d.ts.map +1 -0
- package/dist/chat/builtin-commands.js +53 -0
- package/dist/chat/builtin-commands.js.map +1 -0
- package/dist/chat/progress.d.ts +3 -0
- package/dist/chat/progress.d.ts.map +1 -0
- package/dist/chat/progress.js +23 -0
- package/dist/chat/progress.js.map +1 -0
- package/dist/chat/response-safety.d.ts +8 -0
- package/dist/chat/response-safety.d.ts.map +1 -0
- package/dist/chat/response-safety.js +126 -0
- package/dist/chat/response-safety.js.map +1 -0
- package/dist/chat/step-display.d.ts +2 -0
- package/dist/chat/step-display.d.ts.map +1 -0
- package/dist/chat/step-display.js +50 -0
- package/dist/chat/step-display.js.map +1 -0
- package/dist/chat/tool-result.d.ts +4 -0
- package/dist/chat/tool-result.d.ts.map +1 -0
- package/dist/chat/tool-result.js +24 -0
- package/dist/chat/tool-result.js.map +1 -0
- package/dist/config.d.ts +11 -6
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +26 -12
- package/dist/config.js.map +1 -1
- package/dist/index.js +308 -151
- package/dist/index.js.map +1 -1
- package/dist/iterm/direct-command-router.d.ts +24 -0
- package/dist/iterm/direct-command-router.d.ts.map +1 -0
- package/dist/iterm/direct-command-router.js +213 -0
- package/dist/iterm/direct-command-router.js.map +1 -0
- package/dist/iterm/session-hint.d.ts +10 -0
- package/dist/iterm/session-hint.d.ts.map +1 -0
- package/dist/iterm/session-hint.js +43 -0
- package/dist/iterm/session-hint.js.map +1 -0
- package/dist/iterm/target-panel-policy.d.ts +12 -0
- package/dist/iterm/target-panel-policy.d.ts.map +1 -0
- package/dist/iterm/target-panel-policy.js +287 -0
- package/dist/iterm/target-panel-policy.js.map +1 -0
- package/dist/runtime/text-tool-call-recovery.d.ts +23 -0
- package/dist/runtime/text-tool-call-recovery.d.ts.map +1 -0
- package/dist/runtime/text-tool-call-recovery.js +211 -0
- package/dist/runtime/text-tool-call-recovery.js.map +1 -0
- package/dist/startup/colors.d.ts +37 -0
- package/dist/startup/colors.d.ts.map +1 -0
- package/dist/{startup-colors.js → startup/colors.js} +30 -15
- package/dist/startup/colors.js.map +1 -0
- package/dist/startup/diagnostics.d.ts +8 -0
- package/dist/startup/diagnostics.d.ts.map +1 -0
- package/dist/startup/diagnostics.js +18 -0
- package/dist/startup/diagnostics.js.map +1 -0
- package/dist/startup/os.d.ts +10 -0
- package/dist/startup/os.d.ts.map +1 -0
- package/dist/startup/os.js +67 -0
- package/dist/startup/os.js.map +1 -0
- package/dist/startup/ui.d.ts +11 -0
- package/dist/startup/ui.d.ts.map +1 -0
- package/dist/startup/ui.js +49 -0
- package/dist/startup/ui.js.map +1 -0
- package/package.json +23 -13
- package/scripts/internal-package-refs.mjs +158 -0
- package/scripts/patch-buildin-cache.sh +1 -4
- package/scripts/resolve-deps.js +5 -0
- package/scripts/test-llm.mjs +11 -5
- package/skills/gpu-ssh-monitor/SKILL.md +22 -3
- package/src/chat/builtin-commands.ts +70 -0
- package/src/chat/progress.ts +26 -0
- package/src/chat/response-safety.ts +134 -0
- package/src/chat/step-display.ts +54 -0
- package/src/chat/tool-result.ts +22 -0
- package/src/config.ts +48 -21
- package/src/index.ts +377 -167
- package/src/iterm/direct-command-router.ts +274 -0
- package/src/iterm/session-hint.ts +49 -0
- package/src/iterm/target-panel-policy.ts +341 -0
- package/src/runtime/text-tool-call-recovery.ts +257 -0
- package/src/{startup-colors.ts → startup/colors.ts} +42 -27
- package/src/startup/diagnostics.ts +25 -0
- package/src/startup/os.ts +63 -0
- package/src/startup/ui.ts +56 -0
- package/src/types/marked-terminal.d.ts +3 -0
- package/test/builtin-commands.test.mjs +50 -0
- package/test/chat-flow.integration.test.mjs +235 -0
- package/test/chat-progress.test.mjs +83 -0
- package/test/config.test.mjs +22 -0
- package/test/diagnostics.test.mjs +45 -0
- package/test/direct-command-router.test.mjs +149 -0
- package/test/live-iterm-llm.integration.test.mjs +153 -0
- package/test/response-safety.test.mjs +44 -0
- package/test/session-hint.test.mjs +78 -0
- package/test/startup-colors.test.mjs +145 -0
- package/test/target-panel-policy.test.mjs +180 -0
- package/test/tool-call-recovery.test.mjs +199 -0
- package/config/agent.yaml +0 -121
- package/config/models.yaml +0 -36
- package/config/skills.yaml +0 -4
- package/dist/agent.d.ts +0 -14
- package/dist/agent.d.ts.map +0 -1
- package/dist/agent.js +0 -16
- package/dist/agent.js.map +0 -1
- package/dist/context.d.ts +0 -12
- package/dist/context.d.ts.map +0 -1
- package/dist/context.js +0 -20
- package/dist/context.js.map +0 -1
- package/dist/session-hint.d.ts +0 -4
- package/dist/session-hint.d.ts.map +0 -1
- package/dist/session-hint.js +0 -25
- package/dist/session-hint.js.map +0 -1
- package/dist/startup-colors.d.ts +0 -26
- package/dist/startup-colors.d.ts.map +0 -1
- package/dist/startup-colors.js.map +0 -1
- package/dist/target-routing.d.ts +0 -15
- package/dist/target-routing.d.ts.map +0 -1
- package/dist/target-routing.js +0 -355
- package/dist/target-routing.js.map +0 -1
- package/src/agent.ts +0 -35
- package/src/context.ts +0 -35
- package/src/session-hint.ts +0 -28
- package/src/target-routing.ts +0 -419
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { enforceResponseSafety, enforceResponseSafetyWithMode } from "../dist/chat/response-safety.js";
|
|
4
|
+
|
|
5
|
+
test("enforceResponseSafety redacts destructive commands", () => {
|
|
6
|
+
const input = [
|
|
7
|
+
"Use rm -rf ./tmp and sudo rm /var/tmp to clean.",
|
|
8
|
+
"Alternative: find /var/lib -maxdepth 5 -print | xargs rm -rf",
|
|
9
|
+
].join("\n");
|
|
10
|
+
const out = enforceResponseSafety(input);
|
|
11
|
+
assert.equal(out.includes("rm -rf"), false);
|
|
12
|
+
assert.equal(out.includes("sudo rm"), false);
|
|
13
|
+
assert.equal(out.includes("[redacted-destructive-command]"), true);
|
|
14
|
+
assert.equal(out.includes("Safety-preserving workflow"), false);
|
|
15
|
+
assert.equal(out.includes("Safety note:"), true);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test("enforceResponseSafety keeps normal text unchanged", () => {
|
|
19
|
+
const input = "Use your preferred inspection command to validate environment state.";
|
|
20
|
+
const out = enforceResponseSafety(input);
|
|
21
|
+
assert.equal(out, input);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test("enforceResponseSafetyWithMode off keeps dangerous text unchanged", () => {
|
|
25
|
+
const input = "run rm -rf ./tmp now";
|
|
26
|
+
const out = enforceResponseSafetyWithMode(input, "off");
|
|
27
|
+
assert.equal(out, input);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("enforceResponseSafetyWithMode balanced redacts but keeps structure", () => {
|
|
31
|
+
const input = "run rm -rf ./tmp now";
|
|
32
|
+
const out = enforceResponseSafetyWithMode(input, "balanced");
|
|
33
|
+
assert.equal(out.includes("rm -rf"), false);
|
|
34
|
+
assert.equal(out.includes("[redacted-destructive-command]"), true);
|
|
35
|
+
assert.equal(out.includes("Safety-preserving workflow"), false);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test("enforceResponseSafetyWithMode strict removes redacted lines and adds generic workflow", () => {
|
|
39
|
+
const input = "run rm -rf ./tmp now";
|
|
40
|
+
const out = enforceResponseSafetyWithMode(input, "strict");
|
|
41
|
+
assert.equal(out.includes("rm -rf"), false);
|
|
42
|
+
assert.equal(out.includes("[redacted-destructive-command]"), false);
|
|
43
|
+
assert.equal(out.includes("Safety-preserving workflow"), true);
|
|
44
|
+
});
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { buildSystemPrompt } from "../dist/iterm/session-hint.js";
|
|
4
|
+
|
|
5
|
+
test("buildSystemPrompt renders policy, target session, and base prompt", () => {
|
|
6
|
+
const startup = {
|
|
7
|
+
chatSessionId: "chat-1",
|
|
8
|
+
targetSessionId: "target-9",
|
|
9
|
+
windowId: 42,
|
|
10
|
+
tabIndex: 3,
|
|
11
|
+
colorSnapshots: [],
|
|
12
|
+
};
|
|
13
|
+
const templates = {
|
|
14
|
+
systemPrompt: [
|
|
15
|
+
"Only use target panel tools.",
|
|
16
|
+
"{{targetSessionSection}}",
|
|
17
|
+
"Response safety policy:",
|
|
18
|
+
"- Keep to evidence.",
|
|
19
|
+
].join("\n\n"),
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
const out = buildSystemPrompt(templates, startup, "Base instruction");
|
|
23
|
+
assert.match(out, /Only use target panel tools\./);
|
|
24
|
+
assert.match(out, /## Target Panel Session/);
|
|
25
|
+
assert.match(out, /sessionId: "target-9"/);
|
|
26
|
+
assert.match(out, /windowId: 42/);
|
|
27
|
+
assert.match(out, /tabIndex: 3/);
|
|
28
|
+
assert.match(out, /Response safety policy:/);
|
|
29
|
+
assert.match(out, /Base instruction/);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("buildSystemPrompt omits target section when targetSessionId is missing", () => {
|
|
33
|
+
const startup = {
|
|
34
|
+
chatSessionId: "chat-1",
|
|
35
|
+
targetSessionId: null,
|
|
36
|
+
windowId: null,
|
|
37
|
+
tabIndex: null,
|
|
38
|
+
colorSnapshots: [],
|
|
39
|
+
};
|
|
40
|
+
const templates = {
|
|
41
|
+
systemPrompt: [
|
|
42
|
+
"Only use target panel tools.",
|
|
43
|
+
"{{targetSessionSection}}",
|
|
44
|
+
"Response safety policy:",
|
|
45
|
+
"- Keep to evidence.",
|
|
46
|
+
].join("\n\n"),
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
const out = buildSystemPrompt(templates, startup, "Base instruction");
|
|
50
|
+
assert.match(out, /Only use target panel tools\./);
|
|
51
|
+
assert.doesNotMatch(out, /## Target Panel Session/);
|
|
52
|
+
assert.match(out, /Response safety policy:/);
|
|
53
|
+
assert.match(out, /Base instruction/);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test("buildSystemPrompt includes target os section when provided", () => {
|
|
57
|
+
const startup = {
|
|
58
|
+
chatSessionId: "chat-1",
|
|
59
|
+
targetSessionId: "target-9",
|
|
60
|
+
windowId: 42,
|
|
61
|
+
tabIndex: 3,
|
|
62
|
+
colorSnapshots: [],
|
|
63
|
+
};
|
|
64
|
+
const templates = {
|
|
65
|
+
systemPrompt: [
|
|
66
|
+
"Only use target panel tools.",
|
|
67
|
+
"{{targetSessionSection}}",
|
|
68
|
+
"{{targetOsSection}}",
|
|
69
|
+
"Response safety policy:",
|
|
70
|
+
"- Keep to evidence.",
|
|
71
|
+
].join("\n\n"),
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
const out = buildSystemPrompt(templates, startup, "Base instruction", { targetOs: "linux" });
|
|
75
|
+
assert.match(out, /## Target System OS/);
|
|
76
|
+
assert.match(out, /- os: linux/);
|
|
77
|
+
assert.match(out, /Commands in plans must be compatible with this OS/);
|
|
78
|
+
});
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import {
|
|
4
|
+
applyStartupPanelColorsWithDeps,
|
|
5
|
+
CHAT_BG,
|
|
6
|
+
CHAT_FG,
|
|
7
|
+
TARGET_BG,
|
|
8
|
+
TARGET_FG,
|
|
9
|
+
} from "../dist/startup/colors.js";
|
|
10
|
+
|
|
11
|
+
function makeListResult(result) {
|
|
12
|
+
return Promise.resolve({ result });
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
test("applyStartupPanelColorsWithDeps returns empty result when no sessions exist", async () => {
|
|
16
|
+
const errors = [];
|
|
17
|
+
const deps = {
|
|
18
|
+
getProcessTty: () => "/dev/ttys001",
|
|
19
|
+
listCurrentWindowSessions: () => makeListResult({ count: 0, windowId: null, sessions: [] }),
|
|
20
|
+
splitPane: async () => ({ result: {} }),
|
|
21
|
+
setSessionColors: async () => ({ result: {} }),
|
|
22
|
+
captureSessionColors: () => null,
|
|
23
|
+
onError: (message) => errors.push(String(message)),
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
const out = await applyStartupPanelColorsWithDeps(deps);
|
|
27
|
+
assert.equal(out.chatSessionId, null);
|
|
28
|
+
assert.equal(out.targetSessionId, null);
|
|
29
|
+
assert.deepEqual(out.colorSnapshots, []);
|
|
30
|
+
assert.equal(errors.length, 0);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test("applyStartupPanelColorsWithDeps auto-splits single panel and colors both sessions", async () => {
|
|
34
|
+
const splitCalls = [];
|
|
35
|
+
const colorCalls = [];
|
|
36
|
+
let listCalls = 0;
|
|
37
|
+
const deps = {
|
|
38
|
+
getProcessTty: () => "/dev/ttys001",
|
|
39
|
+
listCurrentWindowSessions: async () => {
|
|
40
|
+
listCalls += 1;
|
|
41
|
+
if (listCalls === 1) {
|
|
42
|
+
return makeListResult({
|
|
43
|
+
count: 1,
|
|
44
|
+
windowId: 12,
|
|
45
|
+
sessions: [
|
|
46
|
+
{ windowId: 12, tabIndex: 0, sessionId: "chat", tty: "/dev/ttys001", isCurrentSession: true },
|
|
47
|
+
],
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
return makeListResult({
|
|
51
|
+
count: 2,
|
|
52
|
+
windowId: 12,
|
|
53
|
+
sessions: [
|
|
54
|
+
{ windowId: 12, tabIndex: 0, sessionId: "chat", tty: "/dev/ttys001", isCurrentSession: true },
|
|
55
|
+
{ windowId: 12, tabIndex: 0, sessionId: "target", tty: "/dev/ttys002", isCurrentSession: false },
|
|
56
|
+
],
|
|
57
|
+
});
|
|
58
|
+
},
|
|
59
|
+
splitPane: async (args) => {
|
|
60
|
+
splitCalls.push(args);
|
|
61
|
+
return { result: { ok: true } };
|
|
62
|
+
},
|
|
63
|
+
setSessionColors: async (args) => {
|
|
64
|
+
colorCalls.push(args);
|
|
65
|
+
return { result: { ok: true } };
|
|
66
|
+
},
|
|
67
|
+
captureSessionColors: ({ sessionId }) => ({
|
|
68
|
+
windowId: 12,
|
|
69
|
+
tabIndex: 0,
|
|
70
|
+
sessionId,
|
|
71
|
+
background: [1, 2, 3],
|
|
72
|
+
foreground: [4, 5, 6],
|
|
73
|
+
}),
|
|
74
|
+
onError: () => {},
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
const out = await applyStartupPanelColorsWithDeps(deps);
|
|
78
|
+
|
|
79
|
+
assert.equal(splitCalls.length, 1);
|
|
80
|
+
assert.equal(colorCalls.length, 2);
|
|
81
|
+
assert.equal(out.chatSessionId, "chat");
|
|
82
|
+
assert.equal(out.targetSessionId, "target");
|
|
83
|
+
assert.equal(out.windowId, 12);
|
|
84
|
+
assert.equal(out.tabIndex, 0);
|
|
85
|
+
assert.equal(out.colorSnapshots.length, 2);
|
|
86
|
+
|
|
87
|
+
const chatCall = colorCalls.find((c) => c.sessionId === "chat");
|
|
88
|
+
const targetCall = colorCalls.find((c) => c.sessionId === "target");
|
|
89
|
+
assert.equal(chatCall.backgroundHex, CHAT_BG);
|
|
90
|
+
assert.equal(chatCall.foregroundHex, CHAT_FG);
|
|
91
|
+
assert.equal(targetCall.backgroundHex, TARGET_BG);
|
|
92
|
+
assert.equal(targetCall.foregroundHex, TARGET_FG);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
test("applyStartupPanelColorsWithDeps returns empty and logs when chat session cannot be identified", async () => {
|
|
96
|
+
const errors = [];
|
|
97
|
+
const deps = {
|
|
98
|
+
getProcessTty: () => "/dev/ttys999",
|
|
99
|
+
listCurrentWindowSessions: () =>
|
|
100
|
+
makeListResult({
|
|
101
|
+
count: 1,
|
|
102
|
+
windowId: 3,
|
|
103
|
+
sessions: [{ windowId: 3, tabIndex: 0, sessionId: "other", tty: "/dev/ttys001", isCurrentSession: false }],
|
|
104
|
+
}),
|
|
105
|
+
splitPane: async () => ({ result: {} }),
|
|
106
|
+
setSessionColors: async () => ({ result: {} }),
|
|
107
|
+
captureSessionColors: () => null,
|
|
108
|
+
onError: (message) => errors.push(String(message)),
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
const out = await applyStartupPanelColorsWithDeps(deps);
|
|
112
|
+
assert.equal(out.chatSessionId, null);
|
|
113
|
+
assert.equal(out.targetSessionId, null);
|
|
114
|
+
assert.equal(errors.length, 1);
|
|
115
|
+
assert.match(errors[0], /could not identify chat session/i);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
test("applyStartupPanelColorsWithDeps logs color apply failure and continues", async () => {
|
|
119
|
+
const errors = [];
|
|
120
|
+
const deps = {
|
|
121
|
+
getProcessTty: () => "/dev/ttys001",
|
|
122
|
+
listCurrentWindowSessions: () =>
|
|
123
|
+
makeListResult({
|
|
124
|
+
count: 2,
|
|
125
|
+
windowId: 5,
|
|
126
|
+
sessions: [
|
|
127
|
+
{ windowId: 5, tabIndex: 1, sessionId: "chat", tty: "/dev/ttys001", isCurrentSession: true },
|
|
128
|
+
{ windowId: 5, tabIndex: 1, sessionId: "target", tty: "/dev/ttys002", isCurrentSession: false },
|
|
129
|
+
],
|
|
130
|
+
}),
|
|
131
|
+
splitPane: async () => ({ result: {} }),
|
|
132
|
+
setSessionColors: async ({ sessionId }) => {
|
|
133
|
+
if (sessionId === "target") throw new Error("set failed");
|
|
134
|
+
return { result: { ok: true } };
|
|
135
|
+
},
|
|
136
|
+
captureSessionColors: () => null,
|
|
137
|
+
onError: (message, error) => errors.push(`${String(message)} ${String(error ?? "")}`.trim()),
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const out = await applyStartupPanelColorsWithDeps(deps);
|
|
141
|
+
assert.equal(out.chatSessionId, "chat");
|
|
142
|
+
assert.equal(out.targetSessionId, "target");
|
|
143
|
+
assert.equal(errors.some((e) => /failed to set color/i.test(e)), true);
|
|
144
|
+
});
|
|
145
|
+
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { enforceTargetPanelExecutionPolicy, setTargetPanelHint } from "../dist/iterm/target-panel-policy.js";
|
|
4
|
+
|
|
5
|
+
function createTool(name, invoke) {
|
|
6
|
+
return { name, invoke };
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
test("redirects listDir to itermRunCommandInSession", async () => {
|
|
10
|
+
const calls = [];
|
|
11
|
+
const itermTool = createTool(
|
|
12
|
+
"npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
13
|
+
async (args) => {
|
|
14
|
+
calls.push(args);
|
|
15
|
+
return {
|
|
16
|
+
result: {
|
|
17
|
+
ok: true,
|
|
18
|
+
args,
|
|
19
|
+
output: "./README.md\n./src",
|
|
20
|
+
commandCompleted: true,
|
|
21
|
+
exitCode: 0,
|
|
22
|
+
sessionId: "target-session",
|
|
23
|
+
},
|
|
24
|
+
};
|
|
25
|
+
},
|
|
26
|
+
);
|
|
27
|
+
const blockedTool = createTool(
|
|
28
|
+
"npm.easynet.agent.tool.buildin.0.0.70.listDir",
|
|
29
|
+
async () => ({ result: { shouldNotReach: true } }),
|
|
30
|
+
);
|
|
31
|
+
const ctx = { tools: [blockedTool, itermTool] };
|
|
32
|
+
|
|
33
|
+
const unpatch = enforceTargetPanelExecutionPolicy(ctx);
|
|
34
|
+
const out = await blockedTool.invoke({
|
|
35
|
+
path: ".",
|
|
36
|
+
recursive: true,
|
|
37
|
+
includeHidden: false,
|
|
38
|
+
maxDepth: 2,
|
|
39
|
+
maxEntries: 50,
|
|
40
|
+
});
|
|
41
|
+
unpatch();
|
|
42
|
+
|
|
43
|
+
assert.equal(calls.length, 1);
|
|
44
|
+
assert.equal(typeof calls[0].command, "string");
|
|
45
|
+
assert.equal(calls[0].command.includes("find"), true);
|
|
46
|
+
assert.equal(calls[0].command.includes("-mindepth 1"), true);
|
|
47
|
+
assert.equal(calls[0].command.includes("&&"), false);
|
|
48
|
+
assert.equal(calls[0].command.includes("||"), false);
|
|
49
|
+
assert.equal(out?.result?.redirected, true);
|
|
50
|
+
assert.equal(out?.result?.requestedPath, ".");
|
|
51
|
+
assert.equal(out?.result?.resolvedPath, ".");
|
|
52
|
+
assert.equal(out?.result?.pathFallbackUsed, false);
|
|
53
|
+
assert.equal(out?.result?.outputText, "./README.md\n./src");
|
|
54
|
+
assert.equal(out?.result?.commandCompleted, true);
|
|
55
|
+
assert.equal(out?.result?.exitCode, 0);
|
|
56
|
+
assert.equal(out?.result?.sessionId, "target-session");
|
|
57
|
+
assert.deepEqual(out?.result?.entries, [
|
|
58
|
+
{ name: "README.md", kind: "other", size: 0, mtime: "" },
|
|
59
|
+
{ name: "src", kind: "other", size: 0, mtime: "" },
|
|
60
|
+
]);
|
|
61
|
+
assert.equal(out?.result?.totalEntries, 2);
|
|
62
|
+
assert.equal(out?.result?.truncated, false);
|
|
63
|
+
assert.equal(
|
|
64
|
+
out?.result?.redirectedTool,
|
|
65
|
+
"npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
66
|
+
);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
test("listDir redirect keeps requested path metadata when target path is missing", async () => {
|
|
70
|
+
const itermTool = createTool(
|
|
71
|
+
"npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
72
|
+
async () => ({
|
|
73
|
+
result: {
|
|
74
|
+
output: "ls: sandbox: No such file or directory",
|
|
75
|
+
},
|
|
76
|
+
}),
|
|
77
|
+
);
|
|
78
|
+
const blockedTool = createTool(
|
|
79
|
+
"npm.easynet.agent.tool.buildin.0.0.70.listDir",
|
|
80
|
+
async () => ({ result: { shouldNotReach: true } }),
|
|
81
|
+
);
|
|
82
|
+
const ctx = { tools: [blockedTool, itermTool] };
|
|
83
|
+
|
|
84
|
+
const unpatch = enforceTargetPanelExecutionPolicy(ctx);
|
|
85
|
+
const out = await blockedTool.invoke({ path: "sandbox", recursive: true });
|
|
86
|
+
unpatch();
|
|
87
|
+
|
|
88
|
+
assert.equal(out?.result?.requestedPath, "sandbox");
|
|
89
|
+
assert.equal(out?.result?.resolvedPath, "sandbox");
|
|
90
|
+
assert.equal(out?.result?.pathFallbackUsed, false);
|
|
91
|
+
assert.equal(out?.result?.outputText, "ls: sandbox: No such file or directory");
|
|
92
|
+
assert.deepEqual(out?.result?.entries, [
|
|
93
|
+
{ name: "ls: sandbox: No such file or directory", kind: "other", size: 0, mtime: "" },
|
|
94
|
+
]);
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
test("keeps non-redirectable blocked tools rejected", async () => {
|
|
98
|
+
const itermTool = createTool(
|
|
99
|
+
"npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
100
|
+
async () => ({ result: { ok: true } }),
|
|
101
|
+
);
|
|
102
|
+
const gitReadTool = createTool(
|
|
103
|
+
"npm.easynet.agent.tool.buildin.0.0.70.gitRead",
|
|
104
|
+
async () => ({ result: { shouldNotReach: true } }),
|
|
105
|
+
);
|
|
106
|
+
const ctx = { tools: [gitReadTool, itermTool] };
|
|
107
|
+
|
|
108
|
+
const unpatch = enforceTargetPanelExecutionPolicy(ctx);
|
|
109
|
+
const out = await gitReadTool.invoke({});
|
|
110
|
+
unpatch();
|
|
111
|
+
assert.equal(out?.result?.blocked, true);
|
|
112
|
+
assert.equal(out?.result?.requiredTool, "itermRunCommandInSession");
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("throws clear policy error when redirectable tool has no itermRunCommandInSession", async () => {
|
|
116
|
+
const blockedTool = createTool(
|
|
117
|
+
"npm.easynet.agent.tool.buildin.0.0.70.listDir",
|
|
118
|
+
async () => ({ result: { shouldNotReach: true } }),
|
|
119
|
+
);
|
|
120
|
+
const ctx = { tools: [blockedTool] };
|
|
121
|
+
|
|
122
|
+
const unpatch = enforceTargetPanelExecutionPolicy(ctx);
|
|
123
|
+
await assert.rejects(
|
|
124
|
+
() =>
|
|
125
|
+
blockedTool.invoke({
|
|
126
|
+
path: ".",
|
|
127
|
+
recursive: false,
|
|
128
|
+
}),
|
|
129
|
+
/blocked in iTermBot policy/i,
|
|
130
|
+
);
|
|
131
|
+
unpatch();
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
test("unpatch restores original invoke behavior", async () => {
|
|
135
|
+
const itermTool = createTool(
|
|
136
|
+
"npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
137
|
+
async () => ({ result: { ok: true } }),
|
|
138
|
+
);
|
|
139
|
+
const blockedTool = createTool(
|
|
140
|
+
"npm.easynet.agent.tool.buildin.0.0.70.runCommand",
|
|
141
|
+
async (args) => ({ result: { original: true, args } }),
|
|
142
|
+
);
|
|
143
|
+
const ctx = { tools: [blockedTool, itermTool] };
|
|
144
|
+
|
|
145
|
+
const unpatch = enforceTargetPanelExecutionPolicy(ctx);
|
|
146
|
+
const redirected = await blockedTool.invoke({ command: "pwd" });
|
|
147
|
+
assert.equal(redirected?.result?.redirected, true);
|
|
148
|
+
|
|
149
|
+
unpatch();
|
|
150
|
+
const restored = await blockedTool.invoke({ command: "pwd" });
|
|
151
|
+
assert.equal(restored?.result?.original, true);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
test("redirect includes target panel hint when configured", async () => {
|
|
155
|
+
const calls = [];
|
|
156
|
+
const itermTool = createTool(
|
|
157
|
+
"npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
158
|
+
async (args) => {
|
|
159
|
+
calls.push(args);
|
|
160
|
+
return { result: { ok: true } };
|
|
161
|
+
},
|
|
162
|
+
);
|
|
163
|
+
const blockedTool = createTool(
|
|
164
|
+
"npm.easynet.agent.tool.buildin.0.0.70.runCommand",
|
|
165
|
+
async () => ({ result: { shouldNotReach: true } }),
|
|
166
|
+
);
|
|
167
|
+
const ctx = { tools: [blockedTool, itermTool] };
|
|
168
|
+
|
|
169
|
+
setTargetPanelHint({ windowId: 9, tabIndex: 2, sessionId: "target-session" });
|
|
170
|
+
const unpatch = enforceTargetPanelExecutionPolicy(ctx);
|
|
171
|
+
const out = await blockedTool.invoke({ command: "pwd" });
|
|
172
|
+
unpatch();
|
|
173
|
+
setTargetPanelHint(null);
|
|
174
|
+
|
|
175
|
+
assert.equal(calls.length, 1);
|
|
176
|
+
assert.equal(calls[0].windowId, 9);
|
|
177
|
+
assert.equal(calls[0].tabIndex, 2);
|
|
178
|
+
assert.equal(calls[0].sessionId, "target-session");
|
|
179
|
+
assert.equal(out?.result?.redirected, true);
|
|
180
|
+
});
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
import test, { beforeEach } from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { AgentContextTokens } from "@easynet/agent-common/context";
|
|
4
|
+
import { runWithTextToolCallRecovery } from "../dist/runtime/text-tool-call-recovery.js";
|
|
5
|
+
|
|
6
|
+
beforeEach(() => {
|
|
7
|
+
delete process.env.ITB_BOOTSTRAP_EVIDENCE_COMMAND;
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
function makeRuntime({ tools = [], runImpl }) {
|
|
11
|
+
return {
|
|
12
|
+
context: {
|
|
13
|
+
get(token) {
|
|
14
|
+
if (token === AgentContextTokens.Tools) return tools;
|
|
15
|
+
return undefined;
|
|
16
|
+
},
|
|
17
|
+
},
|
|
18
|
+
run: runImpl,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
test("runWithTextToolCallRecovery returns original text when no tool-call block", async () => {
|
|
23
|
+
const runtime = makeRuntime({
|
|
24
|
+
runImpl: async () => ({ text: "plain answer" }),
|
|
25
|
+
});
|
|
26
|
+
const out = await runWithTextToolCallRecovery(runtime, "hello", () => {});
|
|
27
|
+
assert.equal(out.recovered, false);
|
|
28
|
+
assert.equal(out.text, "plain answer");
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
test("runWithTextToolCallRecovery executes recovered tool call and formats deterministic recovery output", async () => {
|
|
32
|
+
const calls = [];
|
|
33
|
+
const runtime = makeRuntime({
|
|
34
|
+
tools: [
|
|
35
|
+
{
|
|
36
|
+
name: "npm.easynet.agent.tool.buildin.0.0.70.listDir",
|
|
37
|
+
async invoke(args) {
|
|
38
|
+
calls.push(args);
|
|
39
|
+
return { result: { output: "fileA\nfileB" } };
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
runImpl: async (input) => {
|
|
44
|
+
if (input === "list all files and do analysis") {
|
|
45
|
+
return {
|
|
46
|
+
text: [
|
|
47
|
+
"<tool-call>",
|
|
48
|
+
"{\"name\":\"listDir\",\"arguments\":{\"path\":\".\",\"maxEntries\":10}}",
|
|
49
|
+
"</tool-call>",
|
|
50
|
+
].join("\n"),
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
return { text: "should-not-be-used-for-finalize" };
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const out = await runWithTextToolCallRecovery(runtime, "list all files and do analysis", () => {});
|
|
58
|
+
assert.equal(out.recovered, true);
|
|
59
|
+
assert.equal(out.text.includes("fileA"), true);
|
|
60
|
+
assert.equal(calls.length, 1);
|
|
61
|
+
assert.equal(calls[0].path, ".");
|
|
62
|
+
assert.equal(calls[0].maxEntries, 10);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
test("runWithTextToolCallRecovery returns first-pass text when no tool-call block is present", async () => {
|
|
66
|
+
const calls = [];
|
|
67
|
+
const runtime = {
|
|
68
|
+
context: {
|
|
69
|
+
get(token) {
|
|
70
|
+
if (token === AgentContextTokens.Tools) {
|
|
71
|
+
return [
|
|
72
|
+
{
|
|
73
|
+
name: "npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
74
|
+
async invoke(args) {
|
|
75
|
+
calls.push(args);
|
|
76
|
+
return { result: { output: "/workspace\n" } };
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
];
|
|
80
|
+
}
|
|
81
|
+
return undefined;
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
async run() {
|
|
85
|
+
return {
|
|
86
|
+
text: "need more context",
|
|
87
|
+
messages: [{ type: "ai", content: "need more context" }],
|
|
88
|
+
};
|
|
89
|
+
},
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
const out = await runWithTextToolCallRecovery(runtime, "list files", () => {});
|
|
93
|
+
assert.equal(out.recovered, false);
|
|
94
|
+
assert.equal(calls.length, 0);
|
|
95
|
+
assert.equal(out.text, "need more context");
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test("runWithTextToolCallRecovery does not execute fallback baseline commands", async () => {
|
|
99
|
+
const calls = [];
|
|
100
|
+
const runtime = {
|
|
101
|
+
context: {
|
|
102
|
+
get(token) {
|
|
103
|
+
if (token === AgentContextTokens.Tools) {
|
|
104
|
+
return [
|
|
105
|
+
{
|
|
106
|
+
name: "npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
107
|
+
async invoke(args) {
|
|
108
|
+
calls.push(args);
|
|
109
|
+
return { result: { output: "/workspace\n" } };
|
|
110
|
+
},
|
|
111
|
+
},
|
|
112
|
+
];
|
|
113
|
+
}
|
|
114
|
+
return undefined;
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
async run() {
|
|
118
|
+
return {
|
|
119
|
+
text: "need more context",
|
|
120
|
+
messages: [{ type: "ai", content: "need more context" }],
|
|
121
|
+
};
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const out = await runWithTextToolCallRecovery(runtime, "list files", () => {});
|
|
126
|
+
assert.equal(out.recovered, false);
|
|
127
|
+
assert.equal(calls.length, 0);
|
|
128
|
+
assert.equal(out.text, "need more context");
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test("runWithTextToolCallRecovery ignores available tools when response is plain text", async () => {
|
|
132
|
+
const calls = [];
|
|
133
|
+
const runtime = {
|
|
134
|
+
context: {
|
|
135
|
+
get(token) {
|
|
136
|
+
if (token === AgentContextTokens.Tools) {
|
|
137
|
+
return [
|
|
138
|
+
{
|
|
139
|
+
name: "npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
140
|
+
async invoke(args) {
|
|
141
|
+
calls.push(args);
|
|
142
|
+
return {
|
|
143
|
+
result: {
|
|
144
|
+
output: [
|
|
145
|
+
"(py312) .../>df -h",
|
|
146
|
+
"Filesystem Size Used Avail Capacity iused ifree %iused Mounted on",
|
|
147
|
+
"/dev/disk3s5 460Gi 409Gi 27Gi 94% 9.7M 283M 3% /System/Volumes/Data",
|
|
148
|
+
].join("\n"),
|
|
149
|
+
},
|
|
150
|
+
};
|
|
151
|
+
},
|
|
152
|
+
},
|
|
153
|
+
];
|
|
154
|
+
}
|
|
155
|
+
return undefined;
|
|
156
|
+
},
|
|
157
|
+
},
|
|
158
|
+
async run() {
|
|
159
|
+
return {
|
|
160
|
+
text: "need more context",
|
|
161
|
+
messages: [{ type: "ai", content: "need more context" }],
|
|
162
|
+
};
|
|
163
|
+
},
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
const out = await runWithTextToolCallRecovery(runtime, "investigate disk usage", () => {});
|
|
167
|
+
assert.equal(out.recovered, false);
|
|
168
|
+
assert.equal(calls.length, 0);
|
|
169
|
+
assert.equal(out.text, "need more context");
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("runWithTextToolCallRecovery rethrows runtime invoke failures", async () => {
|
|
173
|
+
const runtime = {
|
|
174
|
+
context: {
|
|
175
|
+
get(token) {
|
|
176
|
+
if (token === AgentContextTokens.Tools) {
|
|
177
|
+
return [
|
|
178
|
+
{
|
|
179
|
+
name: "npm.easynet.agent.tool.buildin.0.0.70.itermRunCommandInSession",
|
|
180
|
+
async invoke(args) {
|
|
181
|
+
calls.push(args);
|
|
182
|
+
return { result: { output: "/workspace\n" } };
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
];
|
|
186
|
+
}
|
|
187
|
+
return undefined;
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
async run() {
|
|
191
|
+
throw new Error("Agent invoke timed out after 90000ms");
|
|
192
|
+
},
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
await assert.rejects(
|
|
196
|
+
() => runWithTextToolCallRecovery(runtime, "investigate disk usage", () => {}),
|
|
197
|
+
/Agent invoke timed out after 90000ms/,
|
|
198
|
+
);
|
|
199
|
+
});
|