@polderlabs/bizar-plugin 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +448 -0
- package/bun.lock +88 -0
- package/index.ts +1113 -0
- package/package.json +42 -0
- package/scripts/check-forbidden-imports.sh +33 -0
- package/src/background-state.ts +463 -0
- package/src/background.ts +964 -0
- package/src/commands-impl.ts +369 -0
- package/src/commands.ts +880 -0
- package/src/event-stream.ts +574 -0
- package/src/fingerprint.ts +120 -0
- package/src/handoff.ts +79 -0
- package/src/http-client.ts +467 -0
- package/src/logger.ts +144 -0
- package/src/loop.ts +176 -0
- package/src/options.ts +421 -0
- package/src/plan-fs.ts +323 -0
- package/src/report.ts +178 -0
- package/src/research-prompt.ts +35 -0
- package/src/serve.ts +476 -0
- package/src/settings.ts +349 -0
- package/src/state.ts +298 -0
- package/src/tools/bg-collect.ts +104 -0
- package/src/tools/bg-get-comments.ts +239 -0
- package/src/tools/bg-kill.ts +87 -0
- package/src/tools/bg-spawn.ts +263 -0
- package/src/tools/bg-status.ts +99 -0
- package/src/tools/plan-action.ts +767 -0
- package/src/tools/wait-for-feedback.ts +402 -0
- package/tests/attach-handler-bug.test.ts +166 -0
- package/tests/background-state.test.ts +277 -0
- package/tests/background.test.ts +402 -0
- package/tests/block.test.ts +193 -0
- package/tests/canonical-key-order.test.ts +71 -0
- package/tests/commands-impl.test.ts +442 -0
- package/tests/commands.test.ts +548 -0
- package/tests/config.test.ts +122 -0
- package/tests/dispose.test.ts +336 -0
- package/tests/event-stream.test.ts +409 -0
- package/tests/event.test.ts +262 -0
- package/tests/fingerprint.test.ts +161 -0
- package/tests/http-client.test.ts +403 -0
- package/tests/init-helpers.test.ts +203 -0
- package/tests/integration/slash-command.test.ts +348 -0
- package/tests/integration/tool-routing.test.ts +314 -0
- package/tests/loop.test.ts +397 -0
- package/tests/options.test.ts +274 -0
- package/tests/serve.test.ts +335 -0
- package/tests/settings.test.ts +351 -0
- package/tests/stall-think.test.ts +749 -0
- package/tests/state.test.ts +275 -0
- package/tests/tools/bg-collect.test.ts +337 -0
- package/tests/tools/bg-get-comments.test.ts +485 -0
- package/tests/tools/bg-kill.test.ts +231 -0
- package/tests/tools/bg-spawn.test.ts +311 -0
- package/tests/tools/bg-status.test.ts +216 -0
- package/tests/tools/plan-action.test.ts +599 -0
- package/tests/tools/wait-for-feedback.test.ts +390 -0
- package/tsconfig.json +29 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Block (threshold-12) tests.
|
|
3
|
+
*
|
|
4
|
+
* Spec contract (tests/block.test.ts per §12.1):
|
|
5
|
+
* - Pre-populate state with 11 entries sharing fingerprint F.
|
|
6
|
+
* - The 12th identical call triggers the block action.
|
|
7
|
+
* - The block action's `reason` contains the tool name.
|
|
8
|
+
* - The block action's `reason` contains the substring `loop`
|
|
9
|
+
* (case-insensitive) OR the substring `escalate`.
|
|
10
|
+
*
|
|
11
|
+
* We test the `decide()` function directly. The `tool.execute.before`
|
|
12
|
+
* hook in `index.ts` throws `new Error(decision.reason)` whenever
|
|
13
|
+
* `decide()` returns `{ action: "block", … }`, so the block-message
|
|
14
|
+
* contract is fully owned by `decide()` + `handoff.ts`. Testing
|
|
15
|
+
* `decide()` keeps this file self-contained and independent of
|
|
16
|
+
* Thor's `state.ts` and `fingerprint.ts` modules.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { describe, test, expect } from "bun:test";
|
|
20
|
+
|
|
21
|
+
import { decide } from "../src/loop.js";
|
|
22
|
+
import {
|
|
23
|
+
DEFAULT_OPTIONS,
|
|
24
|
+
type NormalizedOptions,
|
|
25
|
+
} from "../src/options.js";
|
|
26
|
+
import { blockMessage } from "../src/handoff.js";
|
|
27
|
+
|
|
28
|
+
// Local re-declaration of SessionState (see tests/loop.test.ts for rationale).
|
|
29
|
+
interface ToolCall {
|
|
30
|
+
tool: string;
|
|
31
|
+
fingerprint: string;
|
|
32
|
+
at: number;
|
|
33
|
+
outcome?: "ok" | "error";
|
|
34
|
+
}
|
|
35
|
+
interface SessionState {
|
|
36
|
+
sessionId: string;
|
|
37
|
+
parentAgent: string | null;
|
|
38
|
+
startedAt: number;
|
|
39
|
+
lastActivityAt: number;
|
|
40
|
+
turnCount: number;
|
|
41
|
+
toolCalls: ToolCall[];
|
|
42
|
+
warningsIssued: number;
|
|
43
|
+
blocksTriggered: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function emptyState(): SessionState {
|
|
47
|
+
return {
|
|
48
|
+
sessionId: "sess-block",
|
|
49
|
+
parentAgent: "odin",
|
|
50
|
+
startedAt: 1_700_000_000_000,
|
|
51
|
+
lastActivityAt: 1_700_000_000_000,
|
|
52
|
+
turnCount: 0,
|
|
53
|
+
toolCalls: [],
|
|
54
|
+
warningsIssued: 0,
|
|
55
|
+
blocksTriggered: 0,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const FP = "fp:read:loop";
|
|
60
|
+
const TOOL = "read";
|
|
61
|
+
const ARGS = { path: "/tmp/example.txt" };
|
|
62
|
+
const NOW = 1_700_000_500_000;
|
|
63
|
+
|
|
64
|
+
// For the block tests we need a window size that is at least as large as
|
|
65
|
+
// the block threshold, so the count can actually reach 12. The default
|
|
66
|
+
// `loopWindowSize` is 10 (spec §6.1), and the spec's `loopThresholdBlock
|
|
67
|
+
// <= loopWindowSize + 2` constraint allows block=12 with window=10 — but
|
|
68
|
+
// in that configuration the count is bounded by the window and can never
|
|
69
|
+
// reach 12. We use a wider window here so the block band is reachable;
|
|
70
|
+
// the spec's default config is a known limitation (see README "Limitations"
|
|
71
|
+
// §13 #11 and handoff.ts header).
|
|
72
|
+
const BLOCK_TEST_OPTS: NormalizedOptions = {
|
|
73
|
+
...DEFAULT_OPTIONS,
|
|
74
|
+
loopWindowSize: 15,
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
describe("block — threshold 12 throw (spec §12.1)", () => {
|
|
78
|
+
test("11 prior identical calls + 12th identical call triggers block", () => {
|
|
79
|
+
const state = emptyState();
|
|
80
|
+
|
|
81
|
+
// Pre-populate with 11 entries sharing fingerprint F.
|
|
82
|
+
for (let i = 0; i < 11; i++) {
|
|
83
|
+
state.toolCalls.push({
|
|
84
|
+
tool: TOOL,
|
|
85
|
+
fingerprint: FP,
|
|
86
|
+
at: 1_700_000_000_000 + i,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
expect(state.toolCalls).toHaveLength(11);
|
|
90
|
+
|
|
91
|
+
// The hook appends the current call to the state before calling
|
|
92
|
+
// decide(). We model that here: the 12th call is in the state.
|
|
93
|
+
state.toolCalls.push({
|
|
94
|
+
tool: TOOL,
|
|
95
|
+
fingerprint: FP,
|
|
96
|
+
at: NOW,
|
|
97
|
+
});
|
|
98
|
+
expect(state.toolCalls).toHaveLength(12);
|
|
99
|
+
|
|
100
|
+
const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
|
|
101
|
+
|
|
102
|
+
expect(d.action).toBe("block");
|
|
103
|
+
if (d.action !== "block") return;
|
|
104
|
+
|
|
105
|
+
// Assert the rejection message properties (spec §12.1).
|
|
106
|
+
// 1. Contains the tool name.
|
|
107
|
+
expect(d.reason).toContain(TOOL);
|
|
108
|
+
// 2. Contains "loop" (case-insensitive) OR "escalate".
|
|
109
|
+
const matches = /loop/i.test(d.reason) || /escalate/i.test(d.reason);
|
|
110
|
+
expect(matches).toBe(true);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("block reason matches the canonical block template", () => {
|
|
114
|
+
const state = emptyState();
|
|
115
|
+
for (let i = 0; i < 12; i++) {
|
|
116
|
+
state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
|
|
117
|
+
}
|
|
118
|
+
const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
|
|
119
|
+
expect(d.action).toBe("block");
|
|
120
|
+
if (d.action !== "block") return;
|
|
121
|
+
expect(d.reason).toBe(blockMessage(TOOL));
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test("block fires regardless of which tool name, as long as fingerprint matches", () => {
|
|
125
|
+
const state = emptyState();
|
|
126
|
+
for (let i = 0; i < 12; i++) {
|
|
127
|
+
state.toolCalls.push({ tool: "bash", fingerprint: FP, at: NOW + i });
|
|
128
|
+
}
|
|
129
|
+
const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
|
|
130
|
+
expect(d.action).toBe("block");
|
|
131
|
+
if (d.action !== "block") return;
|
|
132
|
+
// The tool name in the message is recovered from the matching
|
|
133
|
+
// window entries — here, "bash".
|
|
134
|
+
expect(d.reason).toContain("bash");
|
|
135
|
+
expect(/loop/i.test(d.reason) || /escalate/i.test(d.reason)).toBe(true);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("block fires with custom-configured block threshold", () => {
|
|
139
|
+
const state = emptyState();
|
|
140
|
+
for (let i = 0; i < 8; i++) {
|
|
141
|
+
state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
|
|
142
|
+
}
|
|
143
|
+
// Custom block threshold of 8. Window is widened so the count can
|
|
144
|
+
// reach 8.
|
|
145
|
+
const opts: NormalizedOptions = {
|
|
146
|
+
...DEFAULT_OPTIONS,
|
|
147
|
+
loopThresholdBlock: 8,
|
|
148
|
+
loopThresholdEscalate: 6,
|
|
149
|
+
loopThresholdWarn: 4,
|
|
150
|
+
loopWindowSize: 12,
|
|
151
|
+
};
|
|
152
|
+
const d = decide(state, FP, NOW, opts);
|
|
153
|
+
expect(d.action).toBe("block");
|
|
154
|
+
if (d.action !== "block") return;
|
|
155
|
+
// The canonical template still says "12" regardless of the
|
|
156
|
+
// configured threshold (this is a known limitation, see README
|
|
157
|
+
// "Limitations" §13 and handoff.ts header).
|
|
158
|
+
expect(d.reason).toBe(blockMessage(TOOL));
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
test("the hook path would throw — simulate by checking decide() shape", () => {
|
|
162
|
+
// This test pins the contract that `index.ts` depends on:
|
|
163
|
+
// when `decide()` returns `{ action: "block", reason }`, the hook
|
|
164
|
+
// throws `new Error(reason)`. The shape is what matters; the
|
|
165
|
+
// throw itself is wired in index.ts.
|
|
166
|
+
const state = emptyState();
|
|
167
|
+
for (let i = 0; i < 12; i++) {
|
|
168
|
+
state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
|
|
169
|
+
}
|
|
170
|
+
const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
|
|
171
|
+
expect(d.action).toBe("block");
|
|
172
|
+
if (d.action !== "block") return;
|
|
173
|
+
|
|
174
|
+
// Simulate the hook's throw: `throw new Error(d.reason)`. The
|
|
175
|
+
// thrown Error's message must be the decision's reason.
|
|
176
|
+
const thrown = new Error(d.reason);
|
|
177
|
+
expect(thrown.message).toBe(d.reason);
|
|
178
|
+
expect(thrown.message).toContain(TOOL);
|
|
179
|
+
expect(/loop/i.test(thrown.message) || /escalate/i.test(thrown.message)).toBe(true);
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
test("11 prior calls (no 12th) does NOT block", () => {
|
|
183
|
+
const state = emptyState();
|
|
184
|
+
for (let i = 0; i < 11; i++) {
|
|
185
|
+
state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
|
|
186
|
+
}
|
|
187
|
+
// 11 calls total — below the block threshold of 12.
|
|
188
|
+
const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
|
|
189
|
+
expect(d.action).not.toBe("block");
|
|
190
|
+
// It should be escalate (count=11 >= escalate threshold of 8).
|
|
191
|
+
expect(d.action).toBe("escalate");
|
|
192
|
+
});
|
|
193
|
+
});
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* canonical-key-order.test.ts
|
|
3
|
+
*
|
|
4
|
+
* Verifies that fingerprint.ts sorts object keys canonically (alphabetically)
|
|
5
|
+
* before JSON stringify, so that {a:1, b:2} and {b:2, a:1} produce the same
|
|
6
|
+
* fingerprint. Per LOW finding 34 / §12.1.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, test, expect } from "bun:test";
|
|
10
|
+
import { fingerprint } from "../src/fingerprint";
|
|
11
|
+
|
|
12
|
+
describe("fingerprint — canonical key order", () => {
|
|
13
|
+
test("flat object: same keys/values in different insertion order produce the same fingerprint", () => {
|
|
14
|
+
const a = {
|
|
15
|
+
tool: "read",
|
|
16
|
+
args: { path: "/tmp/foo.ts", recursive: false, limit: 10 },
|
|
17
|
+
};
|
|
18
|
+
const b = {
|
|
19
|
+
tool: "read",
|
|
20
|
+
args: { limit: 10, recursive: false, path: "/tmp/foo.ts" },
|
|
21
|
+
};
|
|
22
|
+
// Same keys, same values, different insertion order — must match.
|
|
23
|
+
expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test("nested objects: different insertion order at both levels also match", () => {
|
|
27
|
+
const a = { tool: "edit", args: { meta: { z: 1, a: 2 }, path: "/x" } };
|
|
28
|
+
const b = { tool: "edit", args: { path: "/x", meta: { a: 2, z: 1 } } };
|
|
29
|
+
expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("deeply nested: three levels of differing key order all resolve to same fingerprint", () => {
|
|
33
|
+
const a = {
|
|
34
|
+
tool: "bash",
|
|
35
|
+
args: {
|
|
36
|
+
outer: {
|
|
37
|
+
middle: {
|
|
38
|
+
innerKey: "value",
|
|
39
|
+
otherKey: 42,
|
|
40
|
+
},
|
|
41
|
+
alpha: "x",
|
|
42
|
+
},
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
const b = {
|
|
46
|
+
tool: "bash",
|
|
47
|
+
args: {
|
|
48
|
+
outer: {
|
|
49
|
+
alpha: "x",
|
|
50
|
+
middle: {
|
|
51
|
+
otherKey: 42,
|
|
52
|
+
innerKey: "value",
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test("array order is preserved (arrays of same values in same order match)", () => {
|
|
61
|
+
const a = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
|
|
62
|
+
const b = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
|
|
63
|
+
expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
test("array with different order produces different fingerprint", () => {
|
|
67
|
+
const a = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
|
|
68
|
+
const b = { tool: "bash", args: { commands: ["echo b", "echo a"] } };
|
|
69
|
+
expect(fingerprint(a.tool, a.args, "/tmp")).not.toBe(fingerprint(b.tool, b.args, "/tmp"));
|
|
70
|
+
});
|
|
71
|
+
});
|