@polderlabs/bizar-plugin 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +448 -0
  3. package/bun.lock +88 -0
  4. package/index.ts +1113 -0
  5. package/package.json +42 -0
  6. package/scripts/check-forbidden-imports.sh +33 -0
  7. package/src/background-state.ts +463 -0
  8. package/src/background.ts +964 -0
  9. package/src/commands-impl.ts +369 -0
  10. package/src/commands.ts +880 -0
  11. package/src/event-stream.ts +574 -0
  12. package/src/fingerprint.ts +120 -0
  13. package/src/handoff.ts +79 -0
  14. package/src/http-client.ts +467 -0
  15. package/src/logger.ts +144 -0
  16. package/src/loop.ts +176 -0
  17. package/src/options.ts +421 -0
  18. package/src/plan-fs.ts +323 -0
  19. package/src/report.ts +178 -0
  20. package/src/research-prompt.ts +35 -0
  21. package/src/serve.ts +476 -0
  22. package/src/settings.ts +349 -0
  23. package/src/state.ts +298 -0
  24. package/src/tools/bg-collect.ts +104 -0
  25. package/src/tools/bg-get-comments.ts +239 -0
  26. package/src/tools/bg-kill.ts +87 -0
  27. package/src/tools/bg-spawn.ts +263 -0
  28. package/src/tools/bg-status.ts +99 -0
  29. package/src/tools/plan-action.ts +767 -0
  30. package/src/tools/wait-for-feedback.ts +402 -0
  31. package/tests/attach-handler-bug.test.ts +166 -0
  32. package/tests/background-state.test.ts +277 -0
  33. package/tests/background.test.ts +402 -0
  34. package/tests/block.test.ts +193 -0
  35. package/tests/canonical-key-order.test.ts +71 -0
  36. package/tests/commands-impl.test.ts +442 -0
  37. package/tests/commands.test.ts +548 -0
  38. package/tests/config.test.ts +122 -0
  39. package/tests/dispose.test.ts +336 -0
  40. package/tests/event-stream.test.ts +409 -0
  41. package/tests/event.test.ts +262 -0
  42. package/tests/fingerprint.test.ts +161 -0
  43. package/tests/http-client.test.ts +403 -0
  44. package/tests/init-helpers.test.ts +203 -0
  45. package/tests/integration/slash-command.test.ts +348 -0
  46. package/tests/integration/tool-routing.test.ts +314 -0
  47. package/tests/loop.test.ts +397 -0
  48. package/tests/options.test.ts +274 -0
  49. package/tests/serve.test.ts +335 -0
  50. package/tests/settings.test.ts +351 -0
  51. package/tests/stall-think.test.ts +749 -0
  52. package/tests/state.test.ts +275 -0
  53. package/tests/tools/bg-collect.test.ts +337 -0
  54. package/tests/tools/bg-get-comments.test.ts +485 -0
  55. package/tests/tools/bg-kill.test.ts +231 -0
  56. package/tests/tools/bg-spawn.test.ts +311 -0
  57. package/tests/tools/bg-status.test.ts +216 -0
  58. package/tests/tools/plan-action.test.ts +599 -0
  59. package/tests/tools/wait-for-feedback.test.ts +390 -0
  60. package/tsconfig.json +29 -0
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Block (threshold-12) tests.
3
+ *
4
+ * Spec contract (tests/block.test.ts per §12.1):
5
+ * - Pre-populate state with 11 entries sharing fingerprint F.
6
+ * - The 12th identical call triggers the block action.
7
+ * - The block action's `reason` contains the tool name.
8
+ * - The block action's `reason` contains the substring `loop`
9
+ * (case-insensitive) OR the substring `escalate`.
10
+ *
11
+ * We test the `decide()` function directly. The `tool.execute.before`
12
+ * hook in `index.ts` throws `new Error(decision.reason)` whenever
13
+ * `decide()` returns `{ action: "block", … }`, so the block-message
14
+ * contract is fully owned by `decide()` + `handoff.ts`. Testing
15
+ * `decide()` keeps this file self-contained and independent of
16
+ * Thor's `state.ts` and `fingerprint.ts` modules.
17
+ */
18
+
19
+ import { describe, test, expect } from "bun:test";
20
+
21
+ import { decide } from "../src/loop.js";
22
+ import {
23
+ DEFAULT_OPTIONS,
24
+ type NormalizedOptions,
25
+ } from "../src/options.js";
26
+ import { blockMessage } from "../src/handoff.js";
27
+
28
+ // Local re-declaration of SessionState (see tests/loop.test.ts for rationale).
29
+ interface ToolCall {
30
+ tool: string;
31
+ fingerprint: string;
32
+ at: number;
33
+ outcome?: "ok" | "error";
34
+ }
35
+ interface SessionState {
36
+ sessionId: string;
37
+ parentAgent: string | null;
38
+ startedAt: number;
39
+ lastActivityAt: number;
40
+ turnCount: number;
41
+ toolCalls: ToolCall[];
42
+ warningsIssued: number;
43
+ blocksTriggered: number;
44
+ }
45
+
46
+ function emptyState(): SessionState {
47
+ return {
48
+ sessionId: "sess-block",
49
+ parentAgent: "odin",
50
+ startedAt: 1_700_000_000_000,
51
+ lastActivityAt: 1_700_000_000_000,
52
+ turnCount: 0,
53
+ toolCalls: [],
54
+ warningsIssued: 0,
55
+ blocksTriggered: 0,
56
+ };
57
+ }
58
+
59
+ const FP = "fp:read:loop";
60
+ const TOOL = "read";
61
+ const ARGS = { path: "/tmp/example.txt" };
62
+ const NOW = 1_700_000_500_000;
63
+
64
+ // For the block tests we need a window size that is at least as large as
65
+ // the block threshold, so the count can actually reach 12. The default
66
+ // `loopWindowSize` is 10 (spec §6.1), and the spec's `loopThresholdBlock
67
+ // <= loopWindowSize + 2` constraint allows block=12 with window=10 — but
68
+ // in that configuration the count is bounded by the window and can never
69
+ // reach 12. We use a wider window here so the block band is reachable;
70
+ // the spec's default config is a known limitation (see README "Limitations"
71
+ // §13 #11 and handoff.ts header).
72
+ const BLOCK_TEST_OPTS: NormalizedOptions = {
73
+ ...DEFAULT_OPTIONS,
74
+ loopWindowSize: 15,
75
+ };
76
+
77
+ describe("block — threshold 12 throw (spec §12.1)", () => {
78
+ test("11 prior identical calls + 12th identical call triggers block", () => {
79
+ const state = emptyState();
80
+
81
+ // Pre-populate with 11 entries sharing fingerprint F.
82
+ for (let i = 0; i < 11; i++) {
83
+ state.toolCalls.push({
84
+ tool: TOOL,
85
+ fingerprint: FP,
86
+ at: 1_700_000_000_000 + i,
87
+ });
88
+ }
89
+ expect(state.toolCalls).toHaveLength(11);
90
+
91
+ // The hook appends the current call to the state before calling
92
+ // decide(). We model that here: the 12th call is in the state.
93
+ state.toolCalls.push({
94
+ tool: TOOL,
95
+ fingerprint: FP,
96
+ at: NOW,
97
+ });
98
+ expect(state.toolCalls).toHaveLength(12);
99
+
100
+ const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
101
+
102
+ expect(d.action).toBe("block");
103
+ if (d.action !== "block") return;
104
+
105
+ // Assert the rejection message properties (spec §12.1).
106
+ // 1. Contains the tool name.
107
+ expect(d.reason).toContain(TOOL);
108
+ // 2. Contains "loop" (case-insensitive) OR "escalate".
109
+ const matches = /loop/i.test(d.reason) || /escalate/i.test(d.reason);
110
+ expect(matches).toBe(true);
111
+ });
112
+
113
+ test("block reason matches the canonical block template", () => {
114
+ const state = emptyState();
115
+ for (let i = 0; i < 12; i++) {
116
+ state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
117
+ }
118
+ const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
119
+ expect(d.action).toBe("block");
120
+ if (d.action !== "block") return;
121
+ expect(d.reason).toBe(blockMessage(TOOL));
122
+ });
123
+
124
+ test("block fires regardless of which tool name, as long as fingerprint matches", () => {
125
+ const state = emptyState();
126
+ for (let i = 0; i < 12; i++) {
127
+ state.toolCalls.push({ tool: "bash", fingerprint: FP, at: NOW + i });
128
+ }
129
+ const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
130
+ expect(d.action).toBe("block");
131
+ if (d.action !== "block") return;
132
+ // The tool name in the message is recovered from the matching
133
+ // window entries — here, "bash".
134
+ expect(d.reason).toContain("bash");
135
+ expect(/loop/i.test(d.reason) || /escalate/i.test(d.reason)).toBe(true);
136
+ });
137
+
138
+ test("block fires with custom-configured block threshold", () => {
139
+ const state = emptyState();
140
+ for (let i = 0; i < 8; i++) {
141
+ state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
142
+ }
143
+ // Custom block threshold of 8. Window is widened so the count can
144
+ // reach 8.
145
+ const opts: NormalizedOptions = {
146
+ ...DEFAULT_OPTIONS,
147
+ loopThresholdBlock: 8,
148
+ loopThresholdEscalate: 6,
149
+ loopThresholdWarn: 4,
150
+ loopWindowSize: 12,
151
+ };
152
+ const d = decide(state, FP, NOW, opts);
153
+ expect(d.action).toBe("block");
154
+ if (d.action !== "block") return;
155
+ // The canonical template still says "12" regardless of the
156
+ // configured threshold (this is a known limitation, see README
157
+ // "Limitations" §13 and handoff.ts header).
158
+ expect(d.reason).toBe(blockMessage(TOOL));
159
+ });
160
+
161
+ test("the hook path would throw — simulate by checking decide() shape", () => {
162
+ // This test pins the contract that `index.ts` depends on:
163
+ // when `decide()` returns `{ action: "block", reason }`, the hook
164
+ // throws `new Error(reason)`. The shape is what matters; the
165
+ // throw itself is wired in index.ts.
166
+ const state = emptyState();
167
+ for (let i = 0; i < 12; i++) {
168
+ state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
169
+ }
170
+ const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
171
+ expect(d.action).toBe("block");
172
+ if (d.action !== "block") return;
173
+
174
+ // Simulate the hook's throw: `throw new Error(d.reason)`. The
175
+ // thrown Error's message must be the decision's reason.
176
+ const thrown = new Error(d.reason);
177
+ expect(thrown.message).toBe(d.reason);
178
+ expect(thrown.message).toContain(TOOL);
179
+ expect(/loop/i.test(thrown.message) || /escalate/i.test(thrown.message)).toBe(true);
180
+ });
181
+
182
+ test("11 prior calls (no 12th) does NOT block", () => {
183
+ const state = emptyState();
184
+ for (let i = 0; i < 11; i++) {
185
+ state.toolCalls.push({ tool: TOOL, fingerprint: FP, at: NOW + i });
186
+ }
187
+ // 11 calls total — below the block threshold of 12.
188
+ const d = decide(state, FP, NOW, BLOCK_TEST_OPTS);
189
+ expect(d.action).not.toBe("block");
190
+ // It should be escalate (count=11 >= escalate threshold of 8).
191
+ expect(d.action).toBe("escalate");
192
+ });
193
+ });
@@ -0,0 +1,71 @@
1
+ /**
2
+ * canonical-key-order.test.ts
3
+ *
4
+ * Verifies that fingerprint.ts sorts object keys canonically (alphabetically)
5
+ * before JSON stringify, so that {a:1, b:2} and {b:2, a:1} produce the same
6
+ * fingerprint. Per LOW finding 34 / §12.1.
7
+ */
8
+
9
+ import { describe, test, expect } from "bun:test";
10
+ import { fingerprint } from "../src/fingerprint";
11
+
12
+ describe("fingerprint — canonical key order", () => {
13
+ test("flat object: same keys/values in different insertion order produce the same fingerprint", () => {
14
+ const a = {
15
+ tool: "read",
16
+ args: { path: "/tmp/foo.ts", recursive: false, limit: 10 },
17
+ };
18
+ const b = {
19
+ tool: "read",
20
+ args: { limit: 10, recursive: false, path: "/tmp/foo.ts" },
21
+ };
22
+ // Same keys, same values, different insertion order — must match.
23
+ expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
24
+ });
25
+
26
+ test("nested objects: different insertion order at both levels also match", () => {
27
+ const a = { tool: "edit", args: { meta: { z: 1, a: 2 }, path: "/x" } };
28
+ const b = { tool: "edit", args: { path: "/x", meta: { a: 2, z: 1 } } };
29
+ expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
30
+ });
31
+
32
+ test("deeply nested: three levels of differing key order all resolve to same fingerprint", () => {
33
+ const a = {
34
+ tool: "bash",
35
+ args: {
36
+ outer: {
37
+ middle: {
38
+ innerKey: "value",
39
+ otherKey: 42,
40
+ },
41
+ alpha: "x",
42
+ },
43
+ },
44
+ };
45
+ const b = {
46
+ tool: "bash",
47
+ args: {
48
+ outer: {
49
+ alpha: "x",
50
+ middle: {
51
+ otherKey: 42,
52
+ innerKey: "value",
53
+ },
54
+ },
55
+ },
56
+ };
57
+ expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
58
+ });
59
+
60
+ test("array order is preserved (arrays of same values in same order match)", () => {
61
+ const a = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
62
+ const b = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
63
+ expect(fingerprint(a.tool, a.args, "/tmp")).toBe(fingerprint(b.tool, b.args, "/tmp"));
64
+ });
65
+
66
+ test("array with different order produces different fingerprint", () => {
67
+ const a = { tool: "bash", args: { commands: ["echo a", "echo b"] } };
68
+ const b = { tool: "bash", args: { commands: ["echo b", "echo a"] } };
69
+ expect(fingerprint(a.tool, a.args, "/tmp")).not.toBe(fingerprint(b.tool, b.args, "/tmp"));
70
+ });
71
+ });