@blackbelt-technology/pi-agent-dashboard 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/AGENTS.md +80 -32
  2. package/README.md +7 -3
  3. package/docs/architecture.md +361 -12
  4. package/package.json +7 -7
  5. package/packages/extension/package.json +7 -2
  6. package/packages/extension/src/__tests__/ask-user-schema-discriminator.test.ts +141 -0
  7. package/packages/extension/src/__tests__/ask-user-tool.test.ts +51 -7
  8. package/packages/extension/src/__tests__/multiselect-dashboard-routing.test.ts +203 -0
  9. package/packages/extension/src/__tests__/multiselect-polyfill.test.ts +92 -0
  10. package/packages/extension/src/__tests__/no-tui-multiselect-arm-regression.test.ts +81 -0
  11. package/packages/extension/src/__tests__/openspec-activity-detector.test.ts +37 -0
  12. package/packages/extension/src/__tests__/ui-decorators.test.ts +309 -0
  13. package/packages/extension/src/__tests__/ui-modules.test.ts +293 -0
  14. package/packages/extension/src/ask-user-tool.ts +165 -57
  15. package/packages/extension/src/bridge.ts +97 -4
  16. package/packages/extension/src/multiselect-decode.ts +40 -0
  17. package/packages/extension/src/multiselect-polyfill.ts +38 -8
  18. package/packages/extension/src/ui-modules.ts +272 -0
  19. package/packages/server/package.json +9 -3
  20. package/packages/server/src/__tests__/auto-attach.test.ts +61 -8
  21. package/packages/server/src/__tests__/browse-endpoint.test.ts +295 -19
  22. package/packages/server/src/__tests__/cli-bootstrap.test.ts +36 -0
  23. package/packages/server/src/__tests__/directory-service-refresh-force.test.ts +163 -0
  24. package/packages/server/src/__tests__/directory-service-specs-mtime.test.ts +315 -0
  25. package/packages/server/src/__tests__/directory-service-toctou.test.ts +303 -0
  26. package/packages/server/src/__tests__/directory-service.test.ts +174 -0
  27. package/packages/server/src/__tests__/installed-package-enricher.test.ts +225 -0
  28. package/packages/server/src/__tests__/package-manager-wrapper-move.test.ts +414 -0
  29. package/packages/server/src/__tests__/package-routes.test.ts +136 -3
  30. package/packages/server/src/__tests__/package-source-helpers.test.ts +101 -0
  31. package/packages/server/src/__tests__/pending-attach-registry.test.ts +123 -0
  32. package/packages/server/src/__tests__/pending-resume-intent-registry.test.ts +138 -0
  33. package/packages/server/src/__tests__/pi-core-checker.test.ts +73 -30
  34. package/packages/server/src/__tests__/pi-gateway-consume-pending-attach.test.ts +112 -0
  35. package/packages/server/src/__tests__/post-install-openspec-refresh.test.ts +180 -0
  36. package/packages/server/src/__tests__/post-install-rescan.test.ts +134 -0
  37. package/packages/server/src/__tests__/proposal-attach-naming.test.ts +79 -0
  38. package/packages/server/src/__tests__/session-action-handler-spawn-with-attach.test.ts +108 -0
  39. package/packages/server/src/__tests__/session-order-manager.test.ts +55 -0
  40. package/packages/server/src/__tests__/session-order-reboot.test.ts +242 -0
  41. package/packages/server/src/__tests__/session-scanner.test.ts +44 -0
  42. package/packages/server/src/__tests__/subscription-handler.test.ts +40 -0
  43. package/packages/server/src/__tests__/translate-path-source.test.ts +77 -0
  44. package/packages/server/src/__tests__/ui-decorators-replay.test.ts +209 -0
  45. package/packages/server/src/__tests__/ui-modules-replay.test.ts +221 -0
  46. package/packages/server/src/browse.ts +118 -13
  47. package/packages/server/src/browser-gateway.ts +19 -0
  48. package/packages/server/src/browser-handlers/__tests__/session-meta-handler.test.ts +183 -0
  49. package/packages/server/src/browser-handlers/directory-handler.ts +7 -1
  50. package/packages/server/src/browser-handlers/handler-context.ts +15 -0
  51. package/packages/server/src/browser-handlers/session-action-handler.ts +29 -3
  52. package/packages/server/src/browser-handlers/session-meta-handler.ts +46 -12
  53. package/packages/server/src/browser-handlers/subscription-handler.ts +46 -1
  54. package/packages/server/src/cli.ts +5 -6
  55. package/packages/server/src/directory-service.ts +156 -15
  56. package/packages/server/src/event-wiring.ts +111 -10
  57. package/packages/server/src/installed-package-enricher.ts +143 -0
  58. package/packages/server/src/package-manager-wrapper.ts +305 -8
  59. package/packages/server/src/package-source-helpers.ts +104 -0
  60. package/packages/server/src/pending-attach-registry.ts +112 -0
  61. package/packages/server/src/pending-resume-intent-registry.ts +107 -0
  62. package/packages/server/src/pi-core-checker.ts +9 -14
  63. package/packages/server/src/pi-gateway.ts +14 -0
  64. package/packages/server/src/proposal-attach-naming.ts +47 -0
  65. package/packages/server/src/routes/file-routes.ts +29 -3
  66. package/packages/server/src/routes/package-routes.ts +72 -3
  67. package/packages/server/src/routes/plugin-config-routes.ts +129 -0
  68. package/packages/server/src/routes/system-routes.ts +2 -0
  69. package/packages/server/src/server.ts +339 -10
  70. package/packages/server/src/session-api.ts +30 -5
  71. package/packages/server/src/session-order-manager.ts +22 -0
  72. package/packages/server/src/session-scanner.ts +10 -1
  73. package/packages/shared/package.json +9 -2
  74. package/packages/shared/src/__tests__/browser-protocol-types.test.ts +59 -0
  75. package/packages/shared/src/__tests__/config-plugins.test.ts +68 -0
  76. package/packages/shared/src/__tests__/extension-ui-module-shape.test.ts +265 -0
  77. package/packages/shared/src/__tests__/no-raw-openspec-status-in-skills.test.ts +81 -0
  78. package/packages/shared/src/__tests__/openspec-design-evidence.test.ts +288 -0
  79. package/packages/shared/src/__tests__/openspec-effective-status-script.test.ts +174 -0
  80. package/packages/shared/src/__tests__/openspec-poller-design-override.test.ts +225 -0
  81. package/packages/shared/src/__tests__/openspec-poller-specs-override.test.ts +284 -0
  82. package/packages/shared/src/__tests__/openspec-specs-evidence.test.ts +144 -0
  83. package/packages/shared/src/__tests__/platform/is-appimage-self-hit.test.ts +164 -0
  84. package/packages/shared/src/__tests__/plugin-bridge-register-extended.test.ts +72 -0
  85. package/packages/shared/src/__tests__/plugin-bridge-register.test.ts +113 -0
  86. package/packages/shared/src/__tests__/plugin-config-update-protocol.test.ts +41 -0
  87. package/packages/shared/src/__tests__/publish-workflow-contract.test.ts +123 -0
  88. package/packages/shared/src/__tests__/recommended-extensions.test.ts +5 -1
  89. package/packages/shared/src/__tests__/spawn-session-attach-proposal.test.ts +47 -0
  90. package/packages/shared/src/__tests__/tool-registry-strategies-appimage.test.ts +118 -0
  91. package/packages/shared/src/browser-protocol.ts +110 -4
  92. package/packages/shared/src/config.ts +45 -0
  93. package/packages/shared/src/dashboard-plugin/index.ts +11 -0
  94. package/packages/shared/src/dashboard-plugin/manifest-types.ts +58 -0
  95. package/packages/shared/src/dashboard-plugin/plugin-status.ts +26 -0
  96. package/packages/shared/src/dashboard-plugin/slot-props.ts +92 -0
  97. package/packages/shared/src/dashboard-plugin/slot-types.ts +151 -0
  98. package/packages/shared/src/openspec-activity-detector.ts +18 -22
  99. package/packages/shared/src/openspec-design-evidence.ts +109 -0
  100. package/packages/shared/src/openspec-poller.ts +117 -3
  101. package/packages/shared/src/openspec-specs-evidence.ts +79 -0
  102. package/packages/shared/src/platform/binary-lookup.ts +96 -1
  103. package/packages/shared/src/plugin-bridge-register.ts +139 -0
  104. package/packages/shared/src/protocol.ts +56 -2
  105. package/packages/shared/src/recommended-extensions.ts +7 -1
  106. package/packages/shared/src/rest-api.ts +68 -3
  107. package/packages/shared/src/state-replay.ts +11 -1
  108. package/packages/shared/src/tool-registry/strategies.ts +17 -3
  109. package/packages/shared/src/types.ts +160 -0
@@ -0,0 +1,315 @@
1
+ /**
2
+ * Regression tests for the specs/** mtime watch set in the directory-service
3
+ * gated cache.
4
+ *
5
+ * The bug being fixed: `perChangeArtifactPaths` previously only watched
6
+ * `<change>/`, `tasks.md`, `proposal.md`, and `design.md`. Authoring
7
+ * `specs/<cap>/spec.md` did not bump any of those mtimes (POSIX dir-mtime
8
+ * does not propagate up past the immediate parent), so the cache could
9
+ * stamp `specs: ready` on the first poll and never invalidate. The fix
10
+ * extends the watch set to include `specs/`, every immediate
11
+ * `specs/<cap>/`, and every `specs/<cap>/spec.md`.
12
+ *
13
+ * See change: fix-openspec-specs-mtime-gate-blind-spot.
14
+ */
15
+ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
16
+ import * as fs from "node:fs";
17
+ import * as os from "node:os";
18
+ import * as path from "node:path";
19
+ import { createDirectoryService, type DirectoryService } from "../directory-service.js";
20
+ import type { PreferencesStore } from "../preferences-store.js";
21
+ import type { SessionManager } from "../memory-session-manager.js";
22
+ import type { DashboardSession } from "@blackbelt-technology/pi-dashboard-shared/types.js";
23
+
24
+ vi.mock("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js", async (importOriginal) => {
25
+ const actual = await importOriginal<typeof import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js")>();
26
+ return {
27
+ ...actual,
28
+ pollOpenSpecAsync: vi.fn(async () => ({ initialized: false, changes: [] })),
29
+ runOpenSpecList: vi.fn(async () => null),
30
+ runOpenSpecStatus: vi.fn(async () => null),
31
+ };
32
+ });
33
+
34
+ vi.mock("../pi-resource-scanner.js", () => ({
35
+ scanPiResources: vi.fn(async () => ({ local: { extensions: [], skills: [], prompts: [] }, global: { extensions: [], skills: [], prompts: [] }, packages: [] })),
36
+ }));
37
+
38
+ vi.mock("@blackbelt-technology/pi-dashboard-shared/state-replay.js", () => ({
39
+ replayEntriesAsEvents: vi.fn(() => []),
40
+ }));
41
+
42
+ vi.mock("../session-discovery.js", () => ({
43
+ discoverSessionsForCwd: vi.fn(() => []),
44
+ }));
45
+
46
+ vi.mock("../session-file-reader.js", () => ({
47
+ loadSessionEntries: vi.fn(() => []),
48
+ }));
49
+
50
+ vi.mock("@mariozechner/pi-coding-agent", () => ({
51
+ SessionManager: {
52
+ list: vi.fn(async () => []),
53
+ open: vi.fn(() => ({ getBranch: vi.fn(() => []) })),
54
+ },
55
+ }));
56
+
57
+ function createMockPreferencesStore(): PreferencesStore {
58
+ return {
59
+ getPinnedDirectories: () => [],
60
+ getSessionOrder: () => ({}),
61
+ setSessionOrder: vi.fn(),
62
+ setPinnedDirectories: vi.fn(),
63
+ pinDirectory: vi.fn(),
64
+ unpinDirectory: vi.fn(),
65
+ reorderPinnedDirs: vi.fn(),
66
+ flush: vi.fn(),
67
+ dispose: vi.fn(),
68
+ };
69
+ }
70
+
71
+ function createMockSessionManager(sessions: DashboardSession[] = []): SessionManager {
72
+ const map = new Map<string, DashboardSession>();
73
+ for (const s of sessions) map.set(s.id, s);
74
+ return {
75
+ register: vi.fn(),
76
+ restore: vi.fn(),
77
+ unregister: vi.fn(),
78
+ update: vi.fn(),
79
+ get: (id: string) => map.get(id),
80
+ listActive: () => [],
81
+ listAll: () => Array.from(map.values()),
82
+ } as unknown as SessionManager;
83
+ }
84
+
85
+ /** Bump the mtime of an existing path strictly past every prior bump. Uses a
86
+ * module-level monotonic counter so successive calls in the same millisecond
87
+ * still produce strictly-increasing mtimes (the previous `Date.now()`-based
88
+ * implementation flaked when two bumps landed in the same ms, since the gate
89
+ * uses `===` equality against the cached mtime). */
90
+ let bumpCounter = 0;
91
+ function bumpMtime(p: string, deltaMs = 60_000) {
92
+ bumpCounter += 1;
93
+ const future = new Date(Date.now() + deltaMs + bumpCounter * 1000);
94
+ fs.utimesSync(p, future, future);
95
+ }
96
+
97
+ describe("DirectoryService specs/** mtime watch set", () => {
98
+ let tmpDir: string;
99
+ let cwd: string;
100
+ let changesDir: string;
101
+ let changeDir: string;
102
+ let service: DirectoryService;
103
+
104
+ beforeEach(() => {
105
+ vi.clearAllMocks();
106
+ tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ds-specs-mtime-"));
107
+ cwd = tmpDir;
108
+ changesDir = path.join(cwd, "openspec", "changes");
109
+ changeDir = path.join(changesDir, "foo");
110
+ fs.mkdirSync(changeDir, { recursive: true });
111
+ fs.writeFileSync(path.join(changeDir, "proposal.md"), "## Why\n");
112
+ fs.writeFileSync(path.join(changeDir, "design.md"), "## Context\n");
113
+ fs.writeFileSync(path.join(changeDir, "tasks.md"), "- [ ] 1.1 a\n");
114
+ });
115
+
116
+ afterEach(() => {
117
+ service?.stopPolling();
118
+ if (fs.existsSync(tmpDir)) fs.rmSync(tmpDir, { recursive: true, force: true });
119
+ });
120
+
121
+ it("specs file creation invalidates per-change cache", async () => {
122
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
123
+ (runOpenSpecList as any).mockResolvedValue({
124
+ changes: [{ name: "foo", status: "in-progress", completedTasks: 0, totalTasks: 1 }],
125
+ });
126
+
127
+ // Before any specs files exist, the CLI reports specs: ready.
128
+ (runOpenSpecStatus as any).mockImplementation(async () => {
129
+ // The mock reads the live filesystem to decide what to return — this
130
+ // simulates the real openspec CLI's fast-glob-based check.
131
+ const hasSpec = fs.existsSync(path.join(changeDir, "specs", "cap-a", "spec.md"))
132
+ || fs.existsSync(path.join(changeDir, "specs", "cap-b", "spec.md"));
133
+ return {
134
+ artifacts: [
135
+ { id: "proposal", status: "done" },
136
+ { id: "design", status: "done" },
137
+ { id: "specs", status: hasSpec ? "done" : "ready" },
138
+ { id: "tasks", status: "ready" },
139
+ ],
140
+ isComplete: false,
141
+ };
142
+ });
143
+
144
+ service = createDirectoryService(createMockPreferencesStore(), createMockSessionManager());
145
+
146
+ // First poll: no specs files → specs: ready.
147
+ await service.pollDirectoryGated(cwd);
148
+ {
149
+ const data = service.getOpenSpecData(cwd);
150
+ const foo = data?.changes.find((c) => c.name === "foo");
151
+ expect(foo?.artifacts.find((a) => a.id === "specs")?.status).toBe("ready");
152
+ }
153
+
154
+ (runOpenSpecList as any).mockClear();
155
+ (runOpenSpecStatus as any).mockClear();
156
+
157
+ // Author specs/cap-a/spec.md AFTER the first poll. This is the user's
158
+ // mid-flight authoring; the change directory's mtime DOES advance (since
159
+ // we created a new entry under specs/), but the bug pre-fix was that the
160
+ // gate's signal didn't react to specs/<cap>/spec.md edits in general.
161
+ fs.mkdirSync(path.join(changeDir, "specs", "cap-a"), { recursive: true });
162
+ fs.writeFileSync(path.join(changeDir, "specs", "cap-a", "spec.md"), "## ADDED\n");
163
+ bumpMtime(path.join(changeDir, "specs", "cap-a", "spec.md"));
164
+
165
+ // Second poll: gate must invalidate, runOpenSpecStatus must re-spawn,
166
+ // and the new state must be reflected.
167
+ await service.pollDirectoryGated(cwd);
168
+ expect(runOpenSpecStatus).toHaveBeenCalledTimes(1);
169
+ {
170
+ const data = service.getOpenSpecData(cwd);
171
+ const foo = data?.changes.find((c) => c.name === "foo");
172
+ expect(foo?.artifacts.find((a) => a.id === "specs")?.status).toBe("done");
173
+ }
174
+ });
175
+
176
+ it("in-place edit to existing spec.md invalidates per-change cache", async () => {
177
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
178
+ (runOpenSpecList as any).mockResolvedValue({
179
+ changes: [{ name: "foo", status: "in-progress", completedTasks: 0, totalTasks: 1 }],
180
+ });
181
+ (runOpenSpecStatus as any).mockImplementation(async () => ({
182
+ artifacts: [
183
+ { id: "proposal", status: "done" },
184
+ { id: "design", status: "done" },
185
+ { id: "specs", status: "done" },
186
+ { id: "tasks", status: "ready" },
187
+ ],
188
+ isComplete: false,
189
+ }));
190
+
191
+ // Author specs/cap-a/spec.md before the first poll so we exercise the
192
+ // "in-place edit" path specifically (not the "creation" path).
193
+ fs.mkdirSync(path.join(changeDir, "specs", "cap-a"), { recursive: true });
194
+ const specPath = path.join(changeDir, "specs", "cap-a", "spec.md");
195
+ fs.writeFileSync(specPath, "v1");
196
+
197
+ service = createDirectoryService(createMockPreferencesStore(), createMockSessionManager());
198
+
199
+ await service.pollDirectoryGated(cwd);
200
+ (runOpenSpecList as any).mockClear();
201
+ (runOpenSpecStatus as any).mockClear();
202
+
203
+ // No-op poll: nothing changed → gate must hit, zero CLI calls.
204
+ await service.pollDirectoryGated(cwd);
205
+ expect(runOpenSpecStatus).not.toHaveBeenCalled();
206
+
207
+ // Edit in place. POSIX bumps the file's mtime but NOT the parent dir's.
208
+ // Without specs/<cap>/spec.md in the watch set, the gate would miss this.
209
+ fs.writeFileSync(specPath, "v2");
210
+ bumpMtime(specPath);
211
+
212
+ await service.pollDirectoryGated(cwd);
213
+ expect(runOpenSpecStatus).toHaveBeenCalledTimes(1);
214
+ });
215
+
216
+ it("deletion of specs/<cap> invalidates per-change cache", async () => {
217
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
218
+ (runOpenSpecList as any).mockResolvedValue({
219
+ changes: [{ name: "foo", status: "in-progress", completedTasks: 0, totalTasks: 1 }],
220
+ });
221
+ (runOpenSpecStatus as any).mockImplementation(async () => {
222
+ const hasSpec = fs.existsSync(path.join(changeDir, "specs", "cap-a", "spec.md"));
223
+ return {
224
+ artifacts: [
225
+ { id: "proposal", status: "done" },
226
+ { id: "design", status: "done" },
227
+ { id: "specs", status: hasSpec ? "done" : "ready" },
228
+ { id: "tasks", status: "ready" },
229
+ ],
230
+ isComplete: false,
231
+ };
232
+ });
233
+
234
+ fs.mkdirSync(path.join(changeDir, "specs", "cap-a"), { recursive: true });
235
+ fs.writeFileSync(path.join(changeDir, "specs", "cap-a", "spec.md"), "v1");
236
+ bumpMtime(path.join(changeDir, "specs", "cap-a", "spec.md"));
237
+
238
+ service = createDirectoryService(createMockPreferencesStore(), createMockSessionManager());
239
+
240
+ await service.pollDirectoryGated(cwd);
241
+ {
242
+ const data = service.getOpenSpecData(cwd);
243
+ expect(data?.changes[0].artifacts.find((a) => a.id === "specs")?.status).toBe("done");
244
+ }
245
+
246
+ (runOpenSpecList as any).mockClear();
247
+ (runOpenSpecStatus as any).mockClear();
248
+
249
+ // Remove the entire capability subtree. specs/ mtime advances (entry-
250
+ // delete semantics) so the gate must invalidate.
251
+ fs.rmSync(path.join(changeDir, "specs", "cap-a"), { recursive: true });
252
+ bumpMtime(path.join(changeDir, "specs"));
253
+
254
+ await service.pollDirectoryGated(cwd);
255
+ expect(runOpenSpecStatus).toHaveBeenCalledTimes(1);
256
+ {
257
+ const data = service.getOpenSpecData(cwd);
258
+ expect(data?.changes[0].artifacts.find((a) => a.id === "specs")?.status).toBe("ready");
259
+ }
260
+ });
261
+
262
+ it("change with no specs/ directory at all does not throw", async () => {
263
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
264
+ (runOpenSpecList as any).mockResolvedValue({
265
+ changes: [{ name: "foo", status: "in-progress", completedTasks: 0, totalTasks: 1 }],
266
+ });
267
+ (runOpenSpecStatus as any).mockResolvedValue({
268
+ artifacts: [
269
+ { id: "proposal", status: "done" },
270
+ { id: "design", status: "ready" },
271
+ { id: "specs", status: "ready" },
272
+ { id: "tasks", status: "blocked" },
273
+ ],
274
+ isComplete: false,
275
+ });
276
+
277
+ // beforeEach already created the change without a specs/ directory.
278
+ service = createDirectoryService(createMockPreferencesStore(), createMockSessionManager());
279
+
280
+ await expect(service.pollDirectoryGated(cwd)).resolves.not.toThrow();
281
+ const data = service.getOpenSpecData(cwd);
282
+ expect(data?.changes[0].artifacts.find((a) => a.id === "specs")?.status).toBe("ready");
283
+ });
284
+
285
+ it("specs override promotes ready→done when local files exist (defense in depth)", async () => {
286
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
287
+ (runOpenSpecList as any).mockResolvedValue({
288
+ changes: [{ name: "foo", status: "in-progress", completedTasks: 0, totalTasks: 1 }],
289
+ });
290
+ // CLI lies and says ready even though spec files exist on disk. The
291
+ // local-evidence override at the buildOpenSpecData layer should still
292
+ // promote to done.
293
+ (runOpenSpecStatus as any).mockResolvedValue({
294
+ artifacts: [
295
+ { id: "proposal", status: "done" },
296
+ { id: "design", status: "done" },
297
+ { id: "specs", status: "ready" }, // ← stale CLI verdict
298
+ { id: "tasks", status: "ready" },
299
+ ],
300
+ isComplete: false,
301
+ });
302
+
303
+ fs.mkdirSync(path.join(changeDir, "specs", "cap-a"), { recursive: true });
304
+ fs.writeFileSync(path.join(changeDir, "specs", "cap-a", "spec.md"), "## ADDED\n");
305
+
306
+ service = createDirectoryService(createMockPreferencesStore(), createMockSessionManager());
307
+
308
+ await service.pollDirectoryGated(cwd);
309
+ const data = service.getOpenSpecData(cwd);
310
+ const foo = data?.changes.find((c) => c.name === "foo");
311
+ // The override fired: local evidence promoted ready → done despite the
312
+ // CLI's stale verdict.
313
+ expect(foo?.artifacts.find((a) => a.id === "specs")?.status).toBe("done");
314
+ });
315
+ });
@@ -0,0 +1,303 @@
1
+ /**
2
+ * TOCTOU race regression tests for the directory-service mtime gate.
3
+ *
4
+ * The bug being fixed: `pollOne` used to compute the per-change cache mtime
5
+ * AFTER `openspec status` returned. A write that landed during the CLI call
6
+ * would stamp `{ mtimeMs: post-write, status: pre-write }` into the cache,
7
+ * after which the gate's invariant ("mtime equal => CLI result equal") was
8
+ * broken for that entry forever — the cache would happily reuse the stale
9
+ * status because `current mtime == cached mtime` from then on.
10
+ *
11
+ * See change: fix-openspec-mtime-gate-toctou.
12
+ */
13
+ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
14
+ import * as fs from "node:fs";
15
+ import * as os from "node:os";
16
+ import * as path from "node:path";
17
+ import { createDirectoryService, type DirectoryService } from "../directory-service.js";
18
+ import type { PreferencesStore } from "../preferences-store.js";
19
+ import type { SessionManager } from "../memory-session-manager.js";
20
+ import type { DashboardSession } from "@blackbelt-technology/pi-dashboard-shared/types.js";
21
+
22
+ // Mock the shared openspec poller so we don't shell out.
23
+ vi.mock("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js", async (importOriginal) => {
24
+ const actual = await importOriginal<typeof import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js")>();
25
+ return {
26
+ ...actual,
27
+ pollOpenSpecAsync: vi.fn(async () => ({ initialized: false, changes: [] })),
28
+ runOpenSpecList: vi.fn(async () => null),
29
+ runOpenSpecStatus: vi.fn(async () => null),
30
+ };
31
+ });
32
+
33
+ vi.mock("../pi-resource-scanner.js", () => ({
34
+ scanPiResources: vi.fn(async () => ({ local: { extensions: [], skills: [], prompts: [] }, global: { extensions: [], skills: [], prompts: [] }, packages: [] })),
35
+ }));
36
+
37
+ vi.mock("@blackbelt-technology/pi-dashboard-shared/state-replay.js", () => ({
38
+ replayEntriesAsEvents: vi.fn(() => []),
39
+ }));
40
+
41
+ vi.mock("../session-discovery.js", () => ({
42
+ discoverSessionsForCwd: vi.fn(() => []),
43
+ }));
44
+
45
+ vi.mock("../session-file-reader.js", () => ({
46
+ loadSessionEntries: vi.fn(() => []),
47
+ }));
48
+
49
+ vi.mock("@mariozechner/pi-coding-agent", () => ({
50
+ SessionManager: {
51
+ list: vi.fn(async () => []),
52
+ open: vi.fn(() => ({ getBranch: vi.fn(() => []) })),
53
+ },
54
+ }));
55
+
56
+ function createMockPreferencesStore(): PreferencesStore {
57
+ return {
58
+ getPinnedDirectories: () => [],
59
+ getSessionOrder: () => ({}),
60
+ setSessionOrder: vi.fn(),
61
+ setPinnedDirectories: vi.fn(),
62
+ pinDirectory: vi.fn(),
63
+ unpinDirectory: vi.fn(),
64
+ reorderPinnedDirs: vi.fn(),
65
+ flush: vi.fn(),
66
+ dispose: vi.fn(),
67
+ };
68
+ }
69
+
70
+ function createMockSessionManager(sessions: DashboardSession[] = []): SessionManager {
71
+ const map = new Map<string, DashboardSession>();
72
+ for (const s of sessions) map.set(s.id, s);
73
+ return {
74
+ register: vi.fn(),
75
+ restore: vi.fn(),
76
+ unregister: vi.fn(),
77
+ update: vi.fn(),
78
+ get: (id: string) => map.get(id),
79
+ listActive: () => [],
80
+ listAll: () => Array.from(map.values()),
81
+ } as unknown as SessionManager;
82
+ }
83
+
84
+ describe("DirectoryService TOCTOU race (fix-openspec-mtime-gate-toctou)", () => {
85
+ let tmpDir: string;
86
+ let cwd: string;
87
+ let changesDir: string;
88
+ let service: DirectoryService;
89
+
90
+ beforeEach(() => {
91
+ vi.clearAllMocks();
92
+ tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ds-toctou-"));
93
+ cwd = tmpDir;
94
+ changesDir = path.join(cwd, "openspec", "changes");
95
+ fs.mkdirSync(path.join(changesDir, "change-a"), { recursive: true });
96
+ fs.writeFileSync(path.join(changesDir, "change-a", "tasks.md"), "- [ ] 1.1 a\n");
97
+ });
98
+
99
+ afterEach(() => {
100
+ service?.stopPolling();
101
+ if (fs.existsSync(tmpDir)) fs.rmSync(tmpDir, { recursive: true, force: true });
102
+ });
103
+
104
+ it("write-during-CLI is detected and the cache is NOT poisoned", async () => {
105
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
106
+ (runOpenSpecList as any).mockResolvedValue({ changes: [
107
+ { name: "change-a", status: "in-progress", completedTasks: 0, totalTasks: 1 },
108
+ ] });
109
+
110
+ // Simulate a write happening DURING the CLI call: bump tasks.md mtime
111
+ // inside the mocked status implementation. Pre-call stat saw the original
112
+ // mtime; post-call stat will see the bumped one.
113
+ (runOpenSpecStatus as any).mockImplementation(async () => {
114
+ const future = new Date(Date.now() + 60_000);
115
+ fs.utimesSync(path.join(changesDir, "change-a", "tasks.md"), future, future);
116
+ // Return the (now stale) status the CLI would have computed before the write.
117
+ return {
118
+ artifacts: [
119
+ { id: "proposal", status: "done" },
120
+ { id: "design", status: "done" },
121
+ { id: "specs", status: "done" },
122
+ { id: "tasks", status: "ready" }, // ← the racy/stale value
123
+ ],
124
+ isComplete: false,
125
+ };
126
+ });
127
+
128
+ const stateStore = createMockPreferencesStore();
129
+ const sessionManager = createMockSessionManager();
130
+ service = createDirectoryService(stateStore, sessionManager);
131
+
132
+ // First poll. The TOCTOU guard should detect the in-flight write and
133
+ // refuse to stamp the cache for change-a — so the next gated call must
134
+ // still spawn the CLI (no stale entry to short-circuit on).
135
+ await service.pollDirectoryGated(cwd);
136
+ (runOpenSpecList as any).mockClear();
137
+ (runOpenSpecStatus as any).mockClear();
138
+
139
+ // Now resolve the race: subsequent CLI calls return the post-write status.
140
+ (runOpenSpecStatus as any).mockImplementation(async () => ({
141
+ artifacts: [
142
+ { id: "proposal", status: "done" },
143
+ { id: "design", status: "done" },
144
+ { id: "specs", status: "done" },
145
+ { id: "tasks", status: "done" },
146
+ ],
147
+ isComplete: true,
148
+ }));
149
+
150
+ // Bump list-result so the list-step gate sees a change too (otherwise list
151
+ // would short-circuit on its own cached signal).
152
+ (runOpenSpecList as any).mockResolvedValue({ changes: [
153
+ { name: "change-a", status: "in-progress", completedTasks: 0, totalTasks: 1 },
154
+ ] });
155
+
156
+ await service.pollDirectoryGated(cwd);
157
+ // Status MUST have been re-spawned because the racy entry was discarded.
158
+ expect(runOpenSpecStatus).toHaveBeenCalledTimes(1);
159
+ expect((runOpenSpecStatus as any).mock.calls[0][1]).toBe("change-a");
160
+ const data = service.getOpenSpecData(cwd);
161
+ const ca = data?.changes.find((c) => c.name === "change-a");
162
+ expect(ca?.artifacts.find((a) => a.id === "tasks")?.status).toBe("done");
163
+ });
164
+
165
+ it("happy path (no race): cache is stamped with preCallMtime and gate hits on the next tick", async () => {
166
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
167
+ (runOpenSpecList as any).mockResolvedValue({ changes: [
168
+ { name: "change-a", status: "in-progress", completedTasks: 0, totalTasks: 1 },
169
+ ] });
170
+ (runOpenSpecStatus as any).mockResolvedValue({
171
+ artifacts: [
172
+ { id: "proposal", status: "done" },
173
+ { id: "design", status: "done" },
174
+ { id: "specs", status: "done" },
175
+ { id: "tasks", status: "done" },
176
+ ],
177
+ isComplete: true,
178
+ });
179
+
180
+ const stateStore = createMockPreferencesStore();
181
+ const sessionManager = createMockSessionManager();
182
+ service = createDirectoryService(stateStore, sessionManager);
183
+
184
+ await service.pollDirectoryGated(cwd);
185
+ (runOpenSpecList as any).mockClear();
186
+ (runOpenSpecStatus as any).mockClear();
187
+
188
+ // Second poll — nothing changed on disk → gate must hit, zero CLI calls.
189
+ await service.pollDirectoryGated(cwd);
190
+ expect(runOpenSpecList).not.toHaveBeenCalled();
191
+ expect(runOpenSpecStatus).not.toHaveBeenCalled();
192
+ });
193
+
194
+ it("bulk fast-forward authoring does not poison the cache (W1)", async () => {
195
+ // Simulates `/opsx:ff` writing all 4 artifact files in succession while
196
+ // periodic polls fire mid-stream. Each `runOpenSpecStatus` call sees a
197
+ // different snapshot of disk: the first sees only proposal/design, the
198
+ // second sees specs added, etc. Each interleaved write bumps the file's
199
+ // mtime AFTER the CLI mock is entered — reproducing the TOCTOU window.
200
+ // The TOCTOU guard MUST discard each racy result, and the cache MUST
201
+ // converge to the final post-authoring statuses by the next gated tick
202
+ // after the writes stop.
203
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
204
+ (runOpenSpecList as any).mockResolvedValue({ changes: [
205
+ { name: "change-a", status: "in-progress", completedTasks: 0, totalTasks: 1 },
206
+ ] });
207
+
208
+ // Track which artifacts the "CLI" should consider authored on the next call.
209
+ // Each FF step writes the next artifact and the mock reports it as done.
210
+ let ffStep = 0;
211
+ const ffArtifacts = [
212
+ ["proposal"],
213
+ ["proposal", "design"],
214
+ ["proposal", "design", "specs"],
215
+ ["proposal", "design", "specs", "tasks"],
216
+ ];
217
+ (runOpenSpecStatus as any).mockImplementation(async () => {
218
+ // Simulate a write happening DURING this CLI call — the next FF step
219
+ // bumps tasks.md's mtime. This is exactly the race window the proposal
220
+ // targets, fired once per poll while authoring is in flight.
221
+ if (ffStep < ffArtifacts.length - 1) {
222
+ const future = new Date(Date.now() + (ffStep + 1) * 60_000);
223
+ fs.utimesSync(path.join(changesDir, "change-a", "tasks.md"), future, future);
224
+ }
225
+ const authored = new Set(ffArtifacts[ffStep]);
226
+ ffStep++;
227
+ return {
228
+ artifacts: [
229
+ { id: "proposal", status: authored.has("proposal") ? "done" : "ready" },
230
+ { id: "design", status: authored.has("design") ? "done" : "ready" },
231
+ { id: "specs", status: authored.has("specs") ? "done" : "ready" },
232
+ { id: "tasks", status: authored.has("tasks") ? "done" : "ready" },
233
+ ],
234
+ isComplete: authored.size === 4,
235
+ };
236
+ });
237
+
238
+ const stateStore = createMockPreferencesStore();
239
+ const sessionManager = createMockSessionManager();
240
+ service = createDirectoryService(stateStore, sessionManager);
241
+
242
+ // Three poll ticks fire while authoring is mid-stream — each races and
243
+ // gets discarded. None of them should poison the cache with a stale
244
+ // status (e.g. tasks=ready) that future ticks would reuse.
245
+ await service.pollDirectoryGated(cwd);
246
+ await service.pollDirectoryGated(cwd);
247
+ await service.pollDirectoryGated(cwd);
248
+
249
+ // Authoring done. The mock now returns the final "all done" state and
250
+ // does not bump tasks.md anymore (ffStep === 3 → last branch in the if).
251
+ // One more gated tick should converge the cache.
252
+ await service.pollDirectoryGated(cwd);
253
+
254
+ const data = service.getOpenSpecData(cwd);
255
+ const ca = data?.changes.find((c) => c.name === "change-a");
256
+ expect(ca, "change-a should be present").toBeDefined();
257
+ expect(ca?.artifacts.find((a) => a.id === "proposal")?.status).toBe("done");
258
+ expect(ca?.artifacts.find((a) => a.id === "design")?.status).toBe("done");
259
+ expect(ca?.artifacts.find((a) => a.id === "specs")?.status).toBe("done");
260
+ expect(ca?.artifacts.find((a) => a.id === "tasks")?.status).toBe("done");
261
+ });
262
+
263
+ it("DEBUG-gated warn: emits exactly one line per discard when DEBUG matches; silent otherwise", async () => {
264
+ const { runOpenSpecList, runOpenSpecStatus } = await import("@blackbelt-technology/pi-dashboard-shared/openspec-poller.js");
265
+ (runOpenSpecList as any).mockResolvedValue({ changes: [
266
+ { name: "change-a", status: "in-progress", completedTasks: 0, totalTasks: 1 },
267
+ ] });
268
+ (runOpenSpecStatus as any).mockImplementation(async () => {
269
+ const future = new Date(Date.now() + 60_000);
270
+ fs.utimesSync(path.join(changesDir, "change-a", "tasks.md"), future, future);
271
+ return { artifacts: [], isComplete: false };
272
+ });
273
+
274
+ const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
275
+ const originalDebug = process.env.DEBUG;
276
+
277
+ try {
278
+ // DEBUG unset → silent
279
+ delete process.env.DEBUG;
280
+ const stateStore = createMockPreferencesStore();
281
+ const sessionManager = createMockSessionManager();
282
+ service = createDirectoryService(stateStore, sessionManager);
283
+ await service.pollDirectoryGated(cwd);
284
+ expect(warnSpy).not.toHaveBeenCalled();
285
+ service.stopPolling();
286
+
287
+ warnSpy.mockClear();
288
+
289
+ // DEBUG=pi-dashboard → exactly one warn for the racy change
290
+ process.env.DEBUG = "pi-dashboard";
291
+ // Reset file mtime so the next poll re-arms the race.
292
+ fs.utimesSync(path.join(changesDir, "change-a", "tasks.md"), new Date(), new Date());
293
+ service = createDirectoryService(createMockPreferencesStore(), createMockSessionManager());
294
+ await service.pollDirectoryGated(cwd);
295
+ expect(warnSpy).toHaveBeenCalledTimes(1);
296
+ expect(warnSpy.mock.calls[0][0]).toMatch(/fix-openspec-mtime-gate-toctou.*change-a/);
297
+ } finally {
298
+ if (originalDebug === undefined) delete process.env.DEBUG;
299
+ else process.env.DEBUG = originalDebug;
300
+ warnSpy.mockRestore();
301
+ }
302
+ });
303
+ });