@tmustier/pi-agent-teams 0.4.0-beta.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/CHANGELOG.md +26 -0
  2. package/README.md +72 -9
  3. package/WORKFLOW.md +110 -0
  4. package/docs/claude-parity.md +18 -13
  5. package/docs/hook-contract.md +183 -0
  6. package/docs/smoke-test-plan.md +26 -7
  7. package/extensions/teams/activity-tracker.ts +296 -8
  8. package/extensions/teams/cleanup.ts +216 -3
  9. package/extensions/teams/hooks.ts +57 -5
  10. package/extensions/teams/leader-attach-commands.ts +8 -4
  11. package/extensions/teams/leader-inbox.ts +162 -4
  12. package/extensions/teams/leader-info-commands.ts +105 -3
  13. package/extensions/teams/leader-lifecycle-commands.ts +205 -3
  14. package/extensions/teams/leader-messaging-commands.ts +19 -7
  15. package/extensions/teams/leader-spawn-command.ts +5 -1
  16. package/extensions/teams/leader-team-command.ts +51 -2
  17. package/extensions/teams/leader-teams-tool.ts +387 -11
  18. package/extensions/teams/leader.ts +126 -52
  19. package/extensions/teams/mailbox.ts +6 -1
  20. package/extensions/teams/model-policy.ts +117 -0
  21. package/extensions/teams/spawn-types.ts +4 -0
  22. package/extensions/teams/teammate-rpc.ts +14 -0
  23. package/extensions/teams/teams-panel.ts +117 -19
  24. package/extensions/teams/teams-ui-shared.ts +205 -2
  25. package/extensions/teams/teams-widget.ts +67 -14
  26. package/extensions/teams/worker.ts +18 -6
  27. package/extensions/teams/worktree.ts +143 -0
  28. package/package.json +4 -2
  29. package/scripts/integration-cleanup-test.mts +419 -0
  30. package/scripts/integration-hooks-remediation-test.mts +382 -0
  31. package/scripts/integration-spawn-overrides-test.mts +10 -0
  32. package/scripts/smoke-test.mts +701 -3
  33. package/skills/agent-teams/SKILL.md +28 -7
@@ -0,0 +1,382 @@
1
+ /**
2
+ * Integration test: end-to-end quality-gate remediation loop.
3
+ *
4
+ * Validates a deterministic leader-only flow:
5
+ * 1) Team-level hooks policy set to reopen+followup
6
+ * 2) Completed task emits idle_notification -> task_completed hook runs and fails
7
+ * 3) Leader auto-remediates:
8
+ * - marks task gate failure metadata
9
+ * - reopens original task to pending
10
+ * - creates + assigns follow-up task
11
+ * - sends remediation nudge mailbox message
12
+ *
13
+ * Usage:
14
+ * npx tsx scripts/integration-hooks-remediation-test.mts
15
+ * npx tsx scripts/integration-hooks-remediation-test.mts --timeoutSec 90
16
+ */
17
+
18
+ import * as fs from "node:fs";
19
+ import * as os from "node:os";
20
+ import * as path from "node:path";
21
+ import * as readline from "node:readline";
22
+ import { spawn, type ChildProcess } from "node:child_process";
23
+ import { fileURLToPath } from "node:url";
24
+
25
+ import { writeToMailbox, getInboxPath } from "../extensions/teams/mailbox.js";
26
+ import { TEAM_MAILBOX_NS } from "../extensions/teams/protocol.js";
27
+ import { createTask, getTask, listTasks, updateTask, type TeamTask } from "../extensions/teams/task-store.js";
28
+ import { updateTeamHooksPolicy } from "../extensions/teams/team-config.js";
29
+ import { sleep, terminateAll } from "./lib/pi-workers.js";
30
+
31
+ type RpcCommand = { id?: string; type: "get_state" };
32
+
33
+ type RpcResponse = {
34
+ id?: string;
35
+ type: "response";
36
+ command: string;
37
+ success: boolean;
38
+ data?: unknown;
39
+ error?: string;
40
+ };
41
+
42
+ type PendingRequest = {
43
+ resolve: (v: RpcResponse) => void;
44
+ reject: (e: Error) => void;
45
+ timeout: NodeJS.Timeout;
46
+ };
47
+
48
+ type MailboxMessageWire = {
49
+ from: string;
50
+ text: string;
51
+ timestamp: string;
52
+ read?: boolean;
53
+ color?: string;
54
+ };
55
+
56
+ function parseArgs(argv: readonly string[]): { timeoutSec: number } {
57
+ let timeoutSec = 90;
58
+ for (let i = 0; i < argv.length; i += 1) {
59
+ const a = argv[i];
60
+ if (a === "--timeoutSec") {
61
+ const v = argv[i + 1];
62
+ if (v) timeoutSec = Number.parseInt(v, 10);
63
+ i += 1;
64
+ }
65
+ }
66
+ if (!Number.isFinite(timeoutSec) || timeoutSec < 20) timeoutSec = 90;
67
+ return { timeoutSec };
68
+ }
69
+
70
+ function assert(condition: boolean, message: string): void {
71
+ if (!condition) throw new Error(message);
72
+ }
73
+
74
+ function isRecord(v: unknown): v is Record<string, unknown> {
75
+ return typeof v === "object" && v !== null;
76
+ }
77
+
78
+ function safeJsonParse(line: string): unknown | null {
79
+ try {
80
+ return JSON.parse(line) as unknown;
81
+ } catch {
82
+ return null;
83
+ }
84
+ }
85
+
86
+ function isRpcResponse(v: unknown): v is RpcResponse {
87
+ if (!isRecord(v)) return false;
88
+ if (v.type !== "response") return false;
89
+ if (typeof v.command !== "string") return false;
90
+ if (typeof v.success !== "boolean") return false;
91
+ if (v.id !== undefined && typeof v.id !== "string") return false;
92
+ if (v.error !== undefined && typeof v.error !== "string") return false;
93
+ return true;
94
+ }
95
+
96
+ function isMailboxMessageWire(v: unknown): v is MailboxMessageWire {
97
+ if (!isRecord(v)) return false;
98
+ if (typeof v.from !== "string") return false;
99
+ if (typeof v.text !== "string") return false;
100
+ if (typeof v.timestamp !== "string") return false;
101
+ if (v.read !== undefined && typeof v.read !== "boolean") return false;
102
+ if (v.color !== undefined && typeof v.color !== "string") return false;
103
+ return true;
104
+ }
105
+
106
+ function extractSessionId(resp: RpcResponse): string | null {
107
+ if (!isRecord(resp.data)) return null;
108
+ const sessionId = resp.data.sessionId;
109
+ return typeof sessionId === "string" ? sessionId : null;
110
+ }
111
+
112
+ async function waitFor(
113
+ fn: () => boolean | Promise<boolean>,
114
+ opts: { timeoutMs: number; pollMs: number; label: string },
115
+ ): Promise<void> {
116
+ const { timeoutMs, pollMs, label } = opts;
117
+ const deadline = Date.now() + timeoutMs;
118
+ while (Date.now() < deadline) {
119
+ if (await fn()) return;
120
+ await sleep(pollMs);
121
+ }
122
+ throw new Error(`Timeout waiting for ${label}`);
123
+ }
124
+
125
+ function loadMailboxMessages(filePath: string): MailboxMessageWire[] {
126
+ try {
127
+ const raw = fs.readFileSync(filePath, "utf8");
128
+ const parsed: unknown = JSON.parse(raw);
129
+ if (!Array.isArray(parsed)) return [];
130
+ return parsed.filter(isMailboxMessageWire);
131
+ } catch {
132
+ return [];
133
+ }
134
+ }
135
+
136
+ function findFollowupTask(tasks: readonly TeamTask[], originalTaskId: string): TeamTask | null {
137
+ const marker = `(task #${originalTaskId})`;
138
+ for (const task of tasks) {
139
+ if (task.subject.startsWith("Quality gate failed:") && task.subject.includes(marker)) return task;
140
+ }
141
+ return null;
142
+ }
143
+
144
+ const { timeoutSec } = parseArgs(process.argv.slice(2));
145
+
146
+ const teamsRootDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-teams-hooks-remediation-"));
147
+ const hooksDir = path.join(teamsRootDir, "_hooks");
148
+ fs.mkdirSync(hooksDir, { recursive: true });
149
+
150
+ const hookSentinelFile = path.join(teamsRootDir, "hook-sentinel.txt");
151
+ fs.writeFileSync(
152
+ path.join(hooksDir, "on_task_completed.js"),
153
+ "" +
154
+ "const fs = require('node:fs');\n" +
155
+ `fs.writeFileSync(${JSON.stringify(hookSentinelFile)}, 'failed\\n', 'utf8');\n` +
156
+ "console.error('integration hook failure sentinel');\n" +
157
+ "process.exit(17);\n",
158
+ "utf8",
159
+ );
160
+
161
+ const scriptDir = path.dirname(fileURLToPath(import.meta.url));
162
+ const repoRoot = path.resolve(scriptDir, "..");
163
+ const entryPath = path.join(repoRoot, "extensions", "teams", "index.ts");
164
+
165
+ if (!fs.existsSync(entryPath)) {
166
+ throw new Error(`Teams extension entry not found: ${entryPath}`);
167
+ }
168
+
169
+ console.log(`teamsRootDir: ${teamsRootDir}`);
170
+ console.log(`hooksDir: ${hooksDir}`);
171
+
172
+ const leaderEnv = {
173
+ ...process.env,
174
+ PI_TEAMS_ROOT_DIR: teamsRootDir,
175
+ PI_TEAMS_HOOKS_ENABLED: "1",
176
+ PI_TEAMS_HOOK_TIMEOUT_MS: "10000",
177
+ PI_TEAMS_WORKER: "0",
178
+ PI_TEAMS_TEAM_ID: "",
179
+ PI_TEAMS_AGENT_NAME: "",
180
+ PI_TEAMS_TASK_LIST_ID: "",
181
+ PI_TEAMS_LEAD_NAME: "",
182
+ PI_TEAMS_AUTO_CLAIM: "",
183
+ };
184
+
185
+ const leaderArgs = [
186
+ "--mode",
187
+ "rpc",
188
+ "--no-session",
189
+ "--no-tools",
190
+ "--provider",
191
+ "openai-codex",
192
+ "--model",
193
+ "gpt-5.1-codex-mini",
194
+ "--thinking",
195
+ "minimal",
196
+ "--no-extensions",
197
+ "-e",
198
+ entryPath,
199
+ ];
200
+
201
+ const leader = spawn("pi", leaderArgs, {
202
+ cwd: repoRoot,
203
+ env: leaderEnv,
204
+ stdio: ["pipe", "pipe", "pipe"],
205
+ });
206
+
207
+ const procs: ChildProcess[] = [leader];
208
+ let stderr = "";
209
+ leader.stderr.on("data", (d: Buffer | string) => {
210
+ stderr += d.toString();
211
+ });
212
+
213
+ const pending = new Map<string, PendingRequest>();
214
+ let nextId = 1;
215
+
216
+ leader.on("close", () => {
217
+ for (const [id, req] of pending.entries()) {
218
+ clearTimeout(req.timeout);
219
+ req.reject(new Error(`Leader closed before response id=${id}. stderr=${stderr}`));
220
+ }
221
+ pending.clear();
222
+ });
223
+
224
+ const rl = readline.createInterface({ input: leader.stdout, crlfDelay: Infinity });
225
+ rl.on("line", (line: string) => {
226
+ const obj = safeJsonParse(line);
227
+ if (obj === null) return;
228
+
229
+ if (isRpcResponse(obj)) {
230
+ if (!obj.id) return;
231
+ const req = pending.get(obj.id);
232
+ if (!req) return;
233
+ pending.delete(obj.id);
234
+ clearTimeout(req.timeout);
235
+ req.resolve(obj);
236
+ return;
237
+ }
238
+
239
+ if (isRecord(obj) && obj.type === "extension_ui_request" && obj.method === "notify" && typeof obj.message === "string") {
240
+ const notifyType = typeof obj.notifyType === "string" ? obj.notifyType : "info";
241
+ console.log(`[notify:${notifyType}] ${obj.message}`);
242
+ }
243
+ });
244
+
245
+ const send = async (command: RpcCommand): Promise<RpcResponse> => {
246
+ const id = command.id ?? `req-${nextId++}`;
247
+ leader.stdin.write(JSON.stringify({ ...command, id }) + "\n");
248
+
249
+ return await new Promise<RpcResponse>((resolve, reject) => {
250
+ const timeout = setTimeout(() => {
251
+ if (!pending.has(id)) return;
252
+ pending.delete(id);
253
+ reject(new Error(`Timeout waiting for response to ${command.type}. stderr=${stderr}`));
254
+ }, timeoutSec * 1000);
255
+ pending.set(id, { resolve, reject, timeout });
256
+ });
257
+ };
258
+
259
+ try {
260
+ const state = await send({ type: "get_state" });
261
+ if (!state.success) throw new Error(`get_state failed: ${state.error ?? "unknown error"}`);
262
+
263
+ const sessionId = extractSessionId(state);
264
+ if (!sessionId) throw new Error(`No sessionId in get_state response: ${JSON.stringify(state)}`);
265
+ const teamId = sessionId;
266
+ const teamDir = path.join(teamsRootDir, teamId);
267
+ const leadName = "team-lead";
268
+
269
+ console.log(`teamId: ${teamId}`);
270
+ console.log(`teamDir: ${teamDir}`);
271
+
272
+ await waitFor(
273
+ () => fs.existsSync(path.join(teamDir, "config.json")),
274
+ { timeoutMs: timeoutSec * 1000, pollMs: 100, label: "team config" },
275
+ );
276
+
277
+ const policyCfg = await updateTeamHooksPolicy(teamDir, () => ({
278
+ failureAction: "reopen_followup",
279
+ maxReopensPerTask: 2,
280
+ followupOwner: "member",
281
+ }));
282
+ assert(policyCfg !== null, "failed to set team hooks policy");
283
+
284
+ const originalTask = await createTask(teamDir, teamId, {
285
+ subject: "Integration gate remediation task",
286
+ description: "Task used by integration-hooks-remediation-test",
287
+ owner: "agent1",
288
+ });
289
+
290
+ const completed = await updateTask(teamDir, teamId, originalTask.id, (cur) => {
291
+ const metadata = { ...(cur.metadata ?? {}) };
292
+ metadata.completedAt = new Date().toISOString();
293
+ return { ...cur, status: "completed", metadata };
294
+ });
295
+ assert(completed !== null, "failed to mark original task completed");
296
+
297
+ const ts = new Date().toISOString();
298
+ await writeToMailbox(teamDir, TEAM_MAILBOX_NS, leadName, {
299
+ from: "agent1",
300
+ text: JSON.stringify({
301
+ type: "idle_notification",
302
+ from: "agent1",
303
+ timestamp: ts,
304
+ completedTaskId: originalTask.id,
305
+ completedStatus: "completed",
306
+ }),
307
+ timestamp: ts,
308
+ });
309
+
310
+ let followupTaskId = "";
311
+
312
+ await waitFor(
313
+ async () => {
314
+ const task = await getTask(teamDir, teamId, originalTask.id);
315
+ if (!task) return false;
316
+ if (task.status !== "pending") return false;
317
+ if (task.metadata?.["qualityGateStatus"] !== "failed") return false;
318
+ if (task.metadata?.["reopenedByQualityGateCount"] !== 1) return false;
319
+
320
+ const tasks = await listTasks(teamDir, teamId);
321
+ const followup = findFollowupTask(tasks, originalTask.id);
322
+ if (!followup) return false;
323
+ if (followup.owner !== "agent1") return false;
324
+ followupTaskId = followup.id;
325
+ return true;
326
+ },
327
+ { timeoutMs: timeoutSec * 1000, pollMs: 200, label: "task reopened + follow-up created" },
328
+ );
329
+
330
+ assert(followupTaskId.length > 0, "missing follow-up task id");
331
+
332
+ const workerInboxPath = getInboxPath(teamDir, teamId, "agent1");
333
+ await waitFor(
334
+ () => {
335
+ const messages = loadMailboxMessages(workerInboxPath);
336
+ if (messages.length === 0) return false;
337
+
338
+ let hasAssignment = false;
339
+ let hasNudge = false;
340
+ for (const msg of messages) {
341
+ const parsed = safeJsonParse(msg.text);
342
+ if (isRecord(parsed) && parsed.type === "task_assignment" && parsed.taskId === followupTaskId) {
343
+ hasAssignment = true;
344
+ }
345
+ if (msg.text.includes("Please remediate automatically and continue without waiting for user intervention.")) {
346
+ hasNudge = true;
347
+ }
348
+ }
349
+ return hasAssignment && hasNudge;
350
+ },
351
+ { timeoutMs: timeoutSec * 1000, pollMs: 200, label: "follow-up assignment + remediation nudge" },
352
+ );
353
+
354
+ assert(fs.existsSync(hookSentinelFile), "hook script did not run");
355
+
356
+ const hookLogsDir = path.join(teamDir, "hook-logs");
357
+ await waitFor(
358
+ () => {
359
+ try {
360
+ const files = fs.readdirSync(hookLogsDir, { withFileTypes: true });
361
+ return files.some((f) => f.isFile() && f.name.includes("task_completed"));
362
+ } catch {
363
+ return false;
364
+ }
365
+ },
366
+ { timeoutMs: timeoutSec * 1000, pollMs: 200, label: "hook log file" },
367
+ );
368
+
369
+ console.log("PASS: integration hooks remediation flow passed");
370
+ } finally {
371
+ try {
372
+ rl.close();
373
+ } catch {
374
+ // ignore
375
+ }
376
+ await terminateAll(procs);
377
+ try {
378
+ fs.rmSync(teamsRootDir, { recursive: true, force: true });
379
+ } catch {
380
+ // ignore
381
+ }
382
+ }
@@ -364,6 +364,16 @@ try {
364
364
  assert(messagesContain(invalidModelEvents, "invalid model override"), "expected invalid model override notification");
365
365
  console.log("OK: invalid model override rejected");
366
366
 
367
+ // 5) Deprecated model override is rejected by model policy.
368
+ const beforeDeprecatedModel = notifications.length;
369
+ await sendPrompt("/team spawn erin fresh --model anthropic/claude-sonnet-4 --thinking low");
370
+ await sleep(150);
371
+ const deprecatedModelEvents = notifications.slice(beforeDeprecatedModel);
372
+
373
+ assert(findMember(teamDir, "erin") === null, "erin should not be spawned for deprecated model override");
374
+ assert(messagesContain(deprecatedModelEvents, "deprecated"), "expected deprecated model override notification");
375
+ console.log("OK: deprecated model override rejected");
376
+
367
377
  // Shutdown spawned teammates.
368
378
  await sendPrompt("/team shutdown");
369
379
  await waitForMemberStatus("alice", "offline");