@a5c-ai/babysitter-paperclip 0.0.2-staging.02a0ee21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/worker.ts ADDED
@@ -0,0 +1,595 @@
1
+ /**
2
+ * Babysitter Paperclip plugin worker.
3
+ *
4
+ * Handles the server-side logic: event routing from Paperclip agent runs,
5
+ * babysitter run lifecycle management, breakpoint resolution, and data
6
+ * serving for the UI components.
7
+ *
8
+ * ## Breakpoint interception model
9
+ *
10
+ * Paperclip wraps underlying harnesses (Claude Code, OpenClaw, etc.). Each
11
+ * harness has its own babysitter plugin that drives the orchestration loop:
12
+ *
13
+ * - Claude Code: stop-hook pauses between iterations. When only breakpoints
14
+ * are pending, the stop hook allows exit (approve decision) because the
15
+ * user must approve externally.
16
+ * - OpenClaw: agent_end hook fires async iteration. before_prompt_build
17
+ * injects breakpoint context.
18
+ *
19
+ * The Paperclip plugin SUPPLEMENTS (not replaces) this by:
20
+ * 1. Monitoring run state for pending breakpoints via run:status / task:list
21
+ * 2. Reading full breakpoint metadata from task.json (question, options,
22
+ * expert routing, tags, strategy)
23
+ * 3. Surfacing breakpoints in the Paperclip dashboard UI
24
+ * 4. Allowing approve/reject through Paperclip action handlers
25
+ * 5. Posting results via task:post --status ok (ALWAYS ok, even for reject)
26
+ * 6. The underlying harness picks up the resolved effect on next iteration
27
+ *
28
+ * ## Harness plugin installation
29
+ *
30
+ * On agent.run.started, we detect the underlying harness and check if the
31
+ * babysitter plugin is installed for that harness. If not, we log a warning
32
+ * and attempt installation via `babysitter harness:install-plugin <name>`.
33
+ */
34
+
35
+ import { definePlugin, runWorker } from "@paperclipai/plugin-sdk";
36
+ import * as bridge from "./babysitter-bridge";
37
+ import { detectHarness } from "./delegating-adapter";
38
+ import {
39
+ checkHarnessPluginStatus,
40
+ installHarnessPlugin as installViaInstaller,
41
+ } from "./harness-plugin-installer";
42
+ import type {
43
+ TrackedRun,
44
+ RunsOverview,
45
+ RunDetail,
46
+ PendingBreakpoint,
47
+ } from "./types";
48
+
49
+ /** Interval for polling run state for breakpoints (ms). */
50
+ const BREAKPOINT_POLL_INTERVAL = 5_000;
51
+
52
+ /** Active breakpoint polling timers keyed by runId. */
53
+ const activePollers = new Map<string, ReturnType<typeof setInterval>>();
54
+
55
+ const plugin = definePlugin({
56
+ async setup(ctx) {
57
+ // ---------------------------------------------------------------
58
+ // Event handlers — react to Paperclip agent lifecycle events
59
+ // ---------------------------------------------------------------
60
+
61
+ ctx.events.on("agent.run.started", async (event) => {
62
+ const agentId = event.entityId as string;
63
+ const companyId = (event as Record<string, unknown>).companyId as string;
64
+
65
+ ctx.logger.info("Agent run started", { agentId, companyId });
66
+
67
+ // Look up agent to determine adapter type
68
+ let adapterType: string | undefined;
69
+ try {
70
+ const agent = await ctx.agents.read(agentId, companyId);
71
+ adapterType = (agent as Record<string, unknown>).adapterType as
72
+ | string
73
+ | undefined;
74
+ } catch (err) {
75
+ ctx.logger.warn("Could not read agent metadata", { agentId, err });
76
+ }
77
+
78
+ // Detect underlying harness
79
+ const detection = detectHarness(adapterType, {
80
+ defaultHarness: ctx.config?.defaultHarness as string | undefined,
81
+ });
82
+
83
+ ctx.logger.info("Detected harness", {
84
+ agentId,
85
+ harnessName: detection.harnessName,
86
+ tier: detection.detectionTier,
87
+ confidence: detection.confidence,
88
+ });
89
+
90
+ // Store detection result for this agent session
91
+ await ctx.state.set(`agent:${agentId}:harness`, {
92
+ ...detection,
93
+ agentId,
94
+ companyId,
95
+ startedAt: new Date().toISOString(),
96
+ });
97
+
98
+ // Check if babysitter plugin is installed for the detected harness.
99
+ // The underlying harness plugin is what drives the stop-hook iteration
100
+ // loop and breakpoint presentation. Without it, orchestration won't work.
101
+ try {
102
+ const status = await checkHarnessPluginStatus(detection.harnessName);
103
+ if (!status.pluginInstalled) {
104
+ ctx.logger.warn(
105
+ `Babysitter plugin not installed for harness ${detection.harnessName}. ` +
106
+ `Attempting installation...`,
107
+ { harnessName: detection.harnessName, installCommand: status.installCommand }
108
+ );
109
+
110
+ const installResult = await installViaInstaller(detection.harnessName);
111
+ if (installResult.success) {
112
+ ctx.logger.info("Harness plugin installed", {
113
+ harnessName: detection.harnessName,
114
+ });
115
+ } else {
116
+ ctx.logger.warn(
117
+ `Could not auto-install babysitter plugin for ${detection.harnessName}. ` +
118
+ `Run: ${status.installCommand}`,
119
+ { output: installResult.output }
120
+ );
121
+ }
122
+ } else {
123
+ ctx.logger.info("Harness plugin already installed", {
124
+ harnessName: detection.harnessName,
125
+ });
126
+ }
127
+ } catch (err) {
128
+ ctx.logger.warn("Harness plugin check failed", { err });
129
+ }
130
+ });
131
+
132
+ ctx.events.on("agent.run.finished", async (event) => {
133
+ const agentId = event.entityId as string;
134
+ ctx.logger.info("Agent run finished", { agentId });
135
+ await ctx.state.delete(`agent:${agentId}:harness`);
136
+ stopBreakpointPolling(agentId);
137
+ });
138
+
139
+ ctx.events.on("agent.run.failed", async (event) => {
140
+ const agentId = event.entityId as string;
141
+ ctx.logger.warn("Agent run failed", { agentId });
142
+ await ctx.state.delete(`agent:${agentId}:harness`);
143
+ stopBreakpointPolling(agentId);
144
+ });
145
+
146
+ ctx.events.on("agent.run.cancelled", async (event) => {
147
+ const agentId = event.entityId as string;
148
+ ctx.logger.info("Agent run cancelled", { agentId });
149
+ await ctx.state.delete(`agent:${agentId}:harness`);
150
+ stopBreakpointPolling(agentId);
151
+ });
152
+
153
+ // ---------------------------------------------------------------
154
+ // Data handlers — serve state to UI components
155
+ // ---------------------------------------------------------------
156
+
157
+ ctx.data.register("runs-overview", async () => {
158
+ const tracked = await getTrackedRuns(ctx);
159
+
160
+ const pendingBreakpoints = tracked.reduce(
161
+ (sum, r) => sum + r.pendingBreakpoints.length,
162
+ 0
163
+ );
164
+
165
+ return {
166
+ activeRuns: tracked.filter(
167
+ (r) => r.status === "running" || r.status === "waiting"
168
+ ),
169
+ pendingBreakpoints,
170
+ totalRuns: tracked.length,
171
+ } satisfies RunsOverview;
172
+ });
173
+
174
+ ctx.data.register("run-detail", async (params) => {
175
+ const runId = params.runId as string;
176
+ const runsDir = (ctx.config?.runsDir as string) ?? ".a5c/runs";
177
+ const runDir = bridge.buildRunDir(runsDir, runId);
178
+
179
+ const [status, events, pendingTasks] = await Promise.all([
180
+ bridge.getRunStatus(runDir),
181
+ bridge.getRunEvents(runDir, 50),
182
+ bridge.listPendingTasks(runDir),
183
+ ]);
184
+
185
+ // Get full breakpoint metadata from task.json files
186
+ const breakpoints = await bridge.getPendingBreakpoints(runDir);
187
+
188
+ const tracked = (await ctx.state.get(`run:${runId}`)) as TrackedRun | null;
189
+
190
+ return {
191
+ run: tracked ?? {
192
+ runId,
193
+ processId: "unknown",
194
+ agentId: "unknown",
195
+ companyId: "unknown",
196
+ harnessName: "unknown",
197
+ status: status.state as TrackedRun["status"],
198
+ createdAt: "unknown",
199
+ pendingBreakpoints: breakpoints,
200
+ },
201
+ events,
202
+ pendingEffects: pendingTasks,
203
+ } satisfies RunDetail;
204
+ });
205
+
206
+ ctx.data.register("pending-breakpoints", async () => {
207
+ const runsDir = (ctx.config?.runsDir as string) ?? ".a5c/runs";
208
+ const tracked = await getTrackedRuns(ctx);
209
+ const allBreakpoints: Array<PendingBreakpoint & { runId: string }> = [];
210
+
211
+ for (const run of tracked) {
212
+ if (run.status !== "waiting") continue;
213
+
214
+ // Fetch live breakpoint data with full metadata from task.json
215
+ try {
216
+ const runDir = bridge.buildRunDir(runsDir, run.runId);
217
+ const bps = await bridge.getPendingBreakpoints(runDir);
218
+ for (const bp of bps) {
219
+ allBreakpoints.push({ ...bp, runId: run.runId });
220
+ }
221
+ } catch {
222
+ // Fall back to cached data
223
+ for (const bp of run.pendingBreakpoints) {
224
+ allBreakpoints.push({ ...bp, runId: run.runId });
225
+ }
226
+ }
227
+ }
228
+
229
+ return allBreakpoints;
230
+ });
231
+
232
+ // ---------------------------------------------------------------
233
+ // Action handlers — respond to UI interactions
234
+ // ---------------------------------------------------------------
235
+
236
+ ctx.actions.register("approve-breakpoint", async (params) => {
237
+ const { runId, effectId, response } = params as {
238
+ runId: string;
239
+ effectId: string;
240
+ response?: string;
241
+ companyId?: string;
242
+ };
243
+ const runsDir = (ctx.config?.runsDir as string) ?? ".a5c/runs";
244
+ const runDir = bridge.buildRunDir(runsDir, runId);
245
+
246
+ // Approve: --status ok with { approved: true }
247
+ // CRITICAL: Always use --status ok. Never --status error.
248
+ const result = await bridge.approveBreakpoint(runDir, effectId, response);
249
+
250
+ ctx.events.emit(
251
+ "plugin.babysitter.breakpoint.resolved",
252
+ (params as Record<string, unknown>).companyId as string,
253
+ { runId, effectId, approved: true }
254
+ );
255
+
256
+ // Auto-iterate if enabled — the underlying harness will pick up the
257
+ // resolved effect and continue the orchestration loop
258
+ if (ctx.config?.autoIterate !== false) {
259
+ await iterateAndStream(ctx, runId, runDir);
260
+ }
261
+
262
+ return result;
263
+ });
264
+
265
+ ctx.actions.register("reject-breakpoint", async (params) => {
266
+ const { runId, effectId, feedback } = params as {
267
+ runId: string;
268
+ effectId: string;
269
+ feedback: string;
270
+ companyId?: string;
271
+ };
272
+ const runsDir = (ctx.config?.runsDir as string) ?? ".a5c/runs";
273
+ const runDir = bridge.buildRunDir(runsDir, runId);
274
+
275
+ // Reject: --status ok with { approved: false, feedback }
276
+ // CRITICAL: Rejection uses --status ok, NOT --status error.
277
+ // --status error signals a task execution failure and triggers RUN_FAILED,
278
+ // requiring manual journal surgery to recover.
279
+ const result = await bridge.rejectBreakpoint(runDir, effectId, feedback);
280
+
281
+ ctx.events.emit(
282
+ "plugin.babysitter.breakpoint.resolved",
283
+ (params as Record<string, unknown>).companyId as string,
284
+ { runId, effectId, approved: false, feedback }
285
+ );
286
+
287
+ // Auto-iterate — process will loop back with the rejection feedback
288
+ // via the retry/refine pattern (previousFeedback, attempt fields)
289
+ if (ctx.config?.autoIterate !== false) {
290
+ await iterateAndStream(ctx, runId, runDir);
291
+ }
292
+
293
+ return result;
294
+ });
295
+
296
+ ctx.actions.register("create-run", async (params) => {
297
+ const { processId, entry, inputsFile, agentId, companyId } = params as {
298
+ processId: string;
299
+ entry: string;
300
+ inputsFile: string;
301
+ agentId: string;
302
+ companyId: string;
303
+ };
304
+ const runsDir = (ctx.config?.runsDir as string) ?? ".a5c/runs";
305
+
306
+ const result = await bridge.createRun({
307
+ processId,
308
+ entry,
309
+ inputsFile,
310
+ runsDir,
311
+ });
312
+
313
+ // Track the run
314
+ const harnessState = (await ctx.state.get(
315
+ `agent:${agentId}:harness`
316
+ )) as { harnessName: string } | null;
317
+
318
+ const tracked: TrackedRun = {
319
+ runId: result.runId,
320
+ processId,
321
+ agentId,
322
+ companyId,
323
+ harnessName: harnessState?.harnessName ?? "unknown",
324
+ status: "running",
325
+ createdAt: new Date().toISOString(),
326
+ pendingBreakpoints: [],
327
+ };
328
+
329
+ await ctx.state.set(`run:${result.runId}`, tracked);
330
+
331
+ ctx.events.emit("plugin.babysitter.run.created", companyId, {
332
+ runId: result.runId,
333
+ processId,
334
+ agentId,
335
+ });
336
+
337
+ // Start breakpoint polling for this run
338
+ startBreakpointPolling(ctx, result.runId, companyId, runsDir);
339
+
340
+ return result;
341
+ });
342
+
343
+ // ---------------------------------------------------------------
344
+ // Action: check and install harness plugin
345
+ // ---------------------------------------------------------------
346
+
347
+ ctx.actions.register("check-harness-plugin", async (params) => {
348
+ const { harnessName } = params as { harnessName: string };
349
+ return checkHarnessPluginStatus(harnessName);
350
+ });
351
+
352
+ ctx.actions.register("install-harness-plugin", async (params) => {
353
+ const { harnessName } = params as { harnessName: string };
354
+ return installViaInstaller(harnessName);
355
+ });
356
+
357
+ // ---------------------------------------------------------------
358
+ // Stream handler — real-time run events
359
+ // ---------------------------------------------------------------
360
+
361
+ ctx.actions.register("subscribe-run-events", async (params) => {
362
+ const { runId, companyId } = params as {
363
+ runId: string;
364
+ companyId: string;
365
+ };
366
+
367
+ const channel = `run-events:${runId}`;
368
+ ctx.streams.open(channel, companyId);
369
+
370
+ const runsDir = (ctx.config?.runsDir as string) ?? ".a5c/runs";
371
+ const runDir = bridge.buildRunDir(runsDir, runId);
372
+
373
+ // Emit current state
374
+ try {
375
+ const status = await bridge.getRunStatus(runDir);
376
+ ctx.streams.emit(channel, { type: "status", data: status });
377
+
378
+ const events = await bridge.getRunEvents(runDir, 20);
379
+ for (const event of events) {
380
+ ctx.streams.emit(channel, { type: "event", data: event });
381
+ }
382
+
383
+ // Check for pending breakpoints and emit them
384
+ const bpCheck = await bridge.hasOnlyBreakpointsPending(runDir);
385
+ if (bpCheck.onlyBreakpoints) {
386
+ const breakpoints = await bridge.getPendingBreakpoints(runDir);
387
+ ctx.streams.emit(channel, {
388
+ type: "breakpoints-pending",
389
+ data: { breakpoints, onlyBreakpoints: true },
390
+ });
391
+ }
392
+ } catch (err) {
393
+ ctx.streams.emit(channel, {
394
+ type: "error",
395
+ data: { message: String(err) },
396
+ });
397
+ }
398
+
399
+ return { channel };
400
+ });
401
+
402
+ // ---------------------------------------------------------------
403
+ // Tool handler — babysitter status for agents
404
+ // ---------------------------------------------------------------
405
+
406
+ ctx.tools.register(
407
+ "babysitter-status",
408
+ {
409
+ displayName: "Babysitter Status",
410
+ description:
411
+ "Check the status of babysitter orchestration runs, including pending breakpoints and effects.",
412
+ parametersSchema: {
413
+ type: "object",
414
+ properties: {
415
+ runId: {
416
+ type: "string",
417
+ description:
418
+ "Specific run ID to check. If omitted, returns overview.",
419
+ },
420
+ },
421
+ },
422
+ },
423
+ async (params) => {
424
+ const runId = (params as { runId?: string }).runId;
425
+ const runsDir = (ctx.config?.runsDir as string) ?? ".a5c/runs";
426
+
427
+ if (runId) {
428
+ const runDir = bridge.buildRunDir(runsDir, runId);
429
+ const [status, breakpoints] = await Promise.all([
430
+ bridge.getRunStatus(runDir),
431
+ bridge.getPendingBreakpoints(runDir),
432
+ ]);
433
+
434
+ const bpSummary =
435
+ breakpoints.length > 0
436
+ ? `\nPending breakpoints: ${breakpoints.map((b) => b.title).join(", ")}`
437
+ : "";
438
+
439
+ return {
440
+ content:
441
+ `Run ${runId}: ${status.state}. ` +
442
+ `Pending: ${JSON.stringify(status.pendingByKind)}${bpSummary}`,
443
+ data: { ...status, breakpoints },
444
+ };
445
+ }
446
+
447
+ const tracked = await getTrackedRuns(ctx);
448
+ const active = tracked.filter(
449
+ (r) => r.status === "running" || r.status === "waiting"
450
+ );
451
+ const totalBps = tracked.reduce(
452
+ (sum, r) => sum + r.pendingBreakpoints.length,
453
+ 0
454
+ );
455
+ return {
456
+ content:
457
+ `${active.length} active runs, ${tracked.length} total. ` +
458
+ `${totalBps} pending breakpoints.`,
459
+ data: {
460
+ activeRuns: active.length,
461
+ totalRuns: tracked.length,
462
+ pendingBreakpoints: totalBps,
463
+ },
464
+ };
465
+ }
466
+ );
467
+ },
468
+ });
469
+
470
+ // ---------------------------------------------------------------
471
+ // Breakpoint polling
472
+ // ---------------------------------------------------------------
473
+
474
+ /**
475
+ * Start polling a run for pending breakpoints.
476
+ *
477
+ * This is how the Paperclip plugin intercepts breakpoints from the underlying
478
+ * harness. The harness's stop hook has already paused the orchestration loop
479
+ * (because only breakpoints are pending). We poll run:status to detect this
480
+ * state and stream breakpoint details to the UI.
481
+ */
482
+ function startBreakpointPolling(
483
+ ctx: {
484
+ streams: { open: (ch: string, id: string) => void; emit: (ch: string, evt: unknown) => void };
485
+ state: { get: (k: string) => Promise<unknown>; set: (k: string, v: unknown) => Promise<void> };
486
+ events: { emit: (name: string, companyId: string, payload: unknown) => void };
487
+ logger: { info: (msg: string, data?: Record<string, unknown>) => void };
488
+ },
489
+ runId: string,
490
+ companyId: string,
491
+ runsDir: string
492
+ ): void {
493
+ if (activePollers.has(runId)) return;
494
+
495
+ const runDir = bridge.buildRunDir(runsDir, runId);
496
+ const channel = `run-events:${runId}`;
497
+ let lastBreakpointIds = new Set<string>();
498
+
499
+ const timer = setInterval(async () => {
500
+ try {
501
+ const bpCheck = await bridge.hasOnlyBreakpointsPending(runDir);
502
+
503
+ if (bpCheck.onlyBreakpoints) {
504
+ const breakpoints = await bridge.getPendingBreakpoints(runDir);
505
+ const currentIds = new Set(breakpoints.map((b) => b.effectId));
506
+
507
+ // Emit only newly discovered breakpoints
508
+ const newBreakpoints = breakpoints.filter(
509
+ (b) => !lastBreakpointIds.has(b.effectId)
510
+ );
511
+
512
+ if (newBreakpoints.length > 0) {
513
+ ctx.streams.emit(channel, {
514
+ type: "breakpoints-pending",
515
+ data: { breakpoints: newBreakpoints, onlyBreakpoints: true },
516
+ });
517
+
518
+ for (const bp of newBreakpoints) {
519
+ ctx.events.emit(
520
+ "plugin.babysitter.breakpoint.requested",
521
+ companyId,
522
+ { runId, effectId: bp.effectId, title: bp.title, question: bp.question }
523
+ );
524
+ }
525
+
526
+ ctx.logger.info("New breakpoints detected", {
527
+ runId,
528
+ count: newBreakpoints.length,
529
+ });
530
+ }
531
+
532
+ lastBreakpointIds = currentIds;
533
+ }
534
+
535
+ // Check for completion
536
+ const status = await bridge.getRunStatus(runDir);
537
+ if (status.completionProof || status.state === "completed" || status.state === "failed") {
538
+ stopBreakpointPolling(runId);
539
+ }
540
+ } catch {
541
+ // Poll failures are non-fatal - run may not exist yet or may have been cleaned up
542
+ }
543
+ }, BREAKPOINT_POLL_INTERVAL);
544
+
545
+ activePollers.set(runId, timer);
546
+ }
547
+
548
+ /** Stop polling for a run. */
549
+ function stopBreakpointPolling(key: string): void {
550
+ const timer = activePollers.get(key);
551
+ if (timer) {
552
+ clearInterval(timer);
553
+ activePollers.delete(key);
554
+ }
555
+ }
556
+
557
+ // ---------------------------------------------------------------
558
+ // Helpers
559
+ // ---------------------------------------------------------------
560
+
561
+ /** Retrieve all tracked runs from plugin state. */
562
+ async function getTrackedRuns(ctx: {
563
+ state: { get: (key: string) => Promise<unknown> };
564
+ }): Promise<TrackedRun[]> {
565
+ const runs = (await ctx.state.get("tracked-runs")) as TrackedRun[] | null;
566
+ return runs ?? [];
567
+ }
568
+
569
+ /** Iterate a run and stream events to the UI channel. */
570
+ async function iterateAndStream(
571
+ ctx: {
572
+ streams: { emit: (channel: string, event: unknown) => void };
573
+ state: { set: (key: string, value: unknown) => Promise<void> };
574
+ logger: { info: (msg: string, data?: Record<string, unknown>) => void };
575
+ },
576
+ runId: string,
577
+ runDir: string
578
+ ): Promise<void> {
579
+ try {
580
+ const result = await bridge.iterateRun(runDir);
581
+ const channel = `run-events:${runId}`;
582
+
583
+ ctx.streams.emit(channel, {
584
+ type: "iteration",
585
+ data: { status: result.status, nextActions: result.nextActions },
586
+ });
587
+
588
+ ctx.logger.info("Run iterated", { runId, status: result.status });
589
+ } catch (err) {
590
+ ctx.logger.info("Iteration failed", { runId, error: String(err) });
591
+ }
592
+ }
593
+
594
+ export default plugin;
595
+ runWorker(plugin, import.meta.url);
package/tsconfig.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "jsx": "react-jsx",
7
+ "strict": true,
8
+ "esModuleInterop": true,
9
+ "skipLibCheck": true,
10
+ "outDir": "dist",
11
+ "rootDir": "src",
12
+ "declaration": true,
13
+ "declarationMap": true,
14
+ "sourceMap": true,
15
+ "resolveJsonModule": true
16
+ },
17
+ "include": ["src/**/*.ts", "src/**/*.tsx"],
18
+ "exclude": ["dist", "node_modules", "**/__tests__/**"]
19
+ }
package/versions.json ADDED
@@ -0,0 +1,3 @@
1
+ {
2
+ "sdkVersion": "0.0.187-staging.02a0ee21"
3
+ }