@a5c-ai/babysitter-omp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,256 @@
1
+ /**
2
+ * The orchestration loop driver.
3
+ *
4
+ * On every `agent_end` event the loop driver decides whether to continue
5
+ * iterating: it checks guards, invokes `orchestrateIteration` via the SDK
6
+ * bridge, maps pending effects to a continuation prompt, and schedules a
7
+ * follow-up turn via `session.followUp()` if there is more work to do.
8
+ *
9
+ * This module uses the babysitter SDK directly -- no CLI subprocesses are
10
+ * spawned, no JSON is scraped from stdout. Everything runs in-process,
11
+ * which is marginally less depressing than the alternative.
12
+ *
13
+ * @module loop-driver
14
+ */
15
+
16
+ import type { ExtensionAPI } from './types.js';
17
+ import type { IterationResult, EffectAction } from '@a5c-ai/babysitter-sdk';
18
+ import { iterate, postResult, getRunStatus } from './sdk-bridge.js';
19
+ import { getActiveRun, setActiveRun, clearActiveRun } from './session-binder.js';
20
+ import { checkGuards, recordIterationDigest, resetDigests, recordIterationOutcome, recordPendingCount } from './guards.js';
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Promise-tag extraction
24
+ // ---------------------------------------------------------------------------
25
+
26
+ /**
27
+ * Extract a `<promise>...</promise>` completion proof tag from agent output.
28
+ *
29
+ * The babysitter convention is that an agent signals "I'm done" by emitting
30
+ * a `<promise>` tag. If present, the loop driver treats the run as
31
+ * completed and stops iterating.
32
+ *
33
+ * @param text - The raw agent output text to scan.
34
+ * @returns The captured promise string, or `null` if no tag was found.
35
+ */
36
+ export function extractPromiseTag(text: string): string | null {
37
+ const match = /<promise>([^<]+)<\/promise>/.exec(text);
38
+ return match ? match[1] : null;
39
+ }
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // Continuation prompt builder
43
+ // ---------------------------------------------------------------------------
44
+
45
+ /**
46
+ * Build the continuation prompt injected into the conversation to keep the
47
+ * orchestration loop alive.
48
+ *
49
+ * The prompt tells the agent which effects are pending, what kind of work
50
+ * each one requires, and reminds it to post results when done.
51
+ *
52
+ * @param iterationResult - The SDK iteration result containing nextActions.
53
+ * @param runState - The current run state snapshot.
54
+ * @returns A prompt string for the next agent turn.
55
+ */
56
+ export function buildContinuationPrompt(
57
+ iterationResult: Extract<IterationResult, { status: 'waiting' }>,
58
+ runState: { runId: string; iteration: number },
59
+ ): string {
60
+ const actions = iterationResult.nextActions;
61
+ const header =
62
+ `[babysitter] Iteration ${runState.iteration} | Run ${runState.runId} | ` +
63
+ `Continue orchestration`;
64
+
65
+ if (actions.length === 0) {
66
+ return `${header}\n\nNo pending effects. Waiting for external resolution.`;
67
+ }
68
+
69
+ const effectLines = actions.map((action: EffectAction, idx: number) => {
70
+ const title =
71
+ action.taskDef?.title ?? action.label ?? `(effect ${action.effectId})`;
72
+ return ` ${idx + 1}. [${action.kind}] ${title} (effectId: ${action.effectId})`;
73
+ });
74
+
75
+ const instructionsByKind: Record<string, string> = {
76
+ node: 'Execute the Node.js task and capture its output.',
77
+ shell: 'Run the shell command and capture stdout/stderr.',
78
+ agent: 'Delegate to a sub-agent and collect its response.',
79
+ breakpoint: 'This is a human approval gate. Approve or reject to continue.',
80
+ sleep: 'Wait for the specified duration, then post an OK result.',
81
+ orchestrator_task: 'Delegate to the orchestrator sub-process.',
82
+ skill: 'Invoke the named skill and return its result.',
83
+ };
84
+
85
+ const uniqueKinds = [...new Set(actions.map((a: EffectAction) => a.kind))];
86
+ const instructions = uniqueKinds
87
+ .map((kind) => {
88
+ const instruction = instructionsByKind[kind] ?? `Handle the "${kind}" effect.`;
89
+ return ` - ${kind}: ${instruction}`;
90
+ })
91
+ .join('\n');
92
+
93
+ return [
94
+ header,
95
+ '',
96
+ `Pending effects (${actions.length}):`,
97
+ ...effectLines,
98
+ '',
99
+ 'Instructions by effect kind:',
100
+ instructions,
101
+ '',
102
+ 'Execute the effects, post results, then stop.',
103
+ ].join('\n');
104
+ }
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // agent_end handler
108
+ // ---------------------------------------------------------------------------
109
+
110
+ /**
111
+ * Handle the `agent_end` event from oh-my-pi.
112
+ *
113
+ * This is the core orchestration loop driver. When the LLM finishes a
114
+ * turn, we:
115
+ *
116
+ * 1. Look up the active babysitter run for the current session.
117
+ * 2. Check for a `<promise>` completion proof in the agent output.
118
+ * 3. Run guard checks (max iterations, time limits, doom-loop detection).
119
+ * 4. Call `orchestrateIteration` via the SDK bridge.
120
+ * 5. Based on the result, either clean up (completed/failed) or inject
121
+ * a follow-up prompt to continue the loop (waiting).
122
+ *
123
+ * @param event - The `agent_end` event payload from oh-my-pi.
124
+ * @param pi - The oh-my-pi {@link ExtensionAPI} handle.
125
+ */
126
+ export async function onAgentEnd(
127
+ event: {
128
+ sessionId?: string;
129
+ output?: string;
130
+ text?: string;
131
+ },
132
+ pi: ExtensionAPI,
133
+ ): Promise<void> {
134
+ const sessionId = event.sessionId ?? 'default';
135
+
136
+ // 1. Look up active run
137
+ const run = getActiveRun(sessionId);
138
+ if (!run) {
139
+ return; // No active babysitter run -- nothing to do.
140
+ }
141
+
142
+ // 2. Extract agent output and check for completion proof
143
+ const agentOutput = event.output ?? event.text ?? '';
144
+ const promise = extractPromiseTag(agentOutput);
145
+
146
+ if (promise) {
147
+ // The agent declared itself done. Verify the promise and wrap up.
148
+ pi.appendEntry({
149
+ type: 'info',
150
+ content: `[babysitter] Completion proof received: "${promise}". Finalising run ${run.runId}.`,
151
+ });
152
+ clearActiveRun(sessionId);
153
+ resetDigests();
154
+ return;
155
+ }
156
+
157
+ // 3. Guard checks
158
+ const guardResult = checkGuards(run);
159
+ if (!guardResult.passed) {
160
+ pi.appendEntry({
161
+ type: 'warning',
162
+ content: `[babysitter] Guard tripped: ${guardResult.reason}. Stopping run ${run.runId}.`,
163
+ });
164
+ clearActiveRun(sessionId);
165
+ resetDigests();
166
+ return;
167
+ }
168
+
169
+ // 4. Run SDK orchestration iteration
170
+ let iterResult: IterationResult;
171
+ const iterStart = Date.now();
172
+ try {
173
+ iterResult = await iterate(run.runDir);
174
+ } catch (err: unknown) {
175
+ const errMsg = err instanceof Error ? err.message : String(err);
176
+ pi.appendEntry({
177
+ type: 'error',
178
+ content: `[babysitter] Iteration failed for run ${run.runId}: ${errMsg}`,
179
+ });
180
+ // Update state to reflect the error but don't kill the run on one failure
181
+ recordIterationOutcome(false);
182
+ run.iteration += 1;
183
+ run.iterationTimes.push(Date.now() - iterStart);
184
+ setActiveRun(run);
185
+ return;
186
+ }
187
+
188
+ // Record timing and increment iteration
189
+ recordIterationOutcome(true);
190
+ run.iteration += 1;
191
+ run.iterationTimes.push(Date.now() - iterStart);
192
+
193
+ // Record digest and pending count for doom-loop detection
194
+ if (iterResult.status === 'waiting') {
195
+ recordIterationDigest(
196
+ JSON.stringify(iterResult.nextActions.map((a: EffectAction) => a.effectId)),
197
+ );
198
+ recordPendingCount(iterResult.nextActions.length);
199
+ }
200
+
201
+ // 5. Handle the result
202
+ switch (iterResult.status) {
203
+ case 'completed': {
204
+ pi.appendEntry({
205
+ type: 'info',
206
+ content: `[babysitter] Run ${run.runId} completed successfully after ${run.iteration} iteration(s).`,
207
+ });
208
+ run.status = 'completed';
209
+ setActiveRun(run);
210
+ clearActiveRun(sessionId);
211
+ resetDigests();
212
+ return;
213
+ }
214
+
215
+ case 'failed': {
216
+ const failErr =
217
+ iterResult.error instanceof Error
218
+ ? iterResult.error.message
219
+ : String(iterResult.error ?? 'unknown error');
220
+ pi.appendEntry({
221
+ type: 'error',
222
+ content: `[babysitter] Run ${run.runId} failed: ${failErr}`,
223
+ });
224
+ run.status = 'failed';
225
+ setActiveRun(run);
226
+ clearActiveRun(sessionId);
227
+ resetDigests();
228
+ return;
229
+ }
230
+
231
+ case 'waiting': {
232
+ run.status = 'running';
233
+ setActiveRun(run);
234
+
235
+ const prompt = buildContinuationPrompt(iterResult, {
236
+ runId: run.runId,
237
+ iteration: run.iteration,
238
+ });
239
+
240
+ pi.sendUserMessage({ role: 'user', content: prompt });
241
+ return;
242
+ }
243
+
244
+ default: {
245
+ // Exhaustiveness guard -- should never happen, but the universe
246
+ // has a talent for producing things that should never happen.
247
+ const _exhaustive: never = iterResult;
248
+ pi.appendEntry({
249
+ type: 'warning',
250
+ content: `[babysitter] Unexpected iteration status. This should not happen.`,
251
+ });
252
+ void _exhaustive;
253
+ return;
254
+ }
255
+ }
256
+ }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Posts task results back to the babysitter runtime.
3
+ *
4
+ * After an effect has been executed (by the effect-executor or externally)
5
+ * the result must be committed to the run's journal so the next iteration
6
+ * can replay it. This module calls `commitEffectResult` from the SDK
7
+ * directly — no CLI subprocess, no JSON scraping, no carrier pigeons.
8
+ *
9
+ * @module result-poster
10
+ */
11
+
12
+ import {
13
+ commitEffectResult,
14
+ type CommitEffectResultOptions,
15
+ type CommitEffectResultArtifacts,
16
+ } from '@a5c-ai/babysitter-sdk';
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Types
20
+ // ---------------------------------------------------------------------------
21
+
22
+ /** Options accepted by {@link postResult}. */
23
+ export interface PostResultOptions {
24
+ /** Absolute path to the run directory. */
25
+ runDir: string;
26
+ /** The effect identifier to resolve. */
27
+ effectId: string;
28
+ /** Whether the task succeeded or failed. */
29
+ status: 'ok' | 'error';
30
+ /** The task's return value (when status is 'ok'). */
31
+ value?: unknown;
32
+ /** Error payload (when status is 'error'). */
33
+ error?: unknown;
34
+ /** Optional stdout captured during execution. */
35
+ stdout?: string;
36
+ /** Optional stderr captured during execution. */
37
+ stderr?: string;
38
+ /** ISO-8601 timestamp when the task started executing. */
39
+ startedAt?: string;
40
+ /** ISO-8601 timestamp when the task finished executing. */
41
+ finishedAt?: string;
42
+ }
43
+
44
+ /** Artifacts returned after a result has been committed. */
45
+ export type PostResultArtifacts = CommitEffectResultArtifacts;
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // postResult
49
+ // ---------------------------------------------------------------------------
50
+
51
+ /**
52
+ * Commit an effect result directly via the SDK.
53
+ *
54
+ * Translates the extension's {@link PostResultOptions} into a
55
+ * {@link CommitEffectResultOptions} and delegates to the SDK's
56
+ * `commitEffectResult`. The returned artifacts include the persisted
57
+ * `resultRef` and optional `stdoutRef` / `stderrRef` paths.
58
+ */
59
+ export async function postResult(
60
+ opts: PostResultOptions,
61
+ ): Promise<PostResultArtifacts> {
62
+ const sdkOpts: CommitEffectResultOptions = {
63
+ runDir: opts.runDir,
64
+ effectId: opts.effectId,
65
+ result: {
66
+ status: opts.status,
67
+ value: opts.status === 'ok' ? opts.value : undefined,
68
+ error: opts.status === 'error' ? opts.error : undefined,
69
+ stdout: opts.stdout,
70
+ stderr: opts.stderr,
71
+ startedAt: opts.startedAt,
72
+ finishedAt: opts.finishedAt,
73
+ },
74
+ };
75
+
76
+ return await commitEffectResult(sdkOpts);
77
+ }
78
+
79
+ // ---------------------------------------------------------------------------
80
+ // postOkResult
81
+ // ---------------------------------------------------------------------------
82
+
83
+ /**
84
+ * Convenience wrapper for posting a successful result.
85
+ *
86
+ * @param runDir - Absolute path to the run directory.
87
+ * @param effectId - The effect identifier to resolve.
88
+ * @param value - The task's return value.
89
+ */
90
+ export async function postOkResult(
91
+ runDir: string,
92
+ effectId: string,
93
+ value: unknown,
94
+ ): Promise<void> {
95
+ await postResult({ runDir, effectId, status: 'ok', value });
96
+ }
97
+
98
+ // ---------------------------------------------------------------------------
99
+ // postErrorResult
100
+ // ---------------------------------------------------------------------------
101
+
102
+ /**
103
+ * Convenience wrapper for posting a failed result.
104
+ *
105
+ * @param runDir - Absolute path to the run directory.
106
+ * @param effectId - The effect identifier to resolve.
107
+ * @param error - The error payload describing what went wrong.
108
+ */
109
+ export async function postErrorResult(
110
+ runDir: string,
111
+ effectId: string,
112
+ error: unknown,
113
+ ): Promise<void> {
114
+ await postResult({ runDir, effectId, status: 'error', error });
115
+ }
@@ -0,0 +1,243 @@
1
+ /**
2
+ * Babysitter SDK bridge for the oh-my-pi extension.
3
+ *
4
+ * Replaces the former `cli-wrapper` module with direct SDK function calls.
5
+ * No child processes are spawned, no stdout is parsed, and no JSON is
6
+ * scraped from a subprocess pipe. Instead we import the runtime and
7
+ * storage layers from `@a5c-ai/babysitter-sdk` and call them in-process.
8
+ *
9
+ * Every other module in this extension that needs to talk to babysitter
10
+ * should go through this bridge so there is exactly one place to handle
11
+ * option translation and error mapping.
12
+ *
13
+ * @module sdk-bridge
14
+ */
15
+
16
+ import {
17
+ createRun,
18
+ orchestrateIteration,
19
+ commitEffectResult,
20
+ type CreateRunOptions,
21
+ type CreateRunResult,
22
+ type OrchestrateOptions,
23
+ type IterationResult,
24
+ type CommitEffectResultOptions,
25
+ type CommitEffectResultArtifacts,
26
+ type EffectAction,
27
+ } from '@a5c-ai/babysitter-sdk';
28
+
29
+ import {
30
+ loadJournal,
31
+ readRunMetadata,
32
+ } from '@a5c-ai/babysitter-sdk';
33
+
34
+ // ---------------------------------------------------------------------------
35
+ // Error wrapper
36
+ // ---------------------------------------------------------------------------
37
+
38
+ /** Structured error surfaced by the SDK bridge. */
39
+ export class SdkBridgeError extends Error {
40
+ /** The original error thrown by the SDK, if any. */
41
+ readonly cause: unknown;
42
+
43
+ constructor(message: string, cause?: unknown) {
44
+ super(message);
45
+ this.name = 'SdkBridgeError';
46
+ this.cause = cause;
47
+ }
48
+ }
49
+
50
+ // ---------------------------------------------------------------------------
51
+ // createNewRun
52
+ // ---------------------------------------------------------------------------
53
+
54
+ /**
55
+ * Create a brand-new babysitter run.
56
+ *
57
+ * Translates the extension's simplified option bag into the shape expected
58
+ * by the SDK's {@link createRun} and returns the result as-is.
59
+ */
60
+ export async function createNewRun(opts: {
61
+ runsDir: string;
62
+ processId: string;
63
+ importPath: string;
64
+ exportName?: string;
65
+ inputs?: unknown;
66
+ prompt?: string;
67
+ }): Promise<CreateRunResult> {
68
+ try {
69
+ const sdkOpts: CreateRunOptions = {
70
+ runsDir: opts.runsDir,
71
+ process: {
72
+ processId: opts.processId,
73
+ importPath: opts.importPath,
74
+ exportName: opts.exportName,
75
+ },
76
+ inputs: opts.inputs,
77
+ prompt: opts.prompt,
78
+ };
79
+ return await createRun(sdkOpts);
80
+ } catch (error) {
81
+ throw new SdkBridgeError(
82
+ `Failed to create run for process "${opts.processId}"`,
83
+ error,
84
+ );
85
+ }
86
+ }
87
+
88
+ // ---------------------------------------------------------------------------
89
+ // iterate
90
+ // ---------------------------------------------------------------------------
91
+
92
+ /**
93
+ * Run a single orchestration iteration against an existing run.
94
+ *
95
+ * The caller hands us the `runDir` (not the run ID) so there is no
96
+ * ambiguity about which run-directory layout to use.
97
+ */
98
+ export async function iterate(
99
+ runDir: string,
100
+ opts?: {
101
+ inputs?: unknown;
102
+ context?: Record<string, unknown>;
103
+ },
104
+ ): Promise<IterationResult> {
105
+ try {
106
+ const sdkOpts: OrchestrateOptions = {
107
+ runDir,
108
+ inputs: opts?.inputs,
109
+ context: opts?.context,
110
+ };
111
+ return await orchestrateIteration(sdkOpts);
112
+ } catch (error) {
113
+ throw new SdkBridgeError(
114
+ `Iteration failed for runDir "${runDir}"`,
115
+ error,
116
+ );
117
+ }
118
+ }
119
+
120
+ // ---------------------------------------------------------------------------
121
+ // postResult
122
+ // ---------------------------------------------------------------------------
123
+
124
+ /**
125
+ * Post an effect result (task completion) back into a run's journal.
126
+ *
127
+ * This is the SDK equivalent of `babysitter task:post`.
128
+ */
129
+ export async function postResult(opts: {
130
+ runDir: string;
131
+ effectId: string;
132
+ status: 'ok' | 'error';
133
+ value?: unknown;
134
+ error?: unknown;
135
+ }): Promise<CommitEffectResultArtifacts> {
136
+ try {
137
+ const sdkOpts: CommitEffectResultOptions = {
138
+ runDir: opts.runDir,
139
+ effectId: opts.effectId,
140
+ result: {
141
+ status: opts.status,
142
+ value: opts.status === 'ok' ? opts.value : undefined,
143
+ error: opts.status === 'error' ? opts.error : undefined,
144
+ },
145
+ };
146
+ return await commitEffectResult(sdkOpts);
147
+ } catch (error) {
148
+ throw new SdkBridgeError(
149
+ `Failed to post result for effect "${opts.effectId}"`,
150
+ error,
151
+ );
152
+ }
153
+ }
154
+
155
+ // ---------------------------------------------------------------------------
156
+ // getRunStatus
157
+ // ---------------------------------------------------------------------------
158
+
159
+ /**
160
+ * Retrieve the current status of a run by reading its metadata and journal.
161
+ *
162
+ * Status is derived from the journal's terminal event (if any):
163
+ * - `RUN_COMPLETED` -> `"completed"`
164
+ * - `RUN_FAILED` -> `"failed"`
165
+ * - otherwise -> `"running"`
166
+ *
167
+ * Pending effects are those with an `EFFECT_REQUESTED` event but no
168
+ * corresponding `EFFECT_RESOLVED`.
169
+ */
170
+ export async function getRunStatus(runDir: string): Promise<{
171
+ runId: string;
172
+ processId: string;
173
+ status: string;
174
+ pendingEffects: EffectAction[];
175
+ }> {
176
+ try {
177
+ const metadata = await readRunMetadata(runDir);
178
+ const journal = await loadJournal(runDir);
179
+
180
+ // Derive run status from the last event in the journal.
181
+ let status = 'running';
182
+ for (const entry of journal) {
183
+ if (entry.type === 'RUN_COMPLETED') {
184
+ status = 'completed';
185
+ } else if (entry.type === 'RUN_FAILED') {
186
+ status = 'failed';
187
+ }
188
+ }
189
+
190
+ // Build the set of pending effects (requested but not yet resolved).
191
+ const resolvedEffectIds = new Set<string>();
192
+ const requestedEffects = new Map<string, EffectAction>();
193
+
194
+ for (const entry of journal) {
195
+ const data = entry.data as Record<string, unknown> | undefined;
196
+ if (!data) continue;
197
+
198
+ if (entry.type === 'EFFECT_RESOLVED') {
199
+ resolvedEffectIds.add(data.effectId as string);
200
+ } else if (entry.type === 'EFFECT_REQUESTED') {
201
+ const effectId = data.effectId as string;
202
+ requestedEffects.set(effectId, {
203
+ effectId,
204
+ invocationKey: (data.invocationKey as string) ?? '',
205
+ kind: (data.kind as string) ?? 'unknown',
206
+ label: data.label as string | undefined,
207
+ taskDef: (data.taskDef ?? {}) as EffectAction['taskDef'],
208
+ });
209
+ }
210
+ }
211
+
212
+ const pendingEffects: EffectAction[] = [];
213
+ for (const [effectId, action] of requestedEffects) {
214
+ if (!resolvedEffectIds.has(effectId)) {
215
+ pendingEffects.push(action);
216
+ }
217
+ }
218
+
219
+ return {
220
+ runId: metadata.runId,
221
+ processId: metadata.processId,
222
+ status,
223
+ pendingEffects,
224
+ };
225
+ } catch (error) {
226
+ throw new SdkBridgeError(
227
+ `Failed to read run status for "${runDir}"`,
228
+ error,
229
+ );
230
+ }
231
+ }
232
+
233
+ // ---------------------------------------------------------------------------
234
+ // getPendingEffects
235
+ // ---------------------------------------------------------------------------
236
+
237
+ /**
238
+ * Convenience shorthand — returns only the pending effects for a run.
239
+ */
240
+ export async function getPendingEffects(runDir: string): Promise<EffectAction[]> {
241
+ const { pendingEffects } = await getRunStatus(runDir);
242
+ return pendingEffects;
243
+ }