oh-my-workflow 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/runtime.ts ADDED
@@ -0,0 +1,235 @@
1
+ // makeRuntime assembles the 5 hooks over an injected AgentPort + journal — this
2
+ // is the whole core. The orchestration script the host agent writes is plain JS;
3
+ // these hooks are the only surface it touches. The load-bearing invariant is the
4
+ // null-contract: agent() NEVER throws; a terminal failure resolves to null and a
5
+ // journal entry carrying the failure `kind`, so the authoring agent can read its
6
+ // own failure and repair its own script. Workflow patterns (filter(Boolean),
7
+ // abstain quorums) stand on top of that contract.
8
+
9
+ import type { AgentPort, AgentResult } from "./adapters/types";
10
+ import type { Journal } from "./journal";
11
+ import { promptHash, optsHash } from "./journal";
12
+ import type { ResumeIndex } from "./resume";
13
+ import { schemaGate, makeValidator, type GateCall, type GateFeedback } from "./schema-gate";
14
+
15
+ export type AgentOpts = {
16
+ label?: string;
17
+ phase?: string;
18
+ schema?: object;
19
+ model?: string;
20
+ cwd?: string;
21
+ timeoutMs?: number;
22
+ maxRetries?: number;
23
+ };
24
+
25
+ // `prev`/`item` are intentionally `any`: orchestration scripts are plain JS the
26
+ // host agent authors, so a stage may declare concrete param types (`x: number`)
27
+ // without fighting the type system. The runtime treats every value opaquely.
28
+ export type Stage = (prev: any, item: any, index: number) => unknown | Promise<unknown>;
29
+
30
+ export type Runtime = {
31
+ agent(prompt: string, opts?: AgentOpts): Promise<unknown | null>;
32
+ pipeline(items: unknown[], ...stages: Stage[]): Promise<unknown[]>;
33
+ parallel(thunks: Array<() => Promise<unknown>>): Promise<unknown[]>;
34
+ phase(title: string): void;
35
+ log(msg: string): void;
36
+ };
37
+
38
+ /** Bounded-concurrency gate: at most `max` bodies run at once; the rest queue.
39
+ * Canonical counting-semaphore: a release HANDS its slot directly to the next
40
+ * waiter (active unchanged) rather than decrementing first — otherwise a fresh
41
+ * caller could slip past the `active >= max` check between the wake and the
42
+ * woken waiter resuming, pushing in-flight above `max` (a TOCTOU race). */
43
+ export function makeLimiter(max: number) {
44
+ let active = 0;
45
+ const waiters: Array<() => void> = [];
46
+ return async function run<T>(fn: () => Promise<T>): Promise<T> {
47
+ if (active >= max) {
48
+ await new Promise<void>((res) => waiters.push(res)); // slot transferred to us
49
+ } else {
50
+ active++;
51
+ }
52
+ try {
53
+ return await fn();
54
+ } finally {
55
+ const next = waiters.shift();
56
+ if (next) next(); // hand our slot to the next waiter; active stays the same
57
+ else active--;
58
+ }
59
+ };
60
+ }
61
+
62
+ const errMsg = (e: unknown): string => (e instanceof Error ? e.message : String(e));
63
+
64
+ function retryPrompt(original: string, feedback: GateFeedback, fresh: boolean): string {
65
+ const note =
66
+ "Your previous output failed validation:\n" +
67
+ feedback.errors.map((e) => `- ${e}`).join("\n") +
68
+ "\nReturn ONLY corrected JSON, no prose.";
69
+ return fresh ? `${original}\n\n${note}` : note;
70
+ }
71
+
72
+ export function makeRuntime(deps: {
73
+ adapter: AgentPort;
74
+ journal: Journal;
75
+ concurrency?: number;
76
+ /** A prior run's journal as a lookup. When a node's (call, promptHash,
77
+ * optsHash) key hits, the adapter is skipped and the cached result returned —
78
+ * the longest-unchanged-prefix resume model. A miss (incl. a prior failure)
79
+ * runs live, so resume only re-executes failed/changed nodes. */
80
+ resume?: ResumeIndex;
81
+ }): Runtime {
82
+ const { adapter, journal, resume } = deps;
83
+ const limit = makeLimiter(deps.concurrency ?? 4);
84
+ let callCounter = 0;
85
+ let currentPhase: string | undefined;
86
+
87
+ async function agent(prompt: string, opts: AgentOpts = {}): Promise<unknown | null> {
88
+ const call = ++callCounter;
89
+ const phase = opts.phase ?? currentPhase;
90
+ const pHash = promptHash(prompt);
91
+ const oHash = optsHash(opts);
92
+ journal.agentStart({
93
+ call,
94
+ label: opts.label,
95
+ phase,
96
+ adapter: adapter.name,
97
+ promptHash: pHash,
98
+ optsHash: oHash,
99
+ });
100
+
101
+ // Resume short-circuit: a hit skips the limiter + adapter entirely, but still
102
+ // emits agent_end so every start has a matching end (the spine invariant).
103
+ if (resume) {
104
+ const hit = resume.lookup({ call, promptHash: pHash, optsHash: oHash });
105
+ if (hit.found) {
106
+ journal.agentEnd({ call, ok: true, result: hit.value, durationMs: 0, cached: true });
107
+ return hit.value;
108
+ }
109
+ }
110
+
111
+ return limit(async () => {
112
+ let durationMs = 0;
113
+ const account = (r: AgentResult) => {
114
+ durationMs += r.ok ? r.meta.durationMs : (r.meta?.durationMs ?? 0);
115
+ };
116
+
117
+ try {
118
+ // No schema: one shot, raw text out (or null).
119
+ if (!opts.schema) {
120
+ let r: AgentResult;
121
+ try {
122
+ r = await adapter.invoke({
123
+ prompt,
124
+ model: opts.model,
125
+ cwd: opts.cwd,
126
+ timeoutMs: opts.timeoutMs,
127
+ });
128
+ } catch (e) {
129
+ // A throw at the adapter boundary IS an adapter failure.
130
+ journal.agentEnd({ call, ok: false, kind: "spawn_failure", stderr: errMsg(e), durationMs });
131
+ return null;
132
+ }
133
+ account(r);
134
+ if (r.ok) {
135
+ journal.agentEnd({ call, ok: true, result: r.text, durationMs });
136
+ return r.text;
137
+ }
138
+ journal.agentEnd({ call, ok: false, kind: r.kind, stderr: r.stderr, durationMs });
139
+ return null;
140
+ }
141
+
142
+ // Schema path: gate retries node-level noise; followUp in-session if we
143
+ // have a sessionId, else fresh+error. The authoring agent never sees this.
144
+ const validate = makeValidator(opts.schema);
145
+ let lastSessionId: string | undefined;
146
+ const gateCall: GateCall = async (_n, feedback) => {
147
+ let r: AgentResult;
148
+ if (feedback && lastSessionId && adapter.followUp) {
149
+ r = await adapter.followUp(lastSessionId, retryPrompt(prompt, feedback, false));
150
+ } else {
151
+ const p = feedback ? retryPrompt(prompt, feedback, true) : prompt;
152
+ r = await adapter.invoke({ prompt: p, model: opts.model, cwd: opts.cwd, timeoutMs: opts.timeoutMs });
153
+ }
154
+ account(r);
155
+ if (r.ok && r.meta.sessionId) lastSessionId = r.meta.sessionId;
156
+ return r;
157
+ };
158
+
159
+ const outcome = await schemaGate({
160
+ call: gateCall,
161
+ validate,
162
+ maxRetries: opts.maxRetries,
163
+ onAttempt: (a) =>
164
+ journal.attempt({ call, n: a.n, kind: a.kind, errors: a.errors, stderr: a.stderr, rawText: a.rawText }),
165
+ });
166
+
167
+ if (outcome.ok) {
168
+ journal.agentEnd({ call, ok: true, result: outcome.value, durationMs });
169
+ return outcome.value;
170
+ }
171
+ journal.agentEnd({
172
+ call,
173
+ ok: false,
174
+ kind: outcome.kind,
175
+ stderr: outcome.stderr,
176
+ rawText: outcome.rawText,
177
+ durationMs,
178
+ });
179
+ return null;
180
+ } catch (e) {
181
+ // Last-resort guard: the null-contract holds even on an unexpected throw
182
+ // in OUR code (e.g. an invalid schema fails to compile). Labeled
183
+ // internal_error — distinct from adapter failures — so the authoring
184
+ // agent doesn't misread a schema bug as a flaky node.
185
+ journal.agentEnd({ call, ok: false, kind: "internal_error", error: errMsg(e), durationMs });
186
+ return null;
187
+ }
188
+ });
189
+ }
190
+
191
+ // NOTE: parallel/pipeline do NOT acquire the limiter themselves — the limiter
192
+ // is held at the agent() boundary (the heavy subprocess node). Wrapping these
193
+ // combinators too would deadlock: their thunks call agent(), which would wait
194
+ // for a slot the combinator already holds. Bounding cheap glue is a non-goal.
195
+ async function parallel(thunks: Array<() => Promise<unknown>>): Promise<unknown[]> {
196
+ return Promise.all(
197
+ thunks.map((t, i) =>
198
+ Promise.resolve()
199
+ .then(t)
200
+ .catch((e) => {
201
+ journal.log(`parallel thunk ${i} threw: ${errMsg(e)}`);
202
+ return null;
203
+ }),
204
+ ),
205
+ );
206
+ }
207
+
208
+ async function pipeline(items: unknown[], ...stages: Stage[]): Promise<unknown[]> {
209
+ return Promise.all(
210
+ items.map(async (item, index) => {
211
+ let acc: unknown = item;
212
+ for (const stage of stages) {
213
+ try {
214
+ acc = await stage(acc, item, index);
215
+ } catch (e) {
216
+ journal.log(`pipeline item ${index} stage threw: ${errMsg(e)}`);
217
+ return null;
218
+ }
219
+ }
220
+ return acc;
221
+ }),
222
+ );
223
+ }
224
+
225
+ return {
226
+ agent,
227
+ parallel,
228
+ pipeline,
229
+ phase: (title: string) => {
230
+ currentPhase = title;
231
+ journal.phase(title);
232
+ },
233
+ log: (msg: string) => journal.log(msg),
234
+ };
235
+ }
@@ -0,0 +1,164 @@
1
+ // The schema gate turns probabilistic node output into a validated object — or
2
+ // null. Extraction MUST be deterministic so the same text always yields the same
3
+ // result (the journal/resume model depends on it). Precedence:
4
+ // 1. the LAST fenced code block that parses as JSON, else
5
+ // 2. the LARGEST balanced-brace span that parses as JSON, else
6
+ // 3. undefined.
7
+
8
+ import Ajv from "ajv";
9
+ import type { AgentResult, AgentFailureKind } from "./adapters/types";
10
+
11
+ function tryParse(s: string): unknown | undefined {
12
+ const t = s.trim();
13
+ if (!t) return undefined;
14
+ try {
15
+ return JSON.parse(t);
16
+ } catch {
17
+ return undefined;
18
+ }
19
+ }
20
+
21
+ /** Top-level balanced `{...}` substrings, ignoring braces inside string literals. */
22
+ function balancedBraceSpans(text: string): string[] {
23
+ const spans: string[] = [];
24
+ const n = text.length;
25
+ let i = 0;
26
+ while (i < n) {
27
+ if (text[i] !== "{") {
28
+ i++;
29
+ continue;
30
+ }
31
+ let depth = 0;
32
+ let inStr = false;
33
+ let esc = false;
34
+ let j = i;
35
+ for (; j < n; j++) {
36
+ const c = text[j];
37
+ if (inStr) {
38
+ if (esc) esc = false;
39
+ else if (c === "\\") esc = true;
40
+ else if (c === '"') inStr = false;
41
+ continue;
42
+ }
43
+ if (c === '"') inStr = true;
44
+ else if (c === "{") depth++;
45
+ else if (c === "}" && --depth === 0) break;
46
+ }
47
+ if (depth === 0 && j < n) {
48
+ spans.push(text.slice(i, j + 1));
49
+ i = j + 1;
50
+ } else {
51
+ i++; // never closed — skip this brace
52
+ }
53
+ }
54
+ return spans;
55
+ }
56
+
57
+ export function extractJson(text: string): unknown | undefined {
58
+ // 1) Fenced code blocks — last parseable wins.
59
+ const fences = [...text.matchAll(/```[^\n]*\n([\s\S]*?)```/g)].map((m) => m[1] ?? "");
60
+ for (let i = fences.length - 1; i >= 0; i--) {
61
+ const parsed = tryParse(fences[i] ?? "");
62
+ if (parsed !== undefined) return parsed;
63
+ }
64
+ // 2) Largest balanced-brace span that parses.
65
+ let best: { value: unknown; len: number } | undefined;
66
+ for (const span of balancedBraceSpans(text)) {
67
+ const parsed = tryParse(span);
68
+ if (parsed !== undefined && (best === undefined || span.length > best.len)) {
69
+ best = { value: parsed, len: span.length };
70
+ }
71
+ }
72
+ return best?.value;
73
+ }
74
+
75
+ // ── validation ──────────────────────────────────────────────────────────────
76
+
77
+ export type Validation = { valid: boolean; errors: string[] };
78
+ export type ValidateFn = (value: unknown) => Validation;
79
+
80
+ /** Compile a JSON Schema into a reusable validate function (errors as strings). */
81
+ export function makeValidator(schema: object): ValidateFn {
82
+ const ajv = new Ajv({ allErrors: true, strict: false });
83
+ const validateFn = ajv.compile(schema);
84
+ return (value: unknown): Validation => {
85
+ const valid = validateFn(value) as boolean;
86
+ const errors = (validateFn.errors ?? []).map(
87
+ (e) => `${e.instancePath || "/"} ${e.message ?? "is invalid"}`.trim(),
88
+ );
89
+ return { valid, errors };
90
+ };
91
+ }
92
+
93
+ // ── the retry loop ────────────────────────────────────────────────────────────
94
+
95
+ /** Why a single attempt ended. "ok" plus the terminal kinds we journal. */
96
+ export type AttemptKind = "ok" | "no_json" | "schema_violation" | AgentFailureKind;
97
+
98
+ export type GateAttempt = { n: number; kind: AttemptKind; errors?: string[]; stderr?: string; rawText?: string };
99
+
100
+ export type GateFeedback = { errors: string[]; rawText: string };
101
+
102
+ /** Produce one node result. `n` is 1-based; `feedback` is null on the first try.
103
+ * The runtime supplies this — it decides followUp (in-session) vs fresh+error. */
104
+ export type GateCall = (n: number, feedback: GateFeedback | null) => Promise<AgentResult>;
105
+
106
+ export type GateOutcome =
107
+ | { ok: true; value: unknown }
108
+ // On failure we carry the diagnostic payload up so the runtime can journal it:
109
+ // adapter stderr for hard failures, the node's raw text for schema/no_json.
110
+ | { ok: false; kind: Exclude<AttemptKind, "ok">; stderr?: string; rawText?: string };
111
+
112
+ /**
113
+ * Run a node through the gate. Schema noise (no_json / schema_violation) is
114
+ * retried up to `maxRetries` times; a hard adapter failure short-circuits with
115
+ * its own kind. NEVER throws — exhaustion or error resolves to { ok:false }.
116
+ */
117
+ export async function schemaGate(opts: {
118
+ call: GateCall;
119
+ validate: ValidateFn;
120
+ maxRetries?: number;
121
+ onAttempt?: (a: GateAttempt) => void;
122
+ }): Promise<GateOutcome> {
123
+ const maxRetries = opts.maxRetries ?? 2;
124
+ let feedback: GateFeedback | null = null;
125
+ let lastKind: Exclude<AttemptKind, "ok"> = "schema_violation";
126
+ let lastRawText: string | undefined;
127
+
128
+ for (let n = 1; n <= maxRetries + 1; n++) {
129
+ let result: AgentResult;
130
+ try {
131
+ result = await opts.call(n, feedback);
132
+ } catch {
133
+ opts.onAttempt?.({ n, kind: "spawn_failure" });
134
+ return { ok: false, kind: "spawn_failure" };
135
+ }
136
+
137
+ if (!result.ok) {
138
+ // Adapter-level failure is not schema noise — short-circuit, keep stderr.
139
+ opts.onAttempt?.({ n, kind: result.kind, stderr: result.stderr });
140
+ return { ok: false, kind: result.kind, stderr: result.stderr };
141
+ }
142
+
143
+ lastRawText = result.text;
144
+ const extracted = extractJson(result.text);
145
+ if (extracted === undefined) {
146
+ lastKind = "no_json";
147
+ opts.onAttempt?.({ n, kind: "no_json", rawText: result.text });
148
+ feedback = { errors: ["output contained no extractable JSON"], rawText: result.text };
149
+ continue;
150
+ }
151
+
152
+ const { valid, errors } = opts.validate(extracted);
153
+ if (valid) {
154
+ opts.onAttempt?.({ n, kind: "ok" });
155
+ return { ok: true, value: extracted };
156
+ }
157
+
158
+ lastKind = "schema_violation";
159
+ opts.onAttempt?.({ n, kind: "schema_violation", errors, rawText: result.text });
160
+ feedback = { errors, rawText: result.text };
161
+ }
162
+
163
+ return { ok: false, kind: lastKind, rawText: lastRawText };
164
+ }