oh-my-workflow 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +178 -0
- package/examples/deep-research/workflow.ts +82 -0
- package/package.json +60 -0
- package/skill/SKILL.md +491 -0
- package/src/adapters/claude.ts +146 -0
- package/src/adapters/codex.ts +149 -0
- package/src/adapters/fake.ts +70 -0
- package/src/adapters/types.ts +43 -0
- package/src/cli/omw.ts +37 -0
- package/src/cli/replay.ts +98 -0
- package/src/cli/run.ts +371 -0
- package/src/cli/validate.ts +110 -0
- package/src/journal.ts +138 -0
- package/src/resume.ts +48 -0
- package/src/runtime.ts +235 -0
- package/src/schema-gate.ts +164 -0
package/src/runtime.ts
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
// makeRuntime assembles the 5 hooks over an injected AgentPort + journal — this
|
|
2
|
+
// is the whole core. The orchestration script the host agent writes is plain JS;
|
|
3
|
+
// these hooks are the only surface it touches. The load-bearing invariant is the
|
|
4
|
+
// null-contract: agent() NEVER throws; a terminal failure resolves to null and a
|
|
5
|
+
// journal entry carrying the failure `kind`, so the authoring agent can read its
|
|
6
|
+
// own failure and repair its own script. Workflow patterns (filter(Boolean),
|
|
7
|
+
// abstain quorums) stand on top of that contract.
|
|
8
|
+
|
|
9
|
+
import type { AgentPort, AgentResult } from "./adapters/types";
|
|
10
|
+
import type { Journal } from "./journal";
|
|
11
|
+
import { promptHash, optsHash } from "./journal";
|
|
12
|
+
import type { ResumeIndex } from "./resume";
|
|
13
|
+
import { schemaGate, makeValidator, type GateCall, type GateFeedback } from "./schema-gate";
|
|
14
|
+
|
|
15
|
+
export type AgentOpts = {
|
|
16
|
+
label?: string;
|
|
17
|
+
phase?: string;
|
|
18
|
+
schema?: object;
|
|
19
|
+
model?: string;
|
|
20
|
+
cwd?: string;
|
|
21
|
+
timeoutMs?: number;
|
|
22
|
+
maxRetries?: number;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
// `prev`/`item` are intentionally `any`: orchestration scripts are plain JS the
|
|
26
|
+
// host agent authors, so a stage may declare concrete param types (`x: number`)
|
|
27
|
+
// without fighting the type system. The runtime treats every value opaquely.
|
|
28
|
+
export type Stage = (prev: any, item: any, index: number) => unknown | Promise<unknown>;
|
|
29
|
+
|
|
30
|
+
export type Runtime = {
|
|
31
|
+
agent(prompt: string, opts?: AgentOpts): Promise<unknown | null>;
|
|
32
|
+
pipeline(items: unknown[], ...stages: Stage[]): Promise<unknown[]>;
|
|
33
|
+
parallel(thunks: Array<() => Promise<unknown>>): Promise<unknown[]>;
|
|
34
|
+
phase(title: string): void;
|
|
35
|
+
log(msg: string): void;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
/** Bounded-concurrency gate: at most `max` bodies run at once; the rest queue.
|
|
39
|
+
* Canonical counting-semaphore: a release HANDS its slot directly to the next
|
|
40
|
+
* waiter (active unchanged) rather than decrementing first — otherwise a fresh
|
|
41
|
+
* caller could slip past the `active >= max` check between the wake and the
|
|
42
|
+
* woken waiter resuming, pushing in-flight above `max` (a TOCTOU race). */
|
|
43
|
+
export function makeLimiter(max: number) {
|
|
44
|
+
let active = 0;
|
|
45
|
+
const waiters: Array<() => void> = [];
|
|
46
|
+
return async function run<T>(fn: () => Promise<T>): Promise<T> {
|
|
47
|
+
if (active >= max) {
|
|
48
|
+
await new Promise<void>((res) => waiters.push(res)); // slot transferred to us
|
|
49
|
+
} else {
|
|
50
|
+
active++;
|
|
51
|
+
}
|
|
52
|
+
try {
|
|
53
|
+
return await fn();
|
|
54
|
+
} finally {
|
|
55
|
+
const next = waiters.shift();
|
|
56
|
+
if (next) next(); // hand our slot to the next waiter; active stays the same
|
|
57
|
+
else active--;
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const errMsg = (e: unknown): string => (e instanceof Error ? e.message : String(e));
|
|
63
|
+
|
|
64
|
+
function retryPrompt(original: string, feedback: GateFeedback, fresh: boolean): string {
|
|
65
|
+
const note =
|
|
66
|
+
"Your previous output failed validation:\n" +
|
|
67
|
+
feedback.errors.map((e) => `- ${e}`).join("\n") +
|
|
68
|
+
"\nReturn ONLY corrected JSON, no prose.";
|
|
69
|
+
return fresh ? `${original}\n\n${note}` : note;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export function makeRuntime(deps: {
|
|
73
|
+
adapter: AgentPort;
|
|
74
|
+
journal: Journal;
|
|
75
|
+
concurrency?: number;
|
|
76
|
+
/** A prior run's journal as a lookup. When a node's (call, promptHash,
|
|
77
|
+
* optsHash) key hits, the adapter is skipped and the cached result returned —
|
|
78
|
+
* the longest-unchanged-prefix resume model. A miss (incl. a prior failure)
|
|
79
|
+
* runs live, so resume only re-executes failed/changed nodes. */
|
|
80
|
+
resume?: ResumeIndex;
|
|
81
|
+
}): Runtime {
|
|
82
|
+
const { adapter, journal, resume } = deps;
|
|
83
|
+
const limit = makeLimiter(deps.concurrency ?? 4);
|
|
84
|
+
let callCounter = 0;
|
|
85
|
+
let currentPhase: string | undefined;
|
|
86
|
+
|
|
87
|
+
async function agent(prompt: string, opts: AgentOpts = {}): Promise<unknown | null> {
|
|
88
|
+
const call = ++callCounter;
|
|
89
|
+
const phase = opts.phase ?? currentPhase;
|
|
90
|
+
const pHash = promptHash(prompt);
|
|
91
|
+
const oHash = optsHash(opts);
|
|
92
|
+
journal.agentStart({
|
|
93
|
+
call,
|
|
94
|
+
label: opts.label,
|
|
95
|
+
phase,
|
|
96
|
+
adapter: adapter.name,
|
|
97
|
+
promptHash: pHash,
|
|
98
|
+
optsHash: oHash,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// Resume short-circuit: a hit skips the limiter + adapter entirely, but still
|
|
102
|
+
// emits agent_end so every start has a matching end (the spine invariant).
|
|
103
|
+
if (resume) {
|
|
104
|
+
const hit = resume.lookup({ call, promptHash: pHash, optsHash: oHash });
|
|
105
|
+
if (hit.found) {
|
|
106
|
+
journal.agentEnd({ call, ok: true, result: hit.value, durationMs: 0, cached: true });
|
|
107
|
+
return hit.value;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return limit(async () => {
|
|
112
|
+
let durationMs = 0;
|
|
113
|
+
const account = (r: AgentResult) => {
|
|
114
|
+
durationMs += r.ok ? r.meta.durationMs : (r.meta?.durationMs ?? 0);
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
// No schema: one shot, raw text out (or null).
|
|
119
|
+
if (!opts.schema) {
|
|
120
|
+
let r: AgentResult;
|
|
121
|
+
try {
|
|
122
|
+
r = await adapter.invoke({
|
|
123
|
+
prompt,
|
|
124
|
+
model: opts.model,
|
|
125
|
+
cwd: opts.cwd,
|
|
126
|
+
timeoutMs: opts.timeoutMs,
|
|
127
|
+
});
|
|
128
|
+
} catch (e) {
|
|
129
|
+
// A throw at the adapter boundary IS an adapter failure.
|
|
130
|
+
journal.agentEnd({ call, ok: false, kind: "spawn_failure", stderr: errMsg(e), durationMs });
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
account(r);
|
|
134
|
+
if (r.ok) {
|
|
135
|
+
journal.agentEnd({ call, ok: true, result: r.text, durationMs });
|
|
136
|
+
return r.text;
|
|
137
|
+
}
|
|
138
|
+
journal.agentEnd({ call, ok: false, kind: r.kind, stderr: r.stderr, durationMs });
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Schema path: gate retries node-level noise; followUp in-session if we
|
|
143
|
+
// have a sessionId, else fresh+error. The authoring agent never sees this.
|
|
144
|
+
const validate = makeValidator(opts.schema);
|
|
145
|
+
let lastSessionId: string | undefined;
|
|
146
|
+
const gateCall: GateCall = async (_n, feedback) => {
|
|
147
|
+
let r: AgentResult;
|
|
148
|
+
if (feedback && lastSessionId && adapter.followUp) {
|
|
149
|
+
r = await adapter.followUp(lastSessionId, retryPrompt(prompt, feedback, false));
|
|
150
|
+
} else {
|
|
151
|
+
const p = feedback ? retryPrompt(prompt, feedback, true) : prompt;
|
|
152
|
+
r = await adapter.invoke({ prompt: p, model: opts.model, cwd: opts.cwd, timeoutMs: opts.timeoutMs });
|
|
153
|
+
}
|
|
154
|
+
account(r);
|
|
155
|
+
if (r.ok && r.meta.sessionId) lastSessionId = r.meta.sessionId;
|
|
156
|
+
return r;
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
const outcome = await schemaGate({
|
|
160
|
+
call: gateCall,
|
|
161
|
+
validate,
|
|
162
|
+
maxRetries: opts.maxRetries,
|
|
163
|
+
onAttempt: (a) =>
|
|
164
|
+
journal.attempt({ call, n: a.n, kind: a.kind, errors: a.errors, stderr: a.stderr, rawText: a.rawText }),
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
if (outcome.ok) {
|
|
168
|
+
journal.agentEnd({ call, ok: true, result: outcome.value, durationMs });
|
|
169
|
+
return outcome.value;
|
|
170
|
+
}
|
|
171
|
+
journal.agentEnd({
|
|
172
|
+
call,
|
|
173
|
+
ok: false,
|
|
174
|
+
kind: outcome.kind,
|
|
175
|
+
stderr: outcome.stderr,
|
|
176
|
+
rawText: outcome.rawText,
|
|
177
|
+
durationMs,
|
|
178
|
+
});
|
|
179
|
+
return null;
|
|
180
|
+
} catch (e) {
|
|
181
|
+
// Last-resort guard: the null-contract holds even on an unexpected throw
|
|
182
|
+
// in OUR code (e.g. an invalid schema fails to compile). Labeled
|
|
183
|
+
// internal_error — distinct from adapter failures — so the authoring
|
|
184
|
+
// agent doesn't misread a schema bug as a flaky node.
|
|
185
|
+
journal.agentEnd({ call, ok: false, kind: "internal_error", error: errMsg(e), durationMs });
|
|
186
|
+
return null;
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// NOTE: parallel/pipeline do NOT acquire the limiter themselves — the limiter
|
|
192
|
+
// is held at the agent() boundary (the heavy subprocess node). Wrapping these
|
|
193
|
+
// combinators too would deadlock: their thunks call agent(), which would wait
|
|
194
|
+
// for a slot the combinator already holds. Bounding cheap glue is a non-goal.
|
|
195
|
+
async function parallel(thunks: Array<() => Promise<unknown>>): Promise<unknown[]> {
|
|
196
|
+
return Promise.all(
|
|
197
|
+
thunks.map((t, i) =>
|
|
198
|
+
Promise.resolve()
|
|
199
|
+
.then(t)
|
|
200
|
+
.catch((e) => {
|
|
201
|
+
journal.log(`parallel thunk ${i} threw: ${errMsg(e)}`);
|
|
202
|
+
return null;
|
|
203
|
+
}),
|
|
204
|
+
),
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async function pipeline(items: unknown[], ...stages: Stage[]): Promise<unknown[]> {
|
|
209
|
+
return Promise.all(
|
|
210
|
+
items.map(async (item, index) => {
|
|
211
|
+
let acc: unknown = item;
|
|
212
|
+
for (const stage of stages) {
|
|
213
|
+
try {
|
|
214
|
+
acc = await stage(acc, item, index);
|
|
215
|
+
} catch (e) {
|
|
216
|
+
journal.log(`pipeline item ${index} stage threw: ${errMsg(e)}`);
|
|
217
|
+
return null;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
return acc;
|
|
221
|
+
}),
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
return {
|
|
226
|
+
agent,
|
|
227
|
+
parallel,
|
|
228
|
+
pipeline,
|
|
229
|
+
phase: (title: string) => {
|
|
230
|
+
currentPhase = title;
|
|
231
|
+
journal.phase(title);
|
|
232
|
+
},
|
|
233
|
+
log: (msg: string) => journal.log(msg),
|
|
234
|
+
};
|
|
235
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// The schema gate turns probabilistic node output into a validated object — or
|
|
2
|
+
// null. Extraction MUST be deterministic so the same text always yields the same
|
|
3
|
+
// result (the journal/resume model depends on it). Precedence:
|
|
4
|
+
// 1. the LAST fenced code block that parses as JSON, else
|
|
5
|
+
// 2. the LARGEST balanced-brace span that parses as JSON, else
|
|
6
|
+
// 3. undefined.
|
|
7
|
+
|
|
8
|
+
import Ajv from "ajv";
|
|
9
|
+
import type { AgentResult, AgentFailureKind } from "./adapters/types";
|
|
10
|
+
|
|
11
|
+
function tryParse(s: string): unknown | undefined {
|
|
12
|
+
const t = s.trim();
|
|
13
|
+
if (!t) return undefined;
|
|
14
|
+
try {
|
|
15
|
+
return JSON.parse(t);
|
|
16
|
+
} catch {
|
|
17
|
+
return undefined;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** Top-level balanced `{...}` substrings, ignoring braces inside string literals. */
|
|
22
|
+
function balancedBraceSpans(text: string): string[] {
|
|
23
|
+
const spans: string[] = [];
|
|
24
|
+
const n = text.length;
|
|
25
|
+
let i = 0;
|
|
26
|
+
while (i < n) {
|
|
27
|
+
if (text[i] !== "{") {
|
|
28
|
+
i++;
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
let depth = 0;
|
|
32
|
+
let inStr = false;
|
|
33
|
+
let esc = false;
|
|
34
|
+
let j = i;
|
|
35
|
+
for (; j < n; j++) {
|
|
36
|
+
const c = text[j];
|
|
37
|
+
if (inStr) {
|
|
38
|
+
if (esc) esc = false;
|
|
39
|
+
else if (c === "\\") esc = true;
|
|
40
|
+
else if (c === '"') inStr = false;
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
if (c === '"') inStr = true;
|
|
44
|
+
else if (c === "{") depth++;
|
|
45
|
+
else if (c === "}" && --depth === 0) break;
|
|
46
|
+
}
|
|
47
|
+
if (depth === 0 && j < n) {
|
|
48
|
+
spans.push(text.slice(i, j + 1));
|
|
49
|
+
i = j + 1;
|
|
50
|
+
} else {
|
|
51
|
+
i++; // never closed — skip this brace
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return spans;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function extractJson(text: string): unknown | undefined {
|
|
58
|
+
// 1) Fenced code blocks — last parseable wins.
|
|
59
|
+
const fences = [...text.matchAll(/```[^\n]*\n([\s\S]*?)```/g)].map((m) => m[1] ?? "");
|
|
60
|
+
for (let i = fences.length - 1; i >= 0; i--) {
|
|
61
|
+
const parsed = tryParse(fences[i] ?? "");
|
|
62
|
+
if (parsed !== undefined) return parsed;
|
|
63
|
+
}
|
|
64
|
+
// 2) Largest balanced-brace span that parses.
|
|
65
|
+
let best: { value: unknown; len: number } | undefined;
|
|
66
|
+
for (const span of balancedBraceSpans(text)) {
|
|
67
|
+
const parsed = tryParse(span);
|
|
68
|
+
if (parsed !== undefined && (best === undefined || span.length > best.len)) {
|
|
69
|
+
best = { value: parsed, len: span.length };
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return best?.value;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ── validation ──────────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
export type Validation = { valid: boolean; errors: string[] };
|
|
78
|
+
export type ValidateFn = (value: unknown) => Validation;
|
|
79
|
+
|
|
80
|
+
/** Compile a JSON Schema into a reusable validate function (errors as strings). */
|
|
81
|
+
export function makeValidator(schema: object): ValidateFn {
|
|
82
|
+
const ajv = new Ajv({ allErrors: true, strict: false });
|
|
83
|
+
const validateFn = ajv.compile(schema);
|
|
84
|
+
return (value: unknown): Validation => {
|
|
85
|
+
const valid = validateFn(value) as boolean;
|
|
86
|
+
const errors = (validateFn.errors ?? []).map(
|
|
87
|
+
(e) => `${e.instancePath || "/"} ${e.message ?? "is invalid"}`.trim(),
|
|
88
|
+
);
|
|
89
|
+
return { valid, errors };
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ── the retry loop ────────────────────────────────────────────────────────────
|
|
94
|
+
|
|
95
|
+
/** Why a single attempt ended. "ok" plus the terminal kinds we journal. */
|
|
96
|
+
export type AttemptKind = "ok" | "no_json" | "schema_violation" | AgentFailureKind;
|
|
97
|
+
|
|
98
|
+
export type GateAttempt = { n: number; kind: AttemptKind; errors?: string[]; stderr?: string; rawText?: string };
|
|
99
|
+
|
|
100
|
+
export type GateFeedback = { errors: string[]; rawText: string };
|
|
101
|
+
|
|
102
|
+
/** Produce one node result. `n` is 1-based; `feedback` is null on the first try.
|
|
103
|
+
* The runtime supplies this — it decides followUp (in-session) vs fresh+error. */
|
|
104
|
+
export type GateCall = (n: number, feedback: GateFeedback | null) => Promise<AgentResult>;
|
|
105
|
+
|
|
106
|
+
export type GateOutcome =
|
|
107
|
+
| { ok: true; value: unknown }
|
|
108
|
+
// On failure we carry the diagnostic payload up so the runtime can journal it:
|
|
109
|
+
// adapter stderr for hard failures, the node's raw text for schema/no_json.
|
|
110
|
+
| { ok: false; kind: Exclude<AttemptKind, "ok">; stderr?: string; rawText?: string };
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Run a node through the gate. Schema noise (no_json / schema_violation) is
|
|
114
|
+
* retried up to `maxRetries` times; a hard adapter failure short-circuits with
|
|
115
|
+
* its own kind. NEVER throws — exhaustion or error resolves to { ok:false }.
|
|
116
|
+
*/
|
|
117
|
+
export async function schemaGate(opts: {
|
|
118
|
+
call: GateCall;
|
|
119
|
+
validate: ValidateFn;
|
|
120
|
+
maxRetries?: number;
|
|
121
|
+
onAttempt?: (a: GateAttempt) => void;
|
|
122
|
+
}): Promise<GateOutcome> {
|
|
123
|
+
const maxRetries = opts.maxRetries ?? 2;
|
|
124
|
+
let feedback: GateFeedback | null = null;
|
|
125
|
+
let lastKind: Exclude<AttemptKind, "ok"> = "schema_violation";
|
|
126
|
+
let lastRawText: string | undefined;
|
|
127
|
+
|
|
128
|
+
for (let n = 1; n <= maxRetries + 1; n++) {
|
|
129
|
+
let result: AgentResult;
|
|
130
|
+
try {
|
|
131
|
+
result = await opts.call(n, feedback);
|
|
132
|
+
} catch {
|
|
133
|
+
opts.onAttempt?.({ n, kind: "spawn_failure" });
|
|
134
|
+
return { ok: false, kind: "spawn_failure" };
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
if (!result.ok) {
|
|
138
|
+
// Adapter-level failure is not schema noise — short-circuit, keep stderr.
|
|
139
|
+
opts.onAttempt?.({ n, kind: result.kind, stderr: result.stderr });
|
|
140
|
+
return { ok: false, kind: result.kind, stderr: result.stderr };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
lastRawText = result.text;
|
|
144
|
+
const extracted = extractJson(result.text);
|
|
145
|
+
if (extracted === undefined) {
|
|
146
|
+
lastKind = "no_json";
|
|
147
|
+
opts.onAttempt?.({ n, kind: "no_json", rawText: result.text });
|
|
148
|
+
feedback = { errors: ["output contained no extractable JSON"], rawText: result.text };
|
|
149
|
+
continue;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const { valid, errors } = opts.validate(extracted);
|
|
153
|
+
if (valid) {
|
|
154
|
+
opts.onAttempt?.({ n, kind: "ok" });
|
|
155
|
+
return { ok: true, value: extracted };
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
lastKind = "schema_violation";
|
|
159
|
+
opts.onAttempt?.({ n, kind: "schema_violation", errors, rawText: result.text });
|
|
160
|
+
feedback = { errors, rawText: result.text };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return { ok: false, kind: lastKind, rawText: lastRawText };
|
|
164
|
+
}
|