oh-my-workflow 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +178 -0
- package/examples/deep-research/workflow.ts +82 -0
- package/package.json +60 -0
- package/skill/SKILL.md +491 -0
- package/src/adapters/claude.ts +146 -0
- package/src/adapters/codex.ts +149 -0
- package/src/adapters/fake.ts +70 -0
- package/src/adapters/types.ts +43 -0
- package/src/cli/omw.ts +37 -0
- package/src/cli/replay.ts +98 -0
- package/src/cli/run.ts +371 -0
- package/src/cli/validate.ts +110 -0
- package/src/journal.ts +138 -0
- package/src/resume.ts +48 -0
- package/src/runtime.ts +235 -0
- package/src/schema-gate.ts +164 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// The codex adapter (EXPERIMENTAL). A node is a whole `codex exec` run. codex
|
|
2
|
+
// streams its result as JSONL dot-notation events on stdout:
|
|
3
|
+
// thread.started{thread_id} → turn.started → item.completed{item:agent_message}
|
|
4
|
+
// → turn.completed{usage}
|
|
5
|
+
// We take the LAST agent_message's text as the result and thread_id as the
|
|
6
|
+
// sessionId (for `exec resume` follow-ups). There is no cost field (tokens only),
|
|
7
|
+
// so costUsd stays undefined.
|
|
8
|
+
//
|
|
9
|
+
// Per openai/codex#15451 the stream can include malformed lines; parseCodexJsonl
|
|
10
|
+
// tolerates them line-by-line and, if no final agent_message is found, fails
|
|
11
|
+
// ACTIONABLY (surfaces the reason) rather than silently returning empty — the
|
|
12
|
+
// authoring agent can read WHY in the journal.
|
|
13
|
+
|
|
14
|
+
import type { AgentPort, AgentResult, InvokeRequest } from "./types";
|
|
15
|
+
import type { ClaudeSpawn as Spawn, ClaudeSpawnResult as SpawnResult } from "./claude";
|
|
16
|
+
|
|
17
|
+
const errMsg = (e: unknown): string => (e instanceof Error ? e.message : String(e));
|
|
18
|
+
|
|
19
|
+
export function parseCodexJsonl(stdout: string): AgentResult {
|
|
20
|
+
const lines = stdout.split("\n").map((l) => l.trim()).filter(Boolean);
|
|
21
|
+
let threadId: string | undefined;
|
|
22
|
+
let lastMessage: string | undefined;
|
|
23
|
+
let failure: string | undefined;
|
|
24
|
+
let malformed = 0;
|
|
25
|
+
|
|
26
|
+
for (const line of lines) {
|
|
27
|
+
let ev: Record<string, unknown>;
|
|
28
|
+
try {
|
|
29
|
+
ev = JSON.parse(line);
|
|
30
|
+
} catch {
|
|
31
|
+
malformed++; // #15451 tolerance — skip junk, keep parsing
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
switch (ev.type) {
|
|
35
|
+
case "thread.started":
|
|
36
|
+
threadId = ev.thread_id as string | undefined;
|
|
37
|
+
break;
|
|
38
|
+
case "item.completed": {
|
|
39
|
+
const item = ev.item as { type?: string; text?: string } | undefined;
|
|
40
|
+
if (item?.type === "agent_message" && typeof item.text === "string") lastMessage = item.text;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
case "error":
|
|
44
|
+
failure = typeof ev.message === "string" ? ev.message : JSON.stringify(ev);
|
|
45
|
+
break;
|
|
46
|
+
case "turn.failed": {
|
|
47
|
+
const err = ev.error as { message?: string } | undefined;
|
|
48
|
+
failure = failure ?? err?.message ?? "turn failed";
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (failure) {
|
|
55
|
+
// No refusal kind for codex: its stream has no distinct decline signal. A
|
|
56
|
+
// hard failure (`error` / `turn.failed`) is nonzero_exit; a SOFT decline
|
|
57
|
+
// ("I can't help with that") instead arrives as a normal agent_message and
|
|
58
|
+
// returns ok:true below — an invisible abstention we can't tell from a real
|
|
59
|
+
// answer here. So codex declines are NOT nonzero_exit; refusal is claude-only.
|
|
60
|
+
return { ok: false, kind: "nonzero_exit", stderr: `codex: ${failure}`, meta: { durationMs: 0 } };
|
|
61
|
+
}
|
|
62
|
+
if (lastMessage === undefined) {
|
|
63
|
+
const hint = malformed > 0 ? ` (${malformed} malformed JSONL line(s))` : "";
|
|
64
|
+
return {
|
|
65
|
+
ok: false,
|
|
66
|
+
kind: "nonzero_exit",
|
|
67
|
+
stderr: `codex produced no agent_message${hint}`,
|
|
68
|
+
meta: { durationMs: 0 },
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
return { ok: true, text: lastMessage, meta: { durationMs: 0, sessionId: threadId } };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export type CodexAdapterDeps = {
|
|
75
|
+
spawn?: Spawn;
|
|
76
|
+
bin?: string;
|
|
77
|
+
/** codex sandbox policy. Defaults to workspace-write (a coding node needs to
|
|
78
|
+
* write); override to read-only for safe demos or danger-full-access. */
|
|
79
|
+
sandbox?: "read-only" | "workspace-write" | "danger-full-access";
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
function defaultSpawn(bin: string): Spawn {
|
|
83
|
+
return async (args, opts) => {
|
|
84
|
+
const proc = Bun.spawn([bin, ...args], {
|
|
85
|
+
cwd: opts?.cwd,
|
|
86
|
+
stdin: "ignore", // codex reads stdin otherwise and hangs waiting for EOF
|
|
87
|
+
stdout: "pipe",
|
|
88
|
+
stderr: "pipe",
|
|
89
|
+
});
|
|
90
|
+
let timedOut = false;
|
|
91
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
92
|
+
if (opts?.timeoutMs && opts.timeoutMs > 0) {
|
|
93
|
+
timer = setTimeout(() => {
|
|
94
|
+
timedOut = true;
|
|
95
|
+
proc.kill();
|
|
96
|
+
}, opts.timeoutMs);
|
|
97
|
+
}
|
|
98
|
+
const [stdout, stderr] = await Promise.all([
|
|
99
|
+
new Response(proc.stdout).text(),
|
|
100
|
+
new Response(proc.stderr).text(),
|
|
101
|
+
]);
|
|
102
|
+
const code = await proc.exited;
|
|
103
|
+
if (timer) clearTimeout(timer);
|
|
104
|
+
return { code, stdout, stderr, timedOut };
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export function makeCodexAdapter(deps: CodexAdapterDeps = {}): AgentPort {
|
|
109
|
+
const spawn = deps.spawn ?? defaultSpawn(deps.bin ?? "codex");
|
|
110
|
+
const sandbox = deps.sandbox ?? "workspace-write";
|
|
111
|
+
|
|
112
|
+
async function run(args: string[], cwd?: string, timeoutMs?: number): Promise<AgentResult> {
|
|
113
|
+
let res: SpawnResult;
|
|
114
|
+
try {
|
|
115
|
+
res = await spawn(args, { cwd, timeoutMs });
|
|
116
|
+
} catch (e) {
|
|
117
|
+
return { ok: false, kind: "spawn_failure", stderr: errMsg(e), meta: { durationMs: 0 } };
|
|
118
|
+
}
|
|
119
|
+
if (res.timedOut) {
|
|
120
|
+
return { ok: false, kind: "timeout", stderr: res.stderr || `timed out after ${timeoutMs}ms`, meta: { durationMs: 0 } };
|
|
121
|
+
}
|
|
122
|
+
if (!res.stdout.trim()) {
|
|
123
|
+
// No JSONL at all → fall back to the process-level failure.
|
|
124
|
+
return {
|
|
125
|
+
ok: false,
|
|
126
|
+
kind: "nonzero_exit",
|
|
127
|
+
stderr: res.stderr || `codex exited ${res.code} with no output`,
|
|
128
|
+
meta: { durationMs: 0 },
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
// JSONL present (even on a non-zero exit, e.g. turn.failed) → let the parser
|
|
132
|
+
// decide ok/fail and surface the reason.
|
|
133
|
+
return parseCodexJsonl(res.stdout);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
name: "codex",
|
|
138
|
+
invoke(req: InvokeRequest): Promise<AgentResult> {
|
|
139
|
+
const args = ["exec", "--json", "-s", sandbox];
|
|
140
|
+
if (req.model) args.push("-m", req.model);
|
|
141
|
+
args.push(req.prompt);
|
|
142
|
+
return run(args, req.cwd, req.timeoutMs);
|
|
143
|
+
},
|
|
144
|
+
followUp(sessionId: string, prompt: string): Promise<AgentResult> {
|
|
145
|
+
const args = ["exec", "resume", sessionId, "--json", "-s", sandbox, prompt];
|
|
146
|
+
return run(args);
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// The fake adapter is two things at once: the test double for the whole suite,
|
|
2
|
+
// AND the engine behind `--agent fake` — the free, deterministic, no-API-key
|
|
3
|
+
// try-it path that proves the spine (pipeline/parallel/phase + journal +
|
|
4
|
+
// self-repair loop) for a stranger before they touch a real agent. So it is
|
|
5
|
+
// real product code, not just a mock.
|
|
6
|
+
|
|
7
|
+
import type { AgentPort, AgentResult, AgentFailureKind, InvokeRequest } from "./types";
|
|
8
|
+
|
|
9
|
+
export type FakeResponse =
|
|
10
|
+
| { text: string; sessionId?: string; costUsd?: number }
|
|
11
|
+
| { fail: AgentFailureKind; stderr?: string };
|
|
12
|
+
|
|
13
|
+
export type FakeRule = {
|
|
14
|
+
match: (prompt: string) => boolean;
|
|
15
|
+
responses: FakeResponse[];
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export type FakeAdapterOptions = {
|
|
19
|
+
rules?: FakeRule[];
|
|
20
|
+
default?: FakeResponse;
|
|
21
|
+
durationMs?: number;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
function toResult(r: FakeResponse, durationMs: number): AgentResult {
|
|
25
|
+
if ("fail" in r) {
|
|
26
|
+
return { ok: false, kind: r.fail, stderr: r.stderr, meta: { durationMs } };
|
|
27
|
+
}
|
|
28
|
+
return {
|
|
29
|
+
ok: true,
|
|
30
|
+
text: r.text,
|
|
31
|
+
meta: { durationMs, sessionId: r.sessionId, costUsd: r.costUsd },
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function makeFakeAdapter(opts: FakeAdapterOptions = {}): AgentPort {
|
|
36
|
+
const rules = opts.rules ?? [];
|
|
37
|
+
const durationMs = opts.durationMs ?? 0;
|
|
38
|
+
const fallback: FakeResponse = opts.default ?? { text: "{}" };
|
|
39
|
+
// Per-rule cursor so a sequence advances across invocations and sticks on last.
|
|
40
|
+
const cursors = new Map<FakeRule, number>();
|
|
41
|
+
// sessionId -> issuing rule, so followUp continues the right conversation even
|
|
42
|
+
// when the retry prompt no longer matches the original rule's predicate.
|
|
43
|
+
const sessionRule = new Map<string, FakeRule>();
|
|
44
|
+
|
|
45
|
+
const advance = (rule: FakeRule): FakeResponse => {
|
|
46
|
+
const i = cursors.get(rule) ?? 0;
|
|
47
|
+
const idx = Math.min(i, rule.responses.length - 1);
|
|
48
|
+
cursors.set(rule, i + 1);
|
|
49
|
+
const resp = rule.responses[idx] ?? fallback;
|
|
50
|
+
if ("text" in resp && resp.sessionId) sessionRule.set(resp.sessionId, rule);
|
|
51
|
+
return resp;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
const nextForPrompt = (prompt: string): FakeResponse => {
|
|
55
|
+
const rule = rules.find((r) => r.match(prompt));
|
|
56
|
+
if (!rule || rule.responses.length === 0) return fallback;
|
|
57
|
+
return advance(rule);
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
name: "fake",
|
|
62
|
+
async invoke(req: InvokeRequest): Promise<AgentResult> {
|
|
63
|
+
return toResult(nextForPrompt(req.prompt), durationMs);
|
|
64
|
+
},
|
|
65
|
+
async followUp(sessionId: string, prompt: string): Promise<AgentResult> {
|
|
66
|
+
const rule = sessionRule.get(sessionId);
|
|
67
|
+
return toResult(rule ? advance(rule) : nextForPrompt(prompt), durationMs);
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
// The contract every adapter implements. omw's only job is to be the thin
|
|
2
|
+
// deterministic glue between an orchestration script and these subprocess nodes.
|
|
3
|
+
// Kept tiny on purpose: a node is a whole coding agent, not a single LLM call.
|
|
4
|
+
|
|
5
|
+
/** Why an invocation failed. The journal records this `kind` so the authoring
|
|
6
|
+
* agent can read WHICH failure happened and repair its own script.
|
|
7
|
+
* `refusal` is a DECLINE (the model said no — HTTP 200, `stop_reason:"refusal"`),
|
|
8
|
+
* kept distinct from a crash so an abstain-quorum can treat declined ≠ failed. */
|
|
9
|
+
export type AgentFailureKind = "timeout" | "nonzero_exit" | "spawn_failure" | "refusal";
|
|
10
|
+
|
|
11
|
+
export type AgentResult =
|
|
12
|
+
| {
|
|
13
|
+
ok: true;
|
|
14
|
+
text: string;
|
|
15
|
+
meta: {
|
|
16
|
+
durationMs: number;
|
|
17
|
+
/** Present when the adapter supports session resume (claude --resume). */
|
|
18
|
+
sessionId?: string;
|
|
19
|
+
costUsd?: number;
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
| {
|
|
23
|
+
ok: false;
|
|
24
|
+
kind: AgentFailureKind;
|
|
25
|
+
stderr?: string;
|
|
26
|
+
meta?: { durationMs: number };
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export type InvokeRequest = {
|
|
30
|
+
prompt: string;
|
|
31
|
+
/** Tier alias ("fast" | "smart") or a raw model string passed through. */
|
|
32
|
+
model?: string;
|
|
33
|
+
cwd?: string;
|
|
34
|
+
timeoutMs?: number;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export type AgentPort = {
|
|
38
|
+
name: string;
|
|
39
|
+
invoke(req: InvokeRequest): Promise<AgentResult>;
|
|
40
|
+
/** Optional in-session follow-up (claude --resume). When absent, the runtime
|
|
41
|
+
* re-invokes fresh with the error feedback appended to the prompt. */
|
|
42
|
+
followUp?(sessionId: string, prompt: string): Promise<AgentResult>;
|
|
43
|
+
};
|
package/src/cli/omw.ts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
// The `omw` entry. Runs the .ts directly under bun (no build step) so
|
|
3
|
+
// `bunx oh-my-workflow run …` works on a stranger's machine. Dispatches the
|
|
4
|
+
// subcommand; the heavy lifting lives in the tested run/replay libraries.
|
|
5
|
+
|
|
6
|
+
import { runCommand } from "./run";
|
|
7
|
+
import { replayCommand } from "./replay";
|
|
8
|
+
import { validateCommand } from "./validate";
|
|
9
|
+
|
|
10
|
+
const io = {
|
|
11
|
+
stdout: (s: string) => process.stdout.write(s),
|
|
12
|
+
stderr: (s: string) => process.stderr.write(s),
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
async function main(argv: string[]): Promise<number> {
|
|
16
|
+
const [cmd, ...rest] = argv;
|
|
17
|
+
switch (cmd) {
|
|
18
|
+
case "run":
|
|
19
|
+
return runCommand(rest, io);
|
|
20
|
+
case "replay":
|
|
21
|
+
return replayCommand(rest, io);
|
|
22
|
+
case "validate":
|
|
23
|
+
return validateCommand(rest, io);
|
|
24
|
+
default:
|
|
25
|
+
io.stderr(
|
|
26
|
+
"usage: omw <command>\n\n" +
|
|
27
|
+
"commands:\n" +
|
|
28
|
+
" run <workflow> --agent <fake|claude|codex|pi> [--args JSON] [--concurrency N] [--resume <journal.jsonl>] [--pretty]\n" +
|
|
29
|
+
" replay <journal.jsonl> [--json]\n" +
|
|
30
|
+
" validate <workflow> [--json]\n\n" +
|
|
31
|
+
"free demo (no API key): omw run examples/deep-research --agent fake\n",
|
|
32
|
+
);
|
|
33
|
+
return cmd === undefined ? 2 : 2;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
main(process.argv.slice(2)).then((code) => process.exit(code));
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
// `omw replay <journal.jsonl> [--json]`. Re-derives the phase / fan-out / stats
|
|
2
|
+
// view from a recorded journal. This is honestly a FIXTURE REPLAY — reading back
|
|
3
|
+
// what a run already recorded — NOT a live resume (re-executing from the longest
|
|
4
|
+
// unchanged prefix), which is v2 and layers on without a format change.
|
|
5
|
+
|
|
6
|
+
import { readFileSync } from "node:fs";
|
|
7
|
+
import type { Io } from "./run";
|
|
8
|
+
import { parseJournalLines } from "../journal";
|
|
9
|
+
import { renderTree } from "./run";
|
|
10
|
+
|
|
11
|
+
export type ReplayArgs = { path: string; json: boolean };
|
|
12
|
+
|
|
13
|
+
export type ReplayParse = { ok: true; value: ReplayArgs } | { ok: false; error: string };
|
|
14
|
+
|
|
15
|
+
export function parseReplayArgs(argv: string[]): ReplayParse {
|
|
16
|
+
let path: string | undefined;
|
|
17
|
+
let json = false;
|
|
18
|
+
for (const tok of argv) {
|
|
19
|
+
if (tok === "--json") json = true;
|
|
20
|
+
else if (path === undefined) path = tok;
|
|
21
|
+
else return { ok: false, error: `unexpected argument: ${tok}` };
|
|
22
|
+
}
|
|
23
|
+
if (path === undefined) return { ok: false, error: "missing journal path" };
|
|
24
|
+
return { ok: true, value: { path, json } };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export type ReplaySummary = {
|
|
28
|
+
run?: string;
|
|
29
|
+
wf?: string;
|
|
30
|
+
phases: string[];
|
|
31
|
+
calls: { total: number; ok: number; failed: number };
|
|
32
|
+
failures: Array<{ call: number; kind?: string }>;
|
|
33
|
+
ok?: boolean;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export function summarizeJournal(lines: string[]): ReplaySummary {
|
|
37
|
+
const phases: string[] = [];
|
|
38
|
+
const failures: Array<{ call: number; kind?: string }> = [];
|
|
39
|
+
let run: string | undefined;
|
|
40
|
+
let wf: string | undefined;
|
|
41
|
+
let ok: boolean | undefined;
|
|
42
|
+
let total = 0;
|
|
43
|
+
let okCount = 0;
|
|
44
|
+
let failed = 0;
|
|
45
|
+
|
|
46
|
+
for (const e of parseJournalLines(lines)) {
|
|
47
|
+
switch (e.ev) {
|
|
48
|
+
case "run_start":
|
|
49
|
+
run = e.run;
|
|
50
|
+
wf = e.wf;
|
|
51
|
+
break;
|
|
52
|
+
case "phase":
|
|
53
|
+
phases.push(e.title);
|
|
54
|
+
break;
|
|
55
|
+
case "agent_end":
|
|
56
|
+
total++;
|
|
57
|
+
if (e.ok) okCount++;
|
|
58
|
+
else {
|
|
59
|
+
failed++;
|
|
60
|
+
failures.push({ call: e.call, kind: e.kind });
|
|
61
|
+
}
|
|
62
|
+
break;
|
|
63
|
+
case "run_end":
|
|
64
|
+
ok = e.ok;
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return { run, wf, phases, calls: { total, ok: okCount, failed }, failures, ok };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Read a journal file and print either its reconstructed tree (default) or a
|
|
73
|
+
* structured summary (--json). Exit 2 on usage error, 1 if the file is
|
|
74
|
+
* unreadable, 0 otherwise. */
|
|
75
|
+
export function replayCommand(argv: string[], io: Io): number {
|
|
76
|
+
const parsed = parseReplayArgs(argv);
|
|
77
|
+
if (!parsed.ok) {
|
|
78
|
+
io.stderr(JSON.stringify({ error: "usage", message: parsed.error }) + "\n");
|
|
79
|
+
io.stderr("usage: omw replay <journal.jsonl> [--json]\n");
|
|
80
|
+
return 2;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
let lines: string[];
|
|
84
|
+
try {
|
|
85
|
+
lines = readFileSync(parsed.value.path, "utf8").split("\n");
|
|
86
|
+
} catch (e) {
|
|
87
|
+
io.stderr(JSON.stringify({ error: "read_failed", path: parsed.value.path }) + "\n");
|
|
88
|
+
return 1;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (parsed.value.json) {
|
|
92
|
+
io.stdout(JSON.stringify(summarizeJournal(lines)) + "\n");
|
|
93
|
+
} else {
|
|
94
|
+
io.stdout(renderTree(lines) + "\n");
|
|
95
|
+
io.stderr("(fixture replay — reconstructed from recorded journal, not a live resume)\n");
|
|
96
|
+
}
|
|
97
|
+
return 0;
|
|
98
|
+
}
|