@cordfuse/crosstalkd 7.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GUIDE-CLI.md +315 -0
- package/GUIDE-PROMPTS.md +107 -0
- package/README.md +118 -0
- package/bin/crosstalkd.js +101 -0
- package/package.json +48 -0
- package/src/activation.ts +104 -0
- package/src/api.ts +430 -0
- package/src/channel.ts +202 -0
- package/src/dispatch.ts +430 -0
- package/src/dispatchers.ts +91 -0
- package/src/filenames.ts +28 -0
- package/src/frontmatter.ts +26 -0
- package/src/init.ts +108 -0
- package/src/invoke.ts +148 -0
- package/src/models.ts +86 -0
- package/src/replies.ts +73 -0
- package/src/run.ts +236 -0
- package/src/state.ts +159 -0
- package/src/status.ts +84 -0
- package/src/stop.ts +37 -0
- package/src/transport.ts +236 -0
- package/src/workflow.ts +458 -0
- package/template/CLAUDE.md +10 -0
- package/template/CROSSTALK-VERSION +1 -0
- package/template/CROSSTALK.md +242 -0
- package/template/PROTOCOL.md +66 -0
- package/template/README.md +69 -0
- package/template/data/models.yaml +27 -0
- package/template/gitignore +4 -0
package/src/transport.ts
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
// Git transport layer. The dispatcher's commits contain ONLY data/ —
|
|
2
|
+
// machine-local state lives in the state dir (state.ts), so there is
|
|
3
|
+
// nothing to exclude, untrack, or heal. Push rejection means another
|
|
4
|
+
// machine won the race: pull --rebase and retry at the call site.
|
|
5
|
+
|
|
6
|
+
import { existsSync, readdirSync, readFileSync, statSync } from 'fs';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { spawnSync } from 'child_process';
|
|
9
|
+
import { parseFrontmatter } from './frontmatter.js';
|
|
10
|
+
import { logError } from './state.js';
|
|
11
|
+
|
|
12
|
+
export interface ChannelMessage {
|
|
13
|
+
relPath: string;
|
|
14
|
+
fullPath: string;
|
|
15
|
+
data: Record<string, unknown>;
|
|
16
|
+
body: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface GitResult {
|
|
20
|
+
ok: boolean;
|
|
21
|
+
error?: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface GitPushResult {
|
|
25
|
+
ok: boolean;
|
|
26
|
+
committed: boolean;
|
|
27
|
+
pushed: boolean;
|
|
28
|
+
error?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function captureGit(cwd: string, args: string[]): { status: number; stdout: string; stderr: string } {
|
|
32
|
+
const r = spawnSync('git', args, { cwd, encoding: 'utf-8' });
|
|
33
|
+
return { status: r.status ?? 1, stdout: r.stdout ?? '', stderr: r.stderr ?? '' };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Detect and abort an interrupted rebase/merge left by a killed process.
|
|
37
|
+
// Returns true if recovery was performed.
|
|
38
|
+
export function recoverInterruptedGit(transportRoot: string): boolean {
|
|
39
|
+
const halfStates: { dir: string; abortArgs: string[] }[] = [
|
|
40
|
+
{ dir: '.git/rebase-merge', abortArgs: ['rebase', '--abort'] },
|
|
41
|
+
{ dir: '.git/rebase-apply', abortArgs: ['rebase', '--abort'] },
|
|
42
|
+
{ dir: '.git/MERGE_HEAD', abortArgs: ['merge', '--abort'] },
|
|
43
|
+
{ dir: '.git/CHERRY_PICK_HEAD', abortArgs: ['cherry-pick', '--abort'] },
|
|
44
|
+
];
|
|
45
|
+
for (const { dir, abortArgs } of halfStates) {
|
|
46
|
+
if (existsSync(join(transportRoot, dir))) {
|
|
47
|
+
const r = captureGit(transportRoot, abortArgs);
|
|
48
|
+
logError(
|
|
49
|
+
transportRoot,
|
|
50
|
+
`recovered from interrupted git state at ${dir} via 'git ${abortArgs.join(' ')}' (exit=${r.status})`,
|
|
51
|
+
);
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// The commit cursors anchor to. Prefer the origin tip: origin history is
|
|
59
|
+
// append-only, so a cursor pointing there can never be orphaned by a local
|
|
60
|
+
// `pull --rebase` rewriting unpushed commits. HEAD is the fallback for
|
|
61
|
+
// transports without a remote.
|
|
62
|
+
export function cursorBaseline(transportRoot: string): string | null {
|
|
63
|
+
for (const ref of ['origin/HEAD', 'origin/main', 'HEAD']) {
|
|
64
|
+
const r = captureGit(transportRoot, ['rev-parse', ref]);
|
|
65
|
+
if (r.status === 0) return r.stdout.trim();
|
|
66
|
+
}
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Repo-relative paths of message files added between `sinceCommit` and
|
|
71
|
+
// HEAD. Returns null when the commit is unknown to this clone (state dir
|
|
72
|
+
// copied across transports, history rewritten) — caller falls back to a
|
|
73
|
+
// full channel scan.
|
|
74
|
+
export function newFilesSince(transportRoot: string, sinceCommit: string): string[] | null {
|
|
75
|
+
const r = captureGit(transportRoot, [
|
|
76
|
+
'diff', '--name-only', '--diff-filter=A', `${sinceCommit}..HEAD`, '--', 'data/channels/',
|
|
77
|
+
]);
|
|
78
|
+
if (r.status !== 0) return null;
|
|
79
|
+
return r.stdout.split('\n').filter(Boolean);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export function gitPull(transportRoot: string): GitResult {
|
|
83
|
+
recoverInterruptedGit(transportRoot);
|
|
84
|
+
const fetch = captureGit(transportRoot, ['fetch', 'origin', '--quiet']);
|
|
85
|
+
if (fetch.status !== 0) {
|
|
86
|
+
// No origin remote configured → not an error. Single-machine and
|
|
87
|
+
// local-only transports are valid; the dispatcher just operates on
|
|
88
|
+
// local commits.
|
|
89
|
+
const stderr = fetch.stderr.trim();
|
|
90
|
+
if (stderr.includes("does not appear to be a git repository") ||
|
|
91
|
+
stderr.includes("Could not read from remote") ||
|
|
92
|
+
stderr.includes("origin") && stderr.includes("does not appear")) {
|
|
93
|
+
return { ok: true };
|
|
94
|
+
}
|
|
95
|
+
return { ok: false, error: (fetch.stderr || fetch.stdout).trim().slice(0, 500) };
|
|
96
|
+
}
|
|
97
|
+
const rebase = captureGit(transportRoot, ['rebase', 'origin/main']);
|
|
98
|
+
if (rebase.status !== 0) {
|
|
99
|
+
// No origin/main (single-machine, no remote) → not an error.
|
|
100
|
+
if (rebase.stderr.includes('unknown revision') || rebase.stderr.includes('not a valid object name')) {
|
|
101
|
+
return { ok: true };
|
|
102
|
+
}
|
|
103
|
+
return { ok: false, error: (rebase.stderr || rebase.stdout).trim().slice(0, 500) };
|
|
104
|
+
}
|
|
105
|
+
return { ok: true };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Stage data/ only, commit, push. On push rejection, one pull --rebase +
|
|
109
|
+
// re-push — collision-free filenames make the rebase trivially clean.
|
|
110
|
+
// In single-machine transports with no remote, the push step is a no-op
|
|
111
|
+
// (gracefully — git push fails fast on "No configured push destination",
|
|
112
|
+
// which we treat as success since the commit is already local).
|
|
113
|
+
export function gitCommitAndPush(transportRoot: string, message: string): GitPushResult {
|
|
114
|
+
const status = captureGit(transportRoot, ['status', '--porcelain', '--', 'data/']);
|
|
115
|
+
if (status.status !== 0) {
|
|
116
|
+
return { ok: false, committed: false, pushed: false, error: status.stderr.trim().slice(0, 500) };
|
|
117
|
+
}
|
|
118
|
+
if (!status.stdout.trim()) {
|
|
119
|
+
return { ok: true, committed: false, pushed: false };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const add = captureGit(transportRoot, ['add', '--', 'data/']);
|
|
123
|
+
if (add.status !== 0) {
|
|
124
|
+
return { ok: false, committed: false, pushed: false, error: add.stderr.trim().slice(0, 500) };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const commit = captureGit(transportRoot, ['commit', '-m', message, '--', 'data/']);
|
|
128
|
+
if (commit.status !== 0) {
|
|
129
|
+
const noop = commit.stdout.includes('nothing to commit') ||
|
|
130
|
+
commit.stderr.includes('nothing to commit');
|
|
131
|
+
if (noop) return { ok: true, committed: false, pushed: false };
|
|
132
|
+
return { ok: false, committed: false, pushed: false, error: commit.stderr.trim().slice(0, 500) };
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Push rejection is NORMAL under concurrent writers — git is the
|
|
136
|
+
// arbiter and collision-free filenames make every rebase clean. Retry
|
|
137
|
+
// with jitter; many writers racing one origin converge within a few
|
|
138
|
+
// rounds. v6 used turnq as an advisory lock to reduce churn; v7 dropped
|
|
139
|
+
// it — rebase-retry is the correctness mechanism, turnq was overhead.
|
|
140
|
+
let push = captureGit(transportRoot, ['push', '--quiet']);
|
|
141
|
+
if (push.status !== 0) {
|
|
142
|
+
// No remote configured → push fails with "No configured push destination".
|
|
143
|
+
// Treat as success: commits stay local, which is the intended state
|
|
144
|
+
// for single-machine transports.
|
|
145
|
+
if (push.stderr.includes('No configured push destination') ||
|
|
146
|
+
push.stderr.includes("does not appear to be a git repository")) {
|
|
147
|
+
return { ok: true, committed: true, pushed: false };
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
for (let attempt = 0; push.status !== 0 && attempt < 5; attempt++) {
|
|
151
|
+
spawnSync('sleep', [(0.05 + Math.random() * 0.3 * (attempt + 1)).toFixed(2)]);
|
|
152
|
+
const pull = gitPull(transportRoot);
|
|
153
|
+
if (!pull.ok) continue;
|
|
154
|
+
push = captureGit(transportRoot, ['push', '--quiet']);
|
|
155
|
+
}
|
|
156
|
+
if (push.status !== 0) {
|
|
157
|
+
return {
|
|
158
|
+
ok: false,
|
|
159
|
+
committed: true,
|
|
160
|
+
pushed: false,
|
|
161
|
+
error: (push.stderr || push.stdout).trim().slice(0, 500),
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
return { ok: true, committed: true, pushed: true };
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function discoverChannels(transportRoot: string): string[] {
|
|
168
|
+
const channelsDir = join(transportRoot, 'data', 'channels');
|
|
169
|
+
if (!existsSync(channelsDir)) return [];
|
|
170
|
+
let entries: string[];
|
|
171
|
+
try {
|
|
172
|
+
entries = readdirSync(channelsDir);
|
|
173
|
+
} catch (err) {
|
|
174
|
+
logError(transportRoot, `discoverChannels readdir failed on ${channelsDir}: ${(err as Error).message}`);
|
|
175
|
+
return [];
|
|
176
|
+
}
|
|
177
|
+
return entries.filter((name) => {
|
|
178
|
+
try {
|
|
179
|
+
return statSync(join(channelsDir, name)).isDirectory();
|
|
180
|
+
} catch {
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// v7 frontmatter contract (see transport/CROSSTALK.md §Messages):
|
|
187
|
+
// required: from (string), to (string), timestamp (string)
|
|
188
|
+
// optional: re, as, type ('workflow' only), failed, error, child_channel
|
|
189
|
+
// v6 required `type: text` on every message; v7 omits `type:` on regular
|
|
190
|
+
// messages. Readers must ignore UNKNOWN fields, but known fields with
|
|
191
|
+
// invalid values are protocol violations and rejected at parse time.
|
|
192
|
+
// This catches LLM models that revert to v6 muscle memory (writing
|
|
193
|
+
// `type: text` in hand-crafted frontmatter) — a real failure mode
|
|
194
|
+
// surfaced by the S10 fan-out workflow test.
|
|
195
|
+
function isValidMessageFrontmatter(data: Record<string, unknown>): boolean {
|
|
196
|
+
if (typeof data['from'] !== 'string') return false;
|
|
197
|
+
if (typeof data['to'] !== 'string' && !Array.isArray(data['to'])) return false;
|
|
198
|
+
if (typeof data['timestamp'] !== 'string') return false;
|
|
199
|
+
// `type:` is optional; only 'workflow' is valid in v7. Reject v6's
|
|
200
|
+
// `type: text` and any other invalid value.
|
|
201
|
+
if (data['type'] !== undefined && data['type'] !== 'workflow') return false;
|
|
202
|
+
return true;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export function listChannelMessages(transportRoot: string, channelUuid: string): ChannelMessage[] {
|
|
206
|
+
const channelDir = join(transportRoot, 'data', 'channels', channelUuid);
|
|
207
|
+
if (!existsSync(channelDir)) return [];
|
|
208
|
+
const results: ChannelMessage[] = [];
|
|
209
|
+
const walk = (dir: string, prefix: string): void => {
|
|
210
|
+
for (const entry of readdirSync(dir)) {
|
|
211
|
+
const full = join(dir, entry);
|
|
212
|
+
const rel = prefix ? `${prefix}/${entry}` : entry;
|
|
213
|
+
let stat;
|
|
214
|
+
try { stat = statSync(full); } catch { continue; }
|
|
215
|
+
if (stat.isDirectory()) {
|
|
216
|
+
walk(full, rel);
|
|
217
|
+
} else if (entry.endsWith('.md') && entry !== 'CHANNEL.md') {
|
|
218
|
+
const raw = readFileSync(full, 'utf-8');
|
|
219
|
+
let parsed;
|
|
220
|
+
try {
|
|
221
|
+
parsed = parseFrontmatter(raw);
|
|
222
|
+
} catch (err) {
|
|
223
|
+
logError(transportRoot, `frontmatter parse failed in ${channelUuid}/${rel}: ${(err as Error).message}`);
|
|
224
|
+
continue;
|
|
225
|
+
}
|
|
226
|
+
if (!isValidMessageFrontmatter(parsed.data)) {
|
|
227
|
+
logError(transportRoot, `invalid message frontmatter in ${channelUuid}/${rel}: missing required field(s) (from, to, timestamp)`);
|
|
228
|
+
continue;
|
|
229
|
+
}
|
|
230
|
+
results.push({ relPath: rel, fullPath: full, data: parsed.data, body: parsed.body });
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
};
|
|
234
|
+
walk(channelDir, '');
|
|
235
|
+
return results.sort((a, b) => a.relPath.localeCompare(b.relPath));
|
|
236
|
+
}
|
package/src/workflow.ts
ADDED
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
// workflow.ts — runtime workflow executor.
|
|
2
|
+
//
|
|
3
|
+
// A workflow marker is a `type: workflow` message addressed to the reserved
|
|
4
|
+
// recipient `workflow`. The runtime processes it directly: no model wakes
|
|
5
|
+
// for it via the activation loop. The persona-orchestrator pattern from
|
|
6
|
+
// pre-alpha v7 is gone — workflows are now compile-pass + deterministic
|
|
7
|
+
// runtime execution, so every supported agent CLI works regardless of
|
|
8
|
+
// model's instruction-following discipline.
|
|
9
|
+
//
|
|
10
|
+
// One phase advances per dispatcher tick. State is derived from files
|
|
11
|
+
// each tick — no in-memory workflow registry, no recovery logic needed
|
|
12
|
+
// across dispatcher restarts.
|
|
13
|
+
//
|
|
14
|
+
// 1. compile — invoke the first claimed model with COMPILE_PROMPT
|
|
15
|
+
// and the workflow body. Parse JSON plan. Save to
|
|
16
|
+
// data/channels/<child>/PLAN.json, or fail the workflow.
|
|
17
|
+
// 2. fanout — write `plan.fanout.count` sub-primitives into the
|
|
18
|
+
// child channel addressed to `plan.fanout.to`.
|
|
19
|
+
// 3. synthesize — once all fanout replies are in, write one synthesis
|
|
20
|
+
// sub-primitive into the child channel with body =
|
|
21
|
+
// plan.synthesize.body + the concatenated reply bodies.
|
|
22
|
+
// 4. route — once the synthesis reply lands, write a final reply
|
|
23
|
+
// in the PARENT channel addressed to the workflow
|
|
24
|
+
// marker's `from:`, re:-linked to the marker. Drop a
|
|
25
|
+
// COMPLETE side file so future ticks skip this marker.
|
|
26
|
+
|
|
27
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
28
|
+
import { join } from 'path';
|
|
29
|
+
import { now, messageFilename } from './filenames.js';
|
|
30
|
+
import { serializeFrontmatter } from './frontmatter.js';
|
|
31
|
+
import { invokeModelCli } from './invoke.js';
|
|
32
|
+
import { listChannelMessages, type ChannelMessage } from './transport.js';
|
|
33
|
+
import { logError } from './state.js';
|
|
34
|
+
import { dispatchersForModel } from './dispatchers.js';
|
|
35
|
+
import type { ModelEntry } from './models.js';
|
|
36
|
+
|
|
37
|
+
export const WORKFLOW_RECIPIENT = 'workflow';
|
|
38
|
+
|
|
39
|
+
export interface WorkflowPlan {
|
|
40
|
+
fanout: { to: string; count: number; body: string };
|
|
41
|
+
synthesize: { to: string; body: string };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const COMPILE_PROMPT = [
|
|
45
|
+
'Compile the workflow markdown document below into a single JSON object',
|
|
46
|
+
'with this exact shape:',
|
|
47
|
+
'',
|
|
48
|
+
'{"fanout":{"to":"<model-name>","count":<integer 1-10>,"body":"<task>"},',
|
|
49
|
+
' "synthesize":{"to":"<model-name>","body":"<instruction>"}}',
|
|
50
|
+
'',
|
|
51
|
+
'Field meaning:',
|
|
52
|
+
' fanout.to the model name (from data/models.yaml) to fan out to',
|
|
53
|
+
' fanout.count how many parallel workers, integer 1-10',
|
|
54
|
+
' fanout.body the task each fanout worker performs',
|
|
55
|
+
' synthesize.to the model name that picks/merges the fanout replies',
|
|
56
|
+
' synthesize.body the instruction the synthesizer follows',
|
|
57
|
+
'',
|
|
58
|
+
'Output ONLY the JSON object. No prose, no markdown fences, no explanation.',
|
|
59
|
+
'The workflow document follows:',
|
|
60
|
+
'',
|
|
61
|
+
].join('\n');
|
|
62
|
+
|
|
63
|
+
function channelDir(transportRoot: string, channelUuid: string): string {
|
|
64
|
+
return join(transportRoot, 'data', 'channels', channelUuid);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function planPath(transportRoot: string, childUuid: string): string {
|
|
68
|
+
return join(channelDir(transportRoot, childUuid), 'PLAN.json');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function completePath(transportRoot: string, childUuid: string): string {
|
|
72
|
+
return join(channelDir(transportRoot, childUuid), 'COMPLETE');
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function validatePlan(p: unknown): p is WorkflowPlan {
|
|
76
|
+
if (typeof p !== 'object' || p == null) return false;
|
|
77
|
+
const x = p as Record<string, unknown>;
|
|
78
|
+
const f = x['fanout'] as Record<string, unknown> | undefined;
|
|
79
|
+
const s = x['synthesize'] as Record<string, unknown> | undefined;
|
|
80
|
+
if (!f || !s) return false;
|
|
81
|
+
if (typeof f['to'] !== 'string' || f['to'].length === 0) return false;
|
|
82
|
+
const c = f['count'];
|
|
83
|
+
if (typeof c !== 'number' || !Number.isInteger(c) || c < 1 || c > 10) return false;
|
|
84
|
+
if (typeof f['body'] !== 'string') return false;
|
|
85
|
+
if (typeof s['to'] !== 'string' || s['to'].length === 0) return false;
|
|
86
|
+
if (typeof s['body'] !== 'string') return false;
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function loadPlan(transportRoot: string, childUuid: string): WorkflowPlan | null {
|
|
91
|
+
const p = planPath(transportRoot, childUuid);
|
|
92
|
+
if (!existsSync(p)) return null;
|
|
93
|
+
try {
|
|
94
|
+
const parsed = JSON.parse(readFileSync(p, 'utf-8')) as unknown;
|
|
95
|
+
return validatePlan(parsed) ? parsed : null;
|
|
96
|
+
} catch {
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Models occasionally wrap JSON in markdown fences or add a sentence before
|
|
102
|
+
// the object despite the prompt's "JSON only" instruction. Extract the first
|
|
103
|
+
// balanced {...} block as a fallback.
|
|
104
|
+
export function extractPlanFromOutput(stdout: string): WorkflowPlan | null {
|
|
105
|
+
const trimmed = stdout.trim();
|
|
106
|
+
const tryParse = (s: string): WorkflowPlan | null => {
|
|
107
|
+
try {
|
|
108
|
+
const parsed = JSON.parse(s) as unknown;
|
|
109
|
+
return validatePlan(parsed) ? parsed : null;
|
|
110
|
+
} catch {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
const whole = tryParse(trimmed);
|
|
115
|
+
if (whole) return whole;
|
|
116
|
+
const start = trimmed.indexOf('{');
|
|
117
|
+
const end = trimmed.lastIndexOf('}');
|
|
118
|
+
if (start === -1 || end === -1 || end <= start) return null;
|
|
119
|
+
return tryParse(trimmed.slice(start, end + 1));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export interface WorkflowMarker {
|
|
123
|
+
parentChannelUuid: string;
|
|
124
|
+
childChannelUuid: string;
|
|
125
|
+
markerRelPath: string;
|
|
126
|
+
markerFrom: string;
|
|
127
|
+
body: string;
|
|
128
|
+
// When set, only the dispatcher whose alias matches progresses this
|
|
129
|
+
// workflow's phases. Markers without dispatch_host fall back to
|
|
130
|
+
// race-based progression (acceptable for single-host transports).
|
|
131
|
+
dispatchHost?: string;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export function findOpenWorkflows(
|
|
135
|
+
transportRoot: string,
|
|
136
|
+
channels: string[],
|
|
137
|
+
alias?: string,
|
|
138
|
+
): WorkflowMarker[] {
|
|
139
|
+
const out: WorkflowMarker[] = [];
|
|
140
|
+
for (const parentUuid of channels) {
|
|
141
|
+
const messages = listChannelMessages(transportRoot, parentUuid);
|
|
142
|
+
for (const m of messages) {
|
|
143
|
+
if (m.data['type'] !== 'workflow') continue;
|
|
144
|
+
const childUuid = m.data['child_channel'];
|
|
145
|
+
if (typeof childUuid !== 'string') continue;
|
|
146
|
+
if (existsSync(completePath(transportRoot, childUuid))) continue;
|
|
147
|
+
const dispatchHost = typeof m.data['dispatch_host'] === 'string'
|
|
148
|
+
? (m.data['dispatch_host'] as string)
|
|
149
|
+
: undefined;
|
|
150
|
+
// Ownership filter: when the marker pins a dispatch_host, only that
|
|
151
|
+
// dispatcher progresses it. Other dispatchers see the marker (it's a
|
|
152
|
+
// normal message) but skip it here, leaving the workflow's runtime
|
|
153
|
+
// work to its owner.
|
|
154
|
+
if (dispatchHost && alias && dispatchHost !== alias) continue;
|
|
155
|
+
const from = typeof m.data['from'] === 'string' ? (m.data['from'] as string) : 'unknown';
|
|
156
|
+
out.push({
|
|
157
|
+
parentChannelUuid: parentUuid,
|
|
158
|
+
childChannelUuid: childUuid,
|
|
159
|
+
markerRelPath: m.relPath,
|
|
160
|
+
markerFrom: from,
|
|
161
|
+
body: m.body,
|
|
162
|
+
dispatchHost,
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return out;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function senderOf(msg: ChannelMessage): string {
|
|
170
|
+
return typeof msg.data['from'] === 'string' ? (msg.data['from'] as string) : 'unknown';
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function workflowDispatches(
|
|
174
|
+
transportRoot: string,
|
|
175
|
+
childUuid: string,
|
|
176
|
+
alias: string,
|
|
177
|
+
phase: 'fanout' | 'synthesize',
|
|
178
|
+
): ChannelMessage[] {
|
|
179
|
+
const fromName = `workflow@${alias}`;
|
|
180
|
+
return listChannelMessages(transportRoot, childUuid).filter(
|
|
181
|
+
(m) => senderOf(m) === fromName && m.data['workflow_phase'] === phase,
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function repliesTo(
|
|
186
|
+
transportRoot: string,
|
|
187
|
+
childUuid: string,
|
|
188
|
+
targetRelPaths: string[],
|
|
189
|
+
): ChannelMessage[] {
|
|
190
|
+
const targetSet = new Set(targetRelPaths);
|
|
191
|
+
return listChannelMessages(transportRoot, childUuid).filter((m) => {
|
|
192
|
+
const re = m.data['re'];
|
|
193
|
+
if (typeof re === 'string') return targetSet.has(re);
|
|
194
|
+
if (Array.isArray(re)) return re.some((r) => typeof r === 'string' && targetSet.has(r));
|
|
195
|
+
return false;
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
interface WriteOpts {
|
|
200
|
+
transportRoot: string;
|
|
201
|
+
channelUuid: string;
|
|
202
|
+
from: string;
|
|
203
|
+
to: string;
|
|
204
|
+
body: string;
|
|
205
|
+
workflowPhase?: 'fanout' | 'synthesize';
|
|
206
|
+
re?: string;
|
|
207
|
+
failed?: { error: string };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function writeRuntimeMessage(opts: WriteOpts): string {
|
|
211
|
+
const ts = now();
|
|
212
|
+
const dir = join(channelDir(opts.transportRoot, opts.channelUuid), ts.pathDate);
|
|
213
|
+
mkdirSync(dir, { recursive: true });
|
|
214
|
+
const fm: Record<string, unknown> = {
|
|
215
|
+
from: opts.from,
|
|
216
|
+
to: opts.to,
|
|
217
|
+
timestamp: ts.iso,
|
|
218
|
+
};
|
|
219
|
+
if (opts.re) fm['re'] = opts.re;
|
|
220
|
+
if (opts.workflowPhase) fm['workflow_phase'] = opts.workflowPhase;
|
|
221
|
+
if (opts.failed) {
|
|
222
|
+
fm['failed'] = true;
|
|
223
|
+
fm['error'] = opts.failed.error.slice(0, 2000);
|
|
224
|
+
}
|
|
225
|
+
const filename = messageFilename(ts);
|
|
226
|
+
writeFileSync(join(dir, filename), serializeFrontmatter(fm, opts.body));
|
|
227
|
+
return join(ts.pathDate, filename);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function markComplete(transportRoot: string, childUuid: string): void {
|
|
231
|
+
writeFileSync(completePath(transportRoot, childUuid), new Date().toISOString() + '\n');
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function failWorkflow(
|
|
235
|
+
transportRoot: string,
|
|
236
|
+
marker: WorkflowMarker,
|
|
237
|
+
alias: string,
|
|
238
|
+
error: string,
|
|
239
|
+
): void {
|
|
240
|
+
writeRuntimeMessage({
|
|
241
|
+
transportRoot,
|
|
242
|
+
channelUuid: marker.parentChannelUuid,
|
|
243
|
+
from: `workflow@${alias}`,
|
|
244
|
+
to: marker.markerFrom,
|
|
245
|
+
body: error,
|
|
246
|
+
re: marker.markerRelPath,
|
|
247
|
+
failed: { error },
|
|
248
|
+
});
|
|
249
|
+
markComplete(transportRoot, marker.childChannelUuid);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export interface WorkflowTickContext {
|
|
253
|
+
transportRoot: string;
|
|
254
|
+
alias: string;
|
|
255
|
+
claimed: Map<string, ModelEntry>;
|
|
256
|
+
log: (event: string, fields?: Record<string, unknown>) => void;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
export async function workflowTick(
|
|
260
|
+
ctx: WorkflowTickContext,
|
|
261
|
+
channels: string[],
|
|
262
|
+
): Promise<boolean> {
|
|
263
|
+
const open = findOpenWorkflows(ctx.transportRoot, channels, ctx.alias);
|
|
264
|
+
if (open.length === 0) return false;
|
|
265
|
+
let progressed = false;
|
|
266
|
+
for (const marker of open) {
|
|
267
|
+
try {
|
|
268
|
+
const did = await advanceOne(ctx, marker);
|
|
269
|
+
if (did) progressed = true;
|
|
270
|
+
} catch (err) {
|
|
271
|
+
const msg = (err as Error).message;
|
|
272
|
+
logError(ctx.transportRoot, `workflow ${marker.markerRelPath} crashed: ${msg}`);
|
|
273
|
+
ctx.log('workflow_crash', { marker: marker.markerRelPath, error: msg.slice(0, 200) });
|
|
274
|
+
failWorkflow(ctx.transportRoot, marker, ctx.alias, `runtime error: ${msg.slice(0, 500)}`);
|
|
275
|
+
progressed = true;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
return progressed;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
async function advanceOne(ctx: WorkflowTickContext, marker: WorkflowMarker): Promise<boolean> {
|
|
282
|
+
const { transportRoot, alias, claimed } = ctx;
|
|
283
|
+
const fromIdentity = `workflow@${alias}`;
|
|
284
|
+
|
|
285
|
+
// Phase 1: compile.
|
|
286
|
+
let plan = loadPlan(transportRoot, marker.childChannelUuid);
|
|
287
|
+
if (!plan) {
|
|
288
|
+
const firstClaimed = claimed.values().next().value as ModelEntry | undefined;
|
|
289
|
+
if (!firstClaimed) {
|
|
290
|
+
failWorkflow(transportRoot, marker, alias, 'no claimed model available to compile the workflow');
|
|
291
|
+
ctx.log('workflow_compile_no_model', { marker: marker.markerRelPath });
|
|
292
|
+
return true;
|
|
293
|
+
}
|
|
294
|
+
ctx.log('workflow_compile_start', {
|
|
295
|
+
marker: marker.markerRelPath,
|
|
296
|
+
compile_model: firstClaimed.name,
|
|
297
|
+
});
|
|
298
|
+
const result = await invokeModelCli(firstClaimed, COMPILE_PROMPT, marker.body, {});
|
|
299
|
+
if (result.status !== 0) {
|
|
300
|
+
failWorkflow(transportRoot, marker, alias,
|
|
301
|
+
`compile model exit=${result.status}: ${result.stderr.slice(0, 500)}`);
|
|
302
|
+
ctx.log('workflow_compile_failed', { marker: marker.markerRelPath, exit: result.status });
|
|
303
|
+
return true;
|
|
304
|
+
}
|
|
305
|
+
const parsed = extractPlanFromOutput(result.stdout);
|
|
306
|
+
if (!parsed) {
|
|
307
|
+
failWorkflow(transportRoot, marker, alias,
|
|
308
|
+
`could not parse workflow prose. Compiler returned:\n${result.stdout.slice(0, 800)}`);
|
|
309
|
+
ctx.log('workflow_compile_invalid', { marker: marker.markerRelPath });
|
|
310
|
+
return true;
|
|
311
|
+
}
|
|
312
|
+
// Fail-fast if the compiled plan addresses a model this dispatcher
|
|
313
|
+
// doesn't claim. The alternative is silently hanging on un-deliverable
|
|
314
|
+
// sub-primitives.
|
|
315
|
+
if (!claimed.has(parsed.fanout.to)) {
|
|
316
|
+
failWorkflow(transportRoot, marker, alias,
|
|
317
|
+
`compiled plan addresses model '${parsed.fanout.to}' for fanout — not claimed on this dispatcher`);
|
|
318
|
+
return true;
|
|
319
|
+
}
|
|
320
|
+
if (!claimed.has(parsed.synthesize.to)) {
|
|
321
|
+
failWorkflow(transportRoot, marker, alias,
|
|
322
|
+
`compiled plan addresses model '${parsed.synthesize.to}' for synthesize — not claimed on this dispatcher`);
|
|
323
|
+
return true;
|
|
324
|
+
}
|
|
325
|
+
writeFileSync(planPath(transportRoot, marker.childChannelUuid), JSON.stringify(parsed, null, 2) + '\n');
|
|
326
|
+
plan = parsed;
|
|
327
|
+
ctx.log('workflow_compiled', {
|
|
328
|
+
marker: marker.markerRelPath,
|
|
329
|
+
fanout: `${plan.fanout.to}x${plan.fanout.count}`,
|
|
330
|
+
synthesize: plan.synthesize.to,
|
|
331
|
+
});
|
|
332
|
+
return true;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Phase 2: fanout dispatch.
|
|
336
|
+
//
|
|
337
|
+
// Distribution: read the dispatcher registry to find every dispatcher
|
|
338
|
+
// claiming the fanout model. Round-robin the N sub-primitives across
|
|
339
|
+
// them, scoping each as `to: <model>@<alias>`. Without this, bare
|
|
340
|
+
// `to: <model>` dispatches get claimed by EVERY dispatcher (at-least-
|
|
341
|
+
// once activation), turning a fan-out into N× duplicated work for 1×
|
|
342
|
+
// throughput — defeating the multi-host value prop entirely.
|
|
343
|
+
//
|
|
344
|
+
// Fallback: if the registry is empty (single-host case with no
|
|
345
|
+
// dispatcher publishing, or a transient race between dispatcher start
|
|
346
|
+
// and workflow dispatch), use the bare recipient. Single-host: only
|
|
347
|
+
// one dispatcher claims anyway, so bare is safe. Empty registry on
|
|
348
|
+
// multi-host: degrade to current duplicate-work behavior with a logged
|
|
349
|
+
// warning rather than failing the workflow.
|
|
350
|
+
const fanouts = workflowDispatches(transportRoot, marker.childChannelUuid, alias, 'fanout');
|
|
351
|
+
if (fanouts.length === 0) {
|
|
352
|
+
const candidates = dispatchersForModel(transportRoot, plan.fanout.to);
|
|
353
|
+
const distribution = candidates.length === 0 ? null : candidates;
|
|
354
|
+
if (distribution === null) {
|
|
355
|
+
ctx.log('workflow_fanout_no_registry_fallback', {
|
|
356
|
+
marker: marker.markerRelPath,
|
|
357
|
+
model: plan.fanout.to,
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
for (let i = 0; i < plan.fanout.count; i++) {
|
|
361
|
+
const to = distribution === null
|
|
362
|
+
? plan.fanout.to
|
|
363
|
+
: `${plan.fanout.to}@${distribution[i % distribution.length]}`;
|
|
364
|
+
writeRuntimeMessage({
|
|
365
|
+
transportRoot,
|
|
366
|
+
channelUuid: marker.childChannelUuid,
|
|
367
|
+
from: fromIdentity,
|
|
368
|
+
to,
|
|
369
|
+
body: plan.fanout.body,
|
|
370
|
+
workflowPhase: 'fanout',
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
ctx.log('workflow_fanout_dispatched', {
|
|
374
|
+
marker: marker.markerRelPath,
|
|
375
|
+
count: plan.fanout.count,
|
|
376
|
+
to: plan.fanout.to,
|
|
377
|
+
dispatchers: distribution ?? 'bare',
|
|
378
|
+
});
|
|
379
|
+
return true;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// Phase 3: wait for fanout replies. Failed replies count toward the total
|
|
383
|
+
// so we don't stall on individual worker failures — the synthesizer sees
|
|
384
|
+
// them as FAILED candidates and decides.
|
|
385
|
+
const fanoutRelPaths = fanouts.map((m) => m.relPath);
|
|
386
|
+
const fanoutReplies = repliesTo(transportRoot, marker.childChannelUuid, fanoutRelPaths);
|
|
387
|
+
if (fanoutReplies.length < plan.fanout.count) {
|
|
388
|
+
return false;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Phase 4: synthesize dispatch.
|
|
392
|
+
//
|
|
393
|
+
// Pin to one specific dispatcher so we don't duplicate the synthesis
|
|
394
|
+
// (which costs another model call and emits another routed reply).
|
|
395
|
+
// Pick deterministically — first alias in sorted-claim order — so the
|
|
396
|
+
// assignment is reproducible. Falls back to bare on empty registry,
|
|
397
|
+
// accepting the duplication risk (already documented for fanout above).
|
|
398
|
+
const synthesizes = workflowDispatches(transportRoot, marker.childChannelUuid, alias, 'synthesize');
|
|
399
|
+
if (synthesizes.length === 0) {
|
|
400
|
+
const synthCandidates = dispatchersForModel(transportRoot, plan.synthesize.to);
|
|
401
|
+
const synthTo = synthCandidates.length === 0
|
|
402
|
+
? plan.synthesize.to
|
|
403
|
+
: `${plan.synthesize.to}@${synthCandidates[0]}`;
|
|
404
|
+
const candidatesText = fanoutReplies
|
|
405
|
+
.slice()
|
|
406
|
+
.sort((a, b) => a.relPath.localeCompare(b.relPath))
|
|
407
|
+
.map((m, i) => {
|
|
408
|
+
const failed = m.data['failed'] === true ? ' (FAILED)' : '';
|
|
409
|
+
return `--- candidate ${i + 1}${failed} ---\n${m.body}`;
|
|
410
|
+
})
|
|
411
|
+
.join('\n\n');
|
|
412
|
+
const synthBody = `${plan.synthesize.body}\n\n${candidatesText}`;
|
|
413
|
+
writeRuntimeMessage({
|
|
414
|
+
transportRoot,
|
|
415
|
+
channelUuid: marker.childChannelUuid,
|
|
416
|
+
from: fromIdentity,
|
|
417
|
+
to: synthTo,
|
|
418
|
+
body: synthBody,
|
|
419
|
+
workflowPhase: 'synthesize',
|
|
420
|
+
});
|
|
421
|
+
ctx.log('workflow_synthesize_dispatched', {
|
|
422
|
+
marker: marker.markerRelPath,
|
|
423
|
+
to: synthTo,
|
|
424
|
+
candidates: fanoutReplies.length,
|
|
425
|
+
});
|
|
426
|
+
return true;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Phase 5: wait for synthesis reply.
|
|
430
|
+
const synthRelPaths = synthesizes.map((m) => m.relPath);
|
|
431
|
+
const synthReplies = repliesTo(transportRoot, marker.childChannelUuid, synthRelPaths);
|
|
432
|
+
if (synthReplies.length === 0) return false;
|
|
433
|
+
|
|
434
|
+
// Phase 6: route final reply back to the operator who launched the workflow.
|
|
435
|
+
const finalSource = synthReplies
|
|
436
|
+
.slice()
|
|
437
|
+
.sort((a, b) => a.relPath.localeCompare(b.relPath))[0]!;
|
|
438
|
+
const finalFailed = finalSource.data['failed'] === true;
|
|
439
|
+
writeRuntimeMessage({
|
|
440
|
+
transportRoot,
|
|
441
|
+
channelUuid: marker.parentChannelUuid,
|
|
442
|
+
from: fromIdentity,
|
|
443
|
+
to: marker.markerFrom,
|
|
444
|
+
body: finalSource.body,
|
|
445
|
+
re: marker.markerRelPath,
|
|
446
|
+
failed: finalFailed
|
|
447
|
+
? { error: typeof finalSource.data['error'] === 'string'
|
|
448
|
+
? (finalSource.data['error'] as string)
|
|
449
|
+
: 'synthesize failed' }
|
|
450
|
+
: undefined,
|
|
451
|
+
});
|
|
452
|
+
markComplete(transportRoot, marker.childChannelUuid);
|
|
453
|
+
ctx.log('workflow_complete', {
|
|
454
|
+
marker: marker.markerRelPath,
|
|
455
|
+
final_failed: finalFailed,
|
|
456
|
+
});
|
|
457
|
+
return true;
|
|
458
|
+
}
|