@cordfuse/crosstalk 5.0.0-alpha.7 → 6.0.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/crosstalk.js +34 -78
- package/package.json +4 -4
- package/src/activation.ts +104 -0
- package/src/attach.ts +1 -1
- package/src/channel.ts +8 -21
- package/src/chat.ts +52 -115
- package/src/dispatch.ts +265 -660
- package/src/dlq.ts +68 -136
- package/src/init.ts +17 -41
- package/src/open.ts +55 -31
- package/src/replies.ts +59 -0
- package/src/send.ts +48 -67
- package/src/state.ts +173 -0
- package/src/status.ts +18 -57
- package/src/stop.ts +37 -0
- package/src/transport.ts +68 -198
- package/src/turnq.ts +64 -32
- package/src/upgrade.ts +9 -11
- package/src/wake.ts +5 -6
- package/src/cursor.ts +0 -48
- package/template/.amazonq/rules/crosstalk.md +0 -2
- package/template/.continue/rules/crosstalk.md +0 -7
- package/template/.cursor/rules/crosstalk.mdc +0 -7
- package/template/.github/copilot-instructions.md +0 -2
- package/template/.windsurfrules +0 -2
- package/template/AGENTS.md +0 -2
- package/template/ANTIGRAVITY.md +0 -2
- package/template/CLAUDE.md +0 -2
- package/template/GEMINI.md +0 -2
- package/template/OPENCODE.md +0 -2
- package/template/QWEN.md +0 -2
- package/template/README.md +0 -22
- package/template/local/CROSSTALK.md +0 -4
- package/template/upstream/CROSSTALK-VERSION +0 -1
- package/template/upstream/CROSSTALK.md +0 -589
- package/template/upstream/JITTER.md +0 -24
- package/template/upstream/OPERATOR.md +0 -60
- package/template/upstream/PROTOCOL.md +0 -260
- package/template/upstream/actors/cloud-architect.md +0 -83
- package/template/upstream/actors/concierge.md +0 -130
- package/template/upstream/actors/devops-engineer.md +0 -83
- package/template/upstream/actors/documentation-engineer.md +0 -107
- package/template/upstream/actors/infrastructure-engineer.md +0 -83
- package/template/upstream/actors/junior-developer.md +0 -83
- package/template/upstream/actors/precise-generalist.md +0 -48
- package/template/upstream/actors/product-manager.md +0 -83
- package/template/upstream/actors/qa-engineer.md +0 -83
- package/template/upstream/actors/security-engineer.md +0 -92
- package/template/upstream/actors/senior-generalist-engineer.md +0 -111
- package/template/upstream/actors/senior-software-engineer.md +0 -94
- package/template/upstream/actors/skeptic.md +0 -89
- package/template/upstream/actors/technical-writer.md +0 -89
- package/template/upstream/actors/ux-designer.md +0 -83
package/src/dispatch.ts
CHANGED
|
@@ -1,30 +1,19 @@
|
|
|
1
|
+
// crosstalk dispatch — the loop.
|
|
2
|
+
//
|
|
3
|
+
// Tick: pull → for each local actor, scan channels for messages past the
|
|
4
|
+
// cursor → decideWake (activation.ts, the one rule) → invoke the actor's
|
|
5
|
+
// CLI per batch → write replies (re: linked per sender) → commit+push.
|
|
6
|
+
//
|
|
7
|
+
// Only the commit+push is locked, and the lock is advisory (turnq.ts) —
|
|
8
|
+
// git arbitrates correctness. Cursors, DLQ, heartbeat and the error log
|
|
9
|
+
// live in the machine-local state dir (state.ts), so a tick's commit only
|
|
10
|
+
// ever contains data/ and there is no self-inflicted git deadlock to heal.
|
|
11
|
+
|
|
1
12
|
import { resolve, join, dirname } from 'path';
|
|
2
13
|
import { spawn } from 'child_process';
|
|
3
|
-
import {
|
|
4
|
-
mkdirSync,
|
|
5
|
-
writeFileSync,
|
|
6
|
-
readFileSync,
|
|
7
|
-
existsSync,
|
|
8
|
-
appendFileSync,
|
|
9
|
-
openSync,
|
|
10
|
-
closeSync,
|
|
11
|
-
} from 'fs';
|
|
14
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, appendFileSync } from 'fs';
|
|
12
15
|
import { watch } from 'fs/promises';
|
|
13
16
|
import { fileURLToPath } from 'url';
|
|
14
|
-
|
|
15
|
-
// Read runtime version from the installed package's package.json at startup
|
|
16
|
-
// so dispatch_start logs and heartbeat content always match the actual
|
|
17
|
-
// installed @cordfuse/crosstalk version. Avoids hand-editing on every release.
|
|
18
|
-
const RUNTIME_VERSION: string = (() => {
|
|
19
|
-
try {
|
|
20
|
-
const thisFileDir = dirname(fileURLToPath(import.meta.url));
|
|
21
|
-
const pkgPath = join(thisFileDir, '..', 'package.json');
|
|
22
|
-
const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string };
|
|
23
|
-
return pkg.version ?? 'unknown';
|
|
24
|
-
} catch {
|
|
25
|
-
return 'unknown';
|
|
26
|
-
}
|
|
27
|
-
})();
|
|
28
17
|
import {
|
|
29
18
|
findHostFile,
|
|
30
19
|
loadActorProfile,
|
|
@@ -38,15 +27,33 @@ import {
|
|
|
38
27
|
listChannelMessages,
|
|
39
28
|
gitPull,
|
|
40
29
|
gitCommitAndPush,
|
|
41
|
-
|
|
42
|
-
|
|
30
|
+
cursorBaseline,
|
|
31
|
+
newFilesSince,
|
|
43
32
|
type ChannelMessage,
|
|
44
33
|
} from './transport.js';
|
|
45
|
-
import {
|
|
34
|
+
import {
|
|
35
|
+
stateDir,
|
|
36
|
+
readCursor,
|
|
37
|
+
writeCursor,
|
|
38
|
+
writeHeartbeat,
|
|
39
|
+
writePidfile,
|
|
40
|
+
removePidfile,
|
|
41
|
+
logError,
|
|
42
|
+
} from './state.js';
|
|
43
|
+
import { recipients, reList, decideWake, splitForConcurrency } from './activation.js';
|
|
46
44
|
import { now, messageFilename } from './filenames.js';
|
|
47
45
|
import { serializeFrontmatter } from './frontmatter.js';
|
|
48
46
|
import { withLock } from './turnq.js';
|
|
49
|
-
import { writeDlqEntry, isQuarantined
|
|
47
|
+
import { writeDlqEntry, isQuarantined } from './dlq.js';
|
|
48
|
+
|
|
49
|
+
const RUNTIME_VERSION: string = (() => {
|
|
50
|
+
try {
|
|
51
|
+
const pkgPath = join(dirname(fileURLToPath(import.meta.url)), '..', 'package.json');
|
|
52
|
+
return (JSON.parse(readFileSync(pkgPath, 'utf-8')) as { version?: string }).version ?? 'unknown';
|
|
53
|
+
} catch {
|
|
54
|
+
return 'unknown';
|
|
55
|
+
}
|
|
56
|
+
})();
|
|
50
57
|
|
|
51
58
|
const transportRoot = resolve(process.cwd());
|
|
52
59
|
const argv = process.argv.slice(2);
|
|
@@ -63,27 +70,9 @@ const hostOverride = flag('--host');
|
|
|
63
70
|
const pollSeconds = Number(flag('--poll')) || 30;
|
|
64
71
|
const logFile = flag('--log-file');
|
|
65
72
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const
|
|
69
|
-
const BACKOFF_GRACE = 2; // first N failures don't trigger backoff
|
|
70
|
-
|
|
71
|
-
// Per-tick heal: when N consecutive infra failures pile up, the dispatch
|
|
72
|
-
// loop is stuck in a deadlock that entrypoint's boot-time auto-recovery
|
|
73
|
-
// can't break (because dispatch is already running). At HEAL_THRESHOLD
|
|
74
|
-
// consecutive failures, attempt a `git fetch && reset --hard origin/<branch>
|
|
75
|
-
// && clean -fd` from inside the tick loop. Mirrors the entrypoint logic.
|
|
76
|
-
// Throttled — won't reattempt until fully BACKOFF_GRACE+HEAL_THRESHOLD more
|
|
77
|
-
// failures pile up after a heal, to avoid heal-loop-storms.
|
|
78
|
-
const HEAL_THRESHOLD = 5;
|
|
79
|
-
let lastHealAtFailureCount = 0;
|
|
80
|
-
|
|
81
|
-
// Stale-read-receipt sweep config — runs at most every SWEEP_INTERVAL_MS
|
|
82
|
-
// of wall-clock to surface read receipts that never produced a reply
|
|
83
|
-
// (indicates dispatch crashed mid-tick or CLI hung silently).
|
|
84
|
-
const SWEEP_INTERVAL_MS = 5 * 60_000;
|
|
85
|
-
const STALE_RECEIPT_THRESHOLD_MS = 5 * 60_000;
|
|
86
|
-
let lastSweepAt = 0;
|
|
73
|
+
const CLI_TIMEOUT_MS = 5 * 60_000;
|
|
74
|
+
const MAX_BACKOFF_MULTIPLIER = 10;
|
|
75
|
+
const BACKOFF_GRACE = 2;
|
|
87
76
|
|
|
88
77
|
function log(event: string, fields: Record<string, unknown> = {}): void {
|
|
89
78
|
let line: string;
|
|
@@ -101,185 +90,24 @@ function log(event: string, fields: Record<string, unknown> = {}): void {
|
|
|
101
90
|
}
|
|
102
91
|
}
|
|
103
92
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
93
|
+
// Config errors (bad host file, bad actor profile) repeat every tick until
|
|
94
|
+
// fixed — log each distinct one once per process run, not once per tick.
|
|
95
|
+
const loggedConfigErrors = new Set<string>();
|
|
96
|
+
function logConfigError(scope: string, message: string): void {
|
|
97
|
+
const key = `${scope}::${message}`;
|
|
98
|
+
if (loggedConfigErrors.has(key)) return;
|
|
99
|
+
loggedConfigErrors.add(key);
|
|
100
|
+
logError(transportRoot, 'parse', `${scope}: ${message}`);
|
|
101
|
+
log('config_error', { scope, message: message.slice(0, 200) });
|
|
111
102
|
}
|
|
112
103
|
|
|
113
|
-
|
|
104
|
+
const protocolPrompt = (() => {
|
|
114
105
|
const p = join(transportRoot, 'upstream', 'PROTOCOL.md');
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
const protocolPrompt = loadProtocolPrompt();
|
|
120
|
-
|
|
121
|
-
function recipients(toField: unknown): string[] {
|
|
122
|
-
if (Array.isArray(toField)) return toField.map(String);
|
|
123
|
-
if (typeof toField === 'string') return [toField];
|
|
124
|
-
return [];
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
// A `to:` recipient is either a bare actor name (`junior-developer`) or
|
|
128
|
-
// an actor@host pair (`junior-developer@cachy`). Bare names broadcast to
|
|
129
|
-
// every host that declares the actor; @host narrows to one host.
|
|
130
|
-
//
|
|
131
|
-
// Documented in concierge.md "Host-aware routing"; honored by the runtime
|
|
132
|
-
// as of alpha.7 step 1. Prior to this, the recipient string was matched
|
|
133
|
-
// verbatim against the actor name, so `junior-developer@cachy` never
|
|
134
|
-
// matched the cachy dispatcher's `junior-developer` actor declaration —
|
|
135
|
-
// the harness's first cross-host bug.
|
|
136
|
-
function extractActor(recipient: string): string {
|
|
137
|
-
const at = recipient.indexOf('@');
|
|
138
|
-
return at === -1 ? recipient : recipient.slice(0, at);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function targetHost(recipient: string): string | null {
|
|
142
|
-
const at = recipient.indexOf('@');
|
|
143
|
-
return at === -1 ? null : recipient.slice(at + 1);
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
// Does `recipientList` address `actorName` on `thisHost`? Returns the match
|
|
147
|
-
// outcome plus a flag for "actor was named but every instance targeted a
|
|
148
|
-
// different host" — useful as a diagnostic so silent wrong-host routes are
|
|
149
|
-
// logged rather than dropped without trace.
|
|
150
|
-
function matchHostRouting(
|
|
151
|
-
recipientList: string[],
|
|
152
|
-
actorName: string,
|
|
153
|
-
thisHost: string,
|
|
154
|
-
): { addressed: boolean; wrongHost: boolean } {
|
|
155
|
-
let addressed = false;
|
|
156
|
-
let actorNamedAtAll = false;
|
|
157
|
-
for (const r of recipientList) {
|
|
158
|
-
if (extractActor(r) !== actorName) continue;
|
|
159
|
-
actorNamedAtAll = true;
|
|
160
|
-
const host = targetHost(r);
|
|
161
|
-
if (host === null || host === thisHost) {
|
|
162
|
-
addressed = true;
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
return { addressed, wrongHost: !addressed && actorNamedAtAll };
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Host-agnostic actor name check, used by causality scans (isCausalReply,
|
|
170
|
-
// hasPriorWork) where the question is "does this recipient list name actor
|
|
171
|
-
// X at all?" — host doesn't matter because the `from` field of replies
|
|
172
|
-
// doesn't carry a host suffix either.
|
|
173
|
-
function namesActor(recipientList: string[], actorName: string): boolean {
|
|
174
|
-
for (const r of recipientList) {
|
|
175
|
-
if (extractActor(r) === actorName) return true;
|
|
176
|
-
}
|
|
177
|
-
return false;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
// Declared lifecycle kind for a message. `work` (default for legacy messages
|
|
181
|
-
// without the field) is the as-tagged intent. The runtime does NOT trust this
|
|
182
|
-
// value directly for the activation decision — see effectiveKind() below.
|
|
183
|
-
// Kept for use as the seed of the effective-kind computation.
|
|
184
|
-
function messageKind(msg: ChannelMessage): 'work' | 'result' {
|
|
185
|
-
const raw = msg.data['kind'];
|
|
186
|
-
return raw === 'result' ? 'result' : 'work';
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// Is `msg` causally a reply to a prior ask? True iff some message strictly
|
|
190
|
-
// before `msg` was sent FROM one of `msg`'s recipients TO `msg`'s sender with
|
|
191
|
-
// declared kind `work`. If so, `msg` is that recipient's answer coming back —
|
|
192
|
-
// regardless of how its sender (a fallible LLM actor, or `crosstalk send`'s
|
|
193
|
-
// `work` default) labelled it.
|
|
194
|
-
//
|
|
195
|
-
// Conservative on multi-recipient `to:` lists: if ANY recipient previously
|
|
196
|
-
// tasked the sender, the message is treated as causally a reply for all
|
|
197
|
-
// recipients. The per-addressee asymmetry in hasPriorWork (below) compensates
|
|
198
|
-
// — only the recipient that actually asked wakes on it. Known v1 limitation:
|
|
199
|
-
// genuine multi-recipient fan-out where one recipient happens to have prior
|
|
200
|
-
// unrelated work to the sender will be demoted to result and suppress wakes
|
|
201
|
-
// for the other recipients. Not observed in Monte Carlo; revisit if it
|
|
202
|
-
// surfaces.
|
|
203
|
-
function isCausalReply(channelMessages: ChannelMessage[], msg: ChannelMessage): boolean {
|
|
204
|
-
const sender = typeof msg.data['from'] === 'string' ? msg.data['from'] : '';
|
|
205
|
-
if (!sender) return false;
|
|
206
|
-
const toList = recipients(msg.data['to']);
|
|
207
|
-
for (const m of channelMessages) {
|
|
208
|
-
if (m.relPath >= msg.relPath) break;
|
|
209
|
-
// Read receipts are bookkeeping, never causal evidence. The activation
|
|
210
|
-
// scan already filters them out before considering a message for
|
|
211
|
-
// dispatch — this filter is the same guard at the causality-helper
|
|
212
|
-
// level, so a receipt from one of msg's recipients to msg's sender
|
|
213
|
-
// can't forge a false causal-reply edge (which would then demote a
|
|
214
|
-
// legitimate `work` to `result` and silently skip it). This was the
|
|
215
|
-
// alpha.7 step 2 finding from the cross-host harness — receipts
|
|
216
|
-
// pre-existing in the channel from cachy's first dispatch burst
|
|
217
|
-
// misclassified mac's subsequent fan-out msgs as replies.
|
|
218
|
-
if (m.data['type'] === 'read') continue;
|
|
219
|
-
const mFrom = typeof m.data['from'] === 'string' ? m.data['from'] : '';
|
|
220
|
-
// Host-agnostic actor name match: `from` fields are bare actor names,
|
|
221
|
-
// but `to` fields may include `@host` suffixes that don't change
|
|
222
|
-
// causal semantics.
|
|
223
|
-
if (!namesActor(toList, mFrom)) continue;
|
|
224
|
-
if ((m.data['kind'] ?? 'work') === 'result') continue;
|
|
225
|
-
if (namesActor(recipients(m.data['to']), sender)) return true;
|
|
226
|
-
}
|
|
227
|
-
return false;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// Effective lifecycle kind. The runtime INFERS kind from the causality graph
|
|
231
|
-
// rather than trusting the declared field: a message that is causally a reply
|
|
232
|
-
// is a `result` even if it was labelled `work` (actors routinely report
|
|
233
|
-
// results via `crosstalk send`, which defaults to `work`, and that mislabel
|
|
234
|
-
// forges false reply-causality edges → wake-up loops). Genuine unsolicited
|
|
235
|
-
// tasks (kickoffs, fresh dispatches) have no prior opposite-direction work
|
|
236
|
-
// and keep their `work` kind. See PROTOCOL.md "Message kinds".
|
|
237
|
-
//
|
|
238
|
-
// This is the load-bearing principle the rest of the activation rule rides
|
|
239
|
-
// on: the dispatcher derives semantics from the interaction graph; it never
|
|
240
|
-
// trusts an actor's declaration.
|
|
241
|
-
function effectiveKind(channelMessages: ChannelMessage[], msg: ChannelMessage): 'work' | 'result' {
|
|
242
|
-
if (messageKind(msg) === 'result') return 'result';
|
|
243
|
-
return isCausalReply(channelMessages, msg) ? 'result' : 'work';
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
// Reply causality — does `addressee` have a prior `kind: work` outbound to
|
|
247
|
-
// `sender` somewhere in the channel's history strictly before `before`? If
|
|
248
|
-
// yes, an inbound `kind: result` from `sender` to `addressee` is the answer
|
|
249
|
-
// to that ask, and the addressee should wake on it. If no, the result is
|
|
250
|
-
// unsolicited from addressee's POV and is informational only.
|
|
251
|
-
//
|
|
252
|
-
// Uses effectiveKind (not messageKind) when checking prior messages — a
|
|
253
|
-
// mislabeled "work" reply from a prior peer would otherwise forge a false
|
|
254
|
-
// causality edge here, which was the ping-pong root.
|
|
255
|
-
//
|
|
256
|
-
// The channel is already sorted by relPath ascending in
|
|
257
|
-
// listChannelMessages(), so the scan walks chronologically.
|
|
258
|
-
function hasPriorWork(
|
|
259
|
-
channelMessages: ChannelMessage[],
|
|
260
|
-
addressee: string,
|
|
261
|
-
sender: string,
|
|
262
|
-
before: string,
|
|
263
|
-
): boolean {
|
|
264
|
-
for (const m of channelMessages) {
|
|
265
|
-
if (m.relPath >= before) break;
|
|
266
|
-
// Same receipt filter as isCausalReply — a receipt from `addressee`
|
|
267
|
-
// to `sender` would otherwise look like a prior work outbound and
|
|
268
|
-
// forge a false causal edge here too. Defense against the same
|
|
269
|
-
// bug class at every causality-walking helper.
|
|
270
|
-
if (m.data['type'] === 'read') continue;
|
|
271
|
-
if (typeof m.data['from'] !== 'string' || m.data['from'] !== addressee) continue;
|
|
272
|
-
if (effectiveKind(channelMessages, m) !== 'work') continue;
|
|
273
|
-
const toList = recipients(m.data['to']);
|
|
274
|
-
if (namesActor(toList, sender)) return true;
|
|
275
|
-
}
|
|
276
|
-
return false;
|
|
277
|
-
}
|
|
106
|
+
return existsSync(p) ? readFileSync(p, 'utf-8').trim() : '';
|
|
107
|
+
})();
|
|
278
108
|
|
|
279
109
|
function composeSystemPrompt(actorPrompt: string): string {
|
|
280
|
-
return [protocolPrompt, actorPrompt]
|
|
281
|
-
.filter((p) => p.length > 0)
|
|
282
|
-
.join('\n\n---\n\n');
|
|
110
|
+
return [protocolPrompt, actorPrompt].filter((p) => p.length > 0).join('\n\n---\n\n');
|
|
283
111
|
}
|
|
284
112
|
|
|
285
113
|
function actorConcurrency(tiers: HostActorTiers): number {
|
|
@@ -291,6 +119,10 @@ function actorConcurrency(tiers: HostActorTiers): number {
|
|
|
291
119
|
return 1;
|
|
292
120
|
}
|
|
293
121
|
|
|
122
|
+
function messageSender(msg: ChannelMessage): string {
|
|
123
|
+
return typeof msg.data['from'] === 'string' ? (msg.data['from'] as string) : 'unknown';
|
|
124
|
+
}
|
|
125
|
+
|
|
294
126
|
interface CliResult {
|
|
295
127
|
status: number;
|
|
296
128
|
stdout: string;
|
|
@@ -301,7 +133,7 @@ function invokeCli(
|
|
|
301
133
|
cli: string,
|
|
302
134
|
systemPrompt: string,
|
|
303
135
|
userMessage: string,
|
|
304
|
-
|
|
136
|
+
env: Record<string, string>,
|
|
305
137
|
): Promise<CliResult> {
|
|
306
138
|
return new Promise((res) => {
|
|
307
139
|
const fullPrompt = `${systemPrompt}\n\n---\n\n${userMessage}`;
|
|
@@ -310,15 +142,13 @@ function invokeCli(
|
|
|
310
142
|
res({ status: 1, stdout: '', stderr: 'tokenized cli is empty' });
|
|
311
143
|
return;
|
|
312
144
|
}
|
|
313
|
-
// detached:
|
|
314
|
-
//
|
|
315
|
-
//
|
|
316
|
-
|
|
317
|
-
// the dispatched actor invokes `crosstalk send` without explicit --from.
|
|
318
|
-
const child = spawn(parts[0], parts.slice(1), {
|
|
145
|
+
// detached: new process group, so the timeout SIGKILL takes the actor's
|
|
146
|
+
// children with it — orphans writing to the transport after a timeout
|
|
147
|
+
// was an observed v5 hazard.
|
|
148
|
+
const child = spawn(parts[0]!, parts.slice(1), {
|
|
319
149
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
320
150
|
detached: true,
|
|
321
|
-
env: { ...process.env,
|
|
151
|
+
env: { ...process.env, ...env },
|
|
322
152
|
});
|
|
323
153
|
let stdout = '';
|
|
324
154
|
let stderr = '';
|
|
@@ -326,20 +156,14 @@ function invokeCli(
|
|
|
326
156
|
const timeout = setTimeout(() => {
|
|
327
157
|
if (resolved) return;
|
|
328
158
|
resolved = true;
|
|
329
|
-
// SIGKILL the process group (negative pid) so any children the actor
|
|
330
|
-
// spawned (e.g. crosstalk send subprocesses) die with the parent.
|
|
331
|
-
// Fallback to single-pid kill if the group signal fails (some envs).
|
|
332
159
|
try {
|
|
333
|
-
if (typeof child.pid === 'number')
|
|
334
|
-
|
|
335
|
-
} else {
|
|
336
|
-
child.kill('SIGKILL');
|
|
337
|
-
}
|
|
160
|
+
if (typeof child.pid === 'number') process.kill(-child.pid, 'SIGKILL');
|
|
161
|
+
else child.kill('SIGKILL');
|
|
338
162
|
} catch {
|
|
339
163
|
try { child.kill('SIGKILL'); } catch { /* already dead */ }
|
|
340
164
|
}
|
|
341
165
|
res({ status: 124, stdout, stderr: stderr + '\n[timeout]' });
|
|
342
|
-
},
|
|
166
|
+
}, CLI_TIMEOUT_MS);
|
|
343
167
|
child.stdout.on('data', (d) => { stdout += d.toString(); });
|
|
344
168
|
child.stderr.on('data', (d) => { stderr += d.toString(); });
|
|
345
169
|
child.on('close', (code) => {
|
|
@@ -354,158 +178,53 @@ function invokeCli(
|
|
|
354
178
|
clearTimeout(timeout);
|
|
355
179
|
res({ status: 1, stdout, stderr: stderr + '\n' + err.message });
|
|
356
180
|
});
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
child.stdin.on('error', () => { /* EPIPE/etc. — child closed stdin */ });
|
|
361
|
-
try {
|
|
362
|
-
child.stdin.write(fullPrompt);
|
|
363
|
-
} catch { /* same: child closed stdin before we could write */ }
|
|
364
|
-
try {
|
|
365
|
-
child.stdin.end();
|
|
366
|
-
} catch { /* ignore */ }
|
|
181
|
+
child.stdin.on('error', () => { /* child closed stdin */ });
|
|
182
|
+
try { child.stdin.write(fullPrompt); } catch { /* same */ }
|
|
183
|
+
try { child.stdin.end(); } catch { /* ignore */ }
|
|
367
184
|
});
|
|
368
185
|
}
|
|
369
186
|
|
|
370
187
|
function writeReply(
|
|
371
188
|
channelUuid: string,
|
|
372
189
|
fromActor: string,
|
|
373
|
-
toActor: string
|
|
190
|
+
toActor: string,
|
|
191
|
+
re: string | string[],
|
|
374
192
|
body: string,
|
|
375
193
|
): void {
|
|
376
194
|
const ts = now();
|
|
377
195
|
const dir = join(transportRoot, 'data', 'channels', channelUuid, ts.pathDate);
|
|
378
196
|
mkdirSync(dir, { recursive: true });
|
|
379
|
-
// Auto-replies emitted via stdout are `kind: result` by default — the actor
|
|
380
|
-
// is answering, not initiating new work. Recipients only wake on a result if
|
|
381
|
-
// they previously asked the sender for work in this channel (reply
|
|
382
|
-
// causality, see activation rule below). Actors that want to dispatch new
|
|
383
|
-
// work do so explicitly via `crosstalk send --kind work`.
|
|
384
197
|
const content = serializeFrontmatter(
|
|
385
|
-
{ from: fromActor, to: toActor, type: 'text',
|
|
198
|
+
{ from: fromActor, to: toActor, type: 'text', timestamp: ts.iso, re },
|
|
386
199
|
body,
|
|
387
200
|
);
|
|
388
201
|
writeFileSync(join(dir, messageFilename(ts)), content);
|
|
389
202
|
}
|
|
390
203
|
|
|
391
|
-
function writeReadReceipt(
|
|
392
|
-
channelUuid: string,
|
|
393
|
-
fromActor: string,
|
|
394
|
-
toActor: string,
|
|
395
|
-
ref: string,
|
|
396
|
-
): void {
|
|
397
|
-
const ts = now();
|
|
398
|
-
const dir = join(transportRoot, 'data', 'channels', channelUuid, ts.pathDate);
|
|
399
|
-
mkdirSync(dir, { recursive: true });
|
|
400
|
-
const content = serializeFrontmatter(
|
|
401
|
-
{ from: fromActor, to: toActor, type: 'read', ref, timestamp: ts.iso },
|
|
402
|
-
'',
|
|
403
|
-
);
|
|
404
|
-
writeFileSync(join(dir, messageFilename(ts)), content);
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
interface PendingDispatch {
|
|
408
|
-
actorName: string;
|
|
409
|
-
channelUuid: string;
|
|
410
|
-
msgs: ChannelMessage[]; // all unread messages addressed to this actor in this channel
|
|
411
|
-
tiers: HostActorTiers;
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
function messageSender(msg: ChannelMessage): string {
|
|
415
|
-
return typeof msg.data['from'] === 'string' ? msg.data['from'] : 'unknown';
|
|
416
|
-
}
|
|
417
|
-
|
|
418
204
|
function formatBatchedUserMessage(msgs: ChannelMessage[]): string {
|
|
419
|
-
if (msgs.length === 1) return msgs[0]
|
|
420
|
-
const
|
|
421
|
-
const parts: string[] = [header];
|
|
205
|
+
if (msgs.length === 1) return msgs[0]!.body;
|
|
206
|
+
const parts = [`You have ${msgs.length} new messages in this channel. Process them collectively and reply once.`];
|
|
422
207
|
for (let i = 0; i < msgs.length; i++) {
|
|
423
|
-
const m = msgs[i]
|
|
424
|
-
const
|
|
425
|
-
|
|
426
|
-
parts.push(`--- Message ${i + 1} of ${msgs.length} (from: ${from}, ref: ${m.relPath}${ts ? `, ts: ${ts}` : ''}) ---`);
|
|
208
|
+
const m = msgs[i]!;
|
|
209
|
+
const ts = typeof m.data['timestamp'] === 'string' ? `, ts: ${m.data['timestamp']}` : '';
|
|
210
|
+
parts.push(`--- Message ${i + 1} of ${msgs.length} (from: ${messageSender(m)}, ref: ${m.relPath}${ts}) ---`);
|
|
427
211
|
parts.push(m.body);
|
|
428
212
|
}
|
|
429
213
|
return parts.join('\n\n');
|
|
430
214
|
}
|
|
431
215
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
//
|
|
438
|
-
// When pending fits within concurrency, every batch is a single message
|
|
439
|
-
// (preserves parallel fan-out — junior-developer with count: 10 and 10
|
|
440
|
-
// pending fan-out messages dispatches 10 parallel CLI invocations of 1
|
|
441
|
-
// message each). When pending exceeds concurrency, batches collapse pending
|
|
442
|
-
// into ~concurrency parallel invocations, each handling ceil(N/concurrency)
|
|
443
|
-
// messages (preserves the fan-in collapse — concierge with count: 1 and 10
|
|
444
|
-
// pending replies dispatches 1 invocation of 10 messages).
|
|
445
|
-
function splitForConcurrency(
|
|
446
|
-
msgs: ChannelMessage[],
|
|
447
|
-
concurrency: number,
|
|
448
|
-
): ChannelMessage[][] {
|
|
449
|
-
if (concurrency <= 1 || msgs.length <= 1) return [msgs];
|
|
450
|
-
const chunkSize = Math.max(1, Math.ceil(msgs.length / concurrency));
|
|
451
|
-
const out: ChannelMessage[][] = [];
|
|
452
|
-
for (let i = 0; i < msgs.length; i += chunkSize) {
|
|
453
|
-
out.push(msgs.slice(i, i + chunkSize));
|
|
454
|
-
}
|
|
455
|
-
return out;
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
function distinctSenders(msgs: ChannelMessage[]): string[] {
|
|
459
|
-
const seen = new Set<string>();
|
|
460
|
-
const out: string[] = [];
|
|
461
|
-
for (const m of msgs) {
|
|
462
|
-
const s = messageSender(m);
|
|
463
|
-
if (s !== 'unknown' && !seen.has(s)) {
|
|
464
|
-
seen.add(s);
|
|
465
|
-
out.push(s);
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
return out;
|
|
216
|
+
interface PendingDispatch {
|
|
217
|
+
actorName: string;
|
|
218
|
+
channelUuid: string;
|
|
219
|
+
msgs: ChannelMessage[];
|
|
220
|
+
tiers: HostActorTiers;
|
|
469
221
|
}
|
|
470
222
|
|
|
471
223
|
async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
const firstMsg = p.msgs[0];
|
|
477
|
-
const lastMsg = p.msgs[p.msgs.length - 1];
|
|
478
|
-
const preferredTier = typeof firstMsg.data['tier'] === 'string'
|
|
479
|
-
? (firstMsg.data['tier'] as string)
|
|
480
|
-
: undefined;
|
|
481
|
-
let resolved;
|
|
482
|
-
try {
|
|
483
|
-
resolved = pickTier(p.tiers, preferredTier);
|
|
484
|
-
} catch (err) {
|
|
485
|
-
const r = writeDlqEntry(
|
|
486
|
-
transportRoot,
|
|
487
|
-
'config',
|
|
488
|
-
p.actorName,
|
|
489
|
-
'(config)',
|
|
490
|
-
'(config)',
|
|
491
|
-
`tier selection failed: ${(err as Error).message}`,
|
|
492
|
-
);
|
|
493
|
-
log('actor_config_error', {
|
|
494
|
-
actor: p.actorName,
|
|
495
|
-
dlq_id: r.id,
|
|
496
|
-
attempts: r.attempts,
|
|
497
|
-
quarantined: r.quarantined,
|
|
498
|
-
});
|
|
499
|
-
return false;
|
|
500
|
-
}
|
|
501
|
-
const cli = resolved.cli;
|
|
502
|
-
|
|
503
|
-
// Quarantine check uses the LAST message's relPath as the batch's identity.
|
|
504
|
-
// Per-message quarantine semantics are preserved because batch boundaries
|
|
505
|
-
// align with cursor checkpoints; if a single message in a batch keeps
|
|
506
|
-
// failing, the cursor never advances past it and it surfaces as a singleton
|
|
507
|
-
// batch on the next tick.
|
|
508
|
-
if (isQuarantined(transportRoot, 'dispatch', p.actorName, p.channelUuid, lastMsg.relPath)) {
|
|
224
|
+
const firstMsg = p.msgs[0]!;
|
|
225
|
+
const lastMsg = p.msgs[p.msgs.length - 1]!;
|
|
226
|
+
|
|
227
|
+
if (isQuarantined(transportRoot, p.actorName, p.channelUuid, lastMsg.relPath)) {
|
|
509
228
|
log('dispatch_skipped_quarantined', {
|
|
510
229
|
actor: p.actorName,
|
|
511
230
|
channel: p.channelUuid.slice(0, 8),
|
|
@@ -514,6 +233,17 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
514
233
|
return false;
|
|
515
234
|
}
|
|
516
235
|
|
|
236
|
+
const preferredTier = typeof firstMsg.data['tier'] === 'string' ? (firstMsg.data['tier'] as string) : undefined;
|
|
237
|
+
let cli: string;
|
|
238
|
+
let profile;
|
|
239
|
+
try {
|
|
240
|
+
cli = pickTier(p.tiers, preferredTier).cli;
|
|
241
|
+
profile = loadActorProfile(transportRoot, p.actorName);
|
|
242
|
+
} catch (err) {
|
|
243
|
+
logConfigError(`actor:${p.actorName}`, (err as Error).message);
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
|
|
517
247
|
log('dispatch', {
|
|
518
248
|
actor: p.actorName,
|
|
519
249
|
channel: p.channelUuid.slice(0, 8),
|
|
@@ -522,42 +252,22 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
522
252
|
last_msg: lastMsg.relPath,
|
|
523
253
|
});
|
|
524
254
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
transportRoot,
|
|
538
|
-
'config',
|
|
539
|
-
p.actorName,
|
|
540
|
-
'(config)',
|
|
541
|
-
'(config)',
|
|
542
|
-
`actor profile load failed: ${(err as Error).message}`,
|
|
543
|
-
);
|
|
544
|
-
log('dispatch_config_error', {
|
|
545
|
-
actor: p.actorName,
|
|
546
|
-
dlq_id: r.id,
|
|
547
|
-
attempts: r.attempts,
|
|
548
|
-
quarantined: r.quarantined,
|
|
549
|
-
});
|
|
550
|
-
return false;
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
const systemPrompt = composeSystemPrompt(profile.systemPrompt);
|
|
554
|
-
const userMessage = formatBatchedUserMessage(p.msgs);
|
|
555
|
-
const result = await invokeCli(cli, systemPrompt, userMessage, p.actorName);
|
|
255
|
+
const result = await invokeCli(
|
|
256
|
+
cli,
|
|
257
|
+
composeSystemPrompt(profile.systemPrompt),
|
|
258
|
+
formatBatchedUserMessage(p.msgs),
|
|
259
|
+
{
|
|
260
|
+
CROSSTALK_DISPATCH_ACTOR: p.actorName,
|
|
261
|
+
CROSSTALK_DISPATCH_CHANNEL: p.channelUuid,
|
|
262
|
+
// Every relPath in the batch — `crosstalk send` records them all as
|
|
263
|
+
// the reply's re: list, so batching never loses an answered message.
|
|
264
|
+
CROSSTALK_DISPATCH_RE: p.msgs.map((m) => m.relPath).join(','),
|
|
265
|
+
},
|
|
266
|
+
);
|
|
556
267
|
|
|
557
268
|
if (result.status !== 0) {
|
|
558
269
|
const r = writeDlqEntry(
|
|
559
270
|
transportRoot,
|
|
560
|
-
'dispatch',
|
|
561
271
|
p.actorName,
|
|
562
272
|
p.channelUuid,
|
|
563
273
|
lastMsg.relPath,
|
|
@@ -577,45 +287,26 @@ async function dispatchOne(p: PendingDispatch): Promise<boolean> {
|
|
|
577
287
|
|
|
578
288
|
const reply = result.stdout.trim();
|
|
579
289
|
if (reply.length === 0) {
|
|
580
|
-
//
|
|
581
|
-
//
|
|
582
|
-
//
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
if (p.msgs.length > 1) {
|
|
586
|
-
log('dispatch_batch_silent_ok', {
|
|
587
|
-
actor: p.actorName,
|
|
588
|
-
channel: p.channelUuid.slice(0, 8),
|
|
589
|
-
batch_size: p.msgs.length,
|
|
590
|
-
});
|
|
591
|
-
return true;
|
|
592
|
-
}
|
|
593
|
-
const r = writeDlqEntry(
|
|
594
|
-
transportRoot,
|
|
595
|
-
'dispatch',
|
|
596
|
-
p.actorName,
|
|
597
|
-
p.channelUuid,
|
|
598
|
-
lastMsg.relPath,
|
|
599
|
-
'cli returned empty reply',
|
|
600
|
-
);
|
|
601
|
-
log('dispatch_empty_reply', {
|
|
602
|
-
actor: p.actorName,
|
|
603
|
-
channel: p.channelUuid.slice(0, 8),
|
|
604
|
-
dlq_id: r.id,
|
|
605
|
-
attempts: r.attempts,
|
|
606
|
-
quarantined: r.quarantined,
|
|
607
|
-
});
|
|
608
|
-
return false;
|
|
290
|
+
// Legitimate: the actor routed its answer via `crosstalk send` (which
|
|
291
|
+
// auto-links re:). If it truly did nothing, the asker's `crosstalk
|
|
292
|
+
// replies` stays PENDING — visible, not silently lost.
|
|
293
|
+
log('dispatch_silent', { actor: p.actorName, channel: p.channelUuid.slice(0, 8), batch_size: p.msgs.length });
|
|
294
|
+
return true;
|
|
609
295
|
}
|
|
610
296
|
|
|
611
|
-
//
|
|
612
|
-
//
|
|
613
|
-
//
|
|
614
|
-
const
|
|
615
|
-
const
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
297
|
+
// One reply per distinct sender, re:-linked to EVERY message that sender
|
|
298
|
+
// had in the batch — the asker's activation rule fires, and `crosstalk
|
|
299
|
+
// replies` sees each individual message as answered.
|
|
300
|
+
const bySender = new Map<string, string[]>();
|
|
301
|
+
for (const m of p.msgs) {
|
|
302
|
+
const sender = messageSender(m);
|
|
303
|
+
bySender.set(sender, [...(bySender.get(sender) ?? []), m.relPath]);
|
|
304
|
+
}
|
|
305
|
+
bySender.delete('unknown');
|
|
306
|
+
if (bySender.size === 0) bySender.set(messageSender(firstMsg), [firstMsg.relPath]);
|
|
307
|
+
for (const [sender, relPaths] of bySender) {
|
|
308
|
+
writeReply(p.channelUuid, p.actorName, sender, relPaths.length === 1 ? relPaths[0]! : relPaths, reply);
|
|
309
|
+
}
|
|
619
310
|
return true;
|
|
620
311
|
}
|
|
621
312
|
|
|
@@ -625,278 +316,192 @@ interface TickResult {
|
|
|
625
316
|
}
|
|
626
317
|
|
|
627
318
|
async function dispatchTick(): Promise<TickResult> {
|
|
628
|
-
writeHeartbeat();
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
// gives operators full diagnostic info via stdout/json logs.
|
|
641
|
-
log('git_pull_failed', { error: pullResult.error.slice(0, 200) });
|
|
642
|
-
infraOk = false;
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
let host: HostFile;
|
|
646
|
-
try {
|
|
647
|
-
host = findHostFile(transportRoot, hostOverride);
|
|
648
|
-
} catch (err) {
|
|
649
|
-
const r = writeDlqEntry(
|
|
650
|
-
transportRoot,
|
|
651
|
-
'config',
|
|
652
|
-
'(host)',
|
|
653
|
-
'(config)',
|
|
654
|
-
'(config)',
|
|
655
|
-
`host file load failed: ${(err as Error).message}`,
|
|
656
|
-
);
|
|
657
|
-
log('tick_config_error', {
|
|
658
|
-
scope: 'host',
|
|
659
|
-
dlq_id: r.id,
|
|
660
|
-
attempts: r.attempts,
|
|
661
|
-
quarantined: r.quarantined,
|
|
662
|
-
});
|
|
663
|
-
return { didWork: false, infraOk };
|
|
664
|
-
}
|
|
319
|
+
writeHeartbeat(transportRoot, RUNTIME_VERSION);
|
|
320
|
+
let infraOk = true;
|
|
321
|
+
|
|
322
|
+
const pullResult = gitPull(transportRoot);
|
|
323
|
+
if (!pullResult.ok) {
|
|
324
|
+
// Skip the whole tick: a failed pull can leave origin/HEAD (the cursor
|
|
325
|
+
// baseline) ahead of the working tree, and scanning against that would
|
|
326
|
+
// advance cursors past messages that never materialized.
|
|
327
|
+
logError(transportRoot, 'git_pull', pullResult.error ?? 'unknown');
|
|
328
|
+
log('git_pull_failed', { error: (pullResult.error ?? '').slice(0, 200) });
|
|
329
|
+
return { didWork: false, infraOk: false };
|
|
330
|
+
}
|
|
665
331
|
|
|
666
|
-
|
|
332
|
+
let host: HostFile;
|
|
333
|
+
try {
|
|
334
|
+
host = findHostFile(transportRoot, hostOverride);
|
|
335
|
+
} catch (err) {
|
|
336
|
+
logConfigError('host', (err as Error).message);
|
|
337
|
+
return { didWork: false, infraOk };
|
|
338
|
+
}
|
|
667
339
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
340
|
+
// Cursors are commit hashes, not relPaths: filenames order by sender
|
|
341
|
+
// timestamp but arrive in push order, so a relPath cursor can advance
|
|
342
|
+
// past a slower writer's earlier-stamped message and lose it forever.
|
|
343
|
+
// "New since cursor" is asked of git, which records arrival truthfully.
|
|
344
|
+
const head = cursorBaseline(transportRoot);
|
|
345
|
+
if (!head) {
|
|
346
|
+
logError(transportRoot, 'other', 'git rev-parse failed for origin/HEAD and HEAD — skipping tick');
|
|
347
|
+
return { didWork: false, infraOk: false };
|
|
348
|
+
}
|
|
349
|
+
// diff results keyed by cursor commit (shared across actors on the same
|
|
350
|
+
// cursor); null = commit unknown to this clone -> full re-scan.
|
|
351
|
+
const addedSince = new Map<string, Set<string> | null>();
|
|
352
|
+
|
|
353
|
+
let didWork = false;
|
|
354
|
+
const channels = discoverChannels(transportRoot);
|
|
355
|
+
|
|
356
|
+
for (const actorName of Object.keys(host.actors)) {
|
|
357
|
+
const tiers = host.actors[actorName]!;
|
|
358
|
+
const concurrency = actorConcurrency(tiers);
|
|
359
|
+
const pending: PendingDispatch[] = [];
|
|
360
|
+
|
|
361
|
+
for (const channelUuid of channels) {
|
|
362
|
+
const cursor = readCursor(transportRoot, actorName, channelUuid);
|
|
363
|
+
if (cursor === head) continue;
|
|
364
|
+
|
|
365
|
+
// First encounter: seed to HEAD so only future messages are dispatched.
|
|
366
|
+
// Without this, a null cursor falls through to `post = messages` and
|
|
367
|
+
// replays the full channel history on every fresh-state boot.
|
|
368
|
+
if (cursor === null) {
|
|
369
|
+
writeCursor(transportRoot, actorName, channelUuid, head);
|
|
671
370
|
continue;
|
|
672
371
|
}
|
|
673
372
|
|
|
674
|
-
const
|
|
675
|
-
const
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
const channels = discoverChannels(transportRoot);
|
|
686
|
-
for (const channelUuid of channels) {
|
|
687
|
-
const cursor = readCursor(transportRoot, actorName, channelUuid);
|
|
688
|
-
const messages = listChannelMessages(transportRoot, channelUuid);
|
|
689
|
-
const post = cursor ? messages.filter((m) => m.relPath > cursor) : messages;
|
|
690
|
-
|
|
691
|
-
log('tick_scan', {
|
|
692
|
-
actor: actorName,
|
|
693
|
-
channel: channelUuid.slice(0, 8),
|
|
694
|
-
cursor: cursor ?? '(none)',
|
|
695
|
-
total_msgs: messages.length,
|
|
696
|
-
post_cursor_msgs: post.length,
|
|
697
|
-
});
|
|
698
|
-
|
|
699
|
-
const channelBatch: ChannelMessage[] = [];
|
|
700
|
-
for (const msg of post) {
|
|
701
|
-
const to = recipients(msg.data['to']);
|
|
702
|
-
const from = typeof msg.data['from'] === 'string' ? msg.data['from'] : 'unknown';
|
|
703
|
-
const msgType = typeof msg.data['type'] === 'string' ? msg.data['type'] : 'text';
|
|
704
|
-
// Host-aware routing match. A recipient may target this actor
|
|
705
|
-
// either by bare name (`junior-developer` — broadcast to every
|
|
706
|
-
// host that declares the actor) or by `actor@host` (narrowed to
|
|
707
|
-
// a specific host). Bare-name match always succeeds when the
|
|
708
|
-
// actor name matches; @host match succeeds only when the host
|
|
709
|
-
// alias also matches this dispatcher's host. A recipient that
|
|
710
|
-
// names this actor but targets a different host is flagged as
|
|
711
|
-
// `host_routing_mismatch` so silent wrong-host routes are
|
|
712
|
-
// surfaced rather than dropped without trace. See concierge.md
|
|
713
|
-
// "Host-aware routing" + PROTOCOL.md.
|
|
714
|
-
const routing = matchHostRouting(to, actorName, host.alias);
|
|
715
|
-
if (!routing.addressed || from === actorName || msgType === 'read') {
|
|
716
|
-
if (routing.wrongHost) {
|
|
717
|
-
log('host_routing_mismatch', {
|
|
718
|
-
actor: actorName,
|
|
719
|
-
this_host: host.alias,
|
|
720
|
-
channel: channelUuid.slice(0, 8),
|
|
721
|
-
msg: msg.relPath,
|
|
722
|
-
to,
|
|
723
|
-
});
|
|
724
|
-
}
|
|
725
|
-
writeCursor(transportRoot, actorName, channelUuid, msg.relPath);
|
|
726
|
-
continue;
|
|
727
|
-
}
|
|
728
|
-
// Lifecycle activation rule. `work` always wakes. `result` wakes
|
|
729
|
-
// only if reply-causal — actor previously sent the sender a `work`
|
|
730
|
-
// in this channel. The kind used here is the runtime's INFERRED
|
|
731
|
-
// effective kind, not the actor's declared kind: a message that's
|
|
732
|
-
// causally a reply is treated as `result` even when an actor (or
|
|
733
|
-
// `crosstalk send`'s default) labelled it `work`, so a fan-in peer
|
|
734
|
-
// mislabeling its reply can't forge a wake-up loop. See PROTOCOL.md
|
|
735
|
-
// "Message kinds".
|
|
736
|
-
const kind = effectiveKind(messages, msg);
|
|
737
|
-
if (kind === 'result' && !hasPriorWork(messages, actorName, from, msg.relPath)) {
|
|
738
|
-
writeCursor(transportRoot, actorName, channelUuid, msg.relPath);
|
|
739
|
-
continue;
|
|
740
|
-
}
|
|
741
|
-
channelBatch.push(msg);
|
|
742
|
-
}
|
|
743
|
-
if (channelBatch.length > 0) {
|
|
744
|
-
const groups = splitForConcurrency(channelBatch, concurrency);
|
|
745
|
-
for (const g of groups) {
|
|
746
|
-
pending.push({ actorName, channelUuid, msgs: g, tiers });
|
|
747
|
-
}
|
|
373
|
+
const messages = listChannelMessages(transportRoot, channelUuid);
|
|
374
|
+
const senderByRelPath = new Map(messages.map((m) => [m.relPath, messageSender(m)]));
|
|
375
|
+
const senderOf = (relPath: string) => senderByRelPath.get(relPath);
|
|
376
|
+
|
|
377
|
+
let added = addedSince.get(cursor);
|
|
378
|
+
if (added === undefined) {
|
|
379
|
+
const files = newFilesSince(transportRoot, cursor);
|
|
380
|
+
added = files === null ? null : new Set(files);
|
|
381
|
+
addedSince.set(cursor, added);
|
|
382
|
+
if (added === null) {
|
|
383
|
+
logError(transportRoot, 'other', `cursor commit ${cursor.slice(0, 12)} unknown to this clone — full channel re-scan`);
|
|
748
384
|
}
|
|
749
385
|
}
|
|
386
|
+
let post = messages;
|
|
387
|
+
if (added !== null) {
|
|
388
|
+
const prefix = `data/channels/${channelUuid}/`;
|
|
389
|
+
post = messages.filter((m) => added.has(prefix + m.relPath));
|
|
390
|
+
}
|
|
391
|
+
if (post.length === 0) {
|
|
392
|
+
writeCursor(transportRoot, actorName, channelUuid, head);
|
|
393
|
+
continue;
|
|
394
|
+
}
|
|
750
395
|
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
396
|
+
const batch: ChannelMessage[] = [];
|
|
397
|
+
for (const msg of post) {
|
|
398
|
+
if (msg.data['type'] !== 'text') continue;
|
|
399
|
+
const decision = decideWake(
|
|
400
|
+
{
|
|
401
|
+
from: messageSender(msg),
|
|
402
|
+
to: recipients(msg.data['to']),
|
|
403
|
+
re: reList(msg.data['re']),
|
|
404
|
+
},
|
|
405
|
+
actorName,
|
|
406
|
+
host.alias,
|
|
407
|
+
senderOf,
|
|
408
|
+
);
|
|
409
|
+
if (decision === 'wake') {
|
|
410
|
+
batch.push(msg);
|
|
411
|
+
} else if (decision === 'wrong-host') {
|
|
412
|
+
log('host_routing_mismatch', {
|
|
413
|
+
actor: actorName,
|
|
414
|
+
this_host: host.alias,
|
|
415
|
+
channel: channelUuid.slice(0, 8),
|
|
416
|
+
msg: msg.relPath,
|
|
417
|
+
to: recipients(msg.data['to']),
|
|
418
|
+
});
|
|
764
419
|
}
|
|
765
420
|
}
|
|
421
|
+
|
|
422
|
+
if (batch.length === 0) {
|
|
423
|
+
writeCursor(transportRoot, actorName, channelUuid, head);
|
|
424
|
+
continue;
|
|
425
|
+
}
|
|
426
|
+
for (const g of splitForConcurrency(batch, concurrency)) {
|
|
427
|
+
pending.push({ actorName, channelUuid, msgs: g, tiers });
|
|
428
|
+
}
|
|
766
429
|
}
|
|
767
430
|
|
|
768
|
-
//
|
|
769
|
-
//
|
|
770
|
-
//
|
|
771
|
-
//
|
|
772
|
-
//
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
const
|
|
779
|
-
|
|
780
|
-
// Same rationale as the pull case above: no writeErrorLog.
|
|
781
|
-
// Repeated push failures shouldn't flood errors/ since that
|
|
782
|
-
// contributes to the same git-deadlock-feedback that pull does.
|
|
783
|
-
const kind = pushResult.committed ? 'git_push' : 'git_commit';
|
|
784
|
-
log('git_push_failed', {
|
|
785
|
-
kind,
|
|
786
|
-
committed_locally: pushResult.committed,
|
|
787
|
-
error: pushResult.error.slice(0, 200),
|
|
788
|
-
});
|
|
789
|
-
infraOk = false;
|
|
431
|
+
// Waves of `concurrency` parallel CLI invocations. The cursor advances
|
|
432
|
+
// to the scanned commit whether each batch succeeded or DLQ'd —
|
|
433
|
+
// at-least-once was attempted; `crosstalk dlq --retry` rewinds the
|
|
434
|
+
// cursor explicitly. A crash mid-wave leaves the cursor behind, so the
|
|
435
|
+
// whole span replays next tick (at-least-once, never lost).
|
|
436
|
+
for (let i = 0; i < pending.length; i += concurrency) {
|
|
437
|
+
const wave = pending.slice(i, i + concurrency);
|
|
438
|
+
const results = await Promise.all(wave.map((p) => dispatchOne(p)));
|
|
439
|
+
if (results.some(Boolean)) didWork = true;
|
|
440
|
+
}
|
|
441
|
+
for (const p of pending) {
|
|
442
|
+
writeCursor(transportRoot, p.actorName, p.channelUuid, head);
|
|
790
443
|
}
|
|
444
|
+
}
|
|
791
445
|
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
}
|
|
446
|
+
if (didWork) {
|
|
447
|
+
const pushResult = await withLock(transportRoot, 'git', async () =>
|
|
448
|
+
gitCommitAndPush(transportRoot, `dispatch: replies ${new Date().toISOString()}`),
|
|
449
|
+
);
|
|
450
|
+
if (!pushResult.ok && pushResult.error) {
|
|
451
|
+
logError(transportRoot, pushResult.committed ? 'git_push' : 'git_commit', pushResult.error);
|
|
452
|
+
log('git_push_failed', { committed_locally: pushResult.committed, error: pushResult.error.slice(0, 200) });
|
|
453
|
+
infraOk = false;
|
|
799
454
|
}
|
|
455
|
+
}
|
|
800
456
|
|
|
801
|
-
|
|
802
|
-
});
|
|
457
|
+
return { didWork, infraOk };
|
|
803
458
|
}
|
|
804
459
|
|
|
805
|
-
async function waitForWakeOrTimeout(ms: number): Promise<
|
|
806
|
-
const
|
|
807
|
-
mkdirSync(wakeDir, { recursive: true });
|
|
460
|
+
async function waitForWakeOrTimeout(ms: number): Promise<void> {
|
|
461
|
+
const dir = stateDir(transportRoot);
|
|
808
462
|
const ac = new AbortController();
|
|
809
463
|
const timer = setTimeout(() => ac.abort(), ms);
|
|
810
464
|
try {
|
|
811
|
-
const watcher = watch(
|
|
465
|
+
const watcher = watch(dir, { signal: ac.signal });
|
|
812
466
|
for await (const ev of watcher) {
|
|
813
|
-
if (ev.filename === 'wake.signal')
|
|
814
|
-
clearTimeout(timer);
|
|
815
|
-
return 'wake';
|
|
816
|
-
}
|
|
467
|
+
if (ev.filename === 'wake.signal') return;
|
|
817
468
|
}
|
|
818
|
-
return 'timeout';
|
|
819
469
|
} catch {
|
|
820
|
-
|
|
470
|
+
/* abort = timeout */
|
|
821
471
|
} finally {
|
|
822
472
|
clearTimeout(timer);
|
|
823
473
|
}
|
|
824
474
|
}
|
|
825
475
|
|
|
826
476
|
async function main(): Promise<void> {
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
});
|
|
477
|
+
writePidfile(transportRoot);
|
|
478
|
+
const cleanup = () => removePidfile(transportRoot);
|
|
479
|
+
process.on('exit', cleanup);
|
|
480
|
+
process.on('SIGTERM', () => { cleanup(); process.exit(0); });
|
|
481
|
+
process.on('SIGINT', () => { cleanup(); process.exit(0); });
|
|
482
|
+
|
|
483
|
+
log('dispatch_start', { transport: transportRoot, version: RUNTIME_VERSION, state_dir: stateDir(transportRoot) });
|
|
832
484
|
if (onceMode) {
|
|
833
485
|
await dispatchTick();
|
|
834
|
-
|
|
486
|
+
process.exit(0);
|
|
835
487
|
}
|
|
836
|
-
log('
|
|
488
|
+
log('dispatch_running', { quiet_poll_s: pollSeconds });
|
|
837
489
|
|
|
838
490
|
let consecutiveInfraFailures = 0;
|
|
839
|
-
|
|
840
491
|
while (true) {
|
|
841
492
|
try {
|
|
842
493
|
const r = await dispatchTick();
|
|
843
494
|
if (r.infraOk) {
|
|
844
|
-
if (consecutiveInfraFailures > 0) {
|
|
845
|
-
log('backoff_cleared', { previous_consecutive_failures: consecutiveInfraFailures });
|
|
846
|
-
}
|
|
495
|
+
if (consecutiveInfraFailures > 0) log('backoff_cleared', { previous_failures: consecutiveInfraFailures });
|
|
847
496
|
consecutiveInfraFailures = 0;
|
|
848
497
|
} else {
|
|
849
498
|
consecutiveInfraFailures++;
|
|
850
499
|
}
|
|
851
|
-
|
|
852
|
-
// Backoff kicks in only after a grace period of failures.
|
|
853
500
|
const beyondGrace = Math.max(0, consecutiveInfraFailures - BACKOFF_GRACE);
|
|
854
501
|
const backoffFactor = Math.min(MAX_BACKOFF_MULTIPLIER, 2 ** beyondGrace);
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
log('backoff_active', {
|
|
858
|
-
consecutive_failures: consecutiveInfraFailures,
|
|
859
|
-
factor: backoffFactor,
|
|
860
|
-
});
|
|
502
|
+
if (backoffFactor > 1) {
|
|
503
|
+
log('backoff_active', { consecutive_failures: consecutiveInfraFailures, factor: backoffFactor });
|
|
861
504
|
}
|
|
862
|
-
|
|
863
|
-
// Per-tick heal: deadlock-break when the dispatch loop has been
|
|
864
|
-
// failing for HEAL_THRESHOLD consecutive ticks AND we haven't healed
|
|
865
|
-
// recently. Hard-resets the working tree to origin/<current branch>.
|
|
866
|
-
// Trades any uncommitted local state for forward progress — acceptable
|
|
867
|
-
// because messages/cursors/dlq are pulled back from origin and
|
|
868
|
-
// .turnq/errors are regenerated.
|
|
869
|
-
if (
|
|
870
|
-
consecutiveInfraFailures >= HEAL_THRESHOLD &&
|
|
871
|
-
consecutiveInfraFailures - lastHealAtFailureCount >= HEAL_THRESHOLD
|
|
872
|
-
) {
|
|
873
|
-
try {
|
|
874
|
-
const branchProc = spawn('git', ['rev-parse', '--abbrev-ref', 'HEAD'], {
|
|
875
|
-
cwd: transportRoot,
|
|
876
|
-
stdio: ['ignore', 'pipe', 'ignore'],
|
|
877
|
-
});
|
|
878
|
-
let branchName = '';
|
|
879
|
-
branchProc.stdout.on('data', (d) => { branchName += d.toString(); });
|
|
880
|
-
await new Promise<void>((res) => branchProc.on('close', () => res()));
|
|
881
|
-
const branch = branchName.trim() || 'main';
|
|
882
|
-
log('per_tick_heal_start', {
|
|
883
|
-
consecutive_failures: consecutiveInfraFailures,
|
|
884
|
-
target: `origin/${branch}`,
|
|
885
|
-
});
|
|
886
|
-
await new Promise<void>((res) => {
|
|
887
|
-
const p = spawn('sh', [
|
|
888
|
-
'-c',
|
|
889
|
-
`git rebase --abort 2>/dev/null; git fetch --quiet origin '${branch}' && git reset --hard --quiet 'origin/${branch}' && git clean -fdq`,
|
|
890
|
-
], { cwd: transportRoot, stdio: 'inherit' });
|
|
891
|
-
p.on('close', () => res());
|
|
892
|
-
});
|
|
893
|
-
log('per_tick_heal_done', { target: `origin/${branch}` });
|
|
894
|
-
lastHealAtFailureCount = consecutiveInfraFailures;
|
|
895
|
-
} catch (err) {
|
|
896
|
-
log('per_tick_heal_failed', { error: (err as Error).message });
|
|
897
|
-
}
|
|
898
|
-
}
|
|
899
|
-
|
|
900
505
|
if (r.didWork) {
|
|
901
506
|
await new Promise((res) => setTimeout(res, 1_000 * backoffFactor));
|
|
902
507
|
} else {
|
|
@@ -904,7 +509,7 @@ async function main(): Promise<void> {
|
|
|
904
509
|
}
|
|
905
510
|
} catch (err) {
|
|
906
511
|
const msg = (err as Error).message;
|
|
907
|
-
|
|
512
|
+
logError(transportRoot, 'other', `tick error: ${msg}`);
|
|
908
513
|
log('tick_error', { message: msg });
|
|
909
514
|
consecutiveInfraFailures++;
|
|
910
515
|
await new Promise((res) => setTimeout(res, pollSeconds * 1_000));
|