@cordfuse/crosstalk 5.0.0-alpha.6 → 5.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cordfuse/crosstalk",
3
- "version": "5.0.0-alpha.6",
3
+ "version": "5.0.0-alpha.7",
4
4
  "description": "Crosstalk runtime — async messaging between agents over git. The crosstalk CLI plus dispatch, send, attach, chat, and supporting tools.",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/src/dispatch.ts CHANGED
@@ -124,6 +124,59 @@ function recipients(toField: unknown): string[] {
124
124
  return [];
125
125
  }
126
126
 
127
+ // A `to:` recipient is either a bare actor name (`junior-developer`) or
128
+ // an actor@host pair (`junior-developer@cachy`). Bare names broadcast to
129
+ // every host that declares the actor; @host narrows to one host.
130
+ //
131
+ // Documented in concierge.md "Host-aware routing"; honored by the runtime
132
+ // as of alpha.7 step 1. Prior to this, the recipient string was matched
133
+ // verbatim against the actor name, so `junior-developer@cachy` never
134
+ // matched the cachy dispatcher's `junior-developer` actor declaration —
135
+ // the harness's first cross-host bug.
136
+ function extractActor(recipient: string): string {
137
+ const at = recipient.indexOf('@');
138
+ return at === -1 ? recipient : recipient.slice(0, at);
139
+ }
140
+
141
+ function targetHost(recipient: string): string | null {
142
+ const at = recipient.indexOf('@');
143
+ return at === -1 ? null : recipient.slice(at + 1);
144
+ }
145
+
146
+ // Does `recipientList` address `actorName` on `thisHost`? Returns the match
147
+ // outcome plus a flag for "actor was named but every instance targeted a
148
+ // different host" — useful as a diagnostic so silent wrong-host routes are
149
+ // logged rather than dropped without trace.
150
+ function matchHostRouting(
151
+ recipientList: string[],
152
+ actorName: string,
153
+ thisHost: string,
154
+ ): { addressed: boolean; wrongHost: boolean } {
155
+ let addressed = false;
156
+ let actorNamedAtAll = false;
157
+ for (const r of recipientList) {
158
+ if (extractActor(r) !== actorName) continue;
159
+ actorNamedAtAll = true;
160
+ const host = targetHost(r);
161
+ if (host === null || host === thisHost) {
162
+ addressed = true;
163
+ break;
164
+ }
165
+ }
166
+ return { addressed, wrongHost: !addressed && actorNamedAtAll };
167
+ }
168
+
169
+ // Host-agnostic actor name check, used by causality scans (isCausalReply,
170
+ // hasPriorWork) where the question is "does this recipient list name actor
171
+ // X at all?" — host doesn't matter because the `from` field of replies
172
+ // doesn't carry a host suffix either.
173
+ function namesActor(recipientList: string[], actorName: string): boolean {
174
+ for (const r of recipientList) {
175
+ if (extractActor(r) === actorName) return true;
176
+ }
177
+ return false;
178
+ }
179
+
127
180
  // Declared lifecycle kind for a message. `work` (default for legacy messages
128
181
  // without the field) is the as-tagged intent. The runtime does NOT trust this
129
182
  // value directly for the activation decision — see effectiveKind() below.
@@ -153,10 +206,23 @@ function isCausalReply(channelMessages: ChannelMessage[], msg: ChannelMessage):
153
206
  const toList = recipients(msg.data['to']);
154
207
  for (const m of channelMessages) {
155
208
  if (m.relPath >= msg.relPath) break;
209
+ // Read receipts are bookkeeping, never causal evidence. The activation
210
+ // scan already filters them out before considering a message for
211
+ // dispatch — this filter is the same guard at the causality-helper
212
+ // level, so a receipt from one of msg's recipients to msg's sender
213
+ // can't forge a false causal-reply edge (which would then demote a
214
+ // legitimate `work` to `result` and silently skip it). This was the
215
+ // alpha.7 step 2 finding from the cross-host harness — receipts
216
+ // pre-existing in the channel from cachy's first dispatch burst
217
+ // misclassified mac's subsequent fan-out msgs as replies.
218
+ if (m.data['type'] === 'read') continue;
156
219
  const mFrom = typeof m.data['from'] === 'string' ? m.data['from'] : '';
157
- if (!toList.includes(mFrom)) continue;
220
+ // Host-agnostic actor name match: `from` fields are bare actor names,
221
+ // but `to` fields may include `@host` suffixes that don't change
222
+ // causal semantics.
223
+ if (!namesActor(toList, mFrom)) continue;
158
224
  if ((m.data['kind'] ?? 'work') === 'result') continue;
159
- if (recipients(m.data['to']).includes(sender)) return true;
225
+ if (namesActor(recipients(m.data['to']), sender)) return true;
160
226
  }
161
227
  return false;
162
228
  }
@@ -197,10 +263,15 @@ function hasPriorWork(
197
263
  ): boolean {
198
264
  for (const m of channelMessages) {
199
265
  if (m.relPath >= before) break;
266
+ // Same receipt filter as isCausalReply — a receipt from `addressee`
267
+ // to `sender` would otherwise look like a prior work outbound and
268
+ // forge a false causal edge here too. Defense against the same
269
+ // bug class at every causality-walking helper.
270
+ if (m.data['type'] === 'read') continue;
200
271
  if (typeof m.data['from'] !== 'string' || m.data['from'] !== addressee) continue;
201
272
  if (effectiveKind(channelMessages, m) !== 'work') continue;
202
273
  const toList = recipients(m.data['to']);
203
- if (toList.includes(sender)) return true;
274
+ if (namesActor(toList, sender)) return true;
204
275
  }
205
276
  return false;
206
277
  }
@@ -630,7 +701,27 @@ async function dispatchTick(): Promise<TickResult> {
630
701
  const to = recipients(msg.data['to']);
631
702
  const from = typeof msg.data['from'] === 'string' ? msg.data['from'] : 'unknown';
632
703
  const msgType = typeof msg.data['type'] === 'string' ? msg.data['type'] : 'text';
633
- if (!to.includes(actorName) || from === actorName || msgType === 'read') {
704
+ // Host-aware routing match. A recipient may target this actor
705
+ // either by bare name (`junior-developer` — broadcast to every
706
+ // host that declares the actor) or by `actor@host` (narrowed to
707
+ // a specific host). Bare-name match always succeeds when the
708
+ // actor name matches; @host match succeeds only when the host
709
+ // alias also matches this dispatcher's host. A recipient that
710
+ // names this actor but targets a different host is flagged as
711
+ // `host_routing_mismatch` so silent wrong-host routes are
712
+ // surfaced rather than dropped without trace. See concierge.md
713
+ // "Host-aware routing" + PROTOCOL.md.
714
+ const routing = matchHostRouting(to, actorName, host.alias);
715
+ if (!routing.addressed || from === actorName || msgType === 'read') {
716
+ if (routing.wrongHost) {
717
+ log('host_routing_mismatch', {
718
+ actor: actorName,
719
+ this_host: host.alias,
720
+ channel: channelUuid.slice(0, 8),
721
+ msg: msg.relPath,
722
+ to,
723
+ });
724
+ }
634
725
  writeCursor(transportRoot, actorName, channelUuid, msg.relPath);
635
726
  continue;
636
727
  }
@@ -66,6 +66,14 @@ If you are unsure, ask. Better to ask once than to pollute an aggregate with bia
66
66
 
67
67
  **Second UAT worked example (PRNG-quality).** A subsequent 10-junior fan-out without PRNG guidance got 5/10 valid: instance 1 used a 16-bit-truncated LCG (π≈3.032, badly broken); instances 2/5/8 picked the same `a=1103515245 / 0x7fffffff` LCG and produced **identical** inside-counts from adjacent seeds (correlated streams); instance 9 picked a third biased option. After moving the PRNG requirement into the spec, the same 10-junior fan-out hit 10/10 valid (every instance used the prescribed mulberry32 with the prescribed seed formula). This is why this section exists.
68
68
 
69
+ ### Echo assigned identifiers verbatim
70
+
71
+ If an orchestrator assigns you an identifier — instance number, seed value, task token, anything specific — **echo it back exactly as given.** Do not paraphrase, renumber, substitute, or pick your own. If you were asked to be instance 8 with seed 8000024, your reply names instance 8 with seed 8000024.
72
+
73
+ This rule is for *your* honesty about your own identity. **The orchestrator does not depend on it.** Concierge (and any other orchestrator) reconciles fan-in by the relPath of the dispatched work message, not by what you write in your body — so a lie about your identifier doesn't break the system; it just makes the log harder to read and you look like an unreliable peer. The runtime is robust to peer mislabeling by design (the alpha.7 multi-host harness verified this), but reliable peers cost less to debug.
74
+
75
+ If you genuinely cannot tell what your assigned identifier was (e.g. the orchestrator's prompt was ambiguous), say so explicitly rather than invent one. Inventing an identifier and hoping the orchestrator sorts it out is the worst case.
76
+
69
77
  ## Available tools
70
78
 
71
79
  You have shell access. You can invoke these tools any time you decide they help with your reply. All of them run from the transport root (the current working directory). The tools are documented here so you can pick the right one from natural-language intent — e.g. "check what the dispatch state looks like" → `crosstalk status`.
@@ -198,6 +206,19 @@ Because kind is runtime-inferred, getting the declared field "wrong" rarely hurt
198
206
 
199
207
  A host file can declare `count: N` under an actor's tier. That means the dispatch loop may spawn up to N concurrent CLI invocations of that actor per tick — useful for fan-out workloads (e.g. 10 junior-developer instances processing 10 separate messages in parallel). You behave the same regardless of which slot you occupy.
200
208
 
209
+ ## Host-aware routing
210
+
211
+ When the transport is shared by multiple dispatchers on different hosts (each running its own `hosts/<alias>.md` declaration), the `to:` field accepts two forms:
212
+
213
+ - **Bare actor name** — `to: junior-developer`. Broadcast to every host whose host file declares this actor. Every matching dispatcher will wake an instance on every such message.
214
+ - **Actor@host** — `to: junior-developer@cachy`. Narrowed to the named host only. Only the dispatcher whose host file's `alias:` equals `cachy` will wake an instance; others see the message addressed to a different host and skip it.
215
+
216
+ The runtime parses recipients by splitting on `@` — the part before is the actor name, the part after (if present) is the target host alias. Causality scans (the `effectiveKind` / `hasPriorWork` activation logic) ignore the host suffix; only the actual addressing decision honors it.
217
+
218
+ If a dispatcher sees a message that names its actor but targets a different host, it logs `host_routing_mismatch` with the recipient list, this host alias, and the message path — so silent wrong-host drops are surfaced rather than disappearing without trace.
219
+
220
+ **When to use which.** Use bare names for stateless work-pool patterns where any matching host is fine. Use `@host` when the orchestration depends on which machine runs the work (resource locality, host-specific state, validating cross-host behavior). Profile authors orchestrating fan-out across hosts should prefer `@host` so the topology is explicit in the message frontmatter.
221
+
201
222
  ## Failure handling and where to look
202
223
 
203
224
  There are two persistent failure logs in the transport:
@@ -119,7 +119,12 @@ When the peers reply, the runtime dispatches you a NEW turn. Use THAT turn to re
119
119
 
120
120
  - **Same channel.** Dispatch all N peers in the SAME channel as the original request. The runtime routes by `to:` field; you do not need a subchannel for isolation. Subchannels are for the operator's narrative organization (e.g. "weekly planning", "incident review"), not for orchestration topology. Creating a fan-out subchannel makes the cursor space sprawl and complicates aggregation.
121
121
  - **Peers reply to YOU, not to downstream consumers.** When you dispatch peers, include explicit reply-to guidance in each message body (e.g. "reply to concierge with your result; do NOT send your result to any other actor"). You are the collection point. If peers also send copies to a downstream aggregator, that aggregator will be re-dispatched once per peer message — wasting calls and producing redundant aggregations.
122
- - **Aggregate exactly once.** Wait until you've seen all N peer replies (across however many subsequent dispatches that takes; you can count by scanning the channel for messages from each peer addressed to you). Only THEN dispatch the aggregator (e.g. senior-software-engineer) in a SINGLE message containing the collected results. Never N messages, never one per peer reply.
122
+ - **Aggregate exactly once reconcile by dispatched message identity, never by what the peer says.** Wait until you've received N replies, then dispatch the aggregator (e.g. senior-software-engineer) in a SINGLE message containing the collected results. Never N messages, never one per peer reply. **How you count "N replies" is load-bearing:**
123
+ - **Track the relPaths of the N work messages YOU dispatched.** When you call `crosstalk send --to <peer> --kind work ...` the tool prints `Sent: <relPath>` — those relPaths (e.g. `2026/06/09/123802614Z-5a16ec07.md`) are your dispatched-identity set. Scan the channel directory for them so you have a precise list of what you asked for.
124
+ - **Count replies by causal predecessor, not by peer-reported content.** A "reply to dispatch X" is a `kind: text` message from one of X's recipients (host-agnostic match — `junior-developer` matches `to: junior-developer@cachy`) addressed back to you, and landing AFTER X in `relPath` order. The runtime's reply-causality fix from alpha.6 enforces this same notion at the activation level — you're applying the same reasoning at the application level.
125
+ - **Do NOT count by peer-reported seed, instance number, content fingerprint, or any other identifier the peer wrote in its body.** LLM peers will lie — they will report seeds you didn't assign, claim to be instance 8 while computing for seed 7000021, or echo what they think you wanted to hear. *"Would this still work if every peer lied about what it is?"* Yes — when reconciliation is by your dispatched relPath, not by what the peer claims about itself.
126
+ - **A peer that sends multiple replies counts as one** if you only dispatched it one work message — pick its latest causally-paired reply and discard the rest. The runtime can't dedupe these for you; you must.
127
+ - When the dispatched-relPath set is fully covered (every dispatched relPath has at least one causally-paired reply), aggregate. Until then, this dispatch's job is "wait" — exit and let the runtime re-dispatch you when more replies land.
123
128
  - **Forward the aggregator's final reply to the operator — explicitly, via `crosstalk send`.** When the aggregator replies to you, your stdout auto-reply goes back to the *aggregator*, NOT to the operator — so a stdout-only response means the operator never sees the answer. On the dispatch turn where you read the aggregator's final reply you MUST run `crosstalk send --to <original-requester> --kind result "<the aggregator's final answer, quoted in full>"` so the operator actually receives it. The original requester is the `from:` of the kickoff message that started this orchestration (e.g. `steve`). Do this, then exit. Delivering the final only as a reply to the aggregator is an orchestration failure — the operator asked the question and must get the answer.
124
129
 
125
130
  If you find yourself dispatching the aggregator multiple times for a single orchestration task, you have the topology wrong — peers must reply to you, you must collect, and you must dispatch the aggregator exactly once.