@pimote/pimote 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +43 -16
  2. package/client/build/_app/immutable/assets/0.C7loWTOC.css +2 -0
  3. package/client/build/_app/immutable/assets/2.D9fiCd8W.css +1 -0
  4. package/client/build/_app/immutable/chunks/BNqgidwO.js +5 -0
  5. package/client/build/_app/immutable/chunks/D26i4pYm.js +1 -0
  6. package/client/build/_app/immutable/chunks/D_Fpgknp.js +1 -0
  7. package/client/build/_app/immutable/chunks/DoVhjU85.js +1 -0
  8. package/client/build/_app/immutable/chunks/DzqbY2XU.js +1 -0
  9. package/client/build/_app/immutable/entry/{app.CNzpBgAg.js → app.DO-zgzyy.js} +2 -2
  10. package/client/build/_app/immutable/entry/start.BZlrOH0-.js +1 -0
  11. package/client/build/_app/immutable/nodes/0.BEh4bPGQ.js +10 -0
  12. package/client/build/_app/immutable/nodes/{1.B8zmHMre.js → 1.B2l9JGRO.js} +1 -1
  13. package/client/build/_app/immutable/nodes/2.ph9M0S1U.js +54 -0
  14. package/client/build/_app/version.json +1 -1
  15. package/client/build/index.html +7 -7
  16. package/package.json +7 -3
  17. package/server/dist/auto-drain-on-abort.js +49 -0
  18. package/server/dist/config.js +21 -0
  19. package/server/dist/extension-ui-bridge.js +14 -1
  20. package/server/dist/index.js +31 -1
  21. package/server/dist/message-mapper.js +38 -6
  22. package/server/dist/server.js +2 -2
  23. package/server/dist/session-manager.js +64 -2
  24. package/server/dist/voice/fsm/actions.js +6 -0
  25. package/server/dist/voice/fsm/events.js +7 -0
  26. package/server/dist/voice/fsm/reducer.js +74 -0
  27. package/server/dist/voice/fsm/reducers/lifecycle.js +146 -0
  28. package/server/dist/voice/fsm/reducers/streaming.js +220 -0
  29. package/server/dist/voice/fsm/reducers/walkback.js +73 -0
  30. package/server/dist/voice/fsm/state.js +21 -0
  31. package/server/dist/voice/fsm/text-extractor.js +128 -0
  32. package/server/dist/voice/index.js +319 -0
  33. package/server/dist/voice/interpreter-prompt.js +115 -0
  34. package/server/dist/voice/speechmux-client.js +153 -0
  35. package/server/dist/voice/state-machine.js +7 -0
  36. package/server/dist/voice/wait-for-idle.js +67 -0
  37. package/server/dist/voice/walk-back.js +198 -0
  38. package/server/dist/voice-orchestrator-boot.js +90 -0
  39. package/server/dist/voice-orchestrator.js +91 -0
  40. package/server/dist/ws-handler.js +108 -5
  41. package/shared/dist/index.d.ts +1 -0
  42. package/shared/dist/index.js +2 -0
  43. package/shared/dist/protocol.d.ts +614 -0
  44. package/shared/dist/protocol.js +30 -0
  45. package/client/build/_app/immutable/assets/0.DBrr7n4n.css +0 -2
  46. package/client/build/_app/immutable/assets/2.DE6k3bQj.css +0 -1
  47. package/client/build/_app/immutable/chunks/5vSSf6qG.js +0 -5
  48. package/client/build/_app/immutable/chunks/CT6ckxpD.js +0 -1
  49. package/client/build/_app/immutable/chunks/DlJOVoUQ.js +0 -1
  50. package/client/build/_app/immutable/chunks/YxmLwfhj.js +0 -1
  51. package/client/build/_app/immutable/chunks/yWVx3W2o.js +0 -1
  52. package/client/build/_app/immutable/entry/start.DYkTAHh1.js +0 -1
  53. package/client/build/_app/immutable/nodes/0.DNlQhEb_.js +0 -10
  54. package/client/build/_app/immutable/nodes/2.W9yV4-x2.js +0 -54
@@ -0,0 +1,198 @@
1
+ // Walkback rewrite: surgical truncation of conversation history when
2
+ // speechmux reports the user heard only a prefix of an assistant
3
+ // utterance.
4
+ //
5
+ // **Identity-based design.** Walkback targets a specific `speak()` tool
6
+ // call by its `toolCallId`. That id is round-tripped through speechmux
7
+ // (every outgoing `token`/`end` frame carries it; speechmux echoes it
8
+ // back on `rollback`/`abort`) so we know exactly which utterance the
9
+ // `heardText` belongs to. The previous design used a captured snapshot
10
+ // of the in-flight assistant message and a string-prefix-matching
11
+ // algorithm — both of which broke whenever a turn contained more than
12
+ // one speak() or whenever the snapshot drifted out of sync with the
13
+ // real conversation.
14
+ //
15
+ // **Contract:** see `docs/plans/voice-mode.md` for the high-level
16
+ // behavioural spec. Briefly:
17
+ //
18
+ // 1. The trailing pi-synthetic empty-text aborted assistant (if any)
19
+ // is always stripped, even when no rollback is pending. This is
20
+ // pi's marker for "agent run was aborted"; we don't want it in
21
+ // the LLM context.
22
+ //
23
+ // 2. With a rollback pending, locate the speak block by
24
+ // `targetSpeakToolCallId`. If found:
25
+ // - If `heardText` is empty: drop the speak block entirely (and
26
+ // its paired tool_result if present).
27
+ // - If `heardText.length >= block.text.length`: keep block as-is
28
+ // (whole utterance was heard).
29
+ // - Otherwise: replace the block's text with `heardText` and
30
+ // drop the paired tool_result.
31
+ // Then drop blocks AFTER the target in the same message, and drop
32
+ // any subsequent assistant/tool_result messages — none of those
33
+ // could have been heard if the user interrupted at the target.
34
+ //
35
+ // 3. If the target is not found in messages (e.g. compacted away),
36
+ // walkback is a no-op beyond step 1.
37
+ //
38
+ // **Content-block shape compatibility.** The function handles both
39
+ // pi-agent-core's internal AgentMessage shape (`type:'toolCall'` +
40
+ // `arguments`) and the Anthropic API shape (`type:'tool_use'` +
41
+ // `input`). Earlier versions only matched the latter, which silently
42
+ // failed on every real captured message.
43
+ /**
44
+ * Apply walkback against `messages`. Pure function.
45
+ *
46
+ * Returns a new array; never mutates the input.
47
+ */
48
+ export function walkBack(input) {
49
+ const stripped = stripTrailingAbortedEmpty(input.messages);
50
+ if (input.rollback === null)
51
+ return stripped;
52
+ return rewriteByToolCallId(stripped, input.rollback.heardText, input.rollback.targetSpeakToolCallId);
53
+ }
54
+ // ---------------------------------------------------------------------------
55
+ /** True for the synthetic assistant pi appends to state on abort. */
56
+ export function isAbortedEmptyAssistant(msg) {
57
+ if (!isAssistantMessage(msg))
58
+ return false;
59
+ if (stopReason(msg) !== 'aborted')
60
+ return false;
61
+ return isEmptyText(contentOf(msg));
62
+ }
63
+ function stripTrailingAbortedEmpty(messages) {
64
+ let cut = messages.length;
65
+ while (cut > 0 && isAbortedEmptyAssistant(messages[cut - 1]))
66
+ cut -= 1;
67
+ return cut === messages.length ? messages.slice() : messages.slice(0, cut);
68
+ }
69
+ function rewriteByToolCallId(messages, heardText, targetId) {
70
+ // Search from the back — toolCallIds are unique per session, so the
71
+ // first match is the right one, but searching backward minimises work
72
+ // for the common case (target is in the recent tail).
73
+ let targetMsgIdx = -1;
74
+ let targetBlockIdx = -1;
75
+ for (let i = messages.length - 1; i >= 0; i--) {
76
+ const msg = messages[i];
77
+ if (!isAssistantMessage(msg))
78
+ continue;
79
+ const content = contentOf(msg);
80
+ for (let j = 0; j < content.length; j++) {
81
+ if (isSpeakToolCall(content[j]) && getToolCallId(content[j]) === targetId) {
82
+ targetMsgIdx = i;
83
+ targetBlockIdx = j;
84
+ break;
85
+ }
86
+ }
87
+ if (targetMsgIdx !== -1)
88
+ break;
89
+ }
90
+ if (targetMsgIdx === -1) {
91
+ // Target gone (compacted, or never landed in messages). Best we can
92
+ // do is honour step 1 (already done).
93
+ return messages;
94
+ }
95
+ const targetMsg = messages[targetMsgIdx];
96
+ const targetContent = contentOf(targetMsg);
97
+ const targetBlock = targetContent[targetBlockIdx];
98
+ const originalText = getSpeakText(targetBlock);
99
+ const newBlocks = targetContent.slice(0, targetBlockIdx);
100
+ const droppedToolUseIds = new Set();
101
+ if (heardText.length === 0) {
102
+ // Nothing was heard of this speak. Drop the block and its paired
103
+ // tool_result (if any).
104
+ droppedToolUseIds.add(targetId);
105
+ }
106
+ else if (heardText.length >= originalText.length) {
107
+ // Entire utterance was heard. Keep block intact.
108
+ newBlocks.push(targetBlock);
109
+ }
110
+ else {
111
+ // Partial. Truncate text in-place and drop the paired tool_result
112
+ // (per the contract — a truncated speak's result is no longer
113
+ // grounded in what the user heard).
114
+ newBlocks.push(replaceSpeakText(targetBlock, heardText));
115
+ droppedToolUseIds.add(targetId);
116
+ }
117
+ // Anything in this message AFTER the target block was emitted after
118
+ // the heard prefix and so was not heard.
119
+ for (let j = targetBlockIdx + 1; j < targetContent.length; j++) {
120
+ const id = getToolCallId(targetContent[j]);
121
+ if (id)
122
+ droppedToolUseIds.add(id);
123
+ }
124
+ const rewrittenTarget = {
125
+ ...targetMsg,
126
+ content: newBlocks,
127
+ stopReason: 'aborted',
128
+ };
129
+ // Anything AFTER the target message in the array was emitted by the
130
+ // agent after the interrupted speak — drop it. This includes any
131
+ // tool_result messages whose paired speak we just truncated, plus
132
+ // any subsequent assistant messages.
133
+ return [...messages.slice(0, targetMsgIdx), rewrittenTarget];
134
+ }
135
+ // ---------------------------------------------------------------------------
136
+ // Shape-tolerant accessors. pi-agent-core's runtime AgentMessage uses
137
+ // `toolCall`/`arguments`; the Anthropic API shape uses `tool_use`/`input`.
138
+ // Tests / tooling may pass either; we accept both.
139
+ function isAssistantMessage(msg) {
140
+ return msg.role === 'assistant';
141
+ }
142
+ function stopReason(msg) {
143
+ return msg.stopReason;
144
+ }
145
+ function contentOf(msg) {
146
+ const c = msg.content;
147
+ return Array.isArray(c) ? c : [];
148
+ }
149
+ function isEmptyText(blocks) {
150
+ if (blocks.length === 0)
151
+ return true;
152
+ return blocks.every((b) => {
153
+ if (b.type !== 'text')
154
+ return false;
155
+ const t = b.text;
156
+ return typeof t === 'string' && t.trim() === '';
157
+ });
158
+ }
159
+ export function isSpeakToolCall(block) {
160
+ if (block.type !== 'toolCall' && block.type !== 'tool_use')
161
+ return false;
162
+ return block.name === 'speak';
163
+ }
164
+ function getToolCallId(block) {
165
+ const id = block.id;
166
+ return typeof id === 'string' ? id : undefined;
167
+ }
168
+ function getSpeakText(block) {
169
+ // Try both shapes; whichever holds a string wins.
170
+ const args = block.arguments;
171
+ if (args && typeof args === 'object') {
172
+ const t = args.text;
173
+ if (typeof t === 'string')
174
+ return t;
175
+ }
176
+ const input = block.input;
177
+ if (input && typeof input === 'object') {
178
+ const t = input.text;
179
+ if (typeof t === 'string')
180
+ return t;
181
+ }
182
+ return '';
183
+ }
184
+ function replaceSpeakText(block, text) {
185
+ // Preserve whichever args/input shape was present, replacing only the
186
+ // `text` field. We don't normalise to a single shape — that would
187
+ // diverge from whatever pi-agent-core/the provider expects.
188
+ const args = block.arguments;
189
+ const input = block.input;
190
+ if (args && typeof args === 'object') {
191
+ return { ...block, arguments: { ...args, text } };
192
+ }
193
+ if (input && typeof input === 'object') {
194
+ return { ...block, input: { ...input, text } };
195
+ }
196
+ // Neither shape present — set both defensively.
197
+ return { ...block, arguments: { text }, input: { text } };
198
+ }
@@ -0,0 +1,90 @@
1
+ // Wire the VoiceOrchestrator together with its runtime dependencies at
2
+ // server boot time. Kept separate from `index.ts` so the wiring is
3
+ // testable (no network / child_process side-effects at import time) and
4
+ // isolated from the plain HTTP/WS boot sequence.
5
+ import { spawn } from 'node:child_process';
6
+ import { VoiceOrchestrator } from './voice-orchestrator.js';
7
+ /**
8
+ * Construct a VoiceOrchestrator backed by real seams:
9
+ * - speechmux sidecar via `child_process.spawn`
10
+ * - displacement = looks up current owner via clientRegistry and calls its
11
+ * `sendDisplacedEvent(sessionId)`
12
+ *
13
+ * Auth on `/signal` is handled by Cloudflare Access at the edge, and
14
+ * per-session TURN credentials are minted by speechmux and returned to the
15
+ * PWA in its `/signal` `session` response. Pimote's orchestrator only
16
+ * hands out the signalling URL.
17
+ */
18
+ export function buildVoiceOrchestrator(args) {
19
+ const { config, sessionManager, clientRegistry } = args;
20
+ let speechmuxProc = null;
21
+ const busResolver = {
22
+ getSlot: (sessionId) => sessionManager.getSlot(sessionId),
23
+ getEventBus: (sessionId) => sessionManager.getSlot(sessionId)?.eventBusRef.current ?? null,
24
+ };
25
+ const orchestrator = new VoiceOrchestrator({
26
+ config,
27
+ sessionManager,
28
+ busResolver,
29
+ startSpeechmux: async () => {
30
+ const bin = config.voice?.speechmuxBinary;
31
+ if (!bin) {
32
+ console.log('[voice] speechmuxBinary not configured; assuming speechmux is externally managed (systemd, container, remote host, etc.)');
33
+ return;
34
+ }
35
+ if (speechmuxProc)
36
+ return;
37
+ speechmuxProc = spawn(bin, [], { stdio: ['ignore', 'inherit', 'inherit'] });
38
+ speechmuxProc.on('exit', (code, signal) => {
39
+ console.warn(`[voice] speechmux exited (code=${code}, signal=${signal})`);
40
+ speechmuxProc = null;
41
+ });
42
+ // NB: we do not wait for a ready marker here — speechmux emits readiness
43
+ // to its own logs. Callers should ensure startup ordering or implement a
44
+ // readiness probe as part of the Step 14 smoke.
45
+ },
46
+ stopSpeechmux: async () => {
47
+ if (!speechmuxProc)
48
+ return;
49
+ const proc = speechmuxProc;
50
+ speechmuxProc = null;
51
+ await new Promise((resolve) => {
52
+ const timer = setTimeout(() => {
53
+ try {
54
+ proc.kill('SIGKILL');
55
+ }
56
+ catch {
57
+ /* ignore */
58
+ }
59
+ resolve();
60
+ }, 2000);
61
+ proc.once('exit', () => {
62
+ clearTimeout(timer);
63
+ resolve();
64
+ });
65
+ try {
66
+ proc.kill('SIGTERM');
67
+ }
68
+ catch {
69
+ clearTimeout(timer);
70
+ resolve();
71
+ }
72
+ });
73
+ },
74
+ displaceOwner: async (sessionId, _newOwner) => {
75
+ const slot = sessionManager.getSlot(sessionId);
76
+ const existingClientId = slot?.connection?.connectedClientId;
77
+ if (!existingClientId)
78
+ return;
79
+ const existing = clientRegistry.get(existingClientId);
80
+ existing?.sendDisplacedEvent(sessionId);
81
+ },
82
+ isOwnedByVoiceCall: (sessionId) => orchestrator.isCallActive(sessionId),
83
+ });
84
+ return {
85
+ orchestrator,
86
+ shutdown: async () => {
87
+ await orchestrator.stop();
88
+ },
89
+ };
90
+ }
@@ -0,0 +1,91 @@
1
+ // Voice orchestrator — owns the speechmux sidecar lifecycle and the per-call
2
+ // bind dispatch. See docs/plans/voice-mode.md → "Voice orchestrator".
3
+ //
4
+ // This file defines the interface surface + a stub implementation. The impl
5
+ // phase fills in start()/stop()/bindCall()/endCall() bodies.
6
+ /** Typed error carrying the discriminable reason code used in PimoteResponse.error. */
7
+ export class CallBindError extends Error {
8
+ code;
9
+ constructor(code, message) {
10
+ super(message ?? code);
11
+ this.code = code;
12
+ this.name = 'CallBindError';
13
+ }
14
+ }
15
+ export class VoiceOrchestrator {
16
+ opts;
17
+ started = false;
18
+ activeCalls = new Set();
19
+ constructor(opts) {
20
+ this.opts = opts;
21
+ }
22
+ /** Spawns speechmux sidecar. Throws if it fails to start. */
23
+ async start() {
24
+ if (this.started)
25
+ return;
26
+ await this.opts.startSpeechmux();
27
+ this.started = true;
28
+ }
29
+ /** Kills speechmux. Idempotent. */
30
+ async stop() {
31
+ if (!this.started)
32
+ return;
33
+ await this.opts.stopSpeechmux();
34
+ this.started = false;
35
+ this.activeCalls.clear();
36
+ }
37
+ /** Called by ws-handler for CallBindCommand. */
38
+ async bindCall(args) {
39
+ const slot = this.opts.busResolver.getSlot(args.sessionId);
40
+ if (!slot) {
41
+ throw new CallBindError('call_bind_failed_session_not_found', `No session ${args.sessionId}`);
42
+ }
43
+ const alreadyOwned = this.opts.isOwnedByVoiceCall(args.sessionId);
44
+ if (alreadyOwned && !args.force) {
45
+ throw new CallBindError('call_bind_failed_owned', 'Session already bound to a voice call');
46
+ }
47
+ if (alreadyOwned && args.force) {
48
+ await this.opts.displaceOwner(args.sessionId, args.clientConnection);
49
+ }
50
+ // Voice-disabled guard: if speechmux wiring isn't configured, fail the
51
+ // bind here rather than handing the client empty URLs. Speechmux is
52
+ // what mints the per-call TURN creds now (in the /signal `session`
53
+ // response) and what authenticates peers (via Cloudflare Access at the
54
+ // edge), so pimote no longer needs to mint anything.
55
+ const signalUrl = this.opts.config.voice?.speechmuxSignalUrl;
56
+ const llmWsUrl = this.opts.config.voice?.speechmuxLlmWsUrl;
57
+ if (!signalUrl || !llmWsUrl) {
58
+ throw new CallBindError('call_bind_failed_internal', 'voice_disabled: speechmux signal URL / llm WS URL not configured');
59
+ }
60
+ const bus = this.opts.busResolver.getEventBus(args.sessionId);
61
+ if (!bus) {
62
+ throw new CallBindError('call_bind_failed_internal', 'Session has no EventBus');
63
+ }
64
+ const activate = {
65
+ type: 'pimote:voice:activate',
66
+ sessionId: args.sessionId,
67
+ speechmuxWsUrl: llmWsUrl,
68
+ };
69
+ bus.emit(activate.type, activate);
70
+ this.activeCalls.add(args.sessionId);
71
+ return {
72
+ sessionId: args.sessionId,
73
+ webrtcSignalUrl: signalUrl,
74
+ };
75
+ }
76
+ /** Called by ws-handler for CallEndCommand, or internally on displacement/error. Idempotent. */
77
+ async endCall(args) {
78
+ if (!this.activeCalls.has(args.sessionId))
79
+ return;
80
+ this.activeCalls.delete(args.sessionId);
81
+ const bus = this.opts.busResolver.getEventBus(args.sessionId);
82
+ if (bus) {
83
+ const deactivate = { type: 'pimote:voice:deactivate', sessionId: args.sessionId };
84
+ bus.emit(deactivate.type, deactivate);
85
+ }
86
+ }
87
+ /** True if the given session currently has an active voice call bound. */
88
+ isCallActive(sessionId) {
89
+ return this.activeCalls.has(sessionId);
90
+ }
91
+ }
@@ -8,6 +8,7 @@ import { createExtensionUIBridge } from './extension-ui-bridge.js';
8
8
  import { findExternalPiProcesses, killExternalPiProcesses } from './takeover.js';
9
9
  import { mapAgentMessages, extractMessageEntryIds, applyEntryIds } from './message-mapper.js';
10
10
  import { getGitBranch } from './git-branch.js';
11
+ import { CallBindError } from './voice-orchestrator.js';
11
12
  /** Parse data-URL encoded images into the shape the pi SDK expects. */
12
13
  function parseDataUrlImages(images) {
13
14
  if (!images || images.length === 0)
@@ -133,16 +134,18 @@ export class WsHandler {
133
134
  pushNotificationService;
134
135
  sessionMetadataStore;
135
136
  clientRegistry;
137
+ voiceOrchestrator;
136
138
  subscribedSessions = new Set();
137
139
  viewedSessionId = null;
138
140
  clientId;
139
- constructor(sessionManager, folderIndex, ws, pushNotificationService, sessionMetadataStore, clientId, clientRegistry) {
141
+ constructor(sessionManager, folderIndex, ws, pushNotificationService, sessionMetadataStore, clientId, clientRegistry, voiceOrchestrator) {
140
142
  this.sessionManager = sessionManager;
141
143
  this.folderIndex = folderIndex;
142
144
  this.ws = ws;
143
145
  this.pushNotificationService = pushNotificationService;
144
146
  this.sessionMetadataStore = sessionMetadataStore;
145
147
  this.clientRegistry = clientRegistry;
148
+ this.voiceOrchestrator = voiceOrchestrator;
146
149
  this.clientId = clientId;
147
150
  }
148
151
  getViewedSessionId() {
@@ -480,6 +483,65 @@ export class WsHandler {
480
483
  this.sendResponse(id, true, { sessionId: takeoverSessionId, killedProcesses: killedCount });
481
484
  break;
482
485
  }
486
+ // ---- Voice call control ----
487
+ case 'call_bind': {
488
+ if (!this.voiceOrchestrator) {
489
+ this.sendResponse(id, false, undefined, 'call_bind_failed_internal');
490
+ break;
491
+ }
492
+ const slot = this.sessionManager.getSlot(command.sessionId);
493
+ if (!slot) {
494
+ this.sendResponse(id, false, undefined, 'call_bind_failed_session_not_found');
495
+ break;
496
+ }
497
+ const connection = {
498
+ ws: this.ws,
499
+ connectedClientId: this.clientId,
500
+ onSessionReset: (s) => this.handleSessionReset(s),
501
+ };
502
+ try {
503
+ const data = await this.voiceOrchestrator.bindCall({
504
+ sessionId: command.sessionId,
505
+ clientConnection: connection,
506
+ force: command.force ?? false,
507
+ });
508
+ this.sendResponse(id, true, data);
509
+ this.sendEvent({ type: 'call_status', sessionId: command.sessionId, status: 'binding' });
510
+ }
511
+ catch (err) {
512
+ if (err instanceof CallBindError) {
513
+ this.sendResponse(id, false, undefined, err.code);
514
+ }
515
+ else {
516
+ console.warn('[voice] call_bind failed', err);
517
+ this.sendResponse(id, false, undefined, 'call_bind_failed_internal');
518
+ }
519
+ }
520
+ break;
521
+ }
522
+ case 'call_end': {
523
+ await this.voiceOrchestrator?.endCall({ sessionId: command.sessionId, reason: 'user_hangup' });
524
+ this.sendResponse(id, true);
525
+ this.sendEvent({ type: 'call_ended', sessionId: command.sessionId, reason: 'user_hangup' });
526
+ break;
527
+ }
528
+ // ---- Client diagnostic logs (voice/call tracing) ----
529
+ case 'client_log': {
530
+ // Forward to the server's logger so client-side traces interleave
531
+ // with the server-side voice extension logs in the same journal.
532
+ const clientWall = new Date(command.clientTimestampMs).toISOString();
533
+ const serverWall = new Date().toISOString();
534
+ const driftMs = Date.now() - command.clientTimestampMs;
535
+ const line = `[voice_trace][client/${command.tag}] ${command.message} ${JSON.stringify({ clientWall, serverWall, driftMs, ...(command.data ?? {}) })}`;
536
+ if (command.level === 'error')
537
+ console.error(line);
538
+ else if (command.level === 'warn')
539
+ console.warn(line);
540
+ else
541
+ console.log(line);
542
+ this.sendResponse(id, true);
543
+ break;
544
+ }
483
545
  // ---- Extension UI ----
484
546
  case 'extension_ui_response': {
485
547
  const uiSlot = command.sessionId ? this.sessionManager.getSession(command.sessionId) : undefined;
@@ -896,13 +958,26 @@ export class WsHandler {
896
958
  }
897
959
  }
898
960
  /** Notify the old owner that they've been displaced from a session.
899
- * No-op if the session is unowned or owned by this client. */
961
+ * No-op if the session is unowned or owned by this client.
962
+ *
963
+ * Voice-call tear-down on displacement lives in `sendDisplacedEvent` (the
964
+ * old-owner-side site that also emits `call_ended { displaced }`), so this
965
+ * method does not call `voiceOrchestrator.endCall` itself — see review
966
+ * finding 4.
967
+ */
900
968
  displaceOwner(sessionId, slot) {
901
969
  if (slot.connection?.connectedClientId && slot.connection.connectedClientId !== this.clientId) {
902
970
  const oldHandler = this.clientRegistry.get(slot.connection.connectedClientId);
903
971
  if (oldHandler) {
904
972
  oldHandler.sendDisplacedEvent(sessionId);
905
973
  }
974
+ else if (this.voiceOrchestrator?.isCallActive(sessionId)) {
975
+ // Stale owner id with no live handler — clean up orchestrator state
976
+ // so the new owner doesn't inherit a phantom active call.
977
+ this.voiceOrchestrator.endCall({ sessionId, reason: 'displaced' }).catch((err) => {
978
+ console.warn('[voice] endCall on displace (stale handler) failed', err);
979
+ });
980
+ }
906
981
  }
907
982
  }
908
983
  /** Bind a slot to this client — sets ownership, WebSocket routing,
@@ -920,7 +995,9 @@ export class WsHandler {
920
995
  // ManagedSlot — on reconnect we skip rebinding, but on session reset
921
996
  // we must rebind so the bridge points at the new session state.
922
997
  if (!slot.sessionState.extensionsBound) {
923
- const uiContext = createExtensionUIBridge(slot, this.pushNotificationService);
998
+ const uiContext = createExtensionUIBridge(slot, this.pushNotificationService, {
999
+ isVoiceModeActive: () => this.voiceOrchestrator?.isCallActive(sessionId) ?? false,
1000
+ });
924
1001
  const commandContextActions = createCommandContextActions(slot);
925
1002
  await slot.session.bindExtensions({ uiContext, commandContextActions });
926
1003
  slot.sessionState.extensionsBound = true;
@@ -939,9 +1016,12 @@ export class WsHandler {
939
1016
  return;
940
1017
  }
941
1018
  // Session ID changed — rebuild session state in-place on the same slot.
942
- const folderPath = slot.folderPath;
1019
+ // rebuildSessionState refreshes slot.folderPath from the new session's header cwd,
1020
+ // so capture folderPath AFTER the rebuild to pick up the new value (fork-from can
1021
+ // change cwd, e.g. the worktree extension).
943
1022
  // Rebuild session state (tears down old, creates new from runtime.session)
944
1023
  this.sessionManager.rebuildSessionState(slot);
1024
+ const folderPath = slot.folderPath;
945
1025
  // Re-key the session map
946
1026
  this.sessionManager.reKeySession(slot, oldSessionId, newSessionId);
947
1027
  // Update handler bookkeeping
@@ -951,7 +1031,9 @@ export class WsHandler {
951
1031
  this.viewedSessionId = newSessionId;
952
1032
  }
953
1033
  // Rebind extension UI bridge (new session state for dialog routing)
954
- const uiContext = createExtensionUIBridge(slot, this.pushNotificationService);
1034
+ const uiContext = createExtensionUIBridge(slot, this.pushNotificationService, {
1035
+ isVoiceModeActive: () => this.voiceOrchestrator?.isCallActive(newSessionId) ?? false,
1036
+ });
955
1037
  const commandContextActions = createCommandContextActions(slot);
956
1038
  await slot.session.bindExtensions({ uiContext, commandContextActions });
957
1039
  slot.sessionState.extensionsBound = true;
@@ -1064,6 +1146,27 @@ export class WsHandler {
1064
1146
  sessionId,
1065
1147
  reason: 'displaced',
1066
1148
  });
1149
+ // If the old owner had an active voice call on this session, tear down
1150
+ // orchestrator bookkeeping and surface `call_ended { reason: 'displaced' }`
1151
+ // so their VoiceCallStore tears down alongside the session_closed.
1152
+ if (this.voiceOrchestrator?.isCallActive(sessionId)) {
1153
+ void this.voiceOrchestrator.endCall({ sessionId, reason: 'displaced' });
1154
+ this.sendEvent({
1155
+ type: 'call_ended',
1156
+ sessionId,
1157
+ reason: 'displaced',
1158
+ });
1159
+ }
1160
+ }
1161
+ /** Broadcast a `call_ended` to this client (used by the session manager's
1162
+ * before-close hook so the orchestrator bookkeeping owner learns that a
1163
+ * server-initiated teardown happened). */
1164
+ sendCallEndedEvent(sessionId, reason) {
1165
+ this.sendEvent({
1166
+ type: 'call_ended',
1167
+ sessionId,
1168
+ reason,
1169
+ });
1067
1170
  }
1068
1171
  /** Send a session_closed event with reason 'killed' to this client's WebSocket.
1069
1172
  * Also removes the session from this handler's subscribedSessions so that
@@ -0,0 +1 @@
1
+ export * from './protocol.js';
@@ -0,0 +1,2 @@
1
+ // @pimote/shared barrel export
2
+ export * from './protocol.js';