@pimote/pimote 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +4 -1
  2. package/client/build/_app/immutable/assets/0.-er3OUWm.css +2 -0
  3. package/client/build/_app/immutable/assets/2.BtlPyuHL.css +1 -0
  4. package/client/build/_app/immutable/chunks/ATalJV7d.js +3 -0
  5. package/client/build/_app/immutable/chunks/{eHWBE-tD.js → B1ItOytB.js} +2 -2
  6. package/client/build/_app/immutable/chunks/BiEvVL3P.js +1 -0
  7. package/client/build/_app/immutable/chunks/D8SptH3Y.js +1 -0
  8. package/client/build/_app/immutable/chunks/S8e8sMop.js +1 -0
  9. package/client/build/_app/immutable/chunks/b9CWRTHL.js +1 -0
  10. package/client/build/_app/immutable/entry/{app.Di2WQBl6.js → app.agj-hcVA.js} +2 -2
  11. package/client/build/_app/immutable/entry/start.NVZAE6Px.js +1 -0
  12. package/client/build/_app/immutable/nodes/0.DweM6Pbc.js +10 -0
  13. package/client/build/_app/immutable/nodes/{1.DKkktqMe.js → 1.owr_UHNy.js} +1 -1
  14. package/client/build/_app/immutable/nodes/2.CQNU1AJj.js +55 -0
  15. package/client/build/_app/version.json +1 -1
  16. package/client/build/index.html +7 -7
  17. package/package.json +2 -2
  18. package/server/dist/config.js +5 -2
  19. package/server/dist/event-buffer.js +10 -1
  20. package/server/dist/extension-ui-bridge.js +26 -10
  21. package/server/dist/file-references.js +123 -0
  22. package/server/dist/git-branch.js +12 -9
  23. package/server/dist/login-orchestrator.js +105 -0
  24. package/server/dist/push-infrastructure.js +13 -2
  25. package/server/dist/push-notification.js +18 -11
  26. package/server/dist/server.js +25 -2
  27. package/server/dist/session-cost.js +26 -2
  28. package/server/dist/session-manager.js +116 -7
  29. package/server/dist/static-host/gc.js +13 -0
  30. package/server/dist/static-host/http-handler.js +27 -1
  31. package/server/dist/static-host/index.js +24 -12
  32. package/server/dist/static-host/store.js +10 -1
  33. package/server/dist/static-host/tools.js +5 -1
  34. package/server/dist/voice/fsm/reducer.js +14 -2
  35. package/server/dist/voice/fsm/reducers/lifecycle.js +10 -4
  36. package/server/dist/voice/fsm/reducers/streaming.js +39 -3
  37. package/server/dist/voice/fsm/reducers/walkback.js +13 -10
  38. package/server/dist/voice/fsm/state.js +1 -1
  39. package/server/dist/voice/index.js +97 -41
  40. package/server/dist/voice/walk-back.js +94 -26
  41. package/server/dist/voice-orchestrator-boot.js +22 -5
  42. package/server/dist/voice-orchestrator.js +38 -1
  43. package/server/dist/ws-handler.js +194 -64
  44. package/shared/dist/protocol.d.ts +97 -2
  45. package/client/build/_app/immutable/assets/0.KP1suSk9.css +0 -2
  46. package/client/build/_app/immutable/assets/2.BaqEkCa-.css +0 -1
  47. package/client/build/_app/immutable/chunks/0-bXzYW9.js +0 -1
  48. package/client/build/_app/immutable/chunks/BgJ-X-tf.js +0 -3
  49. package/client/build/_app/immutable/chunks/CnTTbAN2.js +0 -1
  50. package/client/build/_app/immutable/chunks/RbcwTVzu.js +0 -1
  51. package/client/build/_app/immutable/chunks/TV35yyBT.js +0 -1
  52. package/client/build/_app/immutable/chunks/gZLAQ0sf.js +0 -1
  53. package/client/build/_app/immutable/entry/start.ClOWBB7j.js +0 -1
  54. package/client/build/_app/immutable/nodes/0.DJUqUGM7.js +0 -10
  55. package/client/build/_app/immutable/nodes/2.BTjJ9cu5.js +0 -54
@@ -92,45 +92,97 @@ function rewriteByToolCallId(messages, heardText, targetId) {
92
92
  // do is honour step 1 (already done).
93
93
  return messages;
94
94
  }
95
+ // Walkback is about what the user *heard* (speech), not about undoing the
96
+ // agent's real work. So we surgically prune only speak() calls from the
97
+ // target onward — truncate/drop the target, drop later speaks — and keep
98
+ // every other tool call (and its result) and other content intact. Results
99
+ // of pruned speaks are dropped too (a tool_result with no tool_use is the
100
+ // riskier dangling direction). `stopReason` is preserved, not synthesised:
101
+ // the target is no longer necessarily the last message.
102
+ const droppedSpeakIds = new Set();
95
103
  const targetMsg = messages[targetMsgIdx];
96
104
  const targetContent = contentOf(targetMsg);
97
105
  const targetBlock = targetContent[targetBlockIdx];
98
106
  const originalText = getSpeakText(targetBlock);
107
+ // Blocks before the target were spoken/heard earlier in the turn — keep.
99
108
  const newBlocks = targetContent.slice(0, targetBlockIdx);
100
- const droppedToolUseIds = new Set();
101
109
  if (heardText.length === 0) {
102
- // Nothing was heard of this speak. Drop the block and its paired
103
- // tool_result (if any).
104
- droppedToolUseIds.add(targetId);
110
+ droppedSpeakIds.add(targetId); // nothing heard drop the speak + its result
105
111
  }
106
112
  else if (heardText.length >= originalText.length) {
107
- // Entire utterance was heard. Keep block intact.
108
- newBlocks.push(targetBlock);
113
+ newBlocks.push(targetBlock); // whole utterance heard keep intact (+ its result)
109
114
  }
110
115
  else {
111
- // Partial. Truncate text in-place and drop the paired tool_result
112
- // (per the contract — a truncated speak's result is no longer
113
- // grounded in what the user heard).
114
- newBlocks.push(replaceSpeakText(targetBlock, heardText));
115
- droppedToolUseIds.add(targetId);
116
+ newBlocks.push(replaceSpeakText(targetBlock, heardText)); // partial truncate
117
+ droppedSpeakIds.add(targetId); // a truncated speak's result is no longer grounded
116
118
  }
117
- // Anything in this message AFTER the target block was emitted after
118
- // the heard prefix and so was not heard.
119
+ // Blocks after the target in the same message: drop later speaks, keep the rest.
119
120
  for (let j = targetBlockIdx + 1; j < targetContent.length; j++) {
120
- const id = getToolCallId(targetContent[j]);
121
- if (id)
122
- droppedToolUseIds.add(id);
121
+ const block = targetContent[j];
122
+ if (isSpeakToolCall(block)) {
123
+ const id = getToolCallId(block);
124
+ if (id)
125
+ droppedSpeakIds.add(id);
126
+ }
127
+ else {
128
+ newBlocks.push(block);
129
+ }
130
+ }
131
+ const out = messages.slice(0, targetMsgIdx);
132
+ if (newBlocks.length > 0) {
133
+ out.push({ ...targetMsg, content: newBlocks });
134
+ }
135
+ // Subsequent messages: keep them, but drop speak tool calls (and the
136
+ // tool_results of any dropped speak). Forward iteration guarantees a speak's
137
+ // id is recorded before its (later) result message is examined.
138
+ for (let i = targetMsgIdx + 1; i < messages.length; i++) {
139
+ const kept = filterPrunedSpeaks(messages[i], droppedSpeakIds);
140
+ if (kept)
141
+ out.push(kept);
142
+ }
143
+ return out;
144
+ }
145
+ /**
146
+ * Drop pruned-speak content from a trailing message: removes speak tool calls
147
+ * (recording their ids) and the tool_results of any dropped speak, keeping all
148
+ * other content. Returns the (possibly rewritten) message, or null if it ends
149
+ * up empty.
150
+ */
151
+ function filterPrunedSpeaks(msg, droppedSpeakIds) {
152
+ // pi's runtime shape: a tool result is its own message (role 'toolResult')
153
+ // referencing one toolCallId at the message level.
154
+ if (isToolResultMessage(msg)) {
155
+ const ref = toolResultMessageRefId(msg);
156
+ return ref && droppedSpeakIds.has(ref) ? null : msg;
157
+ }
158
+ const content = contentOf(msg);
159
+ if (content.length === 0)
160
+ return msg;
161
+ let changed = false;
162
+ const kept = [];
163
+ for (const block of content) {
164
+ if (isSpeakToolCall(block)) {
165
+ const id = getToolCallId(block);
166
+ if (id)
167
+ droppedSpeakIds.add(id);
168
+ changed = true;
169
+ continue;
170
+ }
171
+ // Block-level tool_result (Anthropic shape) for a dropped speak.
172
+ if (isToolResultBlock(block)) {
173
+ const ref = toolResultBlockRefId(block);
174
+ if (ref && droppedSpeakIds.has(ref)) {
175
+ changed = true;
176
+ continue;
177
+ }
178
+ }
179
+ kept.push(block);
123
180
  }
124
- const rewrittenTarget = {
125
- ...targetMsg,
126
- content: newBlocks,
127
- stopReason: 'aborted',
128
- };
129
- // Anything AFTER the target message in the array was emitted by the
130
- // agent after the interrupted speak — drop it. This includes any
131
- // tool_result messages whose paired speak we just truncated, plus
132
- // any subsequent assistant messages.
133
- return [...messages.slice(0, targetMsgIdx), rewrittenTarget];
181
+ if (!changed)
182
+ return msg;
183
+ if (kept.length === 0)
184
+ return null;
185
+ return { ...msg, content: kept };
134
186
  }
135
187
  // ---------------------------------------------------------------------------
136
188
  // Shape-tolerant accessors. pi-agent-core's runtime AgentMessage uses
@@ -165,6 +217,22 @@ function getToolCallId(block) {
165
217
  const id = block.id;
166
218
  return typeof id === 'string' ? id : undefined;
167
219
  }
220
+ /** pi runtime tool-result message (role 'toolResult', message-level toolCallId). */
221
+ function isToolResultMessage(msg) {
222
+ return msg.role === 'toolResult';
223
+ }
224
+ function toolResultMessageRefId(msg) {
225
+ const id = msg.toolCallId;
226
+ return typeof id === 'string' ? id : undefined;
227
+ }
228
+ /** Block-level tool result (Anthropic `tool_result` / pi `toolResult`). */
229
+ function isToolResultBlock(block) {
230
+ return block.type === 'tool_result' || block.type === 'toolResult';
231
+ }
232
+ function toolResultBlockRefId(block) {
233
+ const ref = block.tool_use_id ?? block.toolCallId;
234
+ return typeof ref === 'string' ? ref : undefined;
235
+ }
168
236
  function getSpeakText(block) {
169
237
  // Try both shapes; whichever holds a string wins.
170
238
  const args = block.arguments;
@@ -35,15 +35,32 @@ export function buildVoiceOrchestrator(args) {
35
35
  config,
36
36
  sessionManager,
37
37
  busResolver,
38
- displaceOwner: async (sessionId, _newOwner) => {
38
+ displaceOwner: async (sessionId, newOwner) => {
39
39
  const slot = sessionManager.getSlot(sessionId);
40
- const existingClientId = slot?.connection?.connectedClientId;
41
- if (!existingClientId)
40
+ if (!slot)
42
41
  return;
43
- const existing = clientRegistry.get(existingClientId);
44
- existing?.sendDisplacedEvent(sessionId);
42
+ // Notify the displaced owner (if a different client), then transfer
43
+ // ownership to the new caller through the SAME claim path open_session
44
+ // uses — sets slot.connection, subscribes, rebinds extensions, replays
45
+ // pending UI. Without the claim, slot.connection stayed the displaced
46
+ // client: events streamed to a dead socket and idle-reap never fired.
47
+ const existingClientId = slot.connection?.connectedClientId;
48
+ if (existingClientId && existingClientId !== newOwner.connectedClientId) {
49
+ clientRegistry.get(existingClientId)?.sendDisplacedEvent(sessionId);
50
+ }
51
+ await clientRegistry.get(newOwner.connectedClientId)?.claimSession(sessionId, slot);
45
52
  },
46
53
  isOwnedByVoiceCall: (sessionId) => orchestrator.isCallActive(sessionId),
54
+ notifyCallEnded: (sessionId) => {
55
+ // The voice extension self-deactivated (speechmux WS failed/dropped).
56
+ // Tell the owning client so its VoiceCallStore tears down instead of
57
+ // waiting for WebRTC to time out. (review finding H4)
58
+ const slot = sessionManager.getSlot(sessionId);
59
+ const ownerClientId = slot?.connection?.connectedClientId;
60
+ if (!ownerClientId)
61
+ return;
62
+ clientRegistry.get(ownerClientId)?.sendCallEndedEvent(sessionId, 'error');
63
+ },
47
64
  });
48
65
  return {
49
66
  orchestrator,
@@ -17,13 +17,41 @@ export class CallBindError extends Error {
17
17
  export class VoiceOrchestrator {
18
18
  opts;
19
19
  activeCalls = new Set();
20
+ /** Per-session unsubscribe fns for the `pimote:voice:deactivate` bus
21
+ * listener installed on bind (so the server learns of extension-initiated
22
+ * deactivations — speechmux drop / open failure). */
23
+ deactivateUnsubs = new Map();
20
24
  constructor(opts) {
21
25
  this.opts = opts;
22
26
  }
23
27
  /** Drop all active-call bookkeeping. Idempotent. Called on server shutdown. */
24
28
  async stop() {
29
+ for (const unsub of this.deactivateUnsubs.values()) {
30
+ try {
31
+ unsub();
32
+ }
33
+ catch {
34
+ /* ignore */
35
+ }
36
+ }
37
+ this.deactivateUnsubs.clear();
25
38
  this.activeCalls.clear();
26
39
  }
40
+ /**
41
+ * Handle a `pimote:voice:deactivate` the voice extension emitted on its own
42
+ * (speechmux WS failed/dropped mid-call). Our own `endCall` removes the
43
+ * session from `activeCalls` *before* emitting deactivate, so the
44
+ * `activeCalls.has` guard distinguishes an extension-initiated deactivate
45
+ * from our own — preventing a feedback loop and a duplicate `call_ended`.
46
+ */
47
+ handleExtensionDeactivated(sessionId) {
48
+ if (!this.activeCalls.has(sessionId))
49
+ return;
50
+ this.activeCalls.delete(sessionId);
51
+ this.deactivateUnsubs.get(sessionId)?.();
52
+ this.deactivateUnsubs.delete(sessionId);
53
+ this.opts.notifyCallEnded?.(sessionId);
54
+ }
27
55
  /** Called by ws-handler for CallBindCommand. */
28
56
  async bindCall(args) {
29
57
  const slot = this.opts.busResolver.getSlot(args.sessionId);
@@ -49,13 +77,18 @@ export class VoiceOrchestrator {
49
77
  if (!bus) {
50
78
  throw new CallBindError('call_bind_failed_internal', 'Session has no EventBus');
51
79
  }
80
+ // Subscribe to extension-initiated deactivate BEFORE activating so a
81
+ // self-deactivate during/after activation is never missed. Replace any
82
+ // prior subscription for this session (force-rebind).
83
+ this.deactivateUnsubs.get(args.sessionId)?.();
84
+ this.deactivateUnsubs.set(args.sessionId, bus.on('pimote:voice:deactivate', () => this.handleExtensionDeactivated(args.sessionId)));
85
+ this.activeCalls.add(args.sessionId);
52
86
  const activate = {
53
87
  type: 'pimote:voice:activate',
54
88
  sessionId: args.sessionId,
55
89
  speechmuxWsUrl: llmWsUrl,
56
90
  };
57
91
  bus.emit(activate.type, activate);
58
- this.activeCalls.add(args.sessionId);
59
92
  return {
60
93
  sessionId: args.sessionId,
61
94
  webrtcSignalUrl: signalUrl,
@@ -66,6 +99,10 @@ export class VoiceOrchestrator {
66
99
  if (!this.activeCalls.has(args.sessionId))
67
100
  return;
68
101
  this.activeCalls.delete(args.sessionId);
102
+ // Drop our deactivate subscription before emitting so our own emit can't
103
+ // loop back into handleExtensionDeactivated.
104
+ this.deactivateUnsubs.get(args.sessionId)?.();
105
+ this.deactivateUnsubs.delete(args.sessionId);
69
106
  const bus = this.opts.busResolver.getEventBus(args.sessionId);
70
107
  if (bus) {
71
108
  const deactivate = { type: 'pimote:voice:deactivate', sessionId: args.sessionId };