@pimote/pimote 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/client/build/_app/immutable/assets/0.-er3OUWm.css +2 -0
- package/client/build/_app/immutable/assets/{2.bfMycywk.css → 2.BtlPyuHL.css} +1 -1
- package/client/build/_app/immutable/chunks/{DNqQZw5U.js → B1ItOytB.js} +2 -2
- package/client/build/_app/immutable/chunks/BiEvVL3P.js +1 -0
- package/client/build/_app/immutable/chunks/D8SptH3Y.js +1 -0
- package/client/build/_app/immutable/chunks/S8e8sMop.js +1 -0
- package/client/build/_app/immutable/chunks/{DHiuV2ft.js → b9CWRTHL.js} +1 -1
- package/client/build/_app/immutable/entry/{app.DZYoujEP.js → app.agj-hcVA.js} +2 -2
- package/client/build/_app/immutable/entry/start.NVZAE6Px.js +1 -0
- package/client/build/_app/immutable/nodes/0.DweM6Pbc.js +10 -0
- package/client/build/_app/immutable/nodes/{1.B5qlqMFD.js → 1.owr_UHNy.js} +1 -1
- package/client/build/_app/immutable/nodes/2.CQNU1AJj.js +55 -0
- package/client/build/_app/version.json +1 -1
- package/client/build/index.html +7 -7
- package/package.json +2 -2
- package/server/dist/config.js +5 -2
- package/server/dist/event-buffer.js +9 -0
- package/server/dist/extension-ui-bridge.js +26 -10
- package/server/dist/file-references.js +123 -0
- package/server/dist/git-branch.js +12 -9
- package/server/dist/login-orchestrator.js +105 -0
- package/server/dist/push-infrastructure.js +13 -2
- package/server/dist/push-notification.js +18 -11
- package/server/dist/server.js +25 -2
- package/server/dist/session-cost.js +26 -2
- package/server/dist/session-manager.js +109 -6
- package/server/dist/static-host/gc.js +13 -0
- package/server/dist/static-host/http-handler.js +27 -1
- package/server/dist/static-host/index.js +24 -12
- package/server/dist/static-host/store.js +10 -1
- package/server/dist/static-host/tools.js +5 -1
- package/server/dist/voice/fsm/reducer.js +14 -2
- package/server/dist/voice/fsm/reducers/lifecycle.js +10 -4
- package/server/dist/voice/fsm/reducers/streaming.js +39 -3
- package/server/dist/voice/fsm/reducers/walkback.js +13 -10
- package/server/dist/voice/fsm/state.js +1 -1
- package/server/dist/voice/index.js +97 -41
- package/server/dist/voice/walk-back.js +94 -26
- package/server/dist/voice-orchestrator-boot.js +22 -5
- package/server/dist/voice-orchestrator.js +38 -1
- package/server/dist/ws-handler.js +190 -63
- package/shared/dist/protocol.d.ts +91 -2
- package/client/build/_app/immutable/assets/0.Dh2gYJ1J.css +0 -2
- package/client/build/_app/immutable/chunks/Czpnrh9t.js +0 -1
- package/client/build/_app/immutable/chunks/D1mCuOEu.js +0 -1
- package/client/build/_app/immutable/chunks/DegHYiTr.js +0 -1
- package/client/build/_app/immutable/entry/start.BNnDRfmt.js +0 -1
- package/client/build/_app/immutable/nodes/0.B20DMuGn.js +0 -10
- package/client/build/_app/immutable/nodes/2.CZjPJM-S.js +0 -55
|
@@ -92,45 +92,97 @@ function rewriteByToolCallId(messages, heardText, targetId) {
|
|
|
92
92
|
// do is honour step 1 (already done).
|
|
93
93
|
return messages;
|
|
94
94
|
}
|
|
95
|
+
// Walkback is about what the user *heard* (speech), not about undoing the
|
|
96
|
+
// agent's real work. So we surgically prune only speak() calls from the
|
|
97
|
+
// target onward — truncate/drop the target, drop later speaks — and keep
|
|
98
|
+
// every other tool call (and its result) and other content intact. Results
|
|
99
|
+
// of pruned speaks are dropped too (a tool_result with no tool_use is the
|
|
100
|
+
// riskier dangling direction). `stopReason` is preserved, not synthesised:
|
|
101
|
+
// the target is no longer necessarily the last message.
|
|
102
|
+
const droppedSpeakIds = new Set();
|
|
95
103
|
const targetMsg = messages[targetMsgIdx];
|
|
96
104
|
const targetContent = contentOf(targetMsg);
|
|
97
105
|
const targetBlock = targetContent[targetBlockIdx];
|
|
98
106
|
const originalText = getSpeakText(targetBlock);
|
|
107
|
+
// Blocks before the target were spoken/heard earlier in the turn — keep.
|
|
99
108
|
const newBlocks = targetContent.slice(0, targetBlockIdx);
|
|
100
|
-
const droppedToolUseIds = new Set();
|
|
101
109
|
if (heardText.length === 0) {
|
|
102
|
-
//
|
|
103
|
-
// tool_result (if any).
|
|
104
|
-
droppedToolUseIds.add(targetId);
|
|
110
|
+
droppedSpeakIds.add(targetId); // nothing heard — drop the speak + its result
|
|
105
111
|
}
|
|
106
112
|
else if (heardText.length >= originalText.length) {
|
|
107
|
-
//
|
|
108
|
-
newBlocks.push(targetBlock);
|
|
113
|
+
newBlocks.push(targetBlock); // whole utterance heard — keep intact (+ its result)
|
|
109
114
|
}
|
|
110
115
|
else {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
// grounded in what the user heard).
|
|
114
|
-
newBlocks.push(replaceSpeakText(targetBlock, heardText));
|
|
115
|
-
droppedToolUseIds.add(targetId);
|
|
116
|
+
newBlocks.push(replaceSpeakText(targetBlock, heardText)); // partial — truncate
|
|
117
|
+
droppedSpeakIds.add(targetId); // a truncated speak's result is no longer grounded
|
|
116
118
|
}
|
|
117
|
-
//
|
|
118
|
-
// the heard prefix and so was not heard.
|
|
119
|
+
// Blocks after the target in the same message: drop later speaks, keep the rest.
|
|
119
120
|
for (let j = targetBlockIdx + 1; j < targetContent.length; j++) {
|
|
120
|
-
const
|
|
121
|
-
if (
|
|
122
|
-
|
|
121
|
+
const block = targetContent[j];
|
|
122
|
+
if (isSpeakToolCall(block)) {
|
|
123
|
+
const id = getToolCallId(block);
|
|
124
|
+
if (id)
|
|
125
|
+
droppedSpeakIds.add(id);
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
newBlocks.push(block);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
const out = messages.slice(0, targetMsgIdx);
|
|
132
|
+
if (newBlocks.length > 0) {
|
|
133
|
+
out.push({ ...targetMsg, content: newBlocks });
|
|
134
|
+
}
|
|
135
|
+
// Subsequent messages: keep them, but drop speak tool calls (and the
|
|
136
|
+
// tool_results of any dropped speak). Forward iteration guarantees a speak's
|
|
137
|
+
// id is recorded before its (later) result message is examined.
|
|
138
|
+
for (let i = targetMsgIdx + 1; i < messages.length; i++) {
|
|
139
|
+
const kept = filterPrunedSpeaks(messages[i], droppedSpeakIds);
|
|
140
|
+
if (kept)
|
|
141
|
+
out.push(kept);
|
|
142
|
+
}
|
|
143
|
+
return out;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Drop pruned-speak content from a trailing message: removes speak tool calls
|
|
147
|
+
* (recording their ids) and the tool_results of any dropped speak, keeping all
|
|
148
|
+
* other content. Returns the (possibly rewritten) message, or null if it ends
|
|
149
|
+
* up empty.
|
|
150
|
+
*/
|
|
151
|
+
function filterPrunedSpeaks(msg, droppedSpeakIds) {
|
|
152
|
+
// pi's runtime shape: a tool result is its own message (role 'toolResult')
|
|
153
|
+
// referencing one toolCallId at the message level.
|
|
154
|
+
if (isToolResultMessage(msg)) {
|
|
155
|
+
const ref = toolResultMessageRefId(msg);
|
|
156
|
+
return ref && droppedSpeakIds.has(ref) ? null : msg;
|
|
157
|
+
}
|
|
158
|
+
const content = contentOf(msg);
|
|
159
|
+
if (content.length === 0)
|
|
160
|
+
return msg;
|
|
161
|
+
let changed = false;
|
|
162
|
+
const kept = [];
|
|
163
|
+
for (const block of content) {
|
|
164
|
+
if (isSpeakToolCall(block)) {
|
|
165
|
+
const id = getToolCallId(block);
|
|
166
|
+
if (id)
|
|
167
|
+
droppedSpeakIds.add(id);
|
|
168
|
+
changed = true;
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
// Block-level tool_result (Anthropic shape) for a dropped speak.
|
|
172
|
+
if (isToolResultBlock(block)) {
|
|
173
|
+
const ref = toolResultBlockRefId(block);
|
|
174
|
+
if (ref && droppedSpeakIds.has(ref)) {
|
|
175
|
+
changed = true;
|
|
176
|
+
continue;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
kept.push(block);
|
|
123
180
|
}
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
};
|
|
129
|
-
// Anything AFTER the target message in the array was emitted by the
|
|
130
|
-
// agent after the interrupted speak — drop it. This includes any
|
|
131
|
-
// tool_result messages whose paired speak we just truncated, plus
|
|
132
|
-
// any subsequent assistant messages.
|
|
133
|
-
return [...messages.slice(0, targetMsgIdx), rewrittenTarget];
|
|
181
|
+
if (!changed)
|
|
182
|
+
return msg;
|
|
183
|
+
if (kept.length === 0)
|
|
184
|
+
return null;
|
|
185
|
+
return { ...msg, content: kept };
|
|
134
186
|
}
|
|
135
187
|
// ---------------------------------------------------------------------------
|
|
136
188
|
// Shape-tolerant accessors. pi-agent-core's runtime AgentMessage uses
|
|
@@ -165,6 +217,22 @@ function getToolCallId(block) {
|
|
|
165
217
|
const id = block.id;
|
|
166
218
|
return typeof id === 'string' ? id : undefined;
|
|
167
219
|
}
|
|
220
|
+
/** pi runtime tool-result message (role 'toolResult', message-level toolCallId). */
|
|
221
|
+
function isToolResultMessage(msg) {
|
|
222
|
+
return msg.role === 'toolResult';
|
|
223
|
+
}
|
|
224
|
+
function toolResultMessageRefId(msg) {
|
|
225
|
+
const id = msg.toolCallId;
|
|
226
|
+
return typeof id === 'string' ? id : undefined;
|
|
227
|
+
}
|
|
228
|
+
/** Block-level tool result (Anthropic `tool_result` / pi `toolResult`). */
|
|
229
|
+
function isToolResultBlock(block) {
|
|
230
|
+
return block.type === 'tool_result' || block.type === 'toolResult';
|
|
231
|
+
}
|
|
232
|
+
function toolResultBlockRefId(block) {
|
|
233
|
+
const ref = block.tool_use_id ?? block.toolCallId;
|
|
234
|
+
return typeof ref === 'string' ? ref : undefined;
|
|
235
|
+
}
|
|
168
236
|
function getSpeakText(block) {
|
|
169
237
|
// Try both shapes; whichever holds a string wins.
|
|
170
238
|
const args = block.arguments;
|
|
@@ -35,15 +35,32 @@ export function buildVoiceOrchestrator(args) {
|
|
|
35
35
|
config,
|
|
36
36
|
sessionManager,
|
|
37
37
|
busResolver,
|
|
38
|
-
displaceOwner: async (sessionId,
|
|
38
|
+
displaceOwner: async (sessionId, newOwner) => {
|
|
39
39
|
const slot = sessionManager.getSlot(sessionId);
|
|
40
|
-
|
|
41
|
-
if (!existingClientId)
|
|
40
|
+
if (!slot)
|
|
42
41
|
return;
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
// Notify the displaced owner (if a different client), then transfer
|
|
43
|
+
// ownership to the new caller through the SAME claim path open_session
|
|
44
|
+
// uses — sets slot.connection, subscribes, rebinds extensions, replays
|
|
45
|
+
// pending UI. Without the claim, slot.connection stayed the displaced
|
|
46
|
+
// client: events streamed to a dead socket and idle-reap never fired.
|
|
47
|
+
const existingClientId = slot.connection?.connectedClientId;
|
|
48
|
+
if (existingClientId && existingClientId !== newOwner.connectedClientId) {
|
|
49
|
+
clientRegistry.get(existingClientId)?.sendDisplacedEvent(sessionId);
|
|
50
|
+
}
|
|
51
|
+
await clientRegistry.get(newOwner.connectedClientId)?.claimSession(sessionId, slot);
|
|
45
52
|
},
|
|
46
53
|
isOwnedByVoiceCall: (sessionId) => orchestrator.isCallActive(sessionId),
|
|
54
|
+
notifyCallEnded: (sessionId) => {
|
|
55
|
+
// The voice extension self-deactivated (speechmux WS failed/dropped).
|
|
56
|
+
// Tell the owning client so its VoiceCallStore tears down instead of
|
|
57
|
+
// waiting for WebRTC to time out. (review finding H4)
|
|
58
|
+
const slot = sessionManager.getSlot(sessionId);
|
|
59
|
+
const ownerClientId = slot?.connection?.connectedClientId;
|
|
60
|
+
if (!ownerClientId)
|
|
61
|
+
return;
|
|
62
|
+
clientRegistry.get(ownerClientId)?.sendCallEndedEvent(sessionId, 'error');
|
|
63
|
+
},
|
|
47
64
|
});
|
|
48
65
|
return {
|
|
49
66
|
orchestrator,
|
|
@@ -17,13 +17,41 @@ export class CallBindError extends Error {
|
|
|
17
17
|
export class VoiceOrchestrator {
|
|
18
18
|
opts;
|
|
19
19
|
activeCalls = new Set();
|
|
20
|
+
/** Per-session unsubscribe fns for the `pimote:voice:deactivate` bus
|
|
21
|
+
* listener installed on bind (so the server learns of extension-initiated
|
|
22
|
+
* deactivations — speechmux drop / open failure). */
|
|
23
|
+
deactivateUnsubs = new Map();
|
|
20
24
|
constructor(opts) {
|
|
21
25
|
this.opts = opts;
|
|
22
26
|
}
|
|
23
27
|
/** Drop all active-call bookkeeping. Idempotent. Called on server shutdown. */
|
|
24
28
|
async stop() {
|
|
29
|
+
for (const unsub of this.deactivateUnsubs.values()) {
|
|
30
|
+
try {
|
|
31
|
+
unsub();
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
/* ignore */
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
this.deactivateUnsubs.clear();
|
|
25
38
|
this.activeCalls.clear();
|
|
26
39
|
}
|
|
40
|
+
/**
|
|
41
|
+
* Handle a `pimote:voice:deactivate` the voice extension emitted on its own
|
|
42
|
+
* (speechmux WS failed/dropped mid-call). Our own `endCall` removes the
|
|
43
|
+
* session from `activeCalls` *before* emitting deactivate, so the
|
|
44
|
+
* `activeCalls.has` guard distinguishes an extension-initiated deactivate
|
|
45
|
+
* from our own — preventing a feedback loop and a duplicate `call_ended`.
|
|
46
|
+
*/
|
|
47
|
+
handleExtensionDeactivated(sessionId) {
|
|
48
|
+
if (!this.activeCalls.has(sessionId))
|
|
49
|
+
return;
|
|
50
|
+
this.activeCalls.delete(sessionId);
|
|
51
|
+
this.deactivateUnsubs.get(sessionId)?.();
|
|
52
|
+
this.deactivateUnsubs.delete(sessionId);
|
|
53
|
+
this.opts.notifyCallEnded?.(sessionId);
|
|
54
|
+
}
|
|
27
55
|
/** Called by ws-handler for CallBindCommand. */
|
|
28
56
|
async bindCall(args) {
|
|
29
57
|
const slot = this.opts.busResolver.getSlot(args.sessionId);
|
|
@@ -49,13 +77,18 @@ export class VoiceOrchestrator {
|
|
|
49
77
|
if (!bus) {
|
|
50
78
|
throw new CallBindError('call_bind_failed_internal', 'Session has no EventBus');
|
|
51
79
|
}
|
|
80
|
+
// Subscribe to extension-initiated deactivate BEFORE activating so a
|
|
81
|
+
// self-deactivate during/after activation is never missed. Replace any
|
|
82
|
+
// prior subscription for this session (force-rebind).
|
|
83
|
+
this.deactivateUnsubs.get(args.sessionId)?.();
|
|
84
|
+
this.deactivateUnsubs.set(args.sessionId, bus.on('pimote:voice:deactivate', () => this.handleExtensionDeactivated(args.sessionId)));
|
|
85
|
+
this.activeCalls.add(args.sessionId);
|
|
52
86
|
const activate = {
|
|
53
87
|
type: 'pimote:voice:activate',
|
|
54
88
|
sessionId: args.sessionId,
|
|
55
89
|
speechmuxWsUrl: llmWsUrl,
|
|
56
90
|
};
|
|
57
91
|
bus.emit(activate.type, activate);
|
|
58
|
-
this.activeCalls.add(args.sessionId);
|
|
59
92
|
return {
|
|
60
93
|
sessionId: args.sessionId,
|
|
61
94
|
webrtcSignalUrl: signalUrl,
|
|
@@ -66,6 +99,10 @@ export class VoiceOrchestrator {
|
|
|
66
99
|
if (!this.activeCalls.has(args.sessionId))
|
|
67
100
|
return;
|
|
68
101
|
this.activeCalls.delete(args.sessionId);
|
|
102
|
+
// Drop our deactivate subscription before emitting so our own emit can't
|
|
103
|
+
// loop back into handleExtensionDeactivated.
|
|
104
|
+
this.deactivateUnsubs.get(args.sessionId)?.();
|
|
105
|
+
this.deactivateUnsubs.delete(args.sessionId);
|
|
69
106
|
const bus = this.opts.busResolver.getEventBus(args.sessionId);
|
|
70
107
|
if (bus) {
|
|
71
108
|
const deactivate = { type: 'pimote:voice:deactivate', sessionId: args.sessionId };
|