@pimote/pimote 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/client/build/_app/immutable/assets/0.-er3OUWm.css +2 -0
- package/client/build/_app/immutable/assets/{2.bfMycywk.css → 2.BtlPyuHL.css} +1 -1
- package/client/build/_app/immutable/chunks/{DNqQZw5U.js → B1ItOytB.js} +2 -2
- package/client/build/_app/immutable/chunks/BiEvVL3P.js +1 -0
- package/client/build/_app/immutable/chunks/D8SptH3Y.js +1 -0
- package/client/build/_app/immutable/chunks/S8e8sMop.js +1 -0
- package/client/build/_app/immutable/chunks/{DHiuV2ft.js → b9CWRTHL.js} +1 -1
- package/client/build/_app/immutable/entry/{app.DZYoujEP.js → app.agj-hcVA.js} +2 -2
- package/client/build/_app/immutable/entry/start.NVZAE6Px.js +1 -0
- package/client/build/_app/immutable/nodes/0.DweM6Pbc.js +10 -0
- package/client/build/_app/immutable/nodes/{1.B5qlqMFD.js → 1.owr_UHNy.js} +1 -1
- package/client/build/_app/immutable/nodes/2.CQNU1AJj.js +55 -0
- package/client/build/_app/version.json +1 -1
- package/client/build/index.html +7 -7
- package/package.json +2 -2
- package/server/dist/config.js +5 -2
- package/server/dist/event-buffer.js +9 -0
- package/server/dist/extension-ui-bridge.js +26 -10
- package/server/dist/file-references.js +123 -0
- package/server/dist/git-branch.js +12 -9
- package/server/dist/login-orchestrator.js +105 -0
- package/server/dist/push-infrastructure.js +13 -2
- package/server/dist/push-notification.js +18 -11
- package/server/dist/server.js +25 -2
- package/server/dist/session-cost.js +26 -2
- package/server/dist/session-manager.js +109 -6
- package/server/dist/static-host/gc.js +13 -0
- package/server/dist/static-host/http-handler.js +27 -1
- package/server/dist/static-host/index.js +24 -12
- package/server/dist/static-host/store.js +10 -1
- package/server/dist/static-host/tools.js +5 -1
- package/server/dist/voice/fsm/reducer.js +14 -2
- package/server/dist/voice/fsm/reducers/lifecycle.js +10 -4
- package/server/dist/voice/fsm/reducers/streaming.js +39 -3
- package/server/dist/voice/fsm/reducers/walkback.js +13 -10
- package/server/dist/voice/fsm/state.js +1 -1
- package/server/dist/voice/index.js +97 -41
- package/server/dist/voice/walk-back.js +94 -26
- package/server/dist/voice-orchestrator-boot.js +22 -5
- package/server/dist/voice-orchestrator.js +38 -1
- package/server/dist/ws-handler.js +190 -63
- package/shared/dist/protocol.d.ts +91 -2
- package/client/build/_app/immutable/assets/0.Dh2gYJ1J.css +0 -2
- package/client/build/_app/immutable/chunks/Czpnrh9t.js +0 -1
- package/client/build/_app/immutable/chunks/D1mCuOEu.js +0 -1
- package/client/build/_app/immutable/chunks/DegHYiTr.js +0 -1
- package/client/build/_app/immutable/entry/start.BNnDRfmt.js +0 -1
- package/client/build/_app/immutable/nodes/0.B20DMuGn.js +0 -10
- package/client/build/_app/immutable/nodes/2.CZjPJM-S.js +0 -55
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createReadStream } from 'node:fs';
|
|
2
|
-
import { stat } from 'node:fs/promises';
|
|
2
|
+
import { stat, realpath } from 'node:fs/promises';
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
const MIME_TYPES = {
|
|
5
5
|
'.html': 'text/html; charset=utf-8',
|
|
@@ -126,11 +126,37 @@ export async function serveStaticHostRoute(req, res, registry) {
|
|
|
126
126
|
send404(res);
|
|
127
127
|
return true;
|
|
128
128
|
}
|
|
129
|
+
// Symlink containment: stat()/createReadStream() follow symlinks, so a symlink
|
|
130
|
+
// INSIDE the bundle could point outside it (the `..` and path.resolve checks
|
|
131
|
+
// above are lexical and cannot see this). Resolve symlinks on both the target
|
|
132
|
+
// and the registered folder and require the real target to stay within.
|
|
133
|
+
try {
|
|
134
|
+
const [realTarget, realFolder] = await Promise.all([realpath(resolved), realpath(folderPath)]);
|
|
135
|
+
if (realTarget !== realFolder && !realTarget.startsWith(realFolder + path.sep)) {
|
|
136
|
+
send404(res);
|
|
137
|
+
return true;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
send404(res);
|
|
142
|
+
return true;
|
|
143
|
+
}
|
|
129
144
|
const ext = path.extname(resolved).toLowerCase();
|
|
130
145
|
const mime = MIME_TYPES[ext] || 'application/octet-stream';
|
|
131
146
|
res.writeHead(200, {
|
|
132
147
|
'Content-Type': mime,
|
|
133
148
|
'Cache-Control': 'no-cache, no-store, must-revalidate',
|
|
149
|
+
// Agent-authored content served same-origin with the control PWA. Two
|
|
150
|
+
// hardening headers (review finding M4):
|
|
151
|
+
// - nosniff: never let the browser MIME-sniff a bundle file into a script.
|
|
152
|
+
// - CSP connect-src http:/https: blocks ws:/wss:, so a (prompt-injected)
|
|
153
|
+
// bundle cannot open pimote's authenticated WebSocket and drive sessions.
|
|
154
|
+
// We stay same-origin (not sandboxed) so bundles keep localStorage and
|
|
155
|
+
// same-origin asset fetches; same-origin HTTP fetch + storage reads remain
|
|
156
|
+
// possible but are contained by the single-user model + edge auth. Full
|
|
157
|
+
// isolation would require serving bundles from a separate origin.
|
|
158
|
+
'X-Content-Type-Options': 'nosniff',
|
|
159
|
+
'Content-Security-Policy': 'connect-src http: https:',
|
|
134
160
|
});
|
|
135
161
|
if (req.method === 'HEAD') {
|
|
136
162
|
res.end();
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Type } from 'typebox';
|
|
2
|
-
import { executeRegisterTool, executeRemoveTool } from './tools.js';
|
|
2
|
+
import { executeRegisterTool, executeRemoveTool, resolveSlugCollision } from './tools.js';
|
|
3
3
|
import { STATIC_HOST_TOOL_DESCRIPTION } from './prompt.js';
|
|
4
4
|
export { InMemoryStaticHostRegistry } from './registry.js';
|
|
5
5
|
export { FileStaticHostStore } from './store.js';
|
|
@@ -94,23 +94,35 @@ export function createStaticHostExtension(opts) {
|
|
|
94
94
|
const file = await store.read(sessionId);
|
|
95
95
|
if (!file)
|
|
96
96
|
return;
|
|
97
|
+
// Replay persisted entries, re-suffixing any slug already taken (another
|
|
98
|
+
// session persisted the same slug, or this session reloaded earlier this
|
|
99
|
+
// boot). Re-suffixing keeps the bundle reachable; the old behaviour left a
|
|
100
|
+
// phantom entry in the file that the remove tool could never match (its
|
|
101
|
+
// registry lookup failed) and that got re-appended on every future write.
|
|
102
|
+
const replayed = [];
|
|
103
|
+
let mutated = false;
|
|
97
104
|
for (const entry of file.entries) {
|
|
105
|
+
let slug = entry.slug;
|
|
106
|
+
if (registry.has(slug)) {
|
|
107
|
+
slug = resolveSlugCollision(slug, registry);
|
|
108
|
+
mutated = true;
|
|
109
|
+
}
|
|
98
110
|
try {
|
|
99
|
-
registry.register({
|
|
100
|
-
|
|
101
|
-
folderPath: entry.folderPath,
|
|
102
|
-
sessionId,
|
|
103
|
-
cardMetadata: entry.cardMetadata,
|
|
104
|
-
});
|
|
111
|
+
registry.register({ slug, folderPath: entry.folderPath, sessionId, cardMetadata: entry.cardMetadata });
|
|
112
|
+
replayed.push(slug === entry.slug ? entry : { ...entry, slug });
|
|
105
113
|
}
|
|
106
114
|
catch (err) {
|
|
107
|
-
//
|
|
108
|
-
//
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
console.warn(`[static-host] session_start: skipping persisted entry ${entry.slug} for session ${sessionId}`, err);
|
|
115
|
+
// Couldn't register even after re-suffixing — drop it from the file so
|
|
116
|
+
// it doesn't linger as a phantom on the next write.
|
|
117
|
+
mutated = true;
|
|
118
|
+
console.warn(`[static-host] session_start: dropping unregisterable entry ${entry.slug} for session ${sessionId}`, err);
|
|
112
119
|
}
|
|
113
120
|
}
|
|
121
|
+
// Persist the reconciled list only if something changed, so the common
|
|
122
|
+
// conflict-free replay performs no write.
|
|
123
|
+
if (mutated) {
|
|
124
|
+
await store.write(sessionId, { version: 1, entries: replayed });
|
|
125
|
+
}
|
|
114
126
|
emitPanelCards(pi, sessionId);
|
|
115
127
|
});
|
|
116
128
|
pi.on('session_shutdown', async (_ev, ctx) => {
|
|
@@ -22,7 +22,16 @@ export class FileStaticHostStore {
|
|
|
22
22
|
return undefined;
|
|
23
23
|
throw err;
|
|
24
24
|
}
|
|
25
|
-
|
|
25
|
+
try {
|
|
26
|
+
return JSON.parse(raw);
|
|
27
|
+
}
|
|
28
|
+
catch (err) {
|
|
29
|
+
// A truncated/corrupt file must not reject out of the async session_start
|
|
30
|
+
// handler (which could break session load). Treat it as "no state" — the
|
|
31
|
+
// next write overwrites it atomically.
|
|
32
|
+
console.warn(`[static-host] ignoring corrupt store file ${path}:`, err.message ?? err);
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
26
35
|
}
|
|
27
36
|
async write(sessionId, file) {
|
|
28
37
|
await mkdir(this.storeDir, { recursive: true });
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { stat } from 'node:fs/promises';
|
|
2
|
-
import { isAbsolute, join } from 'node:path';
|
|
2
|
+
import { isAbsolute, join, resolve } from 'node:path';
|
|
3
3
|
/**
|
|
4
4
|
* Validates and normalises a slug.
|
|
5
5
|
*
|
|
@@ -56,6 +56,10 @@ export async function executeRegisterTool(input, deps) {
|
|
|
56
56
|
if (typeof input.folder !== 'string' || !isAbsolute(input.folder)) {
|
|
57
57
|
throw new Error(`folder must be an absolute path: ${JSON.stringify(input.folder)}`);
|
|
58
58
|
}
|
|
59
|
+
// Normalize before stat/persist/register: a trailing slash or internal `..`
|
|
60
|
+
// segment would otherwise break the http-handler's containment check (which
|
|
61
|
+
// compares a resolved request path against `folderPath + path.sep`).
|
|
62
|
+
input.folder = resolve(input.folder);
|
|
59
63
|
let folderStat;
|
|
60
64
|
try {
|
|
61
65
|
folderStat = await stat(input.folder);
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
// place to handle them because it lives at the boundary between
|
|
20
20
|
// "do we even have a connection" and "what should the agent do".)
|
|
21
21
|
import { reduceLifecycle, applyLifecycleResult, bufferOrPassFrame } from './reducers/lifecycle.js';
|
|
22
|
-
import { reduceStreaming } from './reducers/streaming.js';
|
|
22
|
+
import { reduceStreaming, currentStreamingSpeakId } from './reducers/streaming.js';
|
|
23
23
|
import { reduceWalkback, applyWalkbackResult } from './reducers/walkback.js';
|
|
24
24
|
export function reduce(prev, event, reducers) {
|
|
25
25
|
let state = prev;
|
|
@@ -53,7 +53,19 @@ export function reduce(prev, event, reducers) {
|
|
|
53
53
|
}
|
|
54
54
|
}
|
|
55
55
|
// ---- Walkback ----------------------------------------------------------
|
|
56
|
-
|
|
56
|
+
// Pass:
|
|
57
|
+
// - lifecycle kind, so abort/rollback frames arriving when no call is active
|
|
58
|
+
// are dropped (e.g. in flight during teardown) — a stray abort would
|
|
59
|
+
// otherwise abort a text-mode turn. (H3)
|
|
60
|
+
// - the in-flight speak id, so an interrupt targeting a still-streaming
|
|
61
|
+
// speak resolves correctly when the frame omits a speak_id. (gap 2)
|
|
62
|
+
// `state.message` is post-streaming here, so its blocks still hold the
|
|
63
|
+
// in-flight speak (ws:incoming doesn't clear them).
|
|
64
|
+
const wb = reduceWalkback(state.walkback, event, {
|
|
65
|
+
lastEmittedSpeakId: state.lastEmittedSpeakId,
|
|
66
|
+
currentStreamingSpeakId: currentStreamingSpeakId(state.message),
|
|
67
|
+
lifecycleKind: state.lifecycle.kind,
|
|
68
|
+
});
|
|
57
69
|
state = applyWalkbackResult(state, wb);
|
|
58
70
|
actions.push(...wb.actions);
|
|
59
71
|
// Clear lastEmittedSpeakId on full deactivation so a subsequent call
|
|
@@ -38,7 +38,11 @@ export function reduceLifecycle(prev, event, ctx) {
|
|
|
38
38
|
modelId: ctx.config.defaultInterpreterModel.modelId,
|
|
39
39
|
});
|
|
40
40
|
}
|
|
41
|
-
|
|
41
|
+
// Steer the start sentinel rather than aborting: if the agent is mid-task
|
|
42
|
+
// when the call binds, preserve that work. The executor injects into the
|
|
43
|
+
// running turn when busy, and sends normally (triggering the greeting)
|
|
44
|
+
// when idle. (M7)
|
|
45
|
+
actions.push({ kind: 'send_user_message', text: VOICE_CALL_STARTED_SENTINEL, deliverAs: 'steer' });
|
|
42
46
|
actions.push({ kind: 'open_ws', url: event.msg.speechmuxWsUrl });
|
|
43
47
|
return {
|
|
44
48
|
next: {
|
|
@@ -98,21 +102,23 @@ export function reduceLifecycle(prev, event, ctx) {
|
|
|
98
102
|
return { next: prev, interpreterAppliedNow: false, actions: [] };
|
|
99
103
|
}
|
|
100
104
|
// Drop any buffered frames; the shell will rebuild from scratch
|
|
101
|
-
// on the next activate.
|
|
105
|
+
// on the next activate. Carry the sessionId on the action from the
|
|
106
|
+
// pre-transition state — after this we're dormant. (M1)
|
|
102
107
|
return {
|
|
103
108
|
next: { kind: 'dormant' },
|
|
104
109
|
interpreterAppliedNow: false,
|
|
105
|
-
actions: [{ kind: 'emit_deactivate_request' }],
|
|
110
|
+
actions: [{ kind: 'emit_deactivate_request', sessionId: prev.sessionId }],
|
|
106
111
|
};
|
|
107
112
|
}
|
|
108
113
|
case 'ws:disconnected': {
|
|
109
114
|
if (prev.kind === 'dormant') {
|
|
110
115
|
return { next: prev, interpreterAppliedNow: false, actions: [] };
|
|
111
116
|
}
|
|
117
|
+
// prev is activating|active here — both carry sessionId. (M1)
|
|
112
118
|
return {
|
|
113
119
|
next: { kind: 'dormant' },
|
|
114
120
|
interpreterAppliedNow: false,
|
|
115
|
-
actions: [{ kind: 'emit_deactivate_request' }],
|
|
121
|
+
actions: [{ kind: 'emit_deactivate_request', sessionId: prev.sessionId }],
|
|
116
122
|
};
|
|
117
123
|
}
|
|
118
124
|
default:
|
|
@@ -44,15 +44,41 @@ const noFrames = (next) => ({
|
|
|
44
44
|
export function reduceStreaming(prev, event) {
|
|
45
45
|
switch (event.type) {
|
|
46
46
|
case 'sdk:message_start':
|
|
47
|
-
// Assistant message starts → wipe per-block state
|
|
47
|
+
// Assistant message starts → wipe per-block state and clear the
|
|
48
|
+
// interrupt latch (a new turn can emit again). (Filtering on
|
|
48
49
|
// role==='assistant' happens at the dispatcher.)
|
|
49
|
-
return noFrames({ blocks: new Map() });
|
|
50
|
+
return noFrames({ blocks: new Map(), interrupted: false });
|
|
51
|
+
case 'ws:incoming':
|
|
52
|
+
// A barge-in latches `interrupted` so we stop feeding speechmux tokens
|
|
53
|
+
// for an utterance it already aborted. Reset on the next message_start.
|
|
54
|
+
if (event.frame.type === 'abort' || event.frame.type === 'rollback') {
|
|
55
|
+
return noFrames({ ...prev, interrupted: true });
|
|
56
|
+
}
|
|
57
|
+
return noFrames(prev);
|
|
50
58
|
case 'sdk:toolcall_start':
|
|
59
|
+
if (prev.interrupted)
|
|
60
|
+
return noFrames(prev);
|
|
51
61
|
return noFrames(setBlock(prev, event.contentIndex, blockFromPartial(event.contentIndex, event.partial)));
|
|
52
62
|
case 'sdk:toolcall_delta':
|
|
63
|
+
if (prev.interrupted)
|
|
64
|
+
return noFrames(prev);
|
|
53
65
|
return reduceDelta(prev, event.contentIndex, event.delta, event.partial);
|
|
54
66
|
case 'sdk:toolcall_end':
|
|
67
|
+
if (prev.interrupted)
|
|
68
|
+
return noFrames(prev);
|
|
55
69
|
return reduceEnd(prev, event.contentIndex, event.toolCall);
|
|
70
|
+
case 'sdk:turn_end':
|
|
71
|
+
// Release the floor for the turn's last spoken utterance. Routed as a
|
|
72
|
+
// frame so the lifecycle layer buffers it during `activating` and passes
|
|
73
|
+
// it during `active` — the same discipline as token/end frames. (M2)
|
|
74
|
+
return {
|
|
75
|
+
next: prev,
|
|
76
|
+
frames: [event.lastSpeakToolCallId ? { type: 'floor_released', speak_id: event.lastSpeakToolCallId } : { type: 'floor_released' }],
|
|
77
|
+
endedSpeakIds: [],
|
|
78
|
+
};
|
|
79
|
+
case 'sdk:agent_end':
|
|
80
|
+
// Surface a harness-side error to speechmux. (M2)
|
|
81
|
+
return event.error ? { next: prev, frames: [{ type: 'error', message: event.error }], endedSpeakIds: [] } : noFrames(prev);
|
|
56
82
|
default:
|
|
57
83
|
return noFrames(prev);
|
|
58
84
|
}
|
|
@@ -165,7 +191,17 @@ function setBlock(state, idx, block) {
|
|
|
165
191
|
return state;
|
|
166
192
|
const blocks = new Map(state.blocks);
|
|
167
193
|
blocks.set(idx, block);
|
|
168
|
-
return { blocks };
|
|
194
|
+
return { ...state, blocks };
|
|
195
|
+
}
|
|
196
|
+
/** Toolcall id of the speak() block currently mid-stream (if any). The most
|
|
197
|
+
* likely walkback target when speechmux's frame omits a speak_id: an in-flight
|
|
198
|
+
* speak hasn't emitted its `end`, so it isn't in `lastEmittedSpeakId` yet. */
|
|
199
|
+
export function currentStreamingSpeakId(message) {
|
|
200
|
+
for (const block of message.blocks.values()) {
|
|
201
|
+
if (block.kind === 'speak_streaming' && block.toolCallId)
|
|
202
|
+
return block.toolCallId;
|
|
203
|
+
}
|
|
204
|
+
return null;
|
|
169
205
|
}
|
|
170
206
|
function partialBlock(partial, idx) {
|
|
171
207
|
const c = partial?.content;
|
|
@@ -18,27 +18,30 @@
|
|
|
18
18
|
// such ambiguity.
|
|
19
19
|
import { VOICE_INTERRUPT_CUSTOM_TYPE } from '../../../../../shared/dist/index.js';
|
|
20
20
|
import { walkBack } from '../../walk-back.js';
|
|
21
|
-
/** Resolve which speak() id to walk back to. Prefers what speechmux
|
|
22
|
-
*
|
|
23
|
-
* is available (
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
if (frameSpeakId)
|
|
27
|
-
return frameSpeakId;
|
|
28
|
-
return lastEmittedSpeakId;
|
|
21
|
+
/** Resolve which speak() id to walk back to. Prefers what speechmux echoes;
|
|
22
|
+
* then the in-flight speak; then the last fully-emitted one. Returns null if
|
|
23
|
+
* none is available (degrade gracefully — abort the agent, skip the rewrite). */
|
|
24
|
+
function resolveTarget(frameSpeakId, ctx) {
|
|
25
|
+
return frameSpeakId ?? ctx.currentStreamingSpeakId ?? ctx.lastEmittedSpeakId;
|
|
29
26
|
}
|
|
30
|
-
export function reduceWalkback(prev,
|
|
27
|
+
export function reduceWalkback(prev, event, ctx) {
|
|
31
28
|
switch (event.type) {
|
|
32
29
|
case 'ws:incoming': {
|
|
33
30
|
const f = event.frame;
|
|
34
31
|
if (f.type === 'user')
|
|
35
32
|
return { next: prev, actions: [] };
|
|
33
|
+
// Only honour barge-in (abort/rollback) while a call is live. A frame
|
|
34
|
+
// arriving when dormant (e.g. in flight during teardown, or from a
|
|
35
|
+
// just-discarded client) must not abort an unrelated text-mode turn. (H3)
|
|
36
|
+
if (ctx.lifecycleKind !== 'active' && ctx.lifecycleKind !== 'activating') {
|
|
37
|
+
return { next: prev, actions: [] };
|
|
38
|
+
}
|
|
36
39
|
const heardText = f.type === 'rollback' ? f.heard_text : '';
|
|
37
40
|
const data = {
|
|
38
41
|
heard_text: heardText,
|
|
39
42
|
kind: f.type === 'rollback' ? 'rollback' : 'abort',
|
|
40
43
|
};
|
|
41
|
-
const target = resolveTarget(f.speak_id,
|
|
44
|
+
const target = resolveTarget(f.speak_id, ctx);
|
|
42
45
|
const actions = [{ kind: 'abort_agent' }, { kind: 'append_custom_entry', customType: VOICE_INTERRUPT_CUSTOM_TYPE, data }];
|
|
43
46
|
if (target === null) {
|
|
44
47
|
// No target available → can't rewrite. Just abort + record the
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
export function initialState() {
|
|
14
14
|
return {
|
|
15
15
|
lifecycle: { kind: 'dormant' },
|
|
16
|
-
message: { blocks: new Map() },
|
|
16
|
+
message: { blocks: new Map(), interrupted: false },
|
|
17
17
|
walkback: { kind: 'idle' },
|
|
18
18
|
interpreterApplied: false,
|
|
19
19
|
lastEmittedSpeakId: null,
|
|
@@ -83,10 +83,20 @@ export function createVoiceExtension(opts) {
|
|
|
83
83
|
let state = initialState();
|
|
84
84
|
let lastCtx = null;
|
|
85
85
|
let speechmuxClient = null;
|
|
86
|
-
|
|
87
|
-
|
|
86
|
+
// Monotonic generation tag for the speechmux client. Bumped on every
|
|
87
|
+
// open_ws and close_ws so a discarded client's late callbacks (frame /
|
|
88
|
+
// disconnect) and an in-flight connect that the call already abandoned
|
|
89
|
+
// can detect they are stale and no-op. See voice lifecycle review (H1/H2).
|
|
90
|
+
let clientGeneration = 0;
|
|
91
|
+
// Detaches the current client's onFrame/onDisconnect listeners. Captured
|
|
92
|
+
// on open so we can unsubscribe when discarding the client.
|
|
93
|
+
let detachClientListeners = null;
|
|
88
94
|
// ---- Reducer driver --------------------------------------------------
|
|
89
|
-
|
|
95
|
+
// Synchronous core: reduce + write back state + trace. Returns the actions
|
|
96
|
+
// to execute. Split out from `dispatch` so the `context` hook can run a
|
|
97
|
+
// reduction and read the resulting rewrite synchronously — pi's context
|
|
98
|
+
// hook must return the rewritten messages inline, and `dispatch` is async. (M5)
|
|
99
|
+
const reduceAndApply = (event) => {
|
|
90
100
|
const evtTrace = traceEvent(event);
|
|
91
101
|
const lifecycleBefore = state.lifecycle.kind;
|
|
92
102
|
const { next, actions } = reduce(state, event, {
|
|
@@ -101,6 +111,10 @@ export function createVoiceExtension(opts) {
|
|
|
101
111
|
blocks: blockSummary(state.message.blocks),
|
|
102
112
|
}));
|
|
103
113
|
}
|
|
114
|
+
return actions;
|
|
115
|
+
};
|
|
116
|
+
const dispatch = async (event) => {
|
|
117
|
+
const actions = reduceAndApply(event);
|
|
104
118
|
for (const action of actions) {
|
|
105
119
|
try {
|
|
106
120
|
await execute(action);
|
|
@@ -126,6 +140,19 @@ export function createVoiceExtension(opts) {
|
|
|
126
140
|
return;
|
|
127
141
|
}
|
|
128
142
|
case 'send_user_message': {
|
|
143
|
+
// Steer-on-activate (deliverAs:'steer'): preserve in-flight work
|
|
144
|
+
// rather than aborting it. When the agent is busy, inject the
|
|
145
|
+
// message into the running turn (no abort); when idle there's no
|
|
146
|
+
// turn to steer into, so send normally to trigger the turn. (M7)
|
|
147
|
+
if (action.deliverAs === 'steer') {
|
|
148
|
+
if (lastCtx && !lastCtx.isIdle()) {
|
|
149
|
+
pi.sendUserMessage(action.text, { deliverAs: 'steer' });
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
pi.sendUserMessage(action.text);
|
|
153
|
+
}
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
129
156
|
// Ensure the agent is idle before sending. If it isn't, fire
|
|
130
157
|
// a synthesized barge-in (ctx.abort()) and wait for teardown
|
|
131
158
|
// — covers the case where the user spoke while the worker
|
|
@@ -154,7 +181,11 @@ export function createVoiceExtension(opts) {
|
|
|
154
181
|
return;
|
|
155
182
|
}
|
|
156
183
|
case 'open_ws': {
|
|
157
|
-
//
|
|
184
|
+
// Invalidate any prior client: bump the generation so its in-flight
|
|
185
|
+
// callbacks no-op, detach its listeners, and close it.
|
|
186
|
+
clientGeneration++;
|
|
187
|
+
detachClientListeners?.();
|
|
188
|
+
detachClientListeners = null;
|
|
158
189
|
try {
|
|
159
190
|
speechmuxClient?.close();
|
|
160
191
|
}
|
|
@@ -162,24 +193,53 @@ export function createVoiceExtension(opts) {
|
|
|
162
193
|
/* ignore */
|
|
163
194
|
}
|
|
164
195
|
speechmuxClient = null;
|
|
196
|
+
const gen = clientGeneration;
|
|
165
197
|
try {
|
|
166
198
|
const client = await clientFactory({ wsUrl: action.url });
|
|
199
|
+
// If the call ended (or another open started) while we were
|
|
200
|
+
// connecting, this client is stale — close it and bail. (H2)
|
|
201
|
+
if (gen !== clientGeneration) {
|
|
202
|
+
try {
|
|
203
|
+
client.close();
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
/* ignore */
|
|
207
|
+
}
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
167
210
|
speechmuxClient = client;
|
|
168
|
-
client.onFrame((frame) => {
|
|
211
|
+
const offFrame = client.onFrame((frame) => {
|
|
212
|
+
if (gen !== clientGeneration)
|
|
213
|
+
return;
|
|
169
214
|
void dispatch({ type: 'ws:incoming', frame });
|
|
170
215
|
});
|
|
171
|
-
client.onDisconnect(() => {
|
|
216
|
+
const offDisconnect = client.onDisconnect(() => {
|
|
217
|
+
if (gen !== clientGeneration)
|
|
218
|
+
return;
|
|
172
219
|
void dispatch({ type: 'ws:disconnected' });
|
|
173
220
|
});
|
|
221
|
+
detachClientListeners = () => {
|
|
222
|
+
offFrame();
|
|
223
|
+
offDisconnect();
|
|
224
|
+
};
|
|
174
225
|
await dispatch({ type: 'ws:opened' });
|
|
175
226
|
}
|
|
176
227
|
catch (err) {
|
|
228
|
+
// A stale open's failure is irrelevant — the FSM has moved on.
|
|
229
|
+
if (gen !== clientGeneration)
|
|
230
|
+
return;
|
|
177
231
|
console.warn('[voice] speechmux open failed', err);
|
|
178
232
|
await dispatch({ type: 'ws:open_failed', error: err });
|
|
179
233
|
}
|
|
180
234
|
return;
|
|
181
235
|
}
|
|
182
236
|
case 'close_ws': {
|
|
237
|
+
// Bump the generation so a client still mid-connect (and any late
|
|
238
|
+
// close/frame callbacks from the current one) is invalidated and
|
|
239
|
+
// auto-cleaned. (H1/H2)
|
|
240
|
+
clientGeneration++;
|
|
241
|
+
detachClientListeners?.();
|
|
242
|
+
detachClientListeners = null;
|
|
183
243
|
try {
|
|
184
244
|
speechmuxClient?.close();
|
|
185
245
|
}
|
|
@@ -213,19 +273,16 @@ export function createVoiceExtension(opts) {
|
|
|
213
273
|
return;
|
|
214
274
|
}
|
|
215
275
|
case 'emit_deactivate_request': {
|
|
216
|
-
|
|
276
|
+
// sessionId is carried on the action (populated by the reducer from
|
|
277
|
+
// the pre-transition state) — reading it from `state` here would be
|
|
278
|
+
// too late, since the lifecycle has already gone dormant. (M1)
|
|
217
279
|
const msg = {
|
|
218
280
|
type: 'pimote:voice:deactivate',
|
|
219
|
-
sessionId,
|
|
281
|
+
sessionId: action.sessionId,
|
|
220
282
|
};
|
|
221
283
|
pi.events.emit('pimote:voice:deactivate', msg);
|
|
222
284
|
return;
|
|
223
285
|
}
|
|
224
|
-
case 'rewrite_context': {
|
|
225
|
-
// Stash; the `context` hook below reads this on its return.
|
|
226
|
-
pendingContextRewrite = action.messages;
|
|
227
|
-
return;
|
|
228
|
-
}
|
|
229
286
|
}
|
|
230
287
|
};
|
|
231
288
|
// ---- EventBus listeners ---------------------------------------------
|
|
@@ -319,43 +376,42 @@ export function createVoiceExtension(opts) {
|
|
|
319
376
|
}
|
|
320
377
|
});
|
|
321
378
|
pi.on('turn_end', (event) => {
|
|
322
|
-
|
|
323
|
-
|
|
379
|
+
// Route through the FSM so the floor_released frame is buffered when the
|
|
380
|
+
// greeting turn ends before the WS handshake completes, instead of being
|
|
381
|
+
// dropped. Only dispatch when the turn actually spoke. (M2)
|
|
324
382
|
const lastSpeakResult = [...event.toolResults].reverse().find((result) => result.toolName === 'speak');
|
|
325
383
|
if (!lastSpeakResult)
|
|
326
384
|
return;
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
console.warn('[voice] speechmux send failed', 'floor_released', err);
|
|
332
|
-
}
|
|
385
|
+
void dispatch({
|
|
386
|
+
type: 'sdk:turn_end',
|
|
387
|
+
lastSpeakToolCallId: typeof lastSpeakResult.toolCallId === 'string' ? lastSpeakResult.toolCallId : null,
|
|
388
|
+
});
|
|
333
389
|
});
|
|
334
390
|
pi.on('agent_end', (event) => {
|
|
335
|
-
|
|
336
|
-
return;
|
|
391
|
+
// Route through the FSM (same buffering rationale as turn_end). (M2)
|
|
337
392
|
const error = event.error;
|
|
338
|
-
|
|
339
|
-
return;
|
|
340
|
-
try {
|
|
341
|
-
speechmuxClient.send({ type: 'error', message: error });
|
|
342
|
-
}
|
|
343
|
-
catch (err) {
|
|
344
|
-
console.warn('[voice] speechmux send failed', 'error', err);
|
|
345
|
-
}
|
|
393
|
+
void dispatch({ type: 'sdk:agent_end', error: typeof error === 'string' && error.length > 0 ? error : null });
|
|
346
394
|
});
|
|
347
395
|
pi.on('context', (event, ctx) => {
|
|
348
396
|
lastCtx = ctx;
|
|
349
|
-
// The walkback reducer always runs walkBack (even when no rewrite
|
|
350
|
-
//
|
|
351
|
-
//
|
|
352
|
-
//
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
397
|
+
// The walkback reducer always runs walkBack (even when no rewrite is
|
|
398
|
+
// pending — to strip aborted-empty assistants) and emits the result as a
|
|
399
|
+
// `rewrite_context` action. Drive the reduction synchronously and read
|
|
400
|
+
// the rewrite straight off the returned actions: no module-level slot,
|
|
401
|
+
// no dependence on action ordering or on `execute` staying side-effect
|
|
402
|
+
// free before the rewrite. (M5)
|
|
403
|
+
const actions = reduceAndApply({ type: 'sdk:context', messages: event.messages });
|
|
404
|
+
let rewritten;
|
|
405
|
+
for (const action of actions) {
|
|
406
|
+
if (action.kind === 'rewrite_context') {
|
|
407
|
+
rewritten = action.messages;
|
|
408
|
+
}
|
|
409
|
+
else {
|
|
410
|
+
// Any other side effects still run on the normal async path.
|
|
411
|
+
void execute(action);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
return rewritten ? { messages: rewritten } : undefined;
|
|
359
415
|
});
|
|
360
416
|
};
|
|
361
417
|
}
|