@alexkroman1/aai 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -4
- package/CHANGELOG.md +6 -0
- package/dist/host/runtime-barrel.js +16 -4
- package/host/s2s.test.ts +17 -2
- package/host/s2s.ts +22 -4
- package/host/session.test.ts +17 -0
- package/host/session.ts +8 -0
- package/package.json +1 -1
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @alexkroman1/aai@1.4.
|
|
2
|
+
> @alexkroman1/aai@1.4.4 build /home/runner/work/agent/agent/packages/aai
|
|
3
3
|
> tsdown && tsc -p tsconfig.build.json
|
|
4
4
|
|
|
5
5
|
[34mℹ[39m [34mtsdown v0.21.7[39m powered by [38;2;255;126;23mrolldown v1.0.0-rc.12[39m
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
[34mℹ[39m target: [34mnode22[39m
|
|
9
9
|
[34mℹ[39m tsconfig: [34mtsconfig.json[39m
|
|
10
10
|
[34mℹ[39m Build start
|
|
11
|
-
[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m [
|
|
11
|
+
[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m [2m76.29 kB[22m [2m│ gzip: 22.68 kB[22m
|
|
12
12
|
[34mℹ[39m [2mdist/[22m[1msdk/protocol.js[22m [2m 4.75 kB[22m [2m│ gzip: 1.76 kB[22m
|
|
13
13
|
[34mℹ[39m [2mdist/[22m[1mindex.js[22m [2m 2.88 kB[22m [2m│ gzip: 1.24 kB[22m
|
|
14
14
|
[34mℹ[39m [2mdist/[22m[1msdk/manifest-barrel.js[22m [2m 0.36 kB[22m [2m│ gzip: 0.20 kB[22m
|
|
@@ -22,5 +22,5 @@
|
|
|
22
22
|
[34mℹ[39m [2mdist/[22massemblyai-Cxg9eobY.js [2m 0.53 kB[22m [2m│ gzip: 0.35 kB[22m
|
|
23
23
|
[34mℹ[39m [2mdist/[22manthropic-BrUCPKUc.js [2m 0.23 kB[22m [2m│ gzip: 0.18 kB[22m
|
|
24
24
|
[34mℹ[39m [2mdist/[22mcartesia-DwDk2tEu.js [2m 0.22 kB[22m [2m│ gzip: 0.17 kB[22m
|
|
25
|
-
[34mℹ[39m 14 files, total:
|
|
26
|
-
[32m✔[39m Build complete in [
|
|
25
|
+
[34mℹ[39m 14 files, total: 101.11 kB
|
|
26
|
+
[32m✔[39m Build complete in [32m41ms[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1326,17 +1326,23 @@ function parseS2sMessage(obj) {
|
|
|
1326
1326
|
const result = S2sMessageSchema.safeParse(obj);
|
|
1327
1327
|
return result.success ? result.data : void 0;
|
|
1328
1328
|
}
|
|
1329
|
-
function dispatchS2sMessage(emitter, msg) {
|
|
1329
|
+
function dispatchS2sMessage(emitter, msg, state) {
|
|
1330
1330
|
switch (msg.type) {
|
|
1331
1331
|
case "session.ready":
|
|
1332
1332
|
emitter.emit("ready", { sessionId: msg.session_id });
|
|
1333
1333
|
break;
|
|
1334
1334
|
case "session.updated": break;
|
|
1335
1335
|
case "input.speech.started":
|
|
1336
|
-
|
|
1336
|
+
if (!state.speechActive) {
|
|
1337
|
+
state.speechActive = true;
|
|
1338
|
+
emitter.emit("event", { type: "speech_started" });
|
|
1339
|
+
}
|
|
1337
1340
|
break;
|
|
1338
1341
|
case "input.speech.stopped":
|
|
1339
|
-
|
|
1342
|
+
if (state.speechActive) {
|
|
1343
|
+
state.speechActive = false;
|
|
1344
|
+
emitter.emit("event", { type: "speech_stopped" });
|
|
1345
|
+
}
|
|
1340
1346
|
break;
|
|
1341
1347
|
case "transcript.user":
|
|
1342
1348
|
emitter.emit("event", {
|
|
@@ -1382,6 +1388,7 @@ function connectS2s(opts) {
|
|
|
1382
1388
|
log.info("S2S connecting", { url: config.wssUrl });
|
|
1383
1389
|
const ws = createWebSocket(config.wssUrl, { headers: { Authorization: `Bearer ${apiKey}` } });
|
|
1384
1390
|
const emitter = createNanoEvents();
|
|
1391
|
+
const dispatchState = { speechActive: false };
|
|
1385
1392
|
let opened = false;
|
|
1386
1393
|
function send(msg) {
|
|
1387
1394
|
if (ws.readyState !== 1) {
|
|
@@ -1478,7 +1485,7 @@ function connectS2s(opts) {
|
|
|
1478
1485
|
log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
|
|
1479
1486
|
return;
|
|
1480
1487
|
}
|
|
1481
|
-
dispatchS2sMessage(emitter, parsed);
|
|
1488
|
+
dispatchS2sMessage(emitter, parsed, dispatchState);
|
|
1482
1489
|
}
|
|
1483
1490
|
ws.addEventListener("message", handleS2sMessage);
|
|
1484
1491
|
ws.addEventListener("close", (ev) => {
|
|
@@ -1616,6 +1623,10 @@ function handleReplyCancelled(ctx) {
|
|
|
1616
1623
|
}
|
|
1617
1624
|
function handleReplyDone(ctx) {
|
|
1618
1625
|
const doneReplyId = ctx.reply.currentReplyId;
|
|
1626
|
+
if (doneReplyId === null) {
|
|
1627
|
+
ctx.log.debug("Dropping duplicate reply.done (no active reply)");
|
|
1628
|
+
return;
|
|
1629
|
+
}
|
|
1619
1630
|
const sendPending = () => {
|
|
1620
1631
|
if (ctx.reply.currentReplyId !== doneReplyId) {
|
|
1621
1632
|
ctx.reply.pendingTools = [];
|
|
@@ -1632,6 +1643,7 @@ function handleReplyDone(ctx) {
|
|
|
1632
1643
|
});
|
|
1633
1644
|
ctx.client.playAudioDone();
|
|
1634
1645
|
ctx.client.event({ type: "reply_done" });
|
|
1646
|
+
ctx.reply.currentReplyId = null;
|
|
1635
1647
|
}
|
|
1636
1648
|
};
|
|
1637
1649
|
if (ctx.turnPromise !== null) ctx.turnPromise.then(sendPending);
|
package/host/s2s.test.ts
CHANGED
|
@@ -219,10 +219,25 @@ describe("connectS2s", () => {
|
|
|
219
219
|
const handler = vi.fn();
|
|
220
220
|
handle.on("event", handler);
|
|
221
221
|
|
|
222
|
+
// Prime VAD state — speech_stopped is only forwarded after a speech_started.
|
|
223
|
+
raw.emit("message", Buffer.from(JSON.stringify({ type: "input.speech.started" })));
|
|
222
224
|
raw.emit("message", Buffer.from(JSON.stringify({ type: "input.speech.stopped" })));
|
|
223
225
|
|
|
224
|
-
expect(handler).
|
|
225
|
-
expect(handler.mock.calls[0]?.[0]).toEqual({ type: "
|
|
226
|
+
expect(handler).toHaveBeenCalledTimes(2);
|
|
227
|
+
expect(handler.mock.calls[0]?.[0]).toEqual({ type: "speech_started" });
|
|
228
|
+
expect(handler.mock.calls[1]?.[0]).toEqual({ type: "speech_stopped" });
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
test("duplicate input.speech.stopped is suppressed", async () => {
|
|
232
|
+
const { raw, handle } = await setupHandle();
|
|
233
|
+
const handler = vi.fn();
|
|
234
|
+
handle.on("event", handler);
|
|
235
|
+
|
|
236
|
+
raw.emit("message", Buffer.from(JSON.stringify({ type: "input.speech.started" })));
|
|
237
|
+
raw.emit("message", Buffer.from(JSON.stringify({ type: "input.speech.stopped" })));
|
|
238
|
+
raw.emit("message", Buffer.from(JSON.stringify({ type: "input.speech.stopped" })));
|
|
239
|
+
|
|
240
|
+
expect(handler.mock.calls.filter((c) => c[0].type === "speech_stopped")).toHaveLength(1);
|
|
226
241
|
});
|
|
227
242
|
|
|
228
243
|
test("transcript.user dispatches 'event' with user_transcript", async () => {
|
package/host/s2s.ts
CHANGED
|
@@ -79,7 +79,18 @@ function parseS2sMessage(obj: Record<string, unknown>): S2sServerMessage | undef
|
|
|
79
79
|
*/
|
|
80
80
|
export type S2sEvent = ClientEvent & { _interrupted?: boolean };
|
|
81
81
|
|
|
82
|
-
|
|
82
|
+
/**
|
|
83
|
+
* Per-connection dispatch state. Used to dedup events that the upstream S2S
|
|
84
|
+
* service may emit more than once for a single logical turn (e.g. repeated
|
|
85
|
+
* `input.speech.stopped` after the VAD flips).
|
|
86
|
+
*/
|
|
87
|
+
type DispatchState = { speechActive: boolean };
|
|
88
|
+
|
|
89
|
+
function dispatchS2sMessage(
|
|
90
|
+
emitter: Emitter<S2sEvents>,
|
|
91
|
+
msg: S2sServerMessage,
|
|
92
|
+
state: DispatchState,
|
|
93
|
+
): void {
|
|
83
94
|
switch (msg.type) {
|
|
84
95
|
case "session.ready":
|
|
85
96
|
emitter.emit("ready", { sessionId: msg.session_id });
|
|
@@ -87,10 +98,16 @@ function dispatchS2sMessage(emitter: Emitter<S2sEvents>, msg: S2sServerMessage):
|
|
|
87
98
|
case "session.updated":
|
|
88
99
|
break;
|
|
89
100
|
case "input.speech.started":
|
|
90
|
-
|
|
101
|
+
if (!state.speechActive) {
|
|
102
|
+
state.speechActive = true;
|
|
103
|
+
emitter.emit("event", { type: "speech_started" });
|
|
104
|
+
}
|
|
91
105
|
break;
|
|
92
106
|
case "input.speech.stopped":
|
|
93
|
-
|
|
107
|
+
if (state.speechActive) {
|
|
108
|
+
state.speechActive = false;
|
|
109
|
+
emitter.emit("event", { type: "speech_stopped" });
|
|
110
|
+
}
|
|
94
111
|
break;
|
|
95
112
|
case "transcript.user":
|
|
96
113
|
emitter.emit("event", { type: "user_transcript", text: msg.text });
|
|
@@ -188,6 +205,7 @@ export function connectS2s(opts: ConnectS2sOptions): Promise<S2sHandle> {
|
|
|
188
205
|
});
|
|
189
206
|
|
|
190
207
|
const emitter = createNanoEvents<S2sEvents>();
|
|
208
|
+
const dispatchState: DispatchState = { speechActive: false };
|
|
191
209
|
let opened = false;
|
|
192
210
|
|
|
193
211
|
function send(msg: { type: string; [key: string]: unknown }): void {
|
|
@@ -291,7 +309,7 @@ export function connectS2s(opts: ConnectS2sOptions): Promise<S2sHandle> {
|
|
|
291
309
|
);
|
|
292
310
|
return;
|
|
293
311
|
}
|
|
294
|
-
dispatchS2sMessage(emitter, parsed);
|
|
312
|
+
dispatchS2sMessage(emitter, parsed, dispatchState);
|
|
295
313
|
}
|
|
296
314
|
|
|
297
315
|
ws.addEventListener("message", handleS2sMessage);
|
package/host/session.test.ts
CHANGED
|
@@ -168,12 +168,29 @@ describe("createS2sSession", () => {
|
|
|
168
168
|
const { session, client, mockHandle } = setup();
|
|
169
169
|
await session.start();
|
|
170
170
|
|
|
171
|
+
mockHandle._fire("replyStarted", { replyId: "r1" });
|
|
171
172
|
mockHandle._fire("event", { type: "reply_done" });
|
|
172
173
|
|
|
173
174
|
expect(client.audioDoneCount).toBe(1);
|
|
174
175
|
expect(client.events).toContainEvent("reply_done");
|
|
175
176
|
});
|
|
176
177
|
|
|
178
|
+
test("duplicate reply_done is suppressed after reply completes", async () => {
|
|
179
|
+
const { session, client, mockHandle } = setup();
|
|
180
|
+
await session.start();
|
|
181
|
+
|
|
182
|
+
mockHandle._fire("replyStarted", { replyId: "r1" });
|
|
183
|
+
mockHandle._fire("event", { type: "reply_done" });
|
|
184
|
+
mockHandle._fire("event", { type: "reply_done" });
|
|
185
|
+
|
|
186
|
+
const replyDones = client.events.filter(
|
|
187
|
+
(e): e is { type: string } =>
|
|
188
|
+
typeof e === "object" && e !== null && "type" in e && e.type === "reply_done",
|
|
189
|
+
);
|
|
190
|
+
expect(replyDones).toHaveLength(1);
|
|
191
|
+
expect(client.audioDoneCount).toBe(1);
|
|
192
|
+
});
|
|
193
|
+
|
|
177
194
|
test("cancelled event emits cancelled", async () => {
|
|
178
195
|
const { session, client, mockHandle } = setup();
|
|
179
196
|
await session.start();
|
package/host/session.ts
CHANGED
|
@@ -181,6 +181,12 @@ function handleReplyCancelled(ctx: S2sSessionCtx): void {
|
|
|
181
181
|
|
|
182
182
|
function handleReplyDone(ctx: S2sSessionCtx): void {
|
|
183
183
|
const doneReplyId = ctx.reply.currentReplyId;
|
|
184
|
+
// Dedup duplicate reply.done events from the S2S service: once the reply
|
|
185
|
+
// has been fully dispatched (or was never started), currentReplyId is null.
|
|
186
|
+
if (doneReplyId === null) {
|
|
187
|
+
ctx.log.debug("Dropping duplicate reply.done (no active reply)");
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
184
190
|
const sendPending = () => {
|
|
185
191
|
if (ctx.reply.currentReplyId !== doneReplyId) {
|
|
186
192
|
ctx.reply.pendingTools = [];
|
|
@@ -196,6 +202,8 @@ function handleReplyDone(ctx: S2sSessionCtx): void {
|
|
|
196
202
|
}
|
|
197
203
|
ctx.client.playAudioDone();
|
|
198
204
|
ctx.client.event({ type: "reply_done" });
|
|
205
|
+
// Mark reply as finished so any repeated reply.done is dropped above.
|
|
206
|
+
ctx.reply.currentReplyId = null;
|
|
199
207
|
}
|
|
200
208
|
};
|
|
201
209
|
if (ctx.turnPromise !== null) {
|