@drawdream/livespeech 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/index.d.mts +17 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +7 -0
- package/dist/index.mjs +7 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -154,6 +154,22 @@ client.audioStart();
|
|
|
154
154
|
|
|
155
155
|
> **Note:** All other SDK methods and events work identically in both modes. The only code change is adding `pipelineMode: 'composed'` to your session config.
|
|
156
156
|
|
|
157
|
+
### Event Correlation (`turnId`)
|
|
158
|
+
|
|
159
|
+
In Composed mode, all events include a `turnId` field (monotonic counter starting from 0). Events sharing the same `turnId` belong to the same speech turn — use this to match `userTranscript`, `response`, `audio`, and `turnComplete` events together. In Live mode, `turnId` is not present.
|
|
160
|
+
|
|
161
|
+
```typescript
|
|
162
|
+
client.on('userTranscript', (e) => {
|
|
163
|
+
console.log(`Turn ${e.turnId}: User said '${e.text}'`);
|
|
164
|
+
});
|
|
165
|
+
client.on('response', (e) => {
|
|
166
|
+
if (e.isFinal) console.log(`Turn ${e.turnId}: AI responded '${e.text}'`);
|
|
167
|
+
});
|
|
168
|
+
client.on('turnComplete', (e) => {
|
|
169
|
+
console.log(`Turn ${e.turnId} complete`);
|
|
170
|
+
});
|
|
171
|
+
```
|
|
172
|
+
|
|
157
173
|
---
|
|
158
174
|
|
|
159
175
|
# Advanced API
|
|
@@ -304,6 +320,7 @@ client.audioStart(); // AI speaks immediately
|
|
|
304
320
|
|--------|---------|-------------|
|
|
305
321
|
| `prePrompt` | - | System prompt |
|
|
306
322
|
| `language` | `'en-US'` | Language code |
|
|
323
|
+
| `outputLanguage` | - | TTS voice language override (composed mode only) |
|
|
307
324
|
| `pipelineMode` | `'live'` | `'live'` (~300ms) or `'composed'` (~1-2s) |
|
|
308
325
|
| `aiSpeaksFirst` | `false` | AI initiates (live mode only) |
|
|
309
326
|
| `allowHarmCategory` | `false` | Disable safety filters |
|
package/dist/index.d.mts
CHANGED
|
@@ -222,6 +222,10 @@ interface SessionConfig {
|
|
|
222
222
|
* Session duration configuration (enables duration limits when set)
|
|
223
223
|
*/
|
|
224
224
|
sessionDuration?: SessionDurationConfig;
|
|
225
|
+
/**
|
|
226
|
+
* Enable raw audio archival for QA (default: false)
|
|
227
|
+
*/
|
|
228
|
+
archiveAudio?: boolean;
|
|
225
229
|
}
|
|
226
230
|
/**
|
|
227
231
|
* Internal resolved configuration with defaults applied
|
|
@@ -308,6 +312,8 @@ interface ReadyEvent {
|
|
|
308
312
|
interface UserTranscriptEvent {
|
|
309
313
|
type: 'userTranscript';
|
|
310
314
|
text: string;
|
|
315
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
316
|
+
turnId?: number;
|
|
311
317
|
timestamp: string;
|
|
312
318
|
}
|
|
313
319
|
/**
|
|
@@ -317,6 +323,8 @@ interface ResponseEvent {
|
|
|
317
323
|
type: 'response';
|
|
318
324
|
text: string;
|
|
319
325
|
isFinal: boolean;
|
|
326
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
327
|
+
turnId?: number;
|
|
320
328
|
timestamp: string;
|
|
321
329
|
}
|
|
322
330
|
/**
|
|
@@ -327,6 +335,8 @@ interface AudioEvent {
|
|
|
327
335
|
data: Uint8Array;
|
|
328
336
|
format: string;
|
|
329
337
|
sampleRate: number;
|
|
338
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
339
|
+
turnId?: number;
|
|
330
340
|
timestamp: string;
|
|
331
341
|
}
|
|
332
342
|
/**
|
|
@@ -359,6 +369,8 @@ interface ReconnectingEvent {
|
|
|
359
369
|
*/
|
|
360
370
|
interface TurnCompleteEvent {
|
|
361
371
|
type: 'turnComplete';
|
|
372
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
373
|
+
turnId?: number;
|
|
362
374
|
timestamp: string;
|
|
363
375
|
}
|
|
364
376
|
/**
|
|
@@ -473,6 +485,7 @@ interface StartSessionMessage extends BaseClientMessage {
|
|
|
473
485
|
sessionMaxDurationSeconds?: number;
|
|
474
486
|
enableSessionWarning?: boolean;
|
|
475
487
|
enableSessionGoodbye?: boolean;
|
|
488
|
+
archiveAudio?: boolean;
|
|
476
489
|
}
|
|
477
490
|
/**
|
|
478
491
|
* End session message
|
|
@@ -596,6 +609,7 @@ interface ServerSessionGoodbyeMessage extends BaseServerMessage {
|
|
|
596
609
|
interface ServerUserTranscriptMessage extends BaseServerMessage {
|
|
597
610
|
type: 'userTranscript';
|
|
598
611
|
text: string;
|
|
612
|
+
turnId?: number;
|
|
599
613
|
}
|
|
600
614
|
/**
|
|
601
615
|
* Response message from server
|
|
@@ -604,6 +618,7 @@ interface ServerResponseMessage extends BaseServerMessage {
|
|
|
604
618
|
type: 'response';
|
|
605
619
|
text: string;
|
|
606
620
|
isFinal: boolean;
|
|
621
|
+
turnId?: number;
|
|
607
622
|
}
|
|
608
623
|
/**
|
|
609
624
|
* Audio message from server
|
|
@@ -613,6 +628,7 @@ interface ServerAudioMessage extends BaseServerMessage {
|
|
|
613
628
|
data: string;
|
|
614
629
|
format: string;
|
|
615
630
|
sampleRate: number;
|
|
631
|
+
turnId?: number;
|
|
616
632
|
}
|
|
617
633
|
/**
|
|
618
634
|
* Error message from server
|
|
@@ -634,6 +650,7 @@ interface ServerPongMessage extends BaseServerMessage {
|
|
|
634
650
|
*/
|
|
635
651
|
interface ServerTurnCompleteMessage extends BaseServerMessage {
|
|
636
652
|
type: 'turnComplete';
|
|
653
|
+
turnId?: number;
|
|
637
654
|
}
|
|
638
655
|
/**
|
|
639
656
|
* Ready message from server
|
package/dist/index.d.ts
CHANGED
|
@@ -222,6 +222,10 @@ interface SessionConfig {
|
|
|
222
222
|
* Session duration configuration (enables duration limits when set)
|
|
223
223
|
*/
|
|
224
224
|
sessionDuration?: SessionDurationConfig;
|
|
225
|
+
/**
|
|
226
|
+
* Enable raw audio archival for QA (default: false)
|
|
227
|
+
*/
|
|
228
|
+
archiveAudio?: boolean;
|
|
225
229
|
}
|
|
226
230
|
/**
|
|
227
231
|
* Internal resolved configuration with defaults applied
|
|
@@ -308,6 +312,8 @@ interface ReadyEvent {
|
|
|
308
312
|
interface UserTranscriptEvent {
|
|
309
313
|
type: 'userTranscript';
|
|
310
314
|
text: string;
|
|
315
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
316
|
+
turnId?: number;
|
|
311
317
|
timestamp: string;
|
|
312
318
|
}
|
|
313
319
|
/**
|
|
@@ -317,6 +323,8 @@ interface ResponseEvent {
|
|
|
317
323
|
type: 'response';
|
|
318
324
|
text: string;
|
|
319
325
|
isFinal: boolean;
|
|
326
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
327
|
+
turnId?: number;
|
|
320
328
|
timestamp: string;
|
|
321
329
|
}
|
|
322
330
|
/**
|
|
@@ -327,6 +335,8 @@ interface AudioEvent {
|
|
|
327
335
|
data: Uint8Array;
|
|
328
336
|
format: string;
|
|
329
337
|
sampleRate: number;
|
|
338
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
339
|
+
turnId?: number;
|
|
330
340
|
timestamp: string;
|
|
331
341
|
}
|
|
332
342
|
/**
|
|
@@ -359,6 +369,8 @@ interface ReconnectingEvent {
|
|
|
359
369
|
*/
|
|
360
370
|
interface TurnCompleteEvent {
|
|
361
371
|
type: 'turnComplete';
|
|
372
|
+
/** Monotonic turn counter for event correlation. Only present in Composed mode; `undefined` in Live mode. */
|
|
373
|
+
turnId?: number;
|
|
362
374
|
timestamp: string;
|
|
363
375
|
}
|
|
364
376
|
/**
|
|
@@ -473,6 +485,7 @@ interface StartSessionMessage extends BaseClientMessage {
|
|
|
473
485
|
sessionMaxDurationSeconds?: number;
|
|
474
486
|
enableSessionWarning?: boolean;
|
|
475
487
|
enableSessionGoodbye?: boolean;
|
|
488
|
+
archiveAudio?: boolean;
|
|
476
489
|
}
|
|
477
490
|
/**
|
|
478
491
|
* End session message
|
|
@@ -596,6 +609,7 @@ interface ServerSessionGoodbyeMessage extends BaseServerMessage {
|
|
|
596
609
|
interface ServerUserTranscriptMessage extends BaseServerMessage {
|
|
597
610
|
type: 'userTranscript';
|
|
598
611
|
text: string;
|
|
612
|
+
turnId?: number;
|
|
599
613
|
}
|
|
600
614
|
/**
|
|
601
615
|
* Response message from server
|
|
@@ -604,6 +618,7 @@ interface ServerResponseMessage extends BaseServerMessage {
|
|
|
604
618
|
type: 'response';
|
|
605
619
|
text: string;
|
|
606
620
|
isFinal: boolean;
|
|
621
|
+
turnId?: number;
|
|
607
622
|
}
|
|
608
623
|
/**
|
|
609
624
|
* Audio message from server
|
|
@@ -613,6 +628,7 @@ interface ServerAudioMessage extends BaseServerMessage {
|
|
|
613
628
|
data: string;
|
|
614
629
|
format: string;
|
|
615
630
|
sampleRate: number;
|
|
631
|
+
turnId?: number;
|
|
616
632
|
}
|
|
617
633
|
/**
|
|
618
634
|
* Error message from server
|
|
@@ -634,6 +650,7 @@ interface ServerPongMessage extends BaseServerMessage {
|
|
|
634
650
|
*/
|
|
635
651
|
interface ServerTurnCompleteMessage extends BaseServerMessage {
|
|
636
652
|
type: 'turnComplete';
|
|
653
|
+
turnId?: number;
|
|
637
654
|
}
|
|
638
655
|
/**
|
|
639
656
|
* Ready message from server
|
package/dist/index.js
CHANGED
|
@@ -738,6 +738,7 @@ var LiveSpeechClient = class {
|
|
|
738
738
|
pipelineMode: config?.pipelineMode ?? "live",
|
|
739
739
|
...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
|
|
740
740
|
allowHarmCategory: config?.allowHarmCategory ?? false,
|
|
741
|
+
archiveAudio: config?.archiveAudio ?? false,
|
|
741
742
|
...config?.tools && config.tools.length > 0 && { tools: config.tools },
|
|
742
743
|
...hasSessionDuration && {
|
|
743
744
|
sessionMaxDurationSeconds: sessionDuration.maxSeconds,
|
|
@@ -1025,6 +1026,8 @@ var LiveSpeechClient = class {
|
|
|
1025
1026
|
}
|
|
1026
1027
|
}
|
|
1027
1028
|
handleConnected(connectionId) {
|
|
1029
|
+
this.sessionId = null;
|
|
1030
|
+
this.isStreaming = false;
|
|
1028
1031
|
const event = {
|
|
1029
1032
|
type: "connected",
|
|
1030
1033
|
connectionId,
|
|
@@ -1114,6 +1117,7 @@ var LiveSpeechClient = class {
|
|
|
1114
1117
|
type: "response",
|
|
1115
1118
|
text: message.text,
|
|
1116
1119
|
isFinal: message.isFinal,
|
|
1120
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1117
1121
|
timestamp: message.timestamp
|
|
1118
1122
|
};
|
|
1119
1123
|
this.emit("response", responseEvent);
|
|
@@ -1127,6 +1131,7 @@ var LiveSpeechClient = class {
|
|
|
1127
1131
|
data: audioData,
|
|
1128
1132
|
format: message.format,
|
|
1129
1133
|
sampleRate: message.sampleRate,
|
|
1134
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1130
1135
|
timestamp: message.timestamp
|
|
1131
1136
|
};
|
|
1132
1137
|
this.emit("audio", audioEvent);
|
|
@@ -1137,6 +1142,7 @@ var LiveSpeechClient = class {
|
|
|
1137
1142
|
const userTranscriptEvent = {
|
|
1138
1143
|
type: "userTranscript",
|
|
1139
1144
|
text: message.text,
|
|
1145
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1140
1146
|
timestamp: message.timestamp
|
|
1141
1147
|
};
|
|
1142
1148
|
this.emit("userTranscript", userTranscriptEvent);
|
|
@@ -1146,6 +1152,7 @@ var LiveSpeechClient = class {
|
|
|
1146
1152
|
case "turnComplete": {
|
|
1147
1153
|
const turnCompleteEvent = {
|
|
1148
1154
|
type: "turnComplete",
|
|
1155
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1149
1156
|
timestamp: message.timestamp
|
|
1150
1157
|
};
|
|
1151
1158
|
this.emit("turnComplete", turnCompleteEvent);
|
package/dist/index.mjs
CHANGED
|
@@ -699,6 +699,7 @@ var LiveSpeechClient = class {
|
|
|
699
699
|
pipelineMode: config?.pipelineMode ?? "live",
|
|
700
700
|
...config?.aiSpeaksFirst && { aiSpeaksFirst: config.aiSpeaksFirst },
|
|
701
701
|
allowHarmCategory: config?.allowHarmCategory ?? false,
|
|
702
|
+
archiveAudio: config?.archiveAudio ?? false,
|
|
702
703
|
...config?.tools && config.tools.length > 0 && { tools: config.tools },
|
|
703
704
|
...hasSessionDuration && {
|
|
704
705
|
sessionMaxDurationSeconds: sessionDuration.maxSeconds,
|
|
@@ -986,6 +987,8 @@ var LiveSpeechClient = class {
|
|
|
986
987
|
}
|
|
987
988
|
}
|
|
988
989
|
handleConnected(connectionId) {
|
|
990
|
+
this.sessionId = null;
|
|
991
|
+
this.isStreaming = false;
|
|
989
992
|
const event = {
|
|
990
993
|
type: "connected",
|
|
991
994
|
connectionId,
|
|
@@ -1075,6 +1078,7 @@ var LiveSpeechClient = class {
|
|
|
1075
1078
|
type: "response",
|
|
1076
1079
|
text: message.text,
|
|
1077
1080
|
isFinal: message.isFinal,
|
|
1081
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1078
1082
|
timestamp: message.timestamp
|
|
1079
1083
|
};
|
|
1080
1084
|
this.emit("response", responseEvent);
|
|
@@ -1088,6 +1092,7 @@ var LiveSpeechClient = class {
|
|
|
1088
1092
|
data: audioData,
|
|
1089
1093
|
format: message.format,
|
|
1090
1094
|
sampleRate: message.sampleRate,
|
|
1095
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1091
1096
|
timestamp: message.timestamp
|
|
1092
1097
|
};
|
|
1093
1098
|
this.emit("audio", audioEvent);
|
|
@@ -1098,6 +1103,7 @@ var LiveSpeechClient = class {
|
|
|
1098
1103
|
const userTranscriptEvent = {
|
|
1099
1104
|
type: "userTranscript",
|
|
1100
1105
|
text: message.text,
|
|
1106
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1101
1107
|
timestamp: message.timestamp
|
|
1102
1108
|
};
|
|
1103
1109
|
this.emit("userTranscript", userTranscriptEvent);
|
|
@@ -1107,6 +1113,7 @@ var LiveSpeechClient = class {
|
|
|
1107
1113
|
case "turnComplete": {
|
|
1108
1114
|
const turnCompleteEvent = {
|
|
1109
1115
|
type: "turnComplete",
|
|
1116
|
+
...message.turnId != null && { turnId: message.turnId },
|
|
1110
1117
|
timestamp: message.timestamp
|
|
1111
1118
|
};
|
|
1112
1119
|
this.emit("turnComplete", turnCompleteEvent);
|