@vortexm/vjt 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +163 -60
- package/dist/lib/voice-runtime.d.ts +12 -6
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -5869,8 +5869,9 @@ var MAX_RECORDING_MS = 10 * 60 * 1e3;
|
|
|
5869
5869
|
var MAX_LISTENING_SILENCE_MS = 60 * 60 * 1e3;
|
|
5870
5870
|
var SILENCE_STOP_MS = 2e3;
|
|
5871
5871
|
var DEFAULT_SPEECH_THRESHOLD = 0.035;
|
|
5872
|
-
var
|
|
5873
|
-
var
|
|
5872
|
+
var LISTENING_TARGET_PRE_ROLL_MS = 300;
|
|
5873
|
+
var LISTENING_SEGMENT_MS = 1e3;
|
|
5874
|
+
var LISTENING_SEGMENT_STEP_MS = 500;
|
|
5874
5875
|
var RECORDING_MIME_CANDIDATES = [
|
|
5875
5876
|
"audio/webm;codecs=opus",
|
|
5876
5877
|
"audio/webm",
|
|
@@ -5951,10 +5952,10 @@ var VoiceRuntime = class {
|
|
|
5951
5952
|
listenAnalyser = null;
|
|
5952
5953
|
listenSource = null;
|
|
5953
5954
|
listenFrameId = null;
|
|
5954
|
-
|
|
5955
|
-
|
|
5956
|
-
|
|
5957
|
-
|
|
5955
|
+
listeningIdleSessions = [];
|
|
5956
|
+
listeningLaneTimeoutIds = [];
|
|
5957
|
+
listeningLaneIntervalIds = [];
|
|
5958
|
+
listeningPromotedSession = null;
|
|
5958
5959
|
listeningCaptureActive = false;
|
|
5959
5960
|
lastSpeechAt = 0;
|
|
5960
5961
|
listeningStartedAt = 0;
|
|
@@ -6076,14 +6077,16 @@ var VoiceRuntime = class {
|
|
|
6076
6077
|
analyser.fftSize = 2048;
|
|
6077
6078
|
const source = context.createMediaStreamSource(stream);
|
|
6078
6079
|
source.connect(analyser);
|
|
6079
|
-
this.startListeningRecorder(stream);
|
|
6080
6080
|
this.listening = true;
|
|
6081
6081
|
this.listenContext = context;
|
|
6082
6082
|
this.listenAnalyser = analyser;
|
|
6083
6083
|
this.listenSource = source;
|
|
6084
|
+
this.listeningIdleSessions = [];
|
|
6085
|
+
this.listeningPromotedSession = null;
|
|
6084
6086
|
this.lastSpeechAt = 0;
|
|
6085
6087
|
this.listeningStartedAt = performance.now();
|
|
6086
6088
|
this.speechEventTriggered = false;
|
|
6089
|
+
this.startListeningRecorderSchedule(stream);
|
|
6087
6090
|
const sampleBuffer = new Uint8Array(analyser.fftSize);
|
|
6088
6091
|
const step = async () => {
|
|
6089
6092
|
if (!this.listening || !this.listenAnalyser) {
|
|
@@ -6147,9 +6150,8 @@ var VoiceRuntime = class {
|
|
|
6147
6150
|
if (this.listeningCaptureActive) {
|
|
6148
6151
|
await this.stopListeningCapture();
|
|
6149
6152
|
}
|
|
6150
|
-
this.
|
|
6151
|
-
this.
|
|
6152
|
-
this.listeningCaptureChunks = [];
|
|
6153
|
+
this.clearListeningRecorderSchedule();
|
|
6154
|
+
await this.discardListeningIdleSessions();
|
|
6153
6155
|
this.listeningCaptureActive = false;
|
|
6154
6156
|
this.recordingStartedFromListening = false;
|
|
6155
6157
|
this.releaseInputStreamIfIdle();
|
|
@@ -6231,7 +6233,6 @@ var VoiceRuntime = class {
|
|
|
6231
6233
|
async handleRecordingError(error) {
|
|
6232
6234
|
logRuntimeError("voice.recording", error);
|
|
6233
6235
|
this.listeningCaptureActive = false;
|
|
6234
|
-
this.listeningCaptureChunks = [];
|
|
6235
6236
|
this.recordingStartedFromListening = false;
|
|
6236
6237
|
await this.triggerSystemEvent("onRecordingError", this.normalizeErrorMessage(error));
|
|
6237
6238
|
}
|
|
@@ -6277,89 +6278,194 @@ var VoiceRuntime = class {
|
|
|
6277
6278
|
this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
6278
6279
|
return this.mediaStream;
|
|
6279
6280
|
}
|
|
6280
|
-
|
|
6281
|
-
|
|
6282
|
-
|
|
6281
|
+
startListeningRecorderSchedule(stream) {
|
|
6282
|
+
this.clearListeningRecorderSchedule();
|
|
6283
|
+
this.launchListeningRecorder(stream);
|
|
6284
|
+
const delayedStart = window.setTimeout(() => {
|
|
6285
|
+
if (!this.listening || this.listeningCaptureActive) {
|
|
6286
|
+
return;
|
|
6287
|
+
}
|
|
6288
|
+
this.launchListeningRecorder(stream);
|
|
6289
|
+
const laneB = window.setInterval(() => {
|
|
6290
|
+
if (!this.listening || this.listeningCaptureActive) {
|
|
6291
|
+
return;
|
|
6292
|
+
}
|
|
6293
|
+
this.launchListeningRecorder(stream);
|
|
6294
|
+
}, LISTENING_SEGMENT_MS);
|
|
6295
|
+
this.listeningLaneIntervalIds.push(laneB);
|
|
6296
|
+
}, LISTENING_SEGMENT_STEP_MS);
|
|
6297
|
+
this.listeningLaneTimeoutIds.push(delayedStart);
|
|
6298
|
+
const laneA = window.setInterval(() => {
|
|
6299
|
+
if (!this.listening || this.listeningCaptureActive) {
|
|
6300
|
+
return;
|
|
6301
|
+
}
|
|
6302
|
+
this.launchListeningRecorder(stream);
|
|
6303
|
+
}, LISTENING_SEGMENT_MS);
|
|
6304
|
+
this.listeningLaneIntervalIds.push(laneA);
|
|
6305
|
+
}
|
|
6306
|
+
clearListeningRecorderSchedule() {
|
|
6307
|
+
for (const timeoutId of this.listeningLaneTimeoutIds) {
|
|
6308
|
+
window.clearTimeout(timeoutId);
|
|
6309
|
+
}
|
|
6310
|
+
this.listeningLaneTimeoutIds = [];
|
|
6311
|
+
for (const intervalId of this.listeningLaneIntervalIds) {
|
|
6312
|
+
window.clearInterval(intervalId);
|
|
6283
6313
|
}
|
|
6314
|
+
this.listeningLaneIntervalIds = [];
|
|
6315
|
+
}
|
|
6316
|
+
launchListeningRecorder(stream) {
|
|
6317
|
+
const session = this.createListeningRecorderSession(stream);
|
|
6318
|
+
this.listeningIdleSessions.push(session);
|
|
6319
|
+
session.recorder.start();
|
|
6320
|
+
session.stopTimeoutId = window.setTimeout(() => {
|
|
6321
|
+
if (!session.promoted && session.recorder.state !== "inactive") {
|
|
6322
|
+
session.recorder.stop();
|
|
6323
|
+
}
|
|
6324
|
+
}, LISTENING_SEGMENT_MS);
|
|
6325
|
+
}
|
|
6326
|
+
createListeningRecorderSession(stream) {
|
|
6284
6327
|
if (typeof MediaRecorder === "undefined") {
|
|
6285
6328
|
throw new Error("MediaRecorder is not supported in this browser");
|
|
6286
6329
|
}
|
|
6287
6330
|
const preferredMimeType = this.getPreferredRecordingMimeType();
|
|
6288
6331
|
const options = preferredMimeType ? { mimeType: preferredMimeType } : void 0;
|
|
6289
6332
|
const recorder = options ? new MediaRecorder(stream, options) : new MediaRecorder(stream);
|
|
6290
|
-
|
|
6291
|
-
|
|
6292
|
-
|
|
6293
|
-
|
|
6333
|
+
let resolveStopped = () => {
|
|
6334
|
+
};
|
|
6335
|
+
const stopped = new Promise((resolve) => {
|
|
6336
|
+
resolveStopped = resolve;
|
|
6337
|
+
});
|
|
6338
|
+
const session = {
|
|
6339
|
+
recorder,
|
|
6340
|
+
mimeType: recorder.mimeType || preferredMimeType || "audio/webm",
|
|
6341
|
+
startedAt: performance.now(),
|
|
6342
|
+
chunks: [],
|
|
6343
|
+
stopTimeoutId: null,
|
|
6344
|
+
promoted: false,
|
|
6345
|
+
discard: false,
|
|
6346
|
+
stopped,
|
|
6347
|
+
resolveStopped
|
|
6348
|
+
};
|
|
6294
6349
|
recorder.ondataavailable = (event) => {
|
|
6295
|
-
if (
|
|
6296
|
-
|
|
6297
|
-
}
|
|
6298
|
-
const timestamp = performance.now();
|
|
6299
|
-
if (this.listeningCaptureActive) {
|
|
6300
|
-
this.listeningCaptureChunks.push(event.data);
|
|
6301
|
-
return;
|
|
6302
|
-
}
|
|
6303
|
-
this.listeningPreRollChunks.push({ data: event.data, timestamp });
|
|
6304
|
-
const cutoff = timestamp - LISTENING_PRE_ROLL_MS;
|
|
6305
|
-
while (this.listeningPreRollChunks.length > 0 && this.listeningPreRollChunks[0].timestamp < cutoff) {
|
|
6306
|
-
this.listeningPreRollChunks.shift();
|
|
6350
|
+
if (event.data && event.data.size > 0) {
|
|
6351
|
+
session.chunks.push(event.data);
|
|
6307
6352
|
}
|
|
6308
6353
|
};
|
|
6309
6354
|
recorder.onerror = (event) => {
|
|
6310
|
-
void this.handleRecordingError(event.error ?? new Error("Unknown listening
|
|
6355
|
+
void this.handleRecordingError(event.error ?? new Error("Unknown listening recording error"));
|
|
6311
6356
|
};
|
|
6312
|
-
recorder.
|
|
6357
|
+
recorder.onstop = () => {
|
|
6358
|
+
void this.handleListeningRecorderStop(session);
|
|
6359
|
+
};
|
|
6360
|
+
return session;
|
|
6313
6361
|
}
|
|
6314
|
-
|
|
6315
|
-
|
|
6362
|
+
beginListeningCapture() {
|
|
6363
|
+
const winner = this.selectListeningRecorderWinner();
|
|
6364
|
+
if (!winner) {
|
|
6316
6365
|
return;
|
|
6317
6366
|
}
|
|
6318
|
-
const recorder = this.listeningRecorder;
|
|
6319
|
-
this.listeningRecorder = null;
|
|
6320
|
-
recorder.ondataavailable = null;
|
|
6321
|
-
recorder.onerror = null;
|
|
6322
|
-
if (recorder.state !== "inactive") {
|
|
6323
|
-
recorder.stop();
|
|
6324
|
-
}
|
|
6325
|
-
}
|
|
6326
|
-
beginListeningCapture() {
|
|
6327
6367
|
this.listeningCaptureActive = true;
|
|
6328
6368
|
this.recordingStartedFromListening = true;
|
|
6329
|
-
this.
|
|
6330
|
-
|
|
6369
|
+
this.listeningPromotedSession = winner;
|
|
6370
|
+
winner.promoted = true;
|
|
6371
|
+
winner.discard = false;
|
|
6372
|
+
if (winner.stopTimeoutId !== null) {
|
|
6373
|
+
window.clearTimeout(winner.stopTimeoutId);
|
|
6374
|
+
winner.stopTimeoutId = null;
|
|
6375
|
+
}
|
|
6376
|
+
this.clearListeningRecorderSchedule();
|
|
6377
|
+
for (const session of this.listeningIdleSessions) {
|
|
6378
|
+
if (session === winner) {
|
|
6379
|
+
continue;
|
|
6380
|
+
}
|
|
6381
|
+
void this.discardListeningSession(session);
|
|
6382
|
+
}
|
|
6383
|
+
const preRollMs = Math.max(0, Math.round(performance.now() - winner.startedAt));
|
|
6331
6384
|
logRuntimeDebug(this.debugLogging, "voice-recording-started", {
|
|
6332
|
-
mimeType:
|
|
6385
|
+
mimeType: winner.mimeType,
|
|
6333
6386
|
fromListening: true,
|
|
6334
|
-
preRollMs
|
|
6335
|
-
preRollChunks: this.listeningCaptureChunks.length
|
|
6387
|
+
preRollMs
|
|
6336
6388
|
});
|
|
6337
6389
|
void this.triggerSystemEvent("onRecordingStarted", null);
|
|
6338
6390
|
}
|
|
6391
|
+
selectListeningRecorderWinner() {
|
|
6392
|
+
const activeSessions = this.listeningIdleSessions.filter((session) => session.recorder.state !== "inactive");
|
|
6393
|
+
if (activeSessions.length === 0) {
|
|
6394
|
+
return null;
|
|
6395
|
+
}
|
|
6396
|
+
const targetStartedAt = performance.now() - LISTENING_TARGET_PRE_ROLL_MS;
|
|
6397
|
+
const suitable = activeSessions.filter((session) => session.startedAt <= targetStartedAt).sort((left, right) => right.startedAt - left.startedAt);
|
|
6398
|
+
if (suitable.length > 0) {
|
|
6399
|
+
return suitable[0];
|
|
6400
|
+
}
|
|
6401
|
+
return activeSessions.sort((left, right) => left.startedAt - right.startedAt)[0] ?? null;
|
|
6402
|
+
}
|
|
6339
6403
|
async stopListeningCapture() {
|
|
6340
|
-
if (!this.listeningCaptureActive) {
|
|
6404
|
+
if (!this.listeningCaptureActive || !this.listeningPromotedSession) {
|
|
6341
6405
|
return;
|
|
6342
6406
|
}
|
|
6407
|
+
const session = this.listeningPromotedSession;
|
|
6343
6408
|
this.listeningCaptureActive = false;
|
|
6344
|
-
|
|
6345
|
-
|
|
6409
|
+
this.listeningPromotedSession = null;
|
|
6410
|
+
if (session.recorder.state !== "inactive") {
|
|
6411
|
+
session.recorder.stop();
|
|
6412
|
+
}
|
|
6413
|
+
await session.stopped;
|
|
6414
|
+
}
|
|
6415
|
+
async discardListeningSession(session) {
|
|
6416
|
+
session.discard = true;
|
|
6417
|
+
if (session.stopTimeoutId !== null) {
|
|
6418
|
+
window.clearTimeout(session.stopTimeoutId);
|
|
6419
|
+
session.stopTimeoutId = null;
|
|
6420
|
+
}
|
|
6421
|
+
if (session.recorder.state !== "inactive") {
|
|
6422
|
+
session.recorder.stop();
|
|
6423
|
+
}
|
|
6424
|
+
await session.stopped;
|
|
6425
|
+
}
|
|
6426
|
+
async discardListeningIdleSessions() {
|
|
6427
|
+
const sessions = [...this.listeningIdleSessions];
|
|
6428
|
+
for (const session of sessions) {
|
|
6429
|
+
if (session === this.listeningPromotedSession) {
|
|
6430
|
+
continue;
|
|
6431
|
+
}
|
|
6432
|
+
await this.discardListeningSession(session);
|
|
6433
|
+
}
|
|
6434
|
+
this.listeningIdleSessions = this.listeningPromotedSession ? [this.listeningPromotedSession] : [];
|
|
6435
|
+
}
|
|
6436
|
+
async handleListeningRecorderStop(session) {
|
|
6437
|
+
if (session.stopTimeoutId !== null) {
|
|
6438
|
+
window.clearTimeout(session.stopTimeoutId);
|
|
6439
|
+
session.stopTimeoutId = null;
|
|
6440
|
+
}
|
|
6441
|
+
this.listeningIdleSessions = this.listeningIdleSessions.filter((entry) => entry !== session);
|
|
6346
6442
|
try {
|
|
6347
|
-
|
|
6443
|
+
if (session.discard || !session.promoted) {
|
|
6444
|
+
return;
|
|
6445
|
+
}
|
|
6446
|
+
const blob = new Blob(session.chunks, { type: session.mimeType });
|
|
6348
6447
|
const audioData = await blobToBase64(blob);
|
|
6349
6448
|
logRuntimeDebug(this.debugLogging, "voice-recording-stopped", {
|
|
6350
|
-
mimeType:
|
|
6449
|
+
mimeType: session.mimeType,
|
|
6351
6450
|
size: blob.size,
|
|
6352
|
-
fromListening: true
|
|
6353
|
-
preRollMs: LISTENING_PRE_ROLL_MS
|
|
6451
|
+
fromListening: true
|
|
6354
6452
|
});
|
|
6355
6453
|
await this.triggerSystemEvent("onRecordingStopped", {
|
|
6356
6454
|
audioData,
|
|
6357
|
-
mimeType:
|
|
6455
|
+
mimeType: session.mimeType
|
|
6358
6456
|
});
|
|
6359
6457
|
} catch (error) {
|
|
6360
6458
|
await this.handleRecordingError(error);
|
|
6361
6459
|
} finally {
|
|
6362
|
-
|
|
6460
|
+
session.resolveStopped();
|
|
6461
|
+
if (session.promoted) {
|
|
6462
|
+
this.recordingStartedFromListening = false;
|
|
6463
|
+
if (this.listening && this.mediaStream?.active) {
|
|
6464
|
+
this.listeningStartedAt = performance.now();
|
|
6465
|
+
this.speechEventTriggered = false;
|
|
6466
|
+
this.startListeningRecorderSchedule(this.mediaStream);
|
|
6467
|
+
}
|
|
6468
|
+
}
|
|
6363
6469
|
}
|
|
6364
6470
|
}
|
|
6365
6471
|
clearRecordingTimeout() {
|
|
@@ -6372,9 +6478,6 @@ var VoiceRuntime = class {
|
|
|
6372
6478
|
if (this.listening) {
|
|
6373
6479
|
return;
|
|
6374
6480
|
}
|
|
6375
|
-
if (this.listeningRecorder && this.listeningRecorder.state !== "inactive") {
|
|
6376
|
-
return;
|
|
6377
|
-
}
|
|
6378
6481
|
if (this.mediaRecorder && this.mediaRecorder.state !== "inactive") {
|
|
6379
6482
|
return;
|
|
6380
6483
|
}
|
|
@@ -16,10 +16,10 @@ export declare class VoiceRuntime {
|
|
|
16
16
|
private listenAnalyser;
|
|
17
17
|
private listenSource;
|
|
18
18
|
private listenFrameId;
|
|
19
|
-
private
|
|
20
|
-
private
|
|
21
|
-
private
|
|
22
|
-
private
|
|
19
|
+
private listeningIdleSessions;
|
|
20
|
+
private listeningLaneTimeoutIds;
|
|
21
|
+
private listeningLaneIntervalIds;
|
|
22
|
+
private listeningPromotedSession;
|
|
23
23
|
private listeningCaptureActive;
|
|
24
24
|
private lastSpeechAt;
|
|
25
25
|
private listeningStartedAt;
|
|
@@ -44,10 +44,16 @@ export declare class VoiceRuntime {
|
|
|
44
44
|
private normalizePlayablePayload;
|
|
45
45
|
private getPreferredRecordingMimeType;
|
|
46
46
|
private ensureInputStream;
|
|
47
|
-
private
|
|
48
|
-
private
|
|
47
|
+
private startListeningRecorderSchedule;
|
|
48
|
+
private clearListeningRecorderSchedule;
|
|
49
|
+
private launchListeningRecorder;
|
|
50
|
+
private createListeningRecorderSession;
|
|
49
51
|
private beginListeningCapture;
|
|
52
|
+
private selectListeningRecorderWinner;
|
|
50
53
|
private stopListeningCapture;
|
|
54
|
+
private discardListeningSession;
|
|
55
|
+
private discardListeningIdleSessions;
|
|
56
|
+
private handleListeningRecorderStop;
|
|
51
57
|
private clearRecordingTimeout;
|
|
52
58
|
private releaseInputStreamIfIdle;
|
|
53
59
|
private stopStreamTracks;
|