@livekit/agents 1.0.42 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/index.cjs +8 -0
- package/dist/inference/index.cjs.map +1 -1
- package/dist/inference/index.d.cts +2 -2
- package/dist/inference/index.d.ts +2 -2
- package/dist/inference/index.d.ts.map +1 -1
- package/dist/inference/index.js +8 -0
- package/dist/inference/index.js.map +1 -1
- package/dist/inference/stt.cjs +70 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +34 -1
- package/dist/inference/stt.d.ts +34 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +67 -11
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +204 -0
- package/dist/inference/stt.test.cjs.map +1 -0
- package/dist/inference/stt.test.js +203 -0
- package/dist/inference/stt.test.js.map +1 -0
- package/dist/inference/tts.cjs +52 -10
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +22 -0
- package/dist/inference/tts.d.ts +22 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +49 -9
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +223 -0
- package/dist/inference/tts.test.cjs.map +1 -0
- package/dist/inference/tts.test.js +222 -0
- package/dist/inference/tts.test.js.map +1 -0
- package/dist/ipc/inference_proc_lazy_main.cjs +13 -1
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/inference_proc_lazy_main.js +13 -1
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +8 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +9 -2
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.cts +7 -0
- package/dist/ipc/supervised_proc.d.ts +7 -0
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/stt/stt.cjs +4 -0
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +7 -0
- package/dist/stt/stt.d.ts +7 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +4 -0
- package/dist/stt/stt.js.map +1 -1
- package/dist/transcription.cjs.map +1 -1
- package/dist/transcription.d.cts +6 -0
- package/dist/transcription.d.ts +6 -0
- package/dist/transcription.d.ts.map +1 -1
- package/dist/transcription.js.map +1 -1
- package/dist/utils.cjs +10 -2
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +10 -2
- package/dist/utils.js.map +1 -1
- package/dist/vad.cjs +1 -1
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +3 -2
- package/dist/vad.d.ts +3 -2
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +1 -1
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent_activity.cjs +1 -2
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.js +1 -2
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +14 -0
- package/dist/voice/audio_recognition.d.ts +14 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/index.ts +8 -0
- package/src/inference/stt.test.ts +236 -0
- package/src/inference/stt.ts +116 -20
- package/src/inference/tts.test.ts +255 -0
- package/src/inference/tts.ts +81 -15
- package/src/ipc/inference_proc_lazy_main.ts +13 -1
- package/src/ipc/job_proc_lazy_main.ts +18 -2
- package/src/ipc/supervised_proc.ts +7 -0
- package/src/stt/stt.ts +12 -0
- package/src/transcription.ts +6 -0
- package/src/utils.ts +10 -2
- package/src/vad.ts +4 -3
- package/src/voice/agent_activity.ts +1 -1
- package/src/voice/audio_recognition.ts +14 -0
|
@@ -3,12 +3,19 @@ import type { ChildProcess } from 'node:child_process';
|
|
|
3
3
|
import type { RunningJobInfo } from '../job.js';
|
|
4
4
|
import { Future } from '../utils.js';
|
|
5
5
|
export interface ProcOpts {
|
|
6
|
+
/** Timeout for process initialization in milliseconds. */
|
|
6
7
|
initializeTimeout: number;
|
|
8
|
+
/** Timeout for process shutdown in milliseconds. */
|
|
7
9
|
closeTimeout: number;
|
|
10
|
+
/** Memory usage warning threshold in megabytes. */
|
|
8
11
|
memoryWarnMB: number;
|
|
12
|
+
/** Memory usage limit in megabytes. */
|
|
9
13
|
memoryLimitMB: number;
|
|
14
|
+
/** Interval for health check pings in milliseconds. */
|
|
10
15
|
pingInterval: number;
|
|
16
|
+
/** Timeout waiting for pong response in milliseconds. */
|
|
11
17
|
pingTimeout: number;
|
|
18
|
+
/** Threshold for warning about unresponsive processes in milliseconds. */
|
|
12
19
|
highPingThreshold: number;
|
|
13
20
|
}
|
|
14
21
|
export declare abstract class SupervisedProc {
|
|
@@ -3,12 +3,19 @@ import type { ChildProcess } from 'node:child_process';
|
|
|
3
3
|
import type { RunningJobInfo } from '../job.js';
|
|
4
4
|
import { Future } from '../utils.js';
|
|
5
5
|
export interface ProcOpts {
|
|
6
|
+
/** Timeout for process initialization in milliseconds. */
|
|
6
7
|
initializeTimeout: number;
|
|
8
|
+
/** Timeout for process shutdown in milliseconds. */
|
|
7
9
|
closeTimeout: number;
|
|
10
|
+
/** Memory usage warning threshold in megabytes. */
|
|
8
11
|
memoryWarnMB: number;
|
|
12
|
+
/** Memory usage limit in megabytes. */
|
|
9
13
|
memoryLimitMB: number;
|
|
14
|
+
/** Interval for health check pings in milliseconds. */
|
|
10
15
|
pingInterval: number;
|
|
16
|
+
/** Timeout waiting for pong response in milliseconds. */
|
|
11
17
|
pingTimeout: number;
|
|
18
|
+
/** Threshold for warning about unresponsive processes in milliseconds. */
|
|
12
19
|
highPingThreshold: number;
|
|
13
20
|
}
|
|
14
21
|
export declare abstract class SupervisedProc {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"supervised_proc.d.ts","sourceRoot":"","sources":["../../src/ipc/supervised_proc.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAEhD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,QAAQ;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,8BAAsB,cAAc;;IAKlC,IAAI,CAAC,EAAE,YAAY,CAAC;IAIpB,SAAS,CAAC,IAAI,eAAgB;gBAK5B,iBAAiB,EAAE,MAAM,EACzB,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,MAAM,EACrB,YAAY,EAAE,MAAM,EACpB,WAAW,EAAE,MAAM,EACnB,iBAAiB,EAAE,MAAM;IAa3B,QAAQ,CAAC,aAAa,IAAI,YAAY;IACtC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAErD,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,UAAU,IAAI,cAAc,GAAG,SAAS,CAE3C;IAEK,KAAK;IAaL,GAAG;IA4EH,IAAI;IAQJ,UAAU;IA0BV,KAAK;IAoBL,SAAS,CAAC,IAAI,EAAE,cAAc;YAWtB,qBAAqB;IAiBnC,OAAO,CAAC,WAAW;CAKpB"}
|
|
1
|
+
{"version":3,"file":"supervised_proc.d.ts","sourceRoot":"","sources":["../../src/ipc/supervised_proc.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAGvD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,WAAW,CAAC;AAEhD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,QAAQ;IACvB,0DAA0D;IAC1D,iBAAiB,EAAE,MAAM,CAAC;IAC1B,oDAAoD;IACpD,YAAY,EAAE,MAAM,CAAC;IACrB,mDAAmD;IACnD,YAAY,EAAE,MAAM,CAAC;IACrB,uCAAuC;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,uDAAuD;IACvD,YAAY,EAAE,MAAM,CAAC;IACrB,yDAAyD;IACzD,WAAW,EAAE,MAAM,CAAC;IACpB,0EAA0E;IAC1E,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,8BAAsB,cAAc;;IAKlC,IAAI,CAAC,EAAE,YAAY,CAAC;IAIpB,SAAS,CAAC,IAAI,eAAgB;gBAK5B,iBAAiB,EAAE,MAAM,EACzB,YAAY,EAAE,MAAM,EACpB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,MAAM,EACrB,YAAY,EAAE,MAAM,EACpB,WAAW,EAAE,MAAM,EACnB,iBAAiB,EAAE,MAAM;IAa3B,QAAQ,CAAC,aAAa,IAAI,YAAY;IACtC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAErD,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,UAAU,IAAI,cAAc,GAAG,SAAS,CAE3C;IAEK,KAAK;IAaL,GAAG;IA4EH,IAAI;IAQJ,UAAU;IA0BV,KAAK;IAoBL,SAAS,CAAC,IAAI,EAAE,cAAc;YAWtB,qBAAqB;IAiBnC,OAAO,CAAC,WAAW;CAKpB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/ipc/supervised_proc.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ChildProcess } from 'node:child_process';\nimport { once } from 'node:events';\nimport pidusage from 'pidusage';\nimport type { RunningJobInfo } from '../job.js';\nimport { log, loggerOptions } from '../log.js';\nimport { Future } from '../utils.js';\nimport type { IPCMessage } from './message.js';\n\nexport interface ProcOpts {\n initializeTimeout: number;\n closeTimeout: number;\n memoryWarnMB: number;\n memoryLimitMB: number;\n pingInterval: number;\n pingTimeout: number;\n highPingThreshold: number;\n}\n\nexport abstract class SupervisedProc {\n #opts: ProcOpts;\n #started = false;\n #closing = false;\n #runningJob?: RunningJobInfo = undefined;\n proc?: ChildProcess;\n #pingInterval?: ReturnType<typeof setInterval>;\n #memoryMonitorInterval?: ReturnType<typeof setInterval>;\n #pongTimeout?: ReturnType<typeof setTimeout>;\n protected init = new Future();\n #join = new Future();\n #logger = log().child({ runningJob: this.#runningJob });\n\n constructor(\n initializeTimeout: number,\n closeTimeout: number,\n memoryWarnMB: number,\n memoryLimitMB: number,\n pingInterval: number,\n pingTimeout: number,\n highPingThreshold: number,\n ) {\n this.#opts = {\n initializeTimeout,\n closeTimeout,\n memoryWarnMB,\n memoryLimitMB,\n pingInterval,\n pingTimeout,\n highPingThreshold,\n };\n }\n\n abstract createProcess(): ChildProcess;\n abstract mainTask(child: ChildProcess): Promise<void>;\n\n get started(): boolean {\n return this.#started;\n }\n\n get isAlive(): boolean {\n return this.#started && !this.#closing && !!this.proc?.connected;\n }\n\n get runningJob(): RunningJobInfo | undefined {\n return this.#runningJob;\n }\n\n async start() {\n if (this.#started) {\n throw new Error('runner already started');\n } else if (this.#closing) {\n throw new Error('runner is closed');\n }\n\n this.proc = this.createProcess();\n\n this.#started = true;\n this.run();\n }\n\n async run() {\n await this.init.await;\n\n this.#pingInterval = setInterval(() => {\n if (this.proc?.connected) {\n this.proc.send({ case: 'pingRequest', value: { timestamp: Date.now() } });\n }\n }, this.#opts.pingInterval);\n\n this.#pongTimeout = setTimeout(() => {\n this.#logger.warn('job is unresponsive');\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n this.proc!.kill();\n this.#join.resolve();\n }, this.#opts.pingTimeout);\n\n this.#memoryMonitorInterval = setInterval(async () => {\n const memoryMB = await this.getChildMemoryUsageMB();\n if (this.#opts.memoryLimitMB > 0 && memoryMB > this.#opts.memoryLimitMB) {\n this.#logger\n .child({ memoryUsageMB: memoryMB, memoryLimitMB: this.#opts.memoryLimitMB })\n .error('process exceeded memory limit, killing process');\n this.close();\n } else if (this.#opts.memoryWarnMB > 0 && memoryMB > this.#opts.memoryWarnMB) {\n this.#logger\n .child({\n memoryUsageMB: memoryMB,\n memoryWarnMB: this.#opts.memoryWarnMB,\n memoryLimitMB: this.#opts.memoryLimitMB,\n })\n .warn('process memory usage is high');\n }\n }, 5000);\n\n const listener = (msg: IPCMessage) => {\n switch (msg.case) {\n case 'pongResponse': {\n const delay = Date.now() - msg.value.timestamp;\n if (delay > this.#opts.highPingThreshold) {\n this.#logger.child({ delay }).warn('job executor is unresponsive');\n }\n this.#pongTimeout?.refresh();\n break;\n }\n case 'exiting': {\n this.#logger.child({ reason: msg.value.reason }).debug('job exiting');\n break;\n }\n case 'done': {\n this.#closing = true;\n this.proc!.off('message', listener);\n break;\n }\n }\n };\n this.proc!.on('message', listener);\n this.proc!.on('error', (err) => {\n if (this.#closing) return;\n this.#logger\n .child({ err })\n .warn('job process exited unexpectedly; this likely means the error above caused a crash');\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.proc!.on('exit', () => {\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.mainTask(this.proc!);\n\n await this.#join.await;\n }\n\n async join() {\n if (!this.#started) {\n throw new Error('runner not started');\n }\n\n await this.#join.await;\n }\n\n async initialize() {\n const timer = setTimeout(() => {\n this.init.reject(new Error('runner initialization timed out'));\n }, this.#opts.initializeTimeout);\n if (!this.proc?.connected) {\n this.init.reject(new Error('process not connected'));\n return;\n }\n this.proc.send({\n case: 'initializeRequest',\n value: {\n loggerOptions,\n pingInterval: this.#opts.pingInterval,\n pingTimeout: this.#opts.pingTimeout,\n highPingThreshold: this.#opts.highPingThreshold,\n },\n });\n await once(this.proc!, 'message').then(([msg]: IPCMessage[]) => {\n clearTimeout(timer);\n if (msg!.case !== 'initializeResponse') {\n throw new Error('first message must be InitializeResponse');\n }\n });\n this.init.resolve();\n }\n\n async close() {\n if (!this.#started) {\n return;\n }\n this.#closing = true;\n\n if (this.proc?.connected) {\n this.proc.send({ case: 'shutdownRequest' });\n }\n\n const timer = setTimeout(() => {\n this.#logger.error('job shutdown is taking too much time');\n this.proc!.kill();\n }, this.#opts.closeTimeout);\n await this.#join.await.then(() => {\n clearTimeout(timer);\n this.clearTimers();\n });\n }\n\n async launchJob(info: RunningJobInfo) {\n if (this.#runningJob) {\n throw new Error('executor already has a running job');\n }\n if (!this.proc?.connected) {\n throw new Error('process not connected');\n }\n this.#runningJob = info;\n this.proc.send({ case: 'startJobRequest', value: { runningJob: info } });\n }\n\n private async getChildMemoryUsageMB(): Promise<number> {\n const pid = this.proc?.pid;\n if (!pid) {\n return 0;\n }\n try {\n const stats = await pidusage(pid);\n return stats.memory / (1024 * 1024);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === 'ENOENT' || code === 'ESRCH') {\n return 0;\n }\n throw err;\n }\n }\n\n private clearTimers() {\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n clearInterval(this.#memoryMonitorInterval);\n }\n}\n"],"mappings":"AAIA,SAAS,YAAY;AACrB,OAAO,cAAc;AAErB,SAAS,KAAK,qBAAqB;AACnC,SAAS,cAAc;AAahB,MAAe,eAAe;AAAA,EACnC;AAAA,EACA,WAAW;AAAA,EACX,WAAW;AAAA,EACX,cAA+B;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACU,OAAO,IAAI,OAAO;AAAA,EAC5B,QAAQ,IAAI,OAAO;AAAA,EACnB,UAAU,IAAI,EAAE,MAAM,EAAE,YAAY,KAAK,YAAY,CAAC;AAAA,EAEtD,YACE,mBACA,cACA,cACA,eACA,cACA,aACA,mBACA;AACA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAKA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAmB;AA7DzB;AA8DI,WAAO,KAAK,YAAY,CAAC,KAAK,YAAY,CAAC,GAAC,UAAK,SAAL,mBAAW;AAAA,EACzD;AAAA,EAEA,IAAI,aAAyC;AAC3C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C,WAAW,KAAK,UAAU;AACxB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,SAAK,OAAO,KAAK,cAAc;AAE/B,SAAK,WAAW;AAChB,SAAK,IAAI;AAAA,EACX;AAAA,EAEA,MAAM,MAAM;AACV,UAAM,KAAK,KAAK;AAEhB,SAAK,gBAAgB,YAAY,MAAM;AArF3C;AAsFM,WAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,aAAK,KAAK,KAAK,EAAE,MAAM,eAAe,OAAO,EAAE,WAAW,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,MAC1E;AAAA,IACF,GAAG,KAAK,MAAM,YAAY;AAE1B,SAAK,eAAe,WAAW,MAAM;AACnC,WAAK,QAAQ,KAAK,qBAAqB;AACvC,mBAAa,KAAK,YAAY;AAC9B,oBAAc,KAAK,aAAa;AAChC,WAAK,KAAM,KAAK;AAChB,WAAK,MAAM,QAAQ;AAAA,IACrB,GAAG,KAAK,MAAM,WAAW;AAEzB,SAAK,yBAAyB,YAAY,YAAY;AACpD,YAAM,WAAW,MAAM,KAAK,sBAAsB;AAClD,UAAI,KAAK,MAAM,gBAAgB,KAAK,WAAW,KAAK,MAAM,eAAe;AACvE,aAAK,QACF,MAAM,EAAE,eAAe,UAAU,eAAe,KAAK,MAAM,cAAc,CAAC,EAC1E,MAAM,gDAAgD;AACzD,aAAK,MAAM;AAAA,MACb,WAAW,KAAK,MAAM,eAAe,KAAK,WAAW,KAAK,MAAM,cAAc;AAC5E,aAAK,QACF,MAAM;AAAA,UACL,eAAe;AAAA,UACf,cAAc,KAAK,MAAM;AAAA,UACzB,eAAe,KAAK,MAAM;AAAA,QAC5B,CAAC,EACA,KAAK,8BAA8B;AAAA,MACxC;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,QAAoB;AArH1C;AAsHM,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK,gBAAgB;AACnB,gBAAM,QAAQ,KAAK,IAAI,IAAI,IAAI,MAAM;AACrC,cAAI,QAAQ,KAAK,MAAM,mBAAmB;AACxC,iBAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AAAA,UACnE;AACA,qBAAK,iBAAL,mBAAmB;AACnB;AAAA,QACF;AAAA,QACA,KAAK,WAAW;AACd,eAAK,QAAQ,MAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,CAAC,EAAE,MAAM,aAAa;AACpE;AAAA,QACF;AAAA,QACA,KAAK,QAAQ;AACX,eAAK,WAAW;AAChB,eAAK,KAAM,IAAI,WAAW,QAAQ;AAClC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AACA,SAAK,KAAM,GAAG,WAAW,QAAQ;AACjC,SAAK,KAAM,GAAG,SAAS,CAAC,QAAQ;AAC9B,UAAI,KAAK,SAAU;AACnB,WAAK,QACF,MAAM,EAAE,IAAI,CAAC,EACb,KAAK,mFAAmF;AAC3F,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,KAAM,GAAG,QAAQ,MAAM;AAC1B,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,SAAS,KAAK,IAAK;AAExB,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,OAAO;AACX,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAEA,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,aAAa;AAtKrB;AAuKI,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,KAAK,OAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,IAC/D,GAAG,KAAK,MAAM,iBAAiB;AAC/B,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,WAAK,KAAK,OAAO,IAAI,MAAM,uBAAuB,CAAC;AACnD;AAAA,IACF;AACA,SAAK,KAAK,KAAK;AAAA,MACb,MAAM;AAAA,MACN,OAAO;AAAA,QACL;AAAA,QACA,cAAc,KAAK,MAAM;AAAA,QACzB,aAAa,KAAK,MAAM;AAAA,QACxB,mBAAmB,KAAK,MAAM;AAAA,MAChC;AAAA,IACF,CAAC;AACD,UAAM,KAAK,KAAK,MAAO,SAAS,EAAE,KAAK,CAAC,CAAC,GAAG,MAAoB;AAC9D,mBAAa,KAAK;AAClB,UAAI,IAAK,SAAS,sBAAsB;AACtC,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AAAA,IACF,CAAC;AACD,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AAhMhB;AAiMI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AACA,SAAK,WAAW;AAEhB,SAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,WAAK,KAAK,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAAA,IAC5C;AAEA,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,QAAQ,MAAM,sCAAsC;AACzD,WAAK,KAAM,KAAK;AAAA,IAClB,GAAG,KAAK,MAAM,YAAY;AAC1B,UAAM,KAAK,MAAM,MAAM,KAAK,MAAM;AAChC,mBAAa,KAAK;AAClB,WAAK,YAAY;AAAA,IACnB,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAU,MAAsB;AApNxC;AAqNI,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,oCAAoC;AAAA,IACtD;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AACA,SAAK,cAAc;AACnB,SAAK,KAAK,KAAK,EAAE,MAAM,mBAAmB,OAAO,EAAE,YAAY,KAAK,EAAE,CAAC;AAAA,EACzE;AAAA,EAEA,MAAc,wBAAyC;AA/NzD;AAgOI,UAAM,OAAM,UAAK,SAAL,mBAAW;AACvB,QAAI,CAAC,KAAK;AACR,aAAO;AAAA,IACT;AACA,QAAI;AACF,YAAM,QAAQ,MAAM,SAAS,GAAG;AAChC,aAAO,MAAM,UAAU,OAAO;AAAA,IAChC,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,YAAY,SAAS,SAAS;AACzC,eAAO;AAAA,MACT;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,iBAAa,KAAK,YAAY;AAC9B,kBAAc,KAAK,aAAa;AAChC,kBAAc,KAAK,sBAAsB;AAAA,EAC3C;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/ipc/supervised_proc.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { ChildProcess } from 'node:child_process';\nimport { once } from 'node:events';\nimport pidusage from 'pidusage';\nimport type { RunningJobInfo } from '../job.js';\nimport { log, loggerOptions } from '../log.js';\nimport { Future } from '../utils.js';\nimport type { IPCMessage } from './message.js';\n\nexport interface ProcOpts {\n /** Timeout for process initialization in milliseconds. */\n initializeTimeout: number;\n /** Timeout for process shutdown in milliseconds. */\n closeTimeout: number;\n /** Memory usage warning threshold in megabytes. */\n memoryWarnMB: number;\n /** Memory usage limit in megabytes. */\n memoryLimitMB: number;\n /** Interval for health check pings in milliseconds. */\n pingInterval: number;\n /** Timeout waiting for pong response in milliseconds. */\n pingTimeout: number;\n /** Threshold for warning about unresponsive processes in milliseconds. */\n highPingThreshold: number;\n}\n\nexport abstract class SupervisedProc {\n #opts: ProcOpts;\n #started = false;\n #closing = false;\n #runningJob?: RunningJobInfo = undefined;\n proc?: ChildProcess;\n #pingInterval?: ReturnType<typeof setInterval>;\n #memoryMonitorInterval?: ReturnType<typeof setInterval>;\n #pongTimeout?: ReturnType<typeof setTimeout>;\n protected init = new Future();\n #join = new Future();\n #logger = log().child({ runningJob: this.#runningJob });\n\n constructor(\n initializeTimeout: number,\n closeTimeout: number,\n memoryWarnMB: number,\n memoryLimitMB: number,\n pingInterval: number,\n pingTimeout: number,\n highPingThreshold: number,\n ) {\n this.#opts = {\n initializeTimeout,\n closeTimeout,\n memoryWarnMB,\n memoryLimitMB,\n pingInterval,\n pingTimeout,\n highPingThreshold,\n };\n }\n\n abstract createProcess(): ChildProcess;\n abstract mainTask(child: ChildProcess): Promise<void>;\n\n get started(): boolean {\n return this.#started;\n }\n\n get isAlive(): boolean {\n return this.#started && !this.#closing && !!this.proc?.connected;\n }\n\n get runningJob(): RunningJobInfo | undefined {\n return this.#runningJob;\n }\n\n async start() {\n if (this.#started) {\n throw new Error('runner already started');\n } else if (this.#closing) {\n throw new Error('runner is closed');\n }\n\n this.proc = this.createProcess();\n\n this.#started = true;\n this.run();\n }\n\n async run() {\n await this.init.await;\n\n this.#pingInterval = setInterval(() => {\n if (this.proc?.connected) {\n this.proc.send({ case: 'pingRequest', value: { timestamp: Date.now() } });\n }\n }, this.#opts.pingInterval);\n\n this.#pongTimeout = setTimeout(() => {\n this.#logger.warn('job is unresponsive');\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n this.proc!.kill();\n this.#join.resolve();\n }, this.#opts.pingTimeout);\n\n this.#memoryMonitorInterval = setInterval(async () => {\n const memoryMB = await this.getChildMemoryUsageMB();\n if (this.#opts.memoryLimitMB > 0 && memoryMB > this.#opts.memoryLimitMB) {\n this.#logger\n .child({ memoryUsageMB: memoryMB, memoryLimitMB: this.#opts.memoryLimitMB })\n .error('process exceeded memory limit, killing process');\n this.close();\n } else if (this.#opts.memoryWarnMB > 0 && memoryMB > this.#opts.memoryWarnMB) {\n this.#logger\n .child({\n memoryUsageMB: memoryMB,\n memoryWarnMB: this.#opts.memoryWarnMB,\n memoryLimitMB: this.#opts.memoryLimitMB,\n })\n .warn('process memory usage is high');\n }\n }, 5000);\n\n const listener = (msg: IPCMessage) => {\n switch (msg.case) {\n case 'pongResponse': {\n const delay = Date.now() - msg.value.timestamp;\n if (delay > this.#opts.highPingThreshold) {\n this.#logger.child({ delay }).warn('job executor is unresponsive');\n }\n this.#pongTimeout?.refresh();\n break;\n }\n case 'exiting': {\n this.#logger.child({ reason: msg.value.reason }).debug('job exiting');\n break;\n }\n case 'done': {\n this.#closing = true;\n this.proc!.off('message', listener);\n break;\n }\n }\n };\n this.proc!.on('message', listener);\n this.proc!.on('error', (err) => {\n if (this.#closing) return;\n this.#logger\n .child({ err })\n .warn('job process exited unexpectedly; this likely means the error above caused a crash');\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.proc!.on('exit', () => {\n this.clearTimers();\n this.#join.resolve();\n });\n\n this.mainTask(this.proc!);\n\n await this.#join.await;\n }\n\n async join() {\n if (!this.#started) {\n throw new Error('runner not started');\n }\n\n await this.#join.await;\n }\n\n async initialize() {\n const timer = setTimeout(() => {\n this.init.reject(new Error('runner initialization timed out'));\n }, this.#opts.initializeTimeout);\n if (!this.proc?.connected) {\n this.init.reject(new Error('process not connected'));\n return;\n }\n this.proc.send({\n case: 'initializeRequest',\n value: {\n loggerOptions,\n pingInterval: this.#opts.pingInterval,\n pingTimeout: this.#opts.pingTimeout,\n highPingThreshold: this.#opts.highPingThreshold,\n },\n });\n await once(this.proc!, 'message').then(([msg]: IPCMessage[]) => {\n clearTimeout(timer);\n if (msg!.case !== 'initializeResponse') {\n throw new Error('first message must be InitializeResponse');\n }\n });\n this.init.resolve();\n }\n\n async close() {\n if (!this.#started) {\n return;\n }\n this.#closing = true;\n\n if (this.proc?.connected) {\n this.proc.send({ case: 'shutdownRequest' });\n }\n\n const timer = setTimeout(() => {\n this.#logger.error('job shutdown is taking too much time');\n this.proc!.kill();\n }, this.#opts.closeTimeout);\n await this.#join.await.then(() => {\n clearTimeout(timer);\n this.clearTimers();\n });\n }\n\n async launchJob(info: RunningJobInfo) {\n if (this.#runningJob) {\n throw new Error('executor already has a running job');\n }\n if (!this.proc?.connected) {\n throw new Error('process not connected');\n }\n this.#runningJob = info;\n this.proc.send({ case: 'startJobRequest', value: { runningJob: info } });\n }\n\n private async getChildMemoryUsageMB(): Promise<number> {\n const pid = this.proc?.pid;\n if (!pid) {\n return 0;\n }\n try {\n const stats = await pidusage(pid);\n return stats.memory / (1024 * 1024);\n } catch (err) {\n const code = (err as NodeJS.ErrnoException).code;\n if (code === 'ENOENT' || code === 'ESRCH') {\n return 0;\n }\n throw err;\n }\n }\n\n private clearTimers() {\n clearTimeout(this.#pongTimeout);\n clearInterval(this.#pingInterval);\n clearInterval(this.#memoryMonitorInterval);\n }\n}\n"],"mappings":"AAIA,SAAS,YAAY;AACrB,OAAO,cAAc;AAErB,SAAS,KAAK,qBAAqB;AACnC,SAAS,cAAc;AAoBhB,MAAe,eAAe;AAAA,EACnC;AAAA,EACA,WAAW;AAAA,EACX,WAAW;AAAA,EACX,cAA+B;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACU,OAAO,IAAI,OAAO;AAAA,EAC5B,QAAQ,IAAI,OAAO;AAAA,EACnB,UAAU,IAAI,EAAE,MAAM,EAAE,YAAY,KAAK,YAAY,CAAC;AAAA,EAEtD,YACE,mBACA,cACA,cACA,eACA,cACA,aACA,mBACA;AACA,SAAK,QAAQ;AAAA,MACX;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA,EAKA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAmB;AApEzB;AAqEI,WAAO,KAAK,YAAY,CAAC,KAAK,YAAY,CAAC,GAAC,UAAK,SAAL,mBAAW;AAAA,EACzD;AAAA,EAEA,IAAI,aAAyC;AAC3C,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,wBAAwB;AAAA,IAC1C,WAAW,KAAK,UAAU;AACxB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,SAAK,OAAO,KAAK,cAAc;AAE/B,SAAK,WAAW;AAChB,SAAK,IAAI;AAAA,EACX;AAAA,EAEA,MAAM,MAAM;AACV,UAAM,KAAK,KAAK;AAEhB,SAAK,gBAAgB,YAAY,MAAM;AA5F3C;AA6FM,WAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,aAAK,KAAK,KAAK,EAAE,MAAM,eAAe,OAAO,EAAE,WAAW,KAAK,IAAI,EAAE,EAAE,CAAC;AAAA,MAC1E;AAAA,IACF,GAAG,KAAK,MAAM,YAAY;AAE1B,SAAK,eAAe,WAAW,MAAM;AACnC,WAAK,QAAQ,KAAK,qBAAqB;AACvC,mBAAa,KAAK,YAAY;AAC9B,oBAAc,KAAK,aAAa;AAChC,WAAK,KAAM,KAAK;AAChB,WAAK,MAAM,QAAQ;AAAA,IACrB,GAAG,KAAK,MAAM,WAAW;AAEzB,SAAK,yBAAyB,YAAY,YAAY;AACpD,YAAM,WAAW,MAAM,KAAK,sBAAsB;AAClD,UAAI,KAAK,MAAM,gBAAgB,KAAK,WAAW,KAAK,MAAM,eAAe;AACvE,aAAK,QACF,MAAM,EAAE,eAAe,UAAU,eAAe,KAAK,MAAM,cAAc,CAAC,EAC1E,MAAM,gDAAgD;AACzD,aAAK,MAAM;AAAA,MACb,WAAW,KAAK,MAAM,eAAe,KAAK,WAAW,KAAK,MAAM,cAAc;AAC5E,aAAK,QACF,MAAM;AAAA,UACL,eAAe;AAAA,UACf,cAAc,KAAK,MAAM;AAAA,UACzB,eAAe,KAAK,MAAM;AAAA,QAC5B,CAAC,EACA,KAAK,8BAA8B;AAAA,MACxC;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,CAAC,QAAoB;AA5H1C;AA6HM,cAAQ,IAAI,MAAM;AAAA,QAChB,KAAK,gBAAgB;AACnB,gBAAM,QAAQ,KAAK,IAAI,IAAI,IAAI,MAAM;AACrC,cAAI,QAAQ,KAAK,MAAM,mBAAmB;AACxC,iBAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AAAA,UACnE;AACA,qBAAK,iBAAL,mBAAmB;AACnB;AAAA,QACF;AAAA,QACA,KAAK,WAAW;AACd,eAAK,QAAQ,MAAM,EAAE,QAAQ,IAAI,MAAM,OAAO,CAAC,EAAE,MAAM,aAAa;AACpE;AAAA,QACF;AAAA,QACA,KAAK,QAAQ;AACX,eAAK,WAAW;AAChB,eAAK,KAAM,IAAI,WAAW,QAAQ;AAClC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AACA,SAAK,KAAM,GAAG,WAAW,QAAQ;AACjC,SAAK,KAAM,GAAG,SAAS,CAAC,QAAQ;AAC9B,UAAI,KAAK,SAAU;AACnB,WAAK,QACF,MAAM,EAAE,IAAI,CAAC,EACb,KAAK,mFAAmF;AAC3F,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,KAAM,GAAG,QAAQ,MAAM;AAC1B,WAAK,YAAY;AACjB,WAAK,MAAM,QAAQ;AAAA,IACrB,CAAC;AAED,SAAK,SAAS,KAAK,IAAK;AAExB,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,OAAO;AACX,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,oBAAoB;AAAA,IACtC;AAEA,UAAM,KAAK,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,aAAa;AA7KrB;AA8KI,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,KAAK,OAAO,IAAI,MAAM,iCAAiC,CAAC;AAAA,IAC/D,GAAG,KAAK,MAAM,iBAAiB;AAC/B,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,WAAK,KAAK,OAAO,IAAI,MAAM,uBAAuB,CAAC;AACnD;AAAA,IACF;AACA,SAAK,KAAK,KAAK;AAAA,MACb,MAAM;AAAA,MACN,OAAO;AAAA,QACL;AAAA,QACA,cAAc,KAAK,MAAM;AAAA,QACzB,aAAa,KAAK,MAAM;AAAA,QACxB,mBAAmB,KAAK,MAAM;AAAA,MAChC;AAAA,IACF,CAAC;AACD,UAAM,KAAK,KAAK,MAAO,SAAS,EAAE,KAAK,CAAC,CAAC,GAAG,MAAoB;AAC9D,mBAAa,KAAK;AAClB,UAAI,IAAK,SAAS,sBAAsB;AACtC,cAAM,IAAI,MAAM,0CAA0C;AAAA,MAC5D;AAAA,IACF,CAAC;AACD,SAAK,KAAK,QAAQ;AAAA,EACpB;AAAA,EAEA,MAAM,QAAQ;AAvMhB;AAwMI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AACA,SAAK,WAAW;AAEhB,SAAI,UAAK,SAAL,mBAAW,WAAW;AACxB,WAAK,KAAK,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAAA,IAC5C;AAEA,UAAM,QAAQ,WAAW,MAAM;AAC7B,WAAK,QAAQ,MAAM,sCAAsC;AACzD,WAAK,KAAM,KAAK;AAAA,IAClB,GAAG,KAAK,MAAM,YAAY;AAC1B,UAAM,KAAK,MAAM,MAAM,KAAK,MAAM;AAChC,mBAAa,KAAK;AAClB,WAAK,YAAY;AAAA,IACnB,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,UAAU,MAAsB;AA3NxC;AA4NI,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,oCAAoC;AAAA,IACtD;AACA,QAAI,GAAC,UAAK,SAAL,mBAAW,YAAW;AACzB,YAAM,IAAI,MAAM,uBAAuB;AAAA,IACzC;AACA,SAAK,cAAc;AACnB,SAAK,KAAK,KAAK,EAAE,MAAM,mBAAmB,OAAO,EAAE,YAAY,KAAK,EAAE,CAAC;AAAA,EACzE;AAAA,EAEA,MAAc,wBAAyC;AAtOzD;AAuOI,UAAM,OAAM,UAAK,SAAL,mBAAW;AACvB,QAAI,CAAC,KAAK;AACR,aAAO;AAAA,IACT;AACA,QAAI;AACF,YAAM,QAAQ,MAAM,SAAS,GAAG;AAChC,aAAO,MAAM,UAAU,OAAO;AAAA,IAChC,SAAS,KAAK;AACZ,YAAM,OAAQ,IAA8B;AAC5C,UAAI,SAAS,YAAY,SAAS,SAAS;AACzC,eAAO;AAAA,MACT;AACA,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEQ,cAAc;AACpB,iBAAa,KAAK,YAAY;AAC9B,kBAAc,KAAK,aAAa;AAChC,kBAAc,KAAK,sBAAsB;AAAA,EAC3C;AACF;","names":[]}
|
package/dist/stt/stt.cjs
CHANGED
|
@@ -210,6 +210,10 @@ class SpeechStream {
|
|
|
210
210
|
this.resampler = new import_rtc_node.AudioResampler(frame.sampleRate, this.neededSampleRate);
|
|
211
211
|
}
|
|
212
212
|
}
|
|
213
|
+
if (frame.samplesPerChannel === 0) {
|
|
214
|
+
this.input.put(frame);
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
213
217
|
if (this.resampler) {
|
|
214
218
|
const frames = this.resampler.push(frame);
|
|
215
219
|
for (const frame2 of frames) {
|
package/dist/stt/stt.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAsF;AAEtF,mBAA8D;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AA2FL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,oBAAgB,+BAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n /** Language code of the speech. */\n language: string;\n /** Transcribed text. */\n text: string;\n /** Start time of the speech segment in seconds. */\n startTime: number;\n /** End time of the speech segment in seconds. */\n endTime: number;\n /** Confidence score of the transcription (0-1). */\n confidence: number;\n /** Word-level timing information. */\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n /** Duration of the audio that was recognized in seconds. */\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (frame.samplesPerChannel === 0) {\n this.input.put(frame);\n return;\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAsF;AAEtF,mBAA8D;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAkGL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,oBAAgB,+BAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,MAAM,sBAAsB,GAAG;AACjC,WAAK,MAAM,IAAI,KAAK;AACpB;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
package/dist/stt/stt.d.cts
CHANGED
|
@@ -40,14 +40,21 @@ export declare enum SpeechEventType {
|
|
|
40
40
|
}
|
|
41
41
|
/** SpeechData contains metadata about this {@link SpeechEvent}. */
|
|
42
42
|
export interface SpeechData {
|
|
43
|
+
/** Language code of the speech. */
|
|
43
44
|
language: string;
|
|
45
|
+
/** Transcribed text. */
|
|
44
46
|
text: string;
|
|
47
|
+
/** Start time of the speech segment in seconds. */
|
|
45
48
|
startTime: number;
|
|
49
|
+
/** End time of the speech segment in seconds. */
|
|
46
50
|
endTime: number;
|
|
51
|
+
/** Confidence score of the transcription (0-1). */
|
|
47
52
|
confidence: number;
|
|
53
|
+
/** Word-level timing information. */
|
|
48
54
|
words?: TimedString[];
|
|
49
55
|
}
|
|
50
56
|
export interface RecognitionUsage {
|
|
57
|
+
/** Duration of the audio that was recognized in seconds. */
|
|
51
58
|
audioDuration: number;
|
|
52
59
|
}
|
|
53
60
|
/** SpeechEvent is a packet of speech-to-text data. */
|
package/dist/stt/stt.d.ts
CHANGED
|
@@ -40,14 +40,21 @@ export declare enum SpeechEventType {
|
|
|
40
40
|
}
|
|
41
41
|
/** SpeechData contains metadata about this {@link SpeechEvent}. */
|
|
42
42
|
export interface SpeechData {
|
|
43
|
+
/** Language code of the speech. */
|
|
43
44
|
language: string;
|
|
45
|
+
/** Transcribed text. */
|
|
44
46
|
text: string;
|
|
47
|
+
/** Start time of the speech segment in seconds. */
|
|
45
48
|
startTime: number;
|
|
49
|
+
/** End time of the speech segment in seconds. */
|
|
46
50
|
endTime: number;
|
|
51
|
+
/** Confidence score of the transcription (0-1). */
|
|
47
52
|
confidence: number;
|
|
53
|
+
/** Word-level timing information. */
|
|
48
54
|
words?: TimedString[];
|
|
49
55
|
}
|
|
50
56
|
export interface RecognitionUsage {
|
|
57
|
+
/** Duration of the audio that was recognized in seconds. */
|
|
51
58
|
audioDuration: number;
|
|
52
59
|
}
|
|
53
60
|
/** SpeechEvent is a packet of speech-to-text data. */
|
package/dist/stt/stt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/stt/stt.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,KAAK,UAAU,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAItD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAAiD,MAAM,aAAa,CAAC;AACpG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAC5E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,2CAA2C;AAC3C,oBAAY,eAAe;IACzB;;;;OAIG;IACH,eAAe,IAAI;IACnB;;OAEG;IACH,kBAAkB,IAAI;IACtB;;;OAGG;IACH,gBAAgB,IAAI;IACpB;;;OAGG;IACH,aAAa,IAAI;IACjB,mEAAmE;IACnE,iBAAiB,IAAI;IACrB;;;;OAIG;IACH,oBAAoB,IAAI;CACzB;AAED,mEAAmE;AACnE,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,WAAW,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,CAAC;IACtB,YAAY,CAAC,EAAE,CAAC,UAAU,EAAE,GAAG,UAAU,EAAE,CAAC,CAAC;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;IACxB;;;;;OAKG;IACH,iBAAiB,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,KAAK,CAAC;CAC9C;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IACtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGX,YAAY,EAAE,eAAe;IAKzC,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,8FAA8F;IACxF,SAAS,CAAC,KAAK,EAAE,WAAW,EAAE,WAAW,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAgBpF,SAAS,CAAC,QAAQ,CAAC,UAAU,CAC3B,KAAK,EAAE,WAAW,EAClB,WAAW,CAAC,EAAE,WAAW,GACxB,OAAO,CAAC,WAAW,CAAC;IAEvB;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;IAEtE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;;;;;;;;GAeG;AACH,8BAAsB,YAAa,YAAW,qBAAqB,CAAC,WAAW,CAAC;;IAC9E,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,MAAM,kCAAyC;IACzD,SAAS,CAAC,KAAK,kCAAyC;IACxD,SAAS,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,SAAS,CAAC,EAAE,cAAc,CAAC;IACrC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,MAAM,UAAS;IAEzB,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,gBAAgB,CAAa;IAErC,SAAS,CAAC,eAAe,kBAAyB;gBAGhD,GAAG,EAAE,GAAG,EACR,UAAU,CAAC,EAAE,MAAM,EACnB,iBAAiB,GAAE,iBAA+C;YAgBtD,QAAQ;IAqCtB,OAAO,CAAC,SAAS;cAUD,SAAS;cAkBT,cAAc;IA+B9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,SAAS,KAAK,WAAW,IAAI,WAAW,CAEvC;IAED,IAAI,eAAe,IAAI,MAAM,CAE5B;IAED,IAAI,eAAe,CAAC,KAAK,EAAE,MAAM,EAKhC;IAED,iBAAiB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAIzD,iBAAiB;IAIjB,qCAAqC;IACrC,SAAS,CAAC,KAAK,EAAE,UAAU;
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/stt/stt.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,KAAK,UAAU,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAItD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAAiD,MAAM,aAAa,CAAC;AACpG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAC5E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,2CAA2C;AAC3C,oBAAY,eAAe;IACzB;;;;OAIG;IACH,eAAe,IAAI;IACnB;;OAEG;IACH,kBAAkB,IAAI;IACtB;;;OAGG;IACH,gBAAgB,IAAI;IACpB;;;OAGG;IACH,aAAa,IAAI;IACjB,mEAAmE;IACnE,iBAAiB,IAAI;IACrB;;;;OAIG;IACH,oBAAoB,IAAI;CACzB;AAED,mEAAmE;AACnE,MAAM,WAAW,UAAU;IACzB,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,iDAAiD;IACjD,OAAO,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,UAAU,EAAE,MAAM,CAAC;IACnB,qCAAqC;IACrC,KAAK,CAAC,EAAE,WAAW,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,gBAAgB;IAC/B,4DAA4D;IAC5D,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,CAAC;IACtB,YAAY,CAAC,EAAE,CAAC,UAAU,EAAE,GAAG,UAAU,EAAE,CAAC,CAAC;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;IACxB;;;;;OAKG;IACH,iBAAiB,CAAC,EAAE,MAAM,GAAG,OAAO,GAAG,KAAK,CAAC;CAC9C;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IACtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGX,YAAY,EAAE,eAAe;IAKzC,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,8FAA8F;IACxF,SAAS,CAAC,KAAK,EAAE,WAAW,EAAE,WAAW,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAgBpF,SAAS,CAAC,QAAQ,CAAC,UAAU,CAC3B,KAAK,EAAE,WAAW,EAClB,WAAW,CAAC,EAAE,WAAW,GACxB,OAAO,CAAC,WAAW,CAAC;IAEvB;;;;;OAKG;IACH,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;IAEtE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;;;;;;;;GAeG;AACH,8BAAsB,YAAa,YAAW,qBAAqB,CAAC,WAAW,CAAC;;IAC9E,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,MAAM,kCAAyC;IACzD,SAAS,CAAC,KAAK,kCAAyC;IACxD,SAAS,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,SAAS,CAAC,EAAE,cAAc,CAAC;IACrC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,MAAM,UAAS;IAEzB,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,gBAAgB,CAAa;IAErC,SAAS,CAAC,eAAe,kBAAyB;gBAGhD,GAAG,EAAE,GAAG,EACR,UAAU,CAAC,EAAE,MAAM,EACnB,iBAAiB,GAAE,iBAA+C;YAgBtD,QAAQ;IAqCtB,OAAO,CAAC,SAAS;cAUD,SAAS;cAkBT,cAAc;IA+B9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,SAAS,KAAK,WAAW,IAAI,WAAW,CAEvC;IAED,IAAI,eAAe,IAAI,MAAM,CAE5B;IAED,IAAI,eAAe,CAAC,KAAK,EAAE,MAAM,EAKhC;IAED,iBAAiB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAIzD,iBAAiB;IAIjB,qCAAqC;IACrC,SAAS,CAAC,KAAK,EAAE,UAAU;IA6B3B,4DAA4D;IAC5D,KAAK;IAUL,2DAA2D;IAC3D,QAAQ;IAUR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;IAI5C,wDAAwD;IACxD,KAAK;IAQL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,YAAY;CAGvC"}
|
package/dist/stt/stt.js
CHANGED
|
@@ -185,6 +185,10 @@ class SpeechStream {
|
|
|
185
185
|
this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);
|
|
186
186
|
}
|
|
187
187
|
}
|
|
188
|
+
if (frame.samplesPerChannel === 0) {
|
|
189
|
+
this.input.put(frame);
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
188
192
|
if (this.resampler) {
|
|
189
193
|
const frames = this.resampler.push(frame);
|
|
190
194
|
for (const frame2 of frames) {
|
package/dist/stt/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,qCAAqC;AAC9C,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,6BAA6B,wBAAwB;AAEtF,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AA2FL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,MAAM,8BAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,iBAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n /** Language code of the speech. */\n language: string;\n /** Transcribed text. */\n text: string;\n /** Start time of the speech segment in seconds. */\n startTime: number;\n /** End time of the speech segment in seconds. */\n endTime: number;\n /** Confidence score of the transcription (0-1). */\n confidence: number;\n /** Word-level timing information. */\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n /** Duration of the audio that was recognized in seconds. */\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (frame.samplesPerChannel === 0) {\n this.input.put(frame);\n return;\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,qCAAqC;AAC9C,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,6BAA6B,wBAAwB;AAEtF,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAkGL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,MAAM,8BAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,iBAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,MAAM,sBAAsB,GAAG;AACjC,WAAK,MAAM,IAAI,KAAK;AACpB;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n language: string;\n speed: number;\n newSentenceDelay: number;\n sentenceTokenizer: SentenceTokenizer;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAqC;AACrC,sBAA2B;AAE3B,yBAA6B;AAC7B,sBAAsB;AAEtB,mBAAsD;AAGtD,MAAM,uBAAuB;AAWtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,sBAAM,kBAAkB;AAAA,EAC/C,eAAe,sBAAM;AAAA,EACrB,YAAY,sBAAM;AACpB;AAmBO,MAAM,8BAA+B,gCAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,oBAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,gCAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,gCAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,2BAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AAtHvB;AAuHI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,YAAQ,wBAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qCAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qCAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n /** Language code for transcription. */\n language: string;\n /** Speech speed multiplier. */\n speed: number;\n /** Delay between sentences in milliseconds. */\n newSentenceDelay: number;\n /** Tokenizer for splitting text into sentences. */\n sentenceTokenizer: SentenceTokenizer;\n /** Function to hyphenate words. */\n hyphenateWord: (word: string) => string[];\n /** Function to split text into words with positions. */\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAqC;AACrC,sBAA2B;AAE3B,yBAA6B;AAC7B,sBAAsB;AAEtB,mBAAsD;AAGtD,MAAM,uBAAuB;AAiBtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,sBAAM,kBAAkB;AAAA,EAC/C,eAAe,sBAAM;AAAA,EACrB,YAAY,sBAAM;AACpB;AAmBO,MAAM,8BAA+B,gCAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,oBAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,gCAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,gCAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,2BAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AA5HvB;AA6HI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,YAAQ,wBAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qCAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qCAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|
package/dist/transcription.d.cts
CHANGED
|
@@ -3,11 +3,17 @@ import { AudioFrame } from '@livekit/rtc-node';
|
|
|
3
3
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
4
4
|
import type { SentenceTokenizer } from './tokenize/tokenizer.js';
|
|
5
5
|
export interface TextSyncOptions {
|
|
6
|
+
/** Language code for transcription. */
|
|
6
7
|
language: string;
|
|
8
|
+
/** Speech speed multiplier. */
|
|
7
9
|
speed: number;
|
|
10
|
+
/** Delay between sentences in milliseconds. */
|
|
8
11
|
newSentenceDelay: number;
|
|
12
|
+
/** Tokenizer for splitting text into sentences. */
|
|
9
13
|
sentenceTokenizer: SentenceTokenizer;
|
|
14
|
+
/** Function to hyphenate words. */
|
|
10
15
|
hyphenateWord: (word: string) => string[];
|
|
16
|
+
/** Function to split text into words with positions. */
|
|
11
17
|
splitWords: (words: string) => [string, number, number][];
|
|
12
18
|
}
|
|
13
19
|
export declare const defaultTextSyncOptions: TextSyncOptions;
|
package/dist/transcription.d.ts
CHANGED
|
@@ -3,11 +3,17 @@ import { AudioFrame } from '@livekit/rtc-node';
|
|
|
3
3
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
4
4
|
import type { SentenceTokenizer } from './tokenize/tokenizer.js';
|
|
5
5
|
export interface TextSyncOptions {
|
|
6
|
+
/** Language code for transcription. */
|
|
6
7
|
language: string;
|
|
8
|
+
/** Speech speed multiplier. */
|
|
7
9
|
speed: number;
|
|
10
|
+
/** Delay between sentences in milliseconds. */
|
|
8
11
|
newSentenceDelay: number;
|
|
12
|
+
/** Tokenizer for splitting text into sentences. */
|
|
9
13
|
sentenceTokenizer: SentenceTokenizer;
|
|
14
|
+
/** Function to hyphenate words. */
|
|
10
15
|
hyphenateWord: (word: string) => string[];
|
|
16
|
+
/** Function to split text into words with positions. */
|
|
11
17
|
splitWords: (words: string) => [string, number, number][];
|
|
12
18
|
}
|
|
13
19
|
export declare const defaultTextSyncOptions: TextSyncOptions;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"transcription.d.ts","sourceRoot":"","sources":["../src/transcription.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAMjF,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAC1C,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;CAC3D;AAED,eAAO,MAAM,sBAAsB,EAAE,eAOpC,CAAC;AAeF,KAAK,aAAa,GAAG;IACnB,WAAW,EAAE,CAAC,IAAI,EAAE,oBAAoB,KAAK,IAAI,CAAC;CACnD,CAAC;oDAEoE,aAAa,aAAa,CAAC;AAAjG,qBAAa,qBAAsB,SAAQ,0BAAuD;;gBAsBpF,IAAI,EAAE,eAAe;IAOjC,SAAS,CAAC,KAAK,EAAE,UAAU;IAU3B,QAAQ,CAAC,IAAI,EAAE,MAAM;IAkBrB,mBAAmB;IAYnB,kBAAkB;IAalB,qBAAqB;IASrB,sBAAsB;IAKtB,IAAI,UAAU,IAAI,MAAM,CAEvB;IAEK,KAAK,CAAC,SAAS,EAAE,OAAO;CA6J/B"}
|
|
1
|
+
{"version":3,"file":"transcription.d.ts","sourceRoot":"","sources":["../src/transcription.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAMjF,MAAM,WAAW,eAAe;IAC9B,uCAAuC;IACvC,QAAQ,EAAE,MAAM,CAAC;IACjB,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,+CAA+C;IAC/C,gBAAgB,EAAE,MAAM,CAAC;IACzB,mDAAmD;IACnD,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,mCAAmC;IACnC,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAC1C,wDAAwD;IACxD,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;CAC3D;AAED,eAAO,MAAM,sBAAsB,EAAE,eAOpC,CAAC;AAeF,KAAK,aAAa,GAAG;IACnB,WAAW,EAAE,CAAC,IAAI,EAAE,oBAAoB,KAAK,IAAI,CAAC;CACnD,CAAC;oDAEoE,aAAa,aAAa,CAAC;AAAjG,qBAAa,qBAAsB,SAAQ,0BAAuD;;gBAsBpF,IAAI,EAAE,eAAe;IAOjC,SAAS,CAAC,KAAK,EAAE,UAAU;IAU3B,QAAQ,CAAC,IAAI,EAAE,MAAM;IAkBrB,mBAAmB;IAYnB,kBAAkB;IAalB,qBAAqB;IASrB,sBAAsB;IAKtB,IAAI,UAAU,IAAI,MAAM,CAEvB;IAEK,KAAK,CAAC,SAAS,EAAE,OAAO;CA6J/B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n language: string;\n speed: number;\n newSentenceDelay: number;\n sentenceTokenizer: SentenceTokenizer;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":"AAGA,SAAS,4BAA4B;AACrC,SAAS,kBAAkB;AAE3B,SAAS,oBAAoB;AAC7B,SAAS,aAAa;AAEtB,SAAS,oBAAoB,QAAQ,iBAAiB;AAGtD,MAAM,uBAAuB;AAWtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,MAAM,kBAAkB;AAAA,EAC/C,eAAe,MAAM;AAAA,EACrB,YAAY,MAAM;AACpB;AAmBO,MAAM,8BAA+B,aAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,OAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,mBAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,mBAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,WAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AAtHvB;AAuHI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,QAAQ,UAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qBAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qBAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future, shortuuid } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n /** Language code for transcription. */\n language: string;\n /** Speech speed multiplier. */\n speed: number;\n /** Delay between sentences in milliseconds. */\n newSentenceDelay: number;\n /** Tokenizer for splitting text into sentences. */\n sentenceTokenizer: SentenceTokenizer;\n /** Function to hyphenate words. */\n hyphenateWord: (word: string) => string[];\n /** Function to split text into words with positions. */\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = shortuuid('SG_');\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":"AAGA,SAAS,4BAA4B;AACrC,SAAS,kBAAkB;AAE3B,SAAS,oBAAoB;AAC7B,SAAS,aAAa;AAEtB,SAAS,oBAAoB,QAAQ,iBAAiB;AAGtD,MAAM,uBAAuB;AAiBtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,MAAM,kBAAkB;AAAA,EAC/C,eAAe,MAAM;AAAA,EACrB,YAAY,MAAM;AACpB;AAmBO,MAAM,8BAA+B,aAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,OAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,mBAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,mBAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,WAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AA5HvB;AA6HI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,QAAQ,UAAU,KAAK;AAC7B,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qBAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qBAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
|
package/dist/utils.cjs
CHANGED
|
@@ -494,14 +494,22 @@ function resampleStream({
|
|
|
494
494
|
let resampler = null;
|
|
495
495
|
const transformStream = new import_web.TransformStream({
|
|
496
496
|
transform(chunk, controller) {
|
|
497
|
+
if (chunk.samplesPerChannel === 0) {
|
|
498
|
+
controller.enqueue(chunk);
|
|
499
|
+
return;
|
|
500
|
+
}
|
|
497
501
|
if (!resampler) {
|
|
498
502
|
resampler = new import_rtc_node.AudioResampler(chunk.sampleRate, outputRate);
|
|
499
503
|
}
|
|
500
504
|
for (const frame of resampler.push(chunk)) {
|
|
501
505
|
controller.enqueue(frame);
|
|
502
506
|
}
|
|
503
|
-
|
|
504
|
-
|
|
507
|
+
},
|
|
508
|
+
flush(controller) {
|
|
509
|
+
if (resampler) {
|
|
510
|
+
for (const frame of resampler.flush()) {
|
|
511
|
+
controller.enqueue(frame);
|
|
512
|
+
}
|
|
505
513
|
}
|
|
506
514
|
}
|
|
507
515
|
});
|