@livekit/agents 0.6.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/dist/index.cjs +6 -1
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.ts +3 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +3 -0
  6. package/dist/index.js.map +1 -1
  7. package/dist/inference_runner.cjs +38 -0
  8. package/dist/inference_runner.cjs.map +1 -0
  9. package/dist/inference_runner.d.ts +11 -0
  10. package/dist/inference_runner.d.ts.map +1 -0
  11. package/dist/inference_runner.js +14 -0
  12. package/dist/inference_runner.js.map +1 -0
  13. package/dist/ipc/index.cjs +23 -0
  14. package/dist/ipc/index.cjs.map +1 -0
  15. package/dist/ipc/index.d.ts +2 -0
  16. package/dist/ipc/index.d.ts.map +1 -0
  17. package/dist/ipc/index.js +2 -0
  18. package/dist/ipc/index.js.map +1 -0
  19. package/dist/ipc/inference_executor.cjs +17 -0
  20. package/dist/ipc/inference_executor.cjs.map +1 -0
  21. package/dist/ipc/inference_executor.d.ts +4 -0
  22. package/dist/ipc/inference_executor.d.ts.map +1 -0
  23. package/dist/ipc/inference_executor.js +1 -0
  24. package/dist/ipc/inference_executor.js.map +1 -0
  25. package/dist/ipc/inference_proc_executor.cjs +97 -0
  26. package/dist/ipc/inference_proc_executor.cjs.map +1 -0
  27. package/dist/ipc/inference_proc_executor.d.ts +23 -0
  28. package/dist/ipc/inference_proc_executor.d.ts.map +1 -0
  29. package/dist/ipc/inference_proc_executor.js +72 -0
  30. package/dist/ipc/inference_proc_executor.js.map +1 -0
  31. package/dist/ipc/inference_proc_lazy_main.cjs +90 -0
  32. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -0
  33. package/dist/ipc/inference_proc_lazy_main.d.ts +2 -0
  34. package/dist/ipc/inference_proc_lazy_main.d.ts.map +1 -0
  35. package/dist/ipc/inference_proc_lazy_main.js +67 -0
  36. package/dist/ipc/inference_proc_lazy_main.js.map +1 -0
  37. package/dist/ipc/job_executor.cjs +8 -7
  38. package/dist/ipc/job_executor.cjs.map +1 -1
  39. package/dist/ipc/job_executor.d.ts +14 -15
  40. package/dist/ipc/job_executor.d.ts.map +1 -1
  41. package/dist/ipc/job_executor.js +7 -6
  42. package/dist/ipc/job_executor.js.map +1 -1
  43. package/dist/ipc/job_proc_executor.cjs +108 -0
  44. package/dist/ipc/job_proc_executor.cjs.map +1 -0
  45. package/dist/ipc/job_proc_executor.d.ts +19 -0
  46. package/dist/ipc/job_proc_executor.d.ts.map +1 -0
  47. package/dist/ipc/job_proc_executor.js +83 -0
  48. package/dist/ipc/job_proc_executor.js.map +1 -0
  49. package/dist/ipc/{job_main.cjs → job_proc_lazy_main.cjs} +41 -36
  50. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -0
  51. package/dist/ipc/job_proc_lazy_main.d.ts +2 -0
  52. package/dist/ipc/job_proc_lazy_main.d.ts.map +1 -0
  53. package/dist/ipc/{job_main.js → job_proc_lazy_main.js} +41 -11
  54. package/dist/ipc/job_proc_lazy_main.js.map +1 -0
  55. package/dist/ipc/message.cjs.map +1 -1
  56. package/dist/ipc/message.d.ts +17 -0
  57. package/dist/ipc/message.d.ts.map +1 -1
  58. package/dist/ipc/proc_pool.cjs +30 -4
  59. package/dist/ipc/proc_pool.cjs.map +1 -1
  60. package/dist/ipc/proc_pool.d.ts +5 -1
  61. package/dist/ipc/proc_pool.d.ts.map +1 -1
  62. package/dist/ipc/proc_pool.js +30 -4
  63. package/dist/ipc/proc_pool.js.map +1 -1
  64. package/dist/ipc/{proc_job_executor.cjs → supervised_proc.cjs} +58 -46
  65. package/dist/ipc/supervised_proc.cjs.map +1 -0
  66. package/dist/ipc/supervised_proc.d.ts +30 -0
  67. package/dist/ipc/supervised_proc.d.ts.map +1 -0
  68. package/dist/ipc/{proc_job_executor.js → supervised_proc.js} +54 -32
  69. package/dist/ipc/supervised_proc.js.map +1 -0
  70. package/dist/job.cjs +18 -1
  71. package/dist/job.cjs.map +1 -1
  72. package/dist/job.d.ts +9 -1
  73. package/dist/job.d.ts.map +1 -1
  74. package/dist/job.js +17 -1
  75. package/dist/job.js.map +1 -1
  76. package/dist/metrics/base.cjs +2 -2
  77. package/dist/metrics/base.cjs.map +1 -1
  78. package/dist/metrics/base.d.ts +1 -1
  79. package/dist/metrics/base.d.ts.map +1 -1
  80. package/dist/metrics/base.js +2 -2
  81. package/dist/metrics/base.js.map +1 -1
  82. package/dist/multimodal/agent_playout.cjs +13 -14
  83. package/dist/multimodal/agent_playout.cjs.map +1 -1
  84. package/dist/multimodal/agent_playout.d.ts +4 -4
  85. package/dist/multimodal/agent_playout.d.ts.map +1 -1
  86. package/dist/multimodal/agent_playout.js +13 -14
  87. package/dist/multimodal/agent_playout.js.map +1 -1
  88. package/dist/multimodal/multimodal_agent.cjs +12 -8
  89. package/dist/multimodal/multimodal_agent.cjs.map +1 -1
  90. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  91. package/dist/multimodal/multimodal_agent.js +13 -9
  92. package/dist/multimodal/multimodal_agent.js.map +1 -1
  93. package/dist/pipeline/agent_output.cjs +20 -4
  94. package/dist/pipeline/agent_output.cjs.map +1 -1
  95. package/dist/pipeline/agent_output.d.ts +4 -2
  96. package/dist/pipeline/agent_output.d.ts.map +1 -1
  97. package/dist/pipeline/agent_output.js +20 -4
  98. package/dist/pipeline/agent_output.js.map +1 -1
  99. package/dist/pipeline/agent_playout.cjs +9 -3
  100. package/dist/pipeline/agent_playout.cjs.map +1 -1
  101. package/dist/pipeline/agent_playout.d.ts +4 -2
  102. package/dist/pipeline/agent_playout.d.ts.map +1 -1
  103. package/dist/pipeline/agent_playout.js +9 -3
  104. package/dist/pipeline/agent_playout.js.map +1 -1
  105. package/dist/pipeline/human_input.cjs +6 -0
  106. package/dist/pipeline/human_input.cjs.map +1 -1
  107. package/dist/pipeline/human_input.d.ts +3 -1
  108. package/dist/pipeline/human_input.d.ts.map +1 -1
  109. package/dist/pipeline/human_input.js +6 -0
  110. package/dist/pipeline/human_input.js.map +1 -1
  111. package/dist/pipeline/pipeline_agent.cjs +79 -12
  112. package/dist/pipeline/pipeline_agent.cjs.map +1 -1
  113. package/dist/pipeline/pipeline_agent.d.ts +8 -0
  114. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  115. package/dist/pipeline/pipeline_agent.js +79 -12
  116. package/dist/pipeline/pipeline_agent.js.map +1 -1
  117. package/dist/stt/stream_adapter.cjs +16 -4
  118. package/dist/stt/stream_adapter.cjs.map +1 -1
  119. package/dist/stt/stream_adapter.d.ts.map +1 -1
  120. package/dist/stt/stream_adapter.js +16 -4
  121. package/dist/stt/stream_adapter.js.map +1 -1
  122. package/dist/tokenize/basic/basic.cjs +2 -0
  123. package/dist/tokenize/basic/basic.cjs.map +1 -1
  124. package/dist/tokenize/basic/basic.d.ts +2 -0
  125. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  126. package/dist/tokenize/basic/basic.js +1 -0
  127. package/dist/tokenize/basic/basic.js.map +1 -1
  128. package/dist/tokenize/basic/index.cjs +2 -0
  129. package/dist/tokenize/basic/index.cjs.map +1 -1
  130. package/dist/tokenize/basic/index.d.ts +1 -1
  131. package/dist/tokenize/basic/index.d.ts.map +1 -1
  132. package/dist/tokenize/basic/index.js +8 -1
  133. package/dist/tokenize/basic/index.js.map +1 -1
  134. package/dist/tokenize/token_stream.cjs +5 -3
  135. package/dist/tokenize/token_stream.cjs.map +1 -1
  136. package/dist/tokenize/token_stream.d.ts.map +1 -1
  137. package/dist/tokenize/token_stream.js +5 -3
  138. package/dist/tokenize/token_stream.js.map +1 -1
  139. package/dist/transcription.cjs +203 -86
  140. package/dist/transcription.cjs.map +1 -1
  141. package/dist/transcription.d.ts +24 -17
  142. package/dist/transcription.d.ts.map +1 -1
  143. package/dist/transcription.js +201 -85
  144. package/dist/transcription.js.map +1 -1
  145. package/dist/worker.cjs +42 -9
  146. package/dist/worker.cjs.map +1 -1
  147. package/dist/worker.d.ts +5 -1
  148. package/dist/worker.d.ts.map +1 -1
  149. package/dist/worker.js +42 -9
  150. package/dist/worker.js.map +1 -1
  151. package/package.json +3 -3
  152. package/src/index.ts +3 -1
  153. package/src/inference_runner.ts +19 -0
  154. package/src/ipc/index.ts +5 -0
  155. package/src/ipc/inference_executor.ts +7 -0
  156. package/src/ipc/inference_proc_executor.ts +93 -0
  157. package/src/ipc/inference_proc_lazy_main.ts +86 -0
  158. package/src/ipc/job_executor.ts +15 -17
  159. package/src/ipc/job_proc_executor.ts +112 -0
  160. package/src/ipc/{job_main.ts → job_proc_lazy_main.ts} +44 -14
  161. package/src/ipc/message.ts +14 -1
  162. package/src/ipc/proc_pool.ts +33 -3
  163. package/src/ipc/{proc_job_executor.ts → supervised_proc.ts} +80 -30
  164. package/src/job.ts +21 -0
  165. package/src/metrics/base.ts +7 -10
  166. package/src/multimodal/agent_playout.ts +14 -16
  167. package/src/multimodal/multimodal_agent.ts +13 -9
  168. package/src/pipeline/agent_output.ts +34 -5
  169. package/src/pipeline/agent_playout.ts +10 -1
  170. package/src/pipeline/human_input.ts +8 -0
  171. package/src/pipeline/pipeline_agent.ts +96 -11
  172. package/src/stt/stream_adapter.ts +17 -5
  173. package/src/tokenize/basic/basic.ts +2 -0
  174. package/src/tokenize/basic/index.ts +7 -1
  175. package/src/tokenize/token_stream.ts +6 -3
  176. package/src/transcription.ts +270 -96
  177. package/src/worker.ts +42 -5
  178. package/dist/ipc/job_main.cjs.map +0 -1
  179. package/dist/ipc/job_main.d.ts +0 -8
  180. package/dist/ipc/job_main.d.ts.map +0 -1
  181. package/dist/ipc/job_main.js.map +0 -1
  182. package/dist/ipc/proc_job_executor.cjs.map +0 -1
  183. package/dist/ipc/proc_job_executor.d.ts +0 -15
  184. package/dist/ipc/proc_job_executor.d.ts.map +0 -1
  185. package/dist/ipc/proc_job_executor.js.map +0 -1
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, Room } from '@livekit/rtc-node';\nimport { log } from './log.js';\n\nexport interface TranscriptionForwarder {\n start(): void;\n pushAudio(frame: AudioFrame): void;\n pushText(text: string): void;\n markTextComplete(): void;\n markAudioComplete(): void;\n close(interrupt: boolean): Promise<void>;\n currentCharacterIndex: number;\n text: string;\n}\n\nexport class BasicTranscriptionForwarder implements TranscriptionForwarder {\n #room: Room;\n #participantIdentity: string;\n #trackSid: string;\n #currentText: string = '';\n #totalAudioDuration: number = 0;\n #currentPlayoutTime: number = 0;\n #DEFAULT_CHARS_PER_SECOND = 16;\n #charsPerSecond: number = this.#DEFAULT_CHARS_PER_SECOND;\n #messageId: string;\n #isRunning: boolean = false;\n #logger = log();\n currentCharacterIndex: number = 0;\n\n constructor(room: Room, participantIdentity: string, trackSid: string, messageId: string) {\n this.#room = room;\n this.#participantIdentity = participantIdentity;\n this.#trackSid = trackSid;\n this.#messageId = messageId;\n }\n\n get text(): string {\n return this.#currentText;\n }\n\n start(): void {\n if (!this.#isRunning) {\n this.#isRunning = true;\n this.#startPublishingLoop().catch((error) => {\n this.#logger.error('Error in publishing loop:', error);\n this.#isRunning = false;\n });\n }\n }\n\n pushAudio(frame: AudioFrame): void {\n this.#totalAudioDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string): void {\n this.#currentText += text;\n }\n\n #textIsComplete: boolean = false;\n #audioIsComplete: boolean = false;\n\n markTextComplete(): void {\n this.#textIsComplete = true;\n this.#adjustTimingIfBothFinished();\n }\n\n markAudioComplete(): void {\n this.#audioIsComplete = true;\n this.#adjustTimingIfBothFinished();\n }\n\n #adjustTimingIfBothFinished(): void {\n if (this.#textIsComplete && this.#audioIsComplete) {\n const actualDuration = this.#totalAudioDuration;\n if (actualDuration > 0 && this.#currentText.length > 0) {\n this.#charsPerSecond = this.#currentText.length / actualDuration;\n }\n }\n }\n\n #computeSleepInterval(): number {\n return Math.min(Math.max(1 / this.#charsPerSecond, 0.0625), 0.5);\n }\n\n async #startPublishingLoop(): Promise<void> {\n this.#isRunning = true;\n let sleepInterval = this.#computeSleepInterval();\n let isComplete = false;\n while (this.#isRunning && !isComplete) {\n this.#currentPlayoutTime += sleepInterval;\n this.currentCharacterIndex = Math.floor(this.#currentPlayoutTime * this.#charsPerSecond);\n isComplete = this.#textIsComplete && this.currentCharacterIndex >= this.#currentText.length;\n await this.#publishTranscription(false);\n if (this.#isRunning && !isComplete) {\n sleepInterval = this.#computeSleepInterval();\n await new Promise((resolve) => setTimeout(resolve, sleepInterval * 1000));\n }\n }\n\n if (this.#isRunning) {\n this.close(false);\n }\n }\n\n async #publishTranscription(final: boolean): Promise<void> {\n const textToPublish = this.#currentText.slice(0, this.currentCharacterIndex);\n await this.#room.localParticipant?.publishTranscription({\n participantIdentity: this.#participantIdentity,\n trackSid: this.#trackSid,\n segments: [\n {\n text: textToPublish,\n final: final,\n id: this.#messageId,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n async close(interrupt: boolean): Promise<void> {\n this.#isRunning = false;\n\n // Publish whatever we had as final\n if (!interrupt) {\n this.currentCharacterIndex = this.#currentText.length;\n }\n await this.#publishTranscription(true);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAAoB;AAab,MAAM,4BAA8D;AAAA,EACzE;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAuB;AAAA,EACvB,sBAA8B;AAAA,EAC9B,sBAA8B;AAAA,EAC9B,4BAA4B;AAAA,EAC5B,kBAA0B,KAAK;AAAA,EAC/B;AAAA,EACA,aAAsB;AAAA,EACtB,cAAU,gBAAI;AAAA,EACd,wBAAgC;AAAA,EAEhC,YAAY,MAAY,qBAA6B,UAAkB,WAAmB;AACxF,SAAK,QAAQ;AACb,SAAK,uBAAuB;AAC5B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,OAAe;AACjB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,QAAc;AACZ,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa;AAClB,WAAK,qBAAqB,EAAE,MAAM,CAAC,UAAU;AAC3C,aAAK,QAAQ,MAAM,6BAA6B,KAAK;AACrD,aAAK,aAAa;AAAA,MACpB,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,UAAU,OAAyB;AACjC,SAAK,uBAAuB,MAAM,oBAAoB,MAAM;AAAA,EAC9D;AAAA,EAEA,SAAS,MAAoB;AAC3B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,kBAA2B;AAAA,EAC3B,mBAA4B;AAAA,EAE5B,mBAAyB;AACvB,SAAK,kBAAkB;AACvB,SAAK,4BAA4B;AAAA,EACnC;AAAA,EAEA,oBAA0B;AACxB,SAAK,mBAAmB;AACxB,SAAK,4BAA4B;AAAA,EACnC;AAAA,EAEA,8BAAoC;AAClC,QAAI,KAAK,mBAAmB,KAAK,kBAAkB;AACjD,YAAM,iBAAiB,KAAK;AAC5B,UAAI,iBAAiB,KAAK,KAAK,aAAa,SAAS,GAAG;AACtD,aAAK,kBAAkB,KAAK,aAAa,SAAS;AAAA,MACpD;AAAA,IACF;AAAA,EACF;AAAA,EAEA,wBAAgC;AAC9B,WAAO,KAAK,IAAI,KAAK,IAAI,IAAI,KAAK,iBAAiB,MAAM,GAAG,GAAG;AAAA,EACjE;AAAA,EAEA,MAAM,uBAAsC;AAC1C,SAAK,aAAa;AAClB,QAAI,gBAAgB,KAAK,sBAAsB;AAC/C,QAAI,aAAa;AACjB,WAAO,KAAK,cAAc,CAAC,YAAY;AACrC,WAAK,uBAAuB;AAC5B,WAAK,wBAAwB,KAAK,MAAM,KAAK,sBAAsB,KAAK,eAAe;AACvF,mBAAa,KAAK,mBAAmB,KAAK,yBAAyB,KAAK,aAAa;AACrF,YAAM,KAAK,sBAAsB,KAAK;AACtC,UAAI,KAAK,cAAc,CAAC,YAAY;AAClC,wBAAgB,KAAK,sBAAsB;AAC3C,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,gBAAgB,GAAI,CAAC;AAAA,MAC1E;AAAA,IACF;AAEA,QAAI,KAAK,YAAY;AACnB,WAAK,MAAM,KAAK;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAM,sBAAsB,OAA+B;AA1G7D;AA2GI,UAAM,gBAAgB,KAAK,aAAa,MAAM,GAAG,KAAK,qBAAqB;AAC3E,YAAM,UAAK,MAAM,qBAAX,mBAA6B,qBAAqB;AAAA,MACtD,qBAAqB,KAAK;AAAA,MAC1B,UAAU,KAAK;AAAA,MACf,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN;AAAA,UACA,IAAI,KAAK;AAAA,UACT,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,MAAM,WAAmC;AAC7C,SAAK,aAAa;AAGlB,QAAI,CAAC,WAAW;AACd,WAAK,wBAAwB,KAAK,aAAa;AAAA,IACjD;AACA,UAAM,KAAK,sBAAsB,IAAI;AAAA,EACvC;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { randomUUID } from 'node:crypto';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n language: string;\n speed: number;\n newSentenceDelay: number;\n sentenceTokenizer: SentenceTokenizer;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = 'SG_' + randomUUID();\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAqC;AACrC,sBAA2B;AAE3B,yBAA2B;AAC3B,yBAA6B;AAC7B,sBAAsB;AAEtB,mBAA2C;AAG3C,MAAM,uBAAuB;AAWtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,sBAAM,kBAAkB;AAAA,EAC/C,eAAe,sBAAM;AAAA,EACrB,YAAY,sBAAM;AACpB;AAmBO,MAAM,8BAA+B,gCAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,oBAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,gCAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,gCAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,2BAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AAvHvB;AAwHI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,QAAQ,YAAQ,+BAAW;AACjC,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qCAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qCAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
@@ -1,24 +1,31 @@
1
- import type { AudioFrame, Room } from '@livekit/rtc-node';
2
- export interface TranscriptionForwarder {
3
- start(): void;
4
- pushAudio(frame: AudioFrame): void;
5
- pushText(text: string): void;
6
- markTextComplete(): void;
7
- markAudioComplete(): void;
8
- close(interrupt: boolean): Promise<void>;
9
- currentCharacterIndex: number;
10
- text: string;
1
+ import { TranscriptionSegment } from '@livekit/protocol';
2
+ import { AudioFrame } from '@livekit/rtc-node';
3
+ import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
4
+ import type { SentenceTokenizer } from './tokenize/tokenizer.js';
5
+ export interface TextSyncOptions {
6
+ language: string;
7
+ speed: number;
8
+ newSentenceDelay: number;
9
+ sentenceTokenizer: SentenceTokenizer;
10
+ hyphenateWord: (word: string) => string[];
11
+ splitWords: (words: string) => [string, number, number][];
11
12
  }
12
- export declare class BasicTranscriptionForwarder implements TranscriptionForwarder {
13
+ export declare const defaultTextSyncOptions: TextSyncOptions;
14
+ type SyncCallbacks = {
15
+ textUpdated: (text: TranscriptionSegment) => void;
16
+ };
17
+ declare const TextAudioSynchronizer_base: new () => TypedEmitter<SyncCallbacks>;
18
+ export declare class TextAudioSynchronizer extends TextAudioSynchronizer_base {
13
19
  #private;
14
- currentCharacterIndex: number;
15
- constructor(room: Room, participantIdentity: string, trackSid: string, messageId: string);
16
- get text(): string;
17
- start(): void;
20
+ constructor(opts: TextSyncOptions);
18
21
  pushAudio(frame: AudioFrame): void;
19
22
  pushText(text: string): void;
20
- markTextComplete(): void;
21
- markAudioComplete(): void;
23
+ markAudioSegmentEnd(): void;
24
+ markTextSegmentEnd(): void;
25
+ segmentPlayoutStarted(): void;
26
+ segmentPlayoutFinished(): void;
27
+ get playedText(): string;
22
28
  close(interrupt: boolean): Promise<void>;
23
29
  }
30
+ export {};
24
31
  //# sourceMappingURL=transcription.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"transcription.d.ts","sourceRoot":"","sources":["../src/transcription.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAG1D,MAAM,WAAW,sBAAsB;IACrC,KAAK,IAAI,IAAI,CAAC;IACd,SAAS,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;IACnC,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,gBAAgB,IAAI,IAAI,CAAC;IACzB,iBAAiB,IAAI,IAAI,CAAC;IAC1B,KAAK,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,qBAAa,2BAA4B,YAAW,sBAAsB;;IAYxE,qBAAqB,EAAE,MAAM,CAAK;gBAEtB,IAAI,EAAE,IAAI,EAAE,mBAAmB,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;IAOxF,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,KAAK,IAAI,IAAI;IAUb,SAAS,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI;IAIlC,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAO5B,gBAAgB,IAAI,IAAI;IAKxB,iBAAiB,IAAI,IAAI;IAwDnB,KAAK,CAAC,SAAS,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;CAS/C"}
1
+ {"version":3,"file":"transcription.d.ts","sourceRoot":"","sources":["../src/transcription.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAIhF,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAMjF,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,gBAAgB,EAAE,MAAM,CAAC;IACzB,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAC1C,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;CAC3D;AAED,eAAO,MAAM,sBAAsB,EAAE,eAOpC,CAAC;AAeF,KAAK,aAAa,GAAG;IACnB,WAAW,EAAE,CAAC,IAAI,EAAE,oBAAoB,KAAK,IAAI,CAAC;CACnD,CAAC;oDAEoE,aAAa,aAAa,CAAC;AAAjG,qBAAa,qBAAsB,SAAQ,0BAAuD;;gBAsBpF,IAAI,EAAE,eAAe;IAOjC,SAAS,CAAC,KAAK,EAAE,UAAU;IAU3B,QAAQ,CAAC,IAAI,EAAE,MAAM;IAkBrB,mBAAmB;IAYnB,kBAAkB;IAalB,qBAAqB;IASrB,sBAAsB;IAKtB,IAAI,UAAU,IAAI,MAAM,CAEvB;IAEK,KAAK,CAAC,SAAS,EAAE,OAAO;CA6J/B"}
@@ -1,107 +1,223 @@
1
- import { log } from "./log.js";
2
- class BasicTranscriptionForwarder {
3
- #room;
4
- #participantIdentity;
5
- #trackSid;
6
- #currentText = "";
7
- #totalAudioDuration = 0;
8
- #currentPlayoutTime = 0;
9
- #DEFAULT_CHARS_PER_SECOND = 16;
10
- #charsPerSecond = this.#DEFAULT_CHARS_PER_SECOND;
11
- #messageId;
12
- #isRunning = false;
13
- #logger = log();
14
- currentCharacterIndex = 0;
15
- constructor(room, participantIdentity, trackSid, messageId) {
16
- this.#room = room;
17
- this.#participantIdentity = participantIdentity;
18
- this.#trackSid = trackSid;
19
- this.#messageId = messageId;
20
- }
21
- get text() {
22
- return this.#currentText;
23
- }
24
- start() {
25
- if (!this.#isRunning) {
26
- this.#isRunning = true;
27
- this.#startPublishingLoop().catch((error) => {
28
- this.#logger.error("Error in publishing loop:", error);
29
- this.#isRunning = false;
30
- });
31
- }
1
+ import { TranscriptionSegment } from "@livekit/protocol";
2
+ import { AudioFrame } from "@livekit/rtc-node";
3
+ import { randomUUID } from "node:crypto";
4
+ import { EventEmitter } from "node:events";
5
+ import { basic } from "./tokenize/index.js";
6
+ import { AsyncIterableQueue, Future } from "./utils.js";
7
+ const STANDARD_SPEECH_RATE = 3830;
8
+ const defaultTextSyncOptions = {
9
+ language: "",
10
+ speed: 1,
11
+ newSentenceDelay: 400,
12
+ sentenceTokenizer: new basic.SentenceTokenizer(),
13
+ hyphenateWord: basic.hyphenateWord,
14
+ splitWords: basic.splitWords
15
+ };
16
+ class TextAudioSynchronizer extends EventEmitter {
17
+ #opts;
18
+ #speed;
19
+ #closed = false;
20
+ #interrupted = false;
21
+ #closeFut = new Future();
22
+ #playingSegIndex = -1;
23
+ #finishedSegIndex = -1;
24
+ #textQChanged = new AsyncIterableQueue();
25
+ #textQ = [];
26
+ #audioQChanged = new AsyncIterableQueue();
27
+ #audioQ = [];
28
+ #playedText = "";
29
+ #task;
30
+ #audioData;
31
+ #textData;
32
+ constructor(opts) {
33
+ super();
34
+ this.#opts = opts;
35
+ this.#speed = opts.speed * STANDARD_SPEECH_RATE;
32
36
  }
33
37
  pushAudio(frame) {
34
- this.#totalAudioDuration += frame.samplesPerChannel / frame.sampleRate;
38
+ this.#checkNotClosed();
39
+ if (!this.#audioData) {
40
+ this.#audioData = { pushedDuration: 0, done: false };
41
+ this.#audioQ.push(this.#audioData);
42
+ this.#audioQChanged.put(1);
43
+ }
44
+ this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;
35
45
  }
36
46
  pushText(text) {
37
- this.#currentText += text;
47
+ this.#checkNotClosed();
48
+ if (!this.#textData) {
49
+ this.#textData = {
50
+ sentenceStream: this.#opts.sentenceTokenizer.stream(),
51
+ pushedText: "",
52
+ done: false,
53
+ forwardedHyphens: 0,
54
+ forwardedSentences: 0
55
+ };
56
+ this.#textQ.push(this.#textData);
57
+ this.#textQChanged.put(1);
58
+ }
59
+ this.#textData.pushedText += text;
60
+ this.#textData.sentenceStream.pushText(text);
38
61
  }
39
- #textIsComplete = false;
40
- #audioIsComplete = false;
41
- markTextComplete() {
42
- this.#textIsComplete = true;
43
- this.#adjustTimingIfBothFinished();
62
+ markAudioSegmentEnd() {
63
+ this.#checkNotClosed();
64
+ if (!this.#audioData) {
65
+ this.pushAudio(new AudioFrame(new Int16Array(), 24e3, 1, 0));
66
+ }
67
+ this.#audioData.done = true;
68
+ this.#audioData = void 0;
44
69
  }
45
- markAudioComplete() {
46
- this.#audioIsComplete = true;
47
- this.#adjustTimingIfBothFinished();
70
+ markTextSegmentEnd() {
71
+ var _a, _b;
72
+ this.#checkNotClosed();
73
+ if (!this.#textData) {
74
+ this.pushText("");
75
+ }
76
+ this.#textData.done = true;
77
+ (_a = this.#textData) == null ? void 0 : _a.sentenceStream.flush();
78
+ (_b = this.#textData) == null ? void 0 : _b.sentenceStream.close();
79
+ this.#textData = void 0;
48
80
  }
49
- #adjustTimingIfBothFinished() {
50
- if (this.#textIsComplete && this.#audioIsComplete) {
51
- const actualDuration = this.#totalAudioDuration;
52
- if (actualDuration > 0 && this.#currentText.length > 0) {
53
- this.#charsPerSecond = this.#currentText.length / actualDuration;
54
- }
81
+ segmentPlayoutStarted() {
82
+ this.#checkNotClosed();
83
+ this.#playingSegIndex++;
84
+ if (!this.#task) {
85
+ this.#task = this.#mainLoop();
55
86
  }
56
87
  }
57
- #computeSleepInterval() {
58
- return Math.min(Math.max(1 / this.#charsPerSecond, 0.0625), 0.5);
88
+ segmentPlayoutFinished() {
89
+ this.#checkNotClosed();
90
+ this.#finishedSegIndex++;
59
91
  }
60
- async #startPublishingLoop() {
61
- this.#isRunning = true;
62
- let sleepInterval = this.#computeSleepInterval();
63
- let isComplete = false;
64
- while (this.#isRunning && !isComplete) {
65
- this.#currentPlayoutTime += sleepInterval;
66
- this.currentCharacterIndex = Math.floor(this.#currentPlayoutTime * this.#charsPerSecond);
67
- isComplete = this.#textIsComplete && this.currentCharacterIndex >= this.#currentText.length;
68
- await this.#publishTranscription(false);
69
- if (this.#isRunning && !isComplete) {
70
- sleepInterval = this.#computeSleepInterval();
71
- await new Promise((resolve) => setTimeout(resolve, sleepInterval * 1e3));
72
- }
92
+ get playedText() {
93
+ return this.#playedText;
94
+ }
95
+ async close(interrupt) {
96
+ if (this.#closed) {
97
+ return;
98
+ }
99
+ this.#closed = true;
100
+ this.#interrupted = interrupt;
101
+ this.#closeFut.resolve();
102
+ for (const textData of this.#textQ) {
103
+ textData == null ? void 0 : textData.sentenceStream.close();
73
104
  }
74
- if (this.#isRunning) {
75
- this.close(false);
105
+ this.#textQ.push(void 0);
106
+ this.#audioQ.push(void 0);
107
+ this.#textQChanged.put(1);
108
+ this.#audioQChanged.put(1);
109
+ await this.#task;
110
+ }
111
+ async #mainLoop() {
112
+ let segIndex = 0;
113
+ let qDone = false;
114
+ while (!qDone) {
115
+ await this.#textQChanged.next();
116
+ await this.#audioQChanged.next();
117
+ while (this.#textQ.length && this.#audioQ.length) {
118
+ const textData = this.#textQ.pop();
119
+ const audioData = this.#audioQ.pop();
120
+ if (!(textData && audioData)) {
121
+ qDone = true;
122
+ break;
123
+ }
124
+ while (!this.#closed) {
125
+ if (this.#playingSegIndex >= segIndex) break;
126
+ await this.#sleepIfNotClosed(125);
127
+ }
128
+ const sentenceStream = textData.sentenceStream;
129
+ const forwardStartTime = Date.now();
130
+ for await (const ev of sentenceStream) {
131
+ await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);
132
+ }
133
+ segIndex++;
134
+ }
76
135
  }
77
136
  }
78
- async #publishTranscription(final) {
79
- var _a;
80
- const textToPublish = this.#currentText.slice(0, this.currentCharacterIndex);
81
- await ((_a = this.#room.localParticipant) == null ? void 0 : _a.publishTranscription({
82
- participantIdentity: this.#participantIdentity,
83
- trackSid: this.#trackSid,
84
- segments: [
85
- {
86
- text: textToPublish,
87
- final,
88
- id: this.#messageId,
137
+ async #syncSentence(segIndex, segStartTime, textData, audioData, sentence) {
138
+ let realSpeed;
139
+ if (audioData.pushedDuration > 0 && audioData.done) {
140
+ realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;
141
+ }
142
+ const segId = "SG_" + randomUUID();
143
+ const words = this.#opts.splitWords(sentence);
144
+ const processedWords = [];
145
+ const ogText = this.#playedText;
146
+ for (const [word, _, end] of words) {
147
+ if (segIndex <= this.#finishedSegIndex) break;
148
+ if (this.#interrupted) return;
149
+ const wordHyphens = this.#opts.hyphenateWord(word).length;
150
+ processedWords.push(word);
151
+ const elapsed = Date.now() - segStartTime;
152
+ const text = sentence.slice(0, end);
153
+ let speed = this.#speed;
154
+ let delay;
155
+ if (realSpeed) {
156
+ speed = realSpeed;
157
+ const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;
158
+ const hyphPauses = estimatedPausesMs * speed;
159
+ const targetHyphens = Math.round(speed * elapsed);
160
+ const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;
161
+ const toWaitHyphens = Math.max(0, wordHyphens - dt);
162
+ delay = toWaitHyphens / speed;
163
+ } else {
164
+ delay = wordHyphens / speed;
165
+ }
166
+ const firstDelay = Math.min(delay / 2, 2 / speed);
167
+ await this.#sleepIfNotClosed(firstDelay * 1e6);
168
+ this.emit(
169
+ "textUpdated",
170
+ new TranscriptionSegment({
171
+ id: segId,
172
+ text,
89
173
  startTime: BigInt(0),
90
174
  endTime: BigInt(0),
91
- language: ""
92
- }
93
- ]
94
- }));
175
+ final: false,
176
+ language: this.#opts.language
177
+ })
178
+ );
179
+ this.#playedText = `${ogText} ${text}`;
180
+ await this.#sleepIfNotClosed((delay - firstDelay) * 1e6);
181
+ textData.forwardedHyphens += wordHyphens;
182
+ }
183
+ this.emit(
184
+ "textUpdated",
185
+ new TranscriptionSegment({
186
+ id: segId,
187
+ text: sentence,
188
+ startTime: BigInt(0),
189
+ endTime: BigInt(0),
190
+ final: true,
191
+ language: this.#opts.language
192
+ })
193
+ );
194
+ this.#playedText = `${ogText} ${sentence}`;
195
+ await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);
196
+ textData.forwardedSentences++;
95
197
  }
96
- async close(interrupt) {
97
- this.#isRunning = false;
98
- if (!interrupt) {
99
- this.currentCharacterIndex = this.#currentText.length;
198
+ async #sleepIfNotClosed(delay) {
199
+ await Promise.race([
200
+ this.#closeFut.await,
201
+ new Promise((resolve) => setTimeout(resolve, delay))
202
+ ]);
203
+ }
204
+ #calcHyphens(text) {
205
+ const hyphens = [];
206
+ const words = this.#opts.splitWords(text);
207
+ for (const word of words) {
208
+ const n = this.#opts.hyphenateWord(word[0]);
209
+ hyphens.push(...n);
210
+ }
211
+ return hyphens;
212
+ }
213
+ #checkNotClosed() {
214
+ if (this.#closed) {
215
+ throw new Error("TextAudioSynchronizer is closed");
100
216
  }
101
- await this.#publishTranscription(true);
102
217
  }
103
218
  }
104
219
  export {
105
- BasicTranscriptionForwarder
220
+ TextAudioSynchronizer,
221
+ defaultTextSyncOptions
106
222
  };
107
223
  //# sourceMappingURL=transcription.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, Room } from '@livekit/rtc-node';\nimport { log } from './log.js';\n\nexport interface TranscriptionForwarder {\n start(): void;\n pushAudio(frame: AudioFrame): void;\n pushText(text: string): void;\n markTextComplete(): void;\n markAudioComplete(): void;\n close(interrupt: boolean): Promise<void>;\n currentCharacterIndex: number;\n text: string;\n}\n\nexport class BasicTranscriptionForwarder implements TranscriptionForwarder {\n #room: Room;\n #participantIdentity: string;\n #trackSid: string;\n #currentText: string = '';\n #totalAudioDuration: number = 0;\n #currentPlayoutTime: number = 0;\n #DEFAULT_CHARS_PER_SECOND = 16;\n #charsPerSecond: number = this.#DEFAULT_CHARS_PER_SECOND;\n #messageId: string;\n #isRunning: boolean = false;\n #logger = log();\n currentCharacterIndex: number = 0;\n\n constructor(room: Room, participantIdentity: string, trackSid: string, messageId: string) {\n this.#room = room;\n this.#participantIdentity = participantIdentity;\n this.#trackSid = trackSid;\n this.#messageId = messageId;\n }\n\n get text(): string {\n return this.#currentText;\n }\n\n start(): void {\n if (!this.#isRunning) {\n this.#isRunning = true;\n this.#startPublishingLoop().catch((error) => {\n this.#logger.error('Error in publishing loop:', error);\n this.#isRunning = false;\n });\n }\n }\n\n pushAudio(frame: AudioFrame): void {\n this.#totalAudioDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string): void {\n this.#currentText += text;\n }\n\n #textIsComplete: boolean = false;\n #audioIsComplete: boolean = false;\n\n markTextComplete(): void {\n this.#textIsComplete = true;\n this.#adjustTimingIfBothFinished();\n }\n\n markAudioComplete(): void {\n this.#audioIsComplete = true;\n this.#adjustTimingIfBothFinished();\n }\n\n #adjustTimingIfBothFinished(): void {\n if (this.#textIsComplete && this.#audioIsComplete) {\n const actualDuration = this.#totalAudioDuration;\n if (actualDuration > 0 && this.#currentText.length > 0) {\n this.#charsPerSecond = this.#currentText.length / actualDuration;\n }\n }\n }\n\n #computeSleepInterval(): number {\n return Math.min(Math.max(1 / this.#charsPerSecond, 0.0625), 0.5);\n }\n\n async #startPublishingLoop(): Promise<void> {\n this.#isRunning = true;\n let sleepInterval = this.#computeSleepInterval();\n let isComplete = false;\n while (this.#isRunning && !isComplete) {\n this.#currentPlayoutTime += sleepInterval;\n this.currentCharacterIndex = Math.floor(this.#currentPlayoutTime * this.#charsPerSecond);\n isComplete = this.#textIsComplete && this.currentCharacterIndex >= this.#currentText.length;\n await this.#publishTranscription(false);\n if (this.#isRunning && !isComplete) {\n sleepInterval = this.#computeSleepInterval();\n await new Promise((resolve) => setTimeout(resolve, sleepInterval * 1000));\n }\n }\n\n if (this.#isRunning) {\n this.close(false);\n }\n }\n\n async #publishTranscription(final: boolean): Promise<void> {\n const textToPublish = this.#currentText.slice(0, this.currentCharacterIndex);\n await this.#room.localParticipant?.publishTranscription({\n participantIdentity: this.#participantIdentity,\n trackSid: this.#trackSid,\n segments: [\n {\n text: textToPublish,\n final: final,\n id: this.#messageId,\n startTime: BigInt(0),\n endTime: BigInt(0),\n language: '',\n },\n ],\n });\n }\n\n async close(interrupt: boolean): Promise<void> {\n this.#isRunning = false;\n\n // Publish whatever we had as final\n if (!interrupt) {\n this.currentCharacterIndex = this.#currentText.length;\n }\n await this.#publishTranscription(true);\n }\n}\n"],"mappings":"AAIA,SAAS,WAAW;AAab,MAAM,4BAA8D;AAAA,EACzE;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAuB;AAAA,EACvB,sBAA8B;AAAA,EAC9B,sBAA8B;AAAA,EAC9B,4BAA4B;AAAA,EAC5B,kBAA0B,KAAK;AAAA,EAC/B;AAAA,EACA,aAAsB;AAAA,EACtB,UAAU,IAAI;AAAA,EACd,wBAAgC;AAAA,EAEhC,YAAY,MAAY,qBAA6B,UAAkB,WAAmB;AACxF,SAAK,QAAQ;AACb,SAAK,uBAAuB;AAC5B,SAAK,YAAY;AACjB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,IAAI,OAAe;AACjB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,QAAc;AACZ,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa;AAClB,WAAK,qBAAqB,EAAE,MAAM,CAAC,UAAU;AAC3C,aAAK,QAAQ,MAAM,6BAA6B,KAAK;AACrD,aAAK,aAAa;AAAA,MACpB,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,UAAU,OAAyB;AACjC,SAAK,uBAAuB,MAAM,oBAAoB,MAAM;AAAA,EAC9D;AAAA,EAEA,SAAS,MAAoB;AAC3B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,kBAA2B;AAAA,EAC3B,mBAA4B;AAAA,EAE5B,mBAAyB;AACvB,SAAK,kBAAkB;AACvB,SAAK,4BAA4B;AAAA,EACnC;AAAA,EAEA,oBAA0B;AACxB,SAAK,mBAAmB;AACxB,SAAK,4BAA4B;AAAA,EACnC;AAAA,EAEA,8BAAoC;AAClC,QAAI,KAAK,mBAAmB,KAAK,kBAAkB;AACjD,YAAM,iBAAiB,KAAK;AAC5B,UAAI,iBAAiB,KAAK,KAAK,aAAa,SAAS,GAAG;AACtD,aAAK,kBAAkB,KAAK,aAAa,SAAS;AAAA,MACpD;AAAA,IACF;AAAA,EACF;AAAA,EAEA,wBAAgC;AAC9B,WAAO,KAAK,IAAI,KAAK,IAAI,IAAI,KAAK,iBAAiB,MAAM,GAAG,GAAG;AAAA,EACjE;AAAA,EAEA,MAAM,uBAAsC;AAC1C,SAAK,aAAa;AAClB,QAAI,gBAAgB,KAAK,sBAAsB;AAC/C,QAAI,aAAa;AACjB,WAAO,KAAK,cAAc,CAAC,YAAY;AACrC,WAAK,uBAAuB;AAC5B,WAAK,wBAAwB,KAAK,MAAM,KAAK,sBAAsB,KAAK,eAAe;AACvF,mBAAa,KAAK,mBAAmB,KAAK,yBAAyB,KAAK,aAAa;AACrF,YAAM,KAAK,sBAAsB,KAAK;AACtC,UAAI,KAAK,cAAc,CAAC,YAAY;AAClC,wBAAgB,KAAK,sBAAsB;AAC3C,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,gBAAgB,GAAI,CAAC;AAAA,MAC1E;AAAA,IACF;AAEA,QAAI,KAAK,YAAY;AACnB,WAAK,MAAM,KAAK;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAM,sBAAsB,OAA+B;AA1G7D;AA2GI,UAAM,gBAAgB,KAAK,aAAa,MAAM,GAAG,KAAK,qBAAqB;AAC3E,YAAM,UAAK,MAAM,qBAAX,mBAA6B,qBAAqB;AAAA,MACtD,qBAAqB,KAAK;AAAA,MAC1B,UAAU,KAAK;AAAA,MACf,UAAU;AAAA,QACR;AAAA,UACE,MAAM;AAAA,UACN;AAAA,UACA,IAAI,KAAK;AAAA,UACT,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,UAAU;AAAA,QACZ;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,MAAM,WAAmC;AAC7C,SAAK,aAAa;AAGlB,QAAI,CAAC,WAAW;AACd,WAAK,wBAAwB,KAAK,aAAa;AAAA,IACjD;AACA,UAAM,KAAK,sBAAsB,IAAI;AAAA,EACvC;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/transcription.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { TranscriptionSegment } from '@livekit/protocol';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { randomUUID } from 'node:crypto';\nimport { EventEmitter } from 'node:events';\nimport { basic } from './tokenize/index.js';\nimport type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';\nimport { AsyncIterableQueue, Future } from './utils.js';\n\n// standard speech rate in hyphens/ms\nconst STANDARD_SPEECH_RATE = 3830;\n\nexport interface TextSyncOptions {\n language: string;\n speed: number;\n newSentenceDelay: number;\n sentenceTokenizer: SentenceTokenizer;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n}\n\nexport const defaultTextSyncOptions: TextSyncOptions = {\n language: '',\n speed: 1,\n newSentenceDelay: 400,\n sentenceTokenizer: new basic.SentenceTokenizer(),\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n};\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedSentences: number;\n}\n\ntype SyncCallbacks = {\n textUpdated: (text: TranscriptionSegment) => void;\n};\n\nexport class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitter<SyncCallbacks>) {\n #opts: TextSyncOptions;\n #speed: number;\n\n #closed = false;\n #interrupted = false;\n #closeFut = new Future();\n\n #playingSegIndex = -1;\n #finishedSegIndex = -1;\n\n #textQChanged = new AsyncIterableQueue<number>();\n #textQ: (TextData | undefined)[] = [];\n #audioQChanged = new AsyncIterableQueue<number>();\n #audioQ: (AudioData | undefined)[] = [];\n\n #playedText = '';\n #task?: Promise<void>;\n\n #audioData?: AudioData;\n #textData?: TextData;\n\n constructor(opts: TextSyncOptions) {\n super();\n\n this.#opts = opts;\n this.#speed = opts.speed * STANDARD_SPEECH_RATE;\n }\n\n pushAudio(frame: AudioFrame) {\n this.#checkNotClosed();\n if (!this.#audioData) {\n this.#audioData = { pushedDuration: 0, done: false };\n this.#audioQ.push(this.#audioData);\n this.#audioQChanged.put(1);\n }\n this.#audioData.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n }\n\n pushText(text: string) {\n this.#checkNotClosed();\n if (!this.#textData) {\n this.#textData = {\n sentenceStream: this.#opts.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedSentences: 0,\n };\n this.#textQ.push(this.#textData);\n this.#textQChanged.put(1);\n }\n\n this.#textData.pushedText += text;\n this.#textData.sentenceStream.pushText(text);\n }\n\n markAudioSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#audioData) {\n // create empty audio data if none exists\n this.pushAudio(new AudioFrame(new Int16Array(), 24000, 1, 0));\n }\n\n this.#audioData!.done = true;\n this.#audioData = undefined;\n }\n\n markTextSegmentEnd() {\n this.#checkNotClosed();\n\n if (!this.#textData) {\n this.pushText('');\n }\n\n this.#textData!.done = true;\n this.#textData?.sentenceStream.flush();\n this.#textData?.sentenceStream.close();\n this.#textData = undefined;\n }\n\n segmentPlayoutStarted() {\n this.#checkNotClosed();\n this.#playingSegIndex++;\n\n if (!this.#task) {\n this.#task = this.#mainLoop();\n }\n }\n\n segmentPlayoutFinished() {\n this.#checkNotClosed();\n this.#finishedSegIndex++;\n }\n\n get playedText(): string {\n return this.#playedText;\n }\n\n async close(interrupt: boolean) {\n if (this.#closed) {\n return;\n }\n this.#closed = true;\n this.#interrupted = interrupt;\n this.#closeFut.resolve();\n\n for (const textData of this.#textQ) {\n textData?.sentenceStream.close();\n }\n\n this.#textQ.push(undefined);\n this.#audioQ.push(undefined);\n this.#textQChanged.put(1);\n this.#audioQChanged.put(1);\n\n await this.#task;\n }\n\n async #mainLoop() {\n let segIndex = 0;\n let qDone = false;\n\n while (!qDone) {\n await this.#textQChanged.next();\n await this.#audioQChanged.next();\n\n while (this.#textQ.length && this.#audioQ.length) {\n const textData = this.#textQ.pop();\n const audioData = this.#audioQ.pop();\n\n if (!(textData && audioData)) {\n qDone = true;\n break;\n }\n\n // wait for segment to start playing\n while (!this.#closed) {\n if (this.#playingSegIndex >= segIndex) break;\n await this.#sleepIfNotClosed(125);\n }\n\n const sentenceStream = textData.sentenceStream;\n const forwardStartTime = Date.now();\n\n for await (const ev of sentenceStream) {\n await this.#syncSentence(segIndex, forwardStartTime, textData, audioData, ev.token);\n }\n\n segIndex++;\n }\n }\n }\n\n async #syncSentence(\n segIndex: number,\n segStartTime: number,\n textData: TextData,\n audioData: AudioData,\n sentence: string,\n ) {\n let realSpeed: number | undefined;\n if (audioData.pushedDuration > 0 && audioData.done) {\n realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;\n }\n\n const segId = 'SG_' + randomUUID();\n const words = this.#opts.splitWords(sentence);\n const processedWords: string[] = [];\n\n const ogText = this.#playedText;\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n for (const [word, _, end] of words) {\n if (segIndex <= this.#finishedSegIndex) break;\n if (this.#interrupted) return;\n\n const wordHyphens = this.#opts.hyphenateWord(word).length;\n processedWords.push(word);\n\n const elapsed = Date.now() - segStartTime;\n const text = sentence.slice(0, end); // TODO: rstrip punctuations\n\n let speed = this.#speed;\n let delay: number;\n if (realSpeed) {\n speed = realSpeed;\n const estimatedPausesMs = textData.forwardedSentences * this.#opts.newSentenceDelay;\n const hyphPauses = estimatedPausesMs * speed;\n const targetHyphens = Math.round(speed * elapsed);\n const dt = targetHyphens - textData.forwardedHyphens - hyphPauses;\n const toWaitHyphens = Math.max(0, wordHyphens - dt);\n delay = toWaitHyphens / speed;\n } else {\n delay = wordHyphens / speed;\n }\n\n const firstDelay = Math.min(delay / 2, 2 / speed);\n await this.#sleepIfNotClosed(firstDelay * 1000000);\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: text,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: false,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${text}`;\n await this.#sleepIfNotClosed((delay - firstDelay) * 1000000);\n textData.forwardedHyphens += wordHyphens;\n }\n\n this.emit(\n 'textUpdated',\n new TranscriptionSegment({\n id: segId,\n text: sentence,\n startTime: BigInt(0),\n endTime: BigInt(0),\n final: true,\n language: this.#opts.language,\n }),\n );\n\n this.#playedText = `${ogText} ${sentence}`;\n\n await this.#sleepIfNotClosed(this.#opts.newSentenceDelay);\n textData.forwardedSentences++;\n }\n\n async #sleepIfNotClosed(delay: number) {\n await Promise.race([\n this.#closeFut.await,\n new Promise((resolve) => setTimeout(resolve, delay)),\n ]);\n }\n\n #calcHyphens(text: string): string[] {\n const hyphens: string[] = [];\n const words = this.#opts.splitWords(text);\n for (const word of words) {\n const n = this.#opts.hyphenateWord(word[0]);\n hyphens.push(...n);\n }\n return hyphens;\n }\n\n #checkNotClosed() {\n if (this.#closed) {\n throw new Error('TextAudioSynchronizer is closed');\n }\n }\n}\n"],"mappings":"AAGA,SAAS,4BAA4B;AACrC,SAAS,kBAAkB;AAE3B,SAAS,kBAAkB;AAC3B,SAAS,oBAAoB;AAC7B,SAAS,aAAa;AAEtB,SAAS,oBAAoB,cAAc;AAG3C,MAAM,uBAAuB;AAWtB,MAAM,yBAA0C;AAAA,EACrD,UAAU;AAAA,EACV,OAAO;AAAA,EACP,kBAAkB;AAAA,EAClB,mBAAmB,IAAI,MAAM,kBAAkB;AAAA,EAC/C,eAAe,MAAM;AAAA,EACrB,YAAY,MAAM;AACpB;AAmBO,MAAM,8BAA+B,aAAuD;AAAA,EACjG;AAAA,EACA;AAAA,EAEA,UAAU;AAAA,EACV,eAAe;AAAA,EACf,YAAY,IAAI,OAAO;AAAA,EAEvB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EAEpB,gBAAgB,IAAI,mBAA2B;AAAA,EAC/C,SAAmC,CAAC;AAAA,EACpC,iBAAiB,IAAI,mBAA2B;AAAA,EAChD,UAAqC,CAAC;AAAA,EAEtC,cAAc;AAAA,EACd;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAEN,SAAK,QAAQ;AACb,SAAK,SAAS,KAAK,QAAQ;AAAA,EAC7B;AAAA,EAEA,UAAU,OAAmB;AAC3B,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,YAAY;AACpB,WAAK,aAAa,EAAE,gBAAgB,GAAG,MAAM,MAAM;AACnD,WAAK,QAAQ,KAAK,KAAK,UAAU;AACjC,WAAK,eAAe,IAAI,CAAC;AAAA,IAC3B;AACA,SAAK,WAAW,kBAAkB,MAAM,oBAAoB,MAAM;AAAA,EACpE;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,gBAAgB;AACrB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,YAAY;AAAA,QACf,gBAAgB,KAAK,MAAM,kBAAkB,OAAO;AAAA,QACpD,YAAY;AAAA,QACZ,MAAM;AAAA,QACN,kBAAkB;AAAA,QAClB,oBAAoB;AAAA,MACtB;AACA,WAAK,OAAO,KAAK,KAAK,SAAS;AAC/B,WAAK,cAAc,IAAI,CAAC;AAAA,IAC1B;AAEA,SAAK,UAAU,cAAc;AAC7B,SAAK,UAAU,eAAe,SAAS,IAAI;AAAA,EAC7C;AAAA,EAEA,sBAAsB;AACpB,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,YAAY;AAEpB,WAAK,UAAU,IAAI,WAAW,IAAI,WAAW,GAAG,MAAO,GAAG,CAAC,CAAC;AAAA,IAC9D;AAEA,SAAK,WAAY,OAAO;AACxB,SAAK,aAAa;AAAA,EACpB;AAAA,EAEA,qBAAqB;AAvHvB;AAwHI,SAAK,gBAAgB;AAErB,QAAI,CAAC,KAAK,WAAW;AACnB,WAAK,SAAS,EAAE;AAAA,IAClB;AAEA,SAAK,UAAW,OAAO;AACvB,eAAK,cAAL,mBAAgB,eAAe;AAC/B,eAAK,cAAL,mBAAgB,eAAe;AAC/B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,wBAAwB;AACtB,SAAK,gBAAgB;AACrB,SAAK;AAEL,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,KAAK,UAAU;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,yBAAyB;AACvB,SAAK,gBAAgB;AACrB,SAAK;AAAA,EACP;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,MAAM,WAAoB;AAC9B,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AACA,SAAK,UAAU;AACf,SAAK,eAAe;AACpB,SAAK,UAAU,QAAQ;AAEvB,eAAW,YAAY,KAAK,QAAQ;AAClC,2CAAU,eAAe;AAAA,IAC3B;AAEA,SAAK,OAAO,KAAK,MAAS;AAC1B,SAAK,QAAQ,KAAK,MAAS;AAC3B,SAAK,cAAc,IAAI,CAAC;AACxB,SAAK,eAAe,IAAI,CAAC;AAEzB,UAAM,KAAK;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,WAAW;AACf,QAAI,QAAQ;AAEZ,WAAO,CAAC,OAAO;AACb,YAAM,KAAK,cAAc,KAAK;AAC9B,YAAM,KAAK,eAAe,KAAK;AAE/B,aAAO,KAAK,OAAO,UAAU,KAAK,QAAQ,QAAQ;AAChD,cAAM,WAAW,KAAK,OAAO,IAAI;AACjC,cAAM,YAAY,KAAK,QAAQ,IAAI;AAEnC,YAAI,EAAE,YAAY,YAAY;AAC5B,kBAAQ;AACR;AAAA,QACF;AAGA,eAAO,CAAC,KAAK,SAAS;AACpB,cAAI,KAAK,oBAAoB,SAAU;AACvC,gBAAM,KAAK,kBAAkB,GAAG;AAAA,QAClC;AAEA,cAAM,iBAAiB,SAAS;AAChC,cAAM,mBAAmB,KAAK,IAAI;AAElC,yBAAiB,MAAM,gBAAgB;AACrC,gBAAM,KAAK,cAAc,UAAU,kBAAkB,UAAU,WAAW,GAAG,KAAK;AAAA,QACpF;AAEA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cACJ,UACA,cACA,UACA,WACA,UACA;AACA,QAAI;AACJ,QAAI,UAAU,iBAAiB,KAAK,UAAU,MAAM;AAClD,kBAAY,KAAK,aAAa,SAAS,UAAU,EAAE,SAAS,UAAU;AAAA,IACxE;AAEA,UAAM,QAAQ,QAAQ,WAAW;AACjC,UAAM,QAAQ,KAAK,MAAM,WAAW,QAAQ;AAC5C,UAAM,iBAA2B,CAAC;AAElC,UAAM,SAAS,KAAK;AAEpB,eAAW,CAAC,MAAM,GAAG,GAAG,KAAK,OAAO;AAClC,UAAI,YAAY,KAAK,kBAAmB;AACxC,UAAI,KAAK,aAAc;AAEvB,YAAM,cAAc,KAAK,MAAM,cAAc,IAAI,EAAE;AACnD,qBAAe,KAAK,IAAI;AAExB,YAAM,UAAU,KAAK,IAAI,IAAI;AAC7B,YAAM,OAAO,SAAS,MAAM,GAAG,GAAG;AAElC,UAAI,QAAQ,KAAK;AACjB,UAAI;AACJ,UAAI,WAAW;AACb,gBAAQ;AACR,cAAM,oBAAoB,SAAS,qBAAqB,KAAK,MAAM;AACnE,cAAM,aAAa,oBAAoB;AACvC,cAAM,gBAAgB,KAAK,MAAM,QAAQ,OAAO;AAChD,cAAM,KAAK,gBAAgB,SAAS,mBAAmB;AACvD,cAAM,gBAAgB,KAAK,IAAI,GAAG,cAAc,EAAE;AAClD,gBAAQ,gBAAgB;AAAA,MAC1B,OAAO;AACL,gBAAQ,cAAc;AAAA,MACxB;AAEA,YAAM,aAAa,KAAK,IAAI,QAAQ,GAAG,IAAI,KAAK;AAChD,YAAM,KAAK,kBAAkB,aAAa,GAAO;AAEjD,WAAK;AAAA,QACH;AAAA,QACA,IAAI,qBAAqB;AAAA,UACvB,IAAI;AAAA,UACJ;AAAA,UACA,WAAW,OAAO,CAAC;AAAA,UACnB,SAAS,OAAO,CAAC;AAAA,UACjB,OAAO;AAAA,UACP,UAAU,KAAK,MAAM;AAAA,QACvB,CAAC;AAAA,MACH;AAEA,WAAK,cAAc,GAAG,MAAM,IAAI,IAAI;AACpC,YAAM,KAAK,mBAAmB,QAAQ,cAAc,GAAO;AAC3D,eAAS,oBAAoB;AAAA,IAC/B;AAEA,SAAK;AAAA,MACH;AAAA,MACA,IAAI,qBAAqB;AAAA,QACvB,IAAI;AAAA,QACJ,MAAM;AAAA,QACN,WAAW,OAAO,CAAC;AAAA,QACnB,SAAS,OAAO,CAAC;AAAA,QACjB,OAAO;AAAA,QACP,UAAU,KAAK,MAAM;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,SAAK,cAAc,GAAG,MAAM,IAAI,QAAQ;AAExC,UAAM,KAAK,kBAAkB,KAAK,MAAM,gBAAgB;AACxD,aAAS;AAAA,EACX;AAAA,EAEA,MAAM,kBAAkB,OAAe;AACrC,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,UAAU;AAAA,MACf,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,KAAK,CAAC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,aAAa,MAAwB;AACnC,UAAM,UAAoB,CAAC;AAC3B,UAAM,QAAQ,KAAK,MAAM,WAAW,IAAI;AACxC,eAAW,QAAQ,OAAO;AACxB,YAAM,IAAI,KAAK,MAAM,cAAc,KAAK,CAAC,CAAC;AAC1C,cAAQ,KAAK,GAAG,CAAC;AAAA,IACnB;AACA,WAAO;AAAA,EACT;AAAA,EAEA,kBAAkB;AAChB,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAAA,EACF;AACF;","names":[]}
package/dist/worker.cjs CHANGED
@@ -42,6 +42,8 @@ var import_node_events = require("node:events");
42
42
  var import_node_os = __toESM(require("node:os"), 1);
43
43
  var import_ws = require("ws");
44
44
  var import_http_server = require("./http_server.cjs");
45
+ var import_inference_runner = require("./inference_runner.cjs");
46
+ var import_inference_proc_executor = require("./ipc/inference_proc_executor.cjs");
45
47
  var import_proc_pool = require("./ipc/proc_pool.cjs");
46
48
  var import_job = require("./job.cjs");
47
49
  var import_log = require("./log.cjs");
@@ -143,6 +145,8 @@ class WorkerOptions {
143
145
  port;
144
146
  logLevel;
145
147
  production;
148
+ jobMemoryWarnMB;
149
+ jobMemoryLimitMB;
146
150
  /** @param options */
147
151
  constructor({
148
152
  agent,
@@ -162,7 +166,9 @@ class WorkerOptions {
162
166
  host = "localhost",
163
167
  port = void 0,
164
168
  logLevel = "info",
165
- production = false
169
+ production = false,
170
+ jobMemoryWarnMB = 300,
171
+ jobMemoryLimitMB = 0
166
172
  }) {
167
173
  this.agent = agent;
168
174
  if (!this.agent) {
@@ -185,6 +191,8 @@ class WorkerOptions {
185
191
  this.port = port || Default.port(production);
186
192
  this.logLevel = logLevel;
187
193
  this.production = production;
194
+ this.jobMemoryWarnMB = jobMemoryWarnMB;
195
+ this.jobMemoryLimitMB = jobMemoryLimitMB;
188
196
  }
189
197
  }
190
198
  class PendingAssignment {
@@ -209,6 +217,7 @@ class Worker {
209
217
  #session = void 0;
210
218
  #httpServer;
211
219
  #logger = (0, import_log.log)().child({ version: import_version.version });
220
+ #inferenceExecutor;
212
221
  /* @throws {@link MissingCredentialsError} if URL, API key or API secret are missing */
213
222
  constructor(opts) {
214
223
  opts.wsURL = opts.wsURL || process.env.LIVEKIT_URL || "";
@@ -226,11 +235,26 @@ class Worker {
226
235
  throw new MissingCredentialsError(
227
236
  "API Secret is required: Set LIVEKIT_API_SECRET, run with --api-secret, or pass apiSecret in WorkerOptions"
228
237
  );
238
+ if (Object.entries(import_inference_runner.InferenceRunner.registeredRunners).length) {
239
+ this.#inferenceExecutor = new import_inference_proc_executor.InferenceProcExecutor({
240
+ runners: import_inference_runner.InferenceRunner.registeredRunners,
241
+ initializeTimeout: 3e4,
242
+ closeTimeout: 5e3,
243
+ memoryWarnMB: 2e3,
244
+ memoryLimitMB: 0,
245
+ pingInterval: 5e3,
246
+ pingTimeout: 6e4,
247
+ highPingThreshold: 2500
248
+ });
249
+ }
229
250
  this.#procPool = new import_proc_pool.ProcPool(
230
251
  opts.agent,
231
252
  opts.numIdleProcesses,
232
253
  opts.initializeProcessTimeout,
233
- opts.shutdownProcessTimeout
254
+ opts.shutdownProcessTimeout,
255
+ this.#inferenceExecutor,
256
+ opts.jobMemoryWarnMB,
257
+ opts.jobMemoryLimitMB
234
258
  );
235
259
  this.#opts = opts;
236
260
  this.#httpServer = new import_http_server.HTTPServer(opts.host, opts.port);
@@ -240,6 +264,10 @@ class Worker {
240
264
  if (!this.#closed) {
241
265
  throw new WorkerError("worker is already running");
242
266
  }
267
+ if (this.#inferenceExecutor) {
268
+ await this.#inferenceExecutor.start();
269
+ await this.#inferenceExecutor.initialize();
270
+ }
243
271
  this.#logger.info("starting worker");
244
272
  this.#closed = false;
245
273
  this.#procPool.start();
@@ -321,12 +349,16 @@ class Worker {
321
349
  })
322
350
  );
323
351
  };
324
- const timer = setTimeout(() => {
325
- throw new WorkerError("timed out draining");
326
- }, timeout);
327
- if (timeout === void 0) clearTimeout(timer);
352
+ let timer;
353
+ if (timeout) {
354
+ timer = setTimeout(() => {
355
+ throw new WorkerError("timed out draining");
356
+ }, timeout);
357
+ }
328
358
  await joinJobs().then(() => {
329
- clearTimeout(timer);
359
+ if (timeout) {
360
+ clearTimeout(timer);
361
+ }
330
362
  });
331
363
  }
332
364
  async simulateJob(roomName, participantIdentity) {
@@ -554,17 +586,18 @@ class Worker {
554
586
  await proc.close();
555
587
  }
556
588
  async close() {
557
- var _a;
589
+ var _a, _b;
558
590
  if (this.#closed) {
559
591
  await this.#close.await;
560
592
  return;
561
593
  }
562
594
  this.#logger.info("shutting down worker");
563
595
  this.#closed = true;
596
+ await ((_a = this.#inferenceExecutor) == null ? void 0 : _a.close());
564
597
  await this.#procPool.close();
565
598
  await this.#httpServer.close();
566
599
  await Promise.allSettled(this.#tasks);
567
- (_a = this.#session) == null ? void 0 : _a.close();
600
+ (_b = this.#session) == null ? void 0 : _b.close();
568
601
  await this.#close.await;
569
602
  }
570
603
  }