@livekit/agents 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/dist/audio.cjs +3 -3
  2. package/dist/audio.cjs.map +1 -1
  3. package/dist/audio.d.cts +1 -1
  4. package/dist/audio.d.ts +1 -1
  5. package/dist/audio.d.ts.map +1 -1
  6. package/dist/audio.js +2 -2
  7. package/dist/audio.js.map +1 -1
  8. package/dist/llm/llm.cjs +7 -4
  9. package/dist/llm/llm.cjs.map +1 -1
  10. package/dist/llm/llm.d.ts.map +1 -1
  11. package/dist/llm/llm.js +7 -4
  12. package/dist/llm/llm.js.map +1 -1
  13. package/dist/metrics/base.cjs.map +1 -1
  14. package/dist/metrics/base.d.cts +23 -18
  15. package/dist/metrics/base.d.ts +23 -18
  16. package/dist/metrics/base.d.ts.map +1 -1
  17. package/dist/metrics/usage_collector.cjs +2 -2
  18. package/dist/metrics/usage_collector.cjs.map +1 -1
  19. package/dist/metrics/usage_collector.d.cts +1 -1
  20. package/dist/metrics/usage_collector.d.ts +1 -1
  21. package/dist/metrics/usage_collector.d.ts.map +1 -1
  22. package/dist/metrics/usage_collector.js +2 -2
  23. package/dist/metrics/usage_collector.js.map +1 -1
  24. package/dist/metrics/utils.cjs +14 -7
  25. package/dist/metrics/utils.cjs.map +1 -1
  26. package/dist/metrics/utils.d.ts.map +1 -1
  27. package/dist/metrics/utils.js +14 -7
  28. package/dist/metrics/utils.js.map +1 -1
  29. package/dist/stt/stt.cjs +5 -5
  30. package/dist/stt/stt.cjs.map +1 -1
  31. package/dist/stt/stt.js +6 -6
  32. package/dist/stt/stt.js.map +1 -1
  33. package/dist/tts/tts.cjs +11 -10
  34. package/dist/tts/tts.cjs.map +1 -1
  35. package/dist/tts/tts.d.ts.map +1 -1
  36. package/dist/tts/tts.js +11 -10
  37. package/dist/tts/tts.js.map +1 -1
  38. package/dist/vad.cjs +5 -5
  39. package/dist/vad.cjs.map +1 -1
  40. package/dist/vad.js +5 -5
  41. package/dist/vad.js.map +1 -1
  42. package/dist/voice/agent_activity.cjs +7 -4
  43. package/dist/voice/agent_activity.cjs.map +1 -1
  44. package/dist/voice/agent_activity.d.ts.map +1 -1
  45. package/dist/voice/agent_activity.js +7 -4
  46. package/dist/voice/agent_activity.js.map +1 -1
  47. package/dist/voice/generation_tools.test.cjs +236 -0
  48. package/dist/voice/generation_tools.test.cjs.map +1 -0
  49. package/dist/voice/generation_tools.test.js +235 -0
  50. package/dist/voice/generation_tools.test.js.map +1 -0
  51. package/dist/voice/index.cjs +3 -1
  52. package/dist/voice/index.cjs.map +1 -1
  53. package/dist/voice/index.d.cts +1 -0
  54. package/dist/voice/index.d.ts +1 -0
  55. package/dist/voice/index.d.ts.map +1 -1
  56. package/dist/voice/index.js +1 -0
  57. package/dist/voice/index.js.map +1 -1
  58. package/package.json +1 -1
  59. package/src/audio.ts +1 -1
  60. package/src/llm/llm.ts +7 -4
  61. package/src/metrics/base.ts +23 -18
  62. package/src/metrics/usage_collector.ts +3 -3
  63. package/src/metrics/utils.ts +16 -7
  64. package/src/stt/stt.ts +6 -6
  65. package/src/tts/tts.ts +11 -10
  66. package/src/vad.ts +5 -5
  67. package/src/voice/agent_activity.ts +8 -4
  68. package/src/voice/generation_tools.test.ts +268 -0
  69. package/src/voice/index.ts +1 -0
@@ -29,7 +29,7 @@ const logMetrics = (metrics) => {
29
29
  const logger = (0, import_log.log)();
30
30
  if (metrics.type === "llm_metrics") {
31
31
  logger.child({
32
- ttft: roundTwoDecimals(metrics.ttft),
32
+ ttftMs: roundTwoDecimals(metrics.ttftMs),
33
33
  inputTokens: metrics.promptTokens,
34
34
  promptCachedTokens: metrics.promptCachedTokens,
35
35
  outputTokens: metrics.completionTokens,
@@ -37,7 +37,7 @@ const logMetrics = (metrics) => {
37
37
  }).info("LLM metrics");
38
38
  } else if (metrics.type === "realtime_model_metrics") {
39
39
  logger.child({
40
- ttft: roundTwoDecimals(metrics.ttft),
40
+ ttftMs: roundTwoDecimals(metrics.ttftMs),
41
41
  input_tokens: metrics.inputTokens,
42
42
  cached_input_tokens: metrics.inputTokenDetails.cachedTokens,
43
43
  output_tokens: metrics.outputTokens,
@@ -46,17 +46,24 @@ const logMetrics = (metrics) => {
46
46
  }).info("RealtimeModel metrics");
47
47
  } else if (metrics.type === "tts_metrics") {
48
48
  logger.child({
49
- ttfb: roundTwoDecimals(metrics.ttfb),
50
- audioDuration: metrics.audioDuration
49
+ ttfbMs: roundTwoDecimals(metrics.ttfbMs),
50
+ audioDurationMs: Math.round(metrics.audioDurationMs)
51
51
  }).info("TTS metrics");
52
52
  } else if (metrics.type === "eou_metrics") {
53
53
  logger.child({
54
- end_of_utterance_delay: roundTwoDecimals(metrics.endOfUtteranceDelay),
55
- transcription_delay: roundTwoDecimals(metrics.transcriptionDelay)
54
+ endOfUtteranceDelayMs: roundTwoDecimals(metrics.endOfUtteranceDelayMs),
55
+ transcriptionDelayMs: roundTwoDecimals(metrics.transcriptionDelayMs),
56
+ onUserTurnCompletedDelayMs: roundTwoDecimals(metrics.onUserTurnCompletedDelayMs)
56
57
  }).info("EOU metrics");
58
+ } else if (metrics.type === "vad_metrics") {
59
+ logger.child({
60
+ idleTimeMs: Math.round(metrics.idleTimeMs),
61
+ inferenceDurationTotalMs: Math.round(metrics.inferenceDurationTotalMs),
62
+ inferenceCount: metrics.inferenceCount
63
+ }).info("VAD metrics");
57
64
  } else if (metrics.type === "stt_metrics") {
58
65
  logger.child({
59
- audioDuration: metrics.audioDuration
66
+ audioDurationMs: Math.round(metrics.audioDurationMs)
60
67
  }).info("STT metrics");
61
68
  }
62
69
  };
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/metrics/utils.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { log } from '../log.js';\nimport type { AgentMetrics } from './base.js';\n\nfunction roundTwoDecimals(value: number) {\n return Math.round(value * 100) / 100;\n}\n\nexport const logMetrics = (metrics: AgentMetrics) => {\n const logger = log();\n if (metrics.type === 'llm_metrics') {\n logger\n .child({\n ttft: roundTwoDecimals(metrics.ttft),\n inputTokens: metrics.promptTokens,\n promptCachedTokens: metrics.promptCachedTokens,\n outputTokens: metrics.completionTokens,\n tokensPerSecond: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('LLM metrics');\n } else if (metrics.type === 'realtime_model_metrics') {\n logger\n .child({\n ttft: roundTwoDecimals(metrics.ttft),\n input_tokens: metrics.inputTokens,\n cached_input_tokens: metrics.inputTokenDetails.cachedTokens,\n output_tokens: metrics.outputTokens,\n total_tokens: metrics.totalTokens,\n tokens_per_second: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('RealtimeModel metrics');\n } else if (metrics.type === 'tts_metrics') {\n logger\n .child({\n ttfb: roundTwoDecimals(metrics.ttfb),\n audioDuration: metrics.audioDuration,\n })\n .info('TTS metrics');\n } else if (metrics.type === 'eou_metrics') {\n logger\n .child({\n end_of_utterance_delay: roundTwoDecimals(metrics.endOfUtteranceDelay),\n transcription_delay: roundTwoDecimals(metrics.transcriptionDelay),\n })\n .info('EOU metrics');\n } else if (metrics.type === 'stt_metrics') {\n logger\n .child({\n audioDuration: metrics.audioDuration,\n })\n .info('STT metrics');\n }\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,iBAAoB;AAGpB,SAAS,iBAAiB,OAAe;AACvC,SAAO,KAAK,MAAM,QAAQ,GAAG,IAAI;AACnC;AAEO,MAAM,aAAa,CAAC,YAA0B;AACnD,QAAM,aAAS,gBAAI;AACnB,MAAI,QAAQ,SAAS,eAAe;AAClC,WACG,MAAM;AAAA,MACL,MAAM,iBAAiB,QAAQ,IAAI;AAAA,MACnC,aAAa,QAAQ;AAAA,MACrB,oBAAoB,QAAQ;AAAA,MAC5B,cAAc,QAAQ;AAAA,MACtB,iBAAiB,iBAAiB,QAAQ,eAAe;AAAA,IAC3D,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,0BAA0B;AACpD,WACG,MAAM;AAAA,MACL,MAAM,iBAAiB,QAAQ,IAAI;AAAA,MACnC,cAAc,QAAQ;AAAA,MACtB,qBAAqB,QAAQ,kBAAkB;AAAA,MAC/C,eAAe,QAAQ;AAAA,MACvB,cAAc,QAAQ;AAAA,MACtB,mBAAmB,iBAAiB,QAAQ,eAAe;AAAA,IAC7D,CAAC,EACA,KAAK,uBAAuB;AAAA,EACjC,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,MAAM,iBAAiB,QAAQ,IAAI;AAAA,MACnC,eAAe,QAAQ;AAAA,IACzB,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,wBAAwB,iBAAiB,QAAQ,mBAAmB;AAAA,MACpE,qBAAqB,iBAAiB,QAAQ,kBAAkB;AAAA,IAClE,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,eAAe,QAAQ;AAAA,IACzB,CAAC,EACA,KAAK,aAAa;AAAA,EACvB;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/metrics/utils.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { log } from '../log.js';\nimport type { AgentMetrics } from './base.js';\n\nfunction roundTwoDecimals(value: number) {\n return Math.round(value * 100) / 100;\n}\n\nexport const logMetrics = (metrics: AgentMetrics) => {\n const logger = log();\n if (metrics.type === 'llm_metrics') {\n logger\n .child({\n ttftMs: roundTwoDecimals(metrics.ttftMs),\n inputTokens: metrics.promptTokens,\n promptCachedTokens: metrics.promptCachedTokens,\n outputTokens: metrics.completionTokens,\n tokensPerSecond: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('LLM metrics');\n } else if (metrics.type === 'realtime_model_metrics') {\n logger\n .child({\n ttftMs: roundTwoDecimals(metrics.ttftMs),\n input_tokens: metrics.inputTokens,\n cached_input_tokens: metrics.inputTokenDetails.cachedTokens,\n output_tokens: metrics.outputTokens,\n total_tokens: metrics.totalTokens,\n tokens_per_second: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('RealtimeModel metrics');\n } else if (metrics.type === 'tts_metrics') {\n logger\n .child({\n ttfbMs: roundTwoDecimals(metrics.ttfbMs),\n audioDurationMs: Math.round(metrics.audioDurationMs),\n })\n .info('TTS metrics');\n } else if (metrics.type === 'eou_metrics') {\n logger\n .child({\n endOfUtteranceDelayMs: roundTwoDecimals(metrics.endOfUtteranceDelayMs),\n transcriptionDelayMs: roundTwoDecimals(metrics.transcriptionDelayMs),\n onUserTurnCompletedDelayMs: roundTwoDecimals(metrics.onUserTurnCompletedDelayMs),\n })\n .info('EOU metrics');\n } else if (metrics.type === 'vad_metrics') {\n logger\n .child({\n idleTimeMs: Math.round(metrics.idleTimeMs),\n inferenceDurationTotalMs: Math.round(metrics.inferenceDurationTotalMs),\n inferenceCount: metrics.inferenceCount,\n })\n .info('VAD metrics');\n } else if (metrics.type === 'stt_metrics') {\n logger\n .child({\n audioDurationMs: Math.round(metrics.audioDurationMs),\n })\n .info('STT metrics');\n }\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,iBAAoB;AAGpB,SAAS,iBAAiB,OAAe;AACvC,SAAO,KAAK,MAAM,QAAQ,GAAG,IAAI;AACnC;AAEO,MAAM,aAAa,CAAC,YAA0B;AACnD,QAAM,aAAS,gBAAI;AACnB,MAAI,QAAQ,SAAS,eAAe;AAClC,WACG,MAAM;AAAA,MACL,QAAQ,iBAAiB,QAAQ,MAAM;AAAA,MACvC,aAAa,QAAQ;AAAA,MACrB,oBAAoB,QAAQ;AAAA,MAC5B,cAAc,QAAQ;AAAA,MACtB,iBAAiB,iBAAiB,QAAQ,eAAe;AAAA,IAC3D,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,0BAA0B;AACpD,WACG,MAAM;AAAA,MACL,QAAQ,iBAAiB,QAAQ,MAAM;AAAA,MACvC,cAAc,QAAQ;AAAA,MACtB,qBAAqB,QAAQ,kBAAkB;AAAA,MAC/C,eAAe,QAAQ;AAAA,MACvB,cAAc,QAAQ;AAAA,MACtB,mBAAmB,iBAAiB,QAAQ,eAAe;AAAA,IAC7D,CAAC,EACA,KAAK,uBAAuB;AAAA,EACjC,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,QAAQ,iBAAiB,QAAQ,MAAM;AAAA,MACvC,iBAAiB,KAAK,MAAM,QAAQ,eAAe;AAAA,IACrD,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,uBAAuB,iBAAiB,QAAQ,qBAAqB;AAAA,MACrE,sBAAsB,iBAAiB,QAAQ,oBAAoB;AAAA,MACnE,4BAA4B,iBAAiB,QAAQ,0BAA0B;AAAA,IACjF,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,YAAY,KAAK,MAAM,QAAQ,UAAU;AAAA,MACzC,0BAA0B,KAAK,MAAM,QAAQ,wBAAwB;AAAA,MACrE,gBAAgB,QAAQ;AAAA,IAC1B,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,iBAAiB,KAAK,MAAM,QAAQ,eAAe;AAAA,IACrD,CAAC,EACA,KAAK,aAAa;AAAA,EACvB;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/metrics/utils.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAM9C,eAAO,MAAM,UAAU,YAAa,YAAY,SA4C/C,CAAC"}
1
+ {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../src/metrics/utils.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAM9C,eAAO,MAAM,UAAU,YAAa,YAAY,SAqD/C,CAAC"}
@@ -6,7 +6,7 @@ const logMetrics = (metrics) => {
6
6
  const logger = log();
7
7
  if (metrics.type === "llm_metrics") {
8
8
  logger.child({
9
- ttft: roundTwoDecimals(metrics.ttft),
9
+ ttftMs: roundTwoDecimals(metrics.ttftMs),
10
10
  inputTokens: metrics.promptTokens,
11
11
  promptCachedTokens: metrics.promptCachedTokens,
12
12
  outputTokens: metrics.completionTokens,
@@ -14,7 +14,7 @@ const logMetrics = (metrics) => {
14
14
  }).info("LLM metrics");
15
15
  } else if (metrics.type === "realtime_model_metrics") {
16
16
  logger.child({
17
- ttft: roundTwoDecimals(metrics.ttft),
17
+ ttftMs: roundTwoDecimals(metrics.ttftMs),
18
18
  input_tokens: metrics.inputTokens,
19
19
  cached_input_tokens: metrics.inputTokenDetails.cachedTokens,
20
20
  output_tokens: metrics.outputTokens,
@@ -23,17 +23,24 @@ const logMetrics = (metrics) => {
23
23
  }).info("RealtimeModel metrics");
24
24
  } else if (metrics.type === "tts_metrics") {
25
25
  logger.child({
26
- ttfb: roundTwoDecimals(metrics.ttfb),
27
- audioDuration: metrics.audioDuration
26
+ ttfbMs: roundTwoDecimals(metrics.ttfbMs),
27
+ audioDurationMs: Math.round(metrics.audioDurationMs)
28
28
  }).info("TTS metrics");
29
29
  } else if (metrics.type === "eou_metrics") {
30
30
  logger.child({
31
- end_of_utterance_delay: roundTwoDecimals(metrics.endOfUtteranceDelay),
32
- transcription_delay: roundTwoDecimals(metrics.transcriptionDelay)
31
+ endOfUtteranceDelayMs: roundTwoDecimals(metrics.endOfUtteranceDelayMs),
32
+ transcriptionDelayMs: roundTwoDecimals(metrics.transcriptionDelayMs),
33
+ onUserTurnCompletedDelayMs: roundTwoDecimals(metrics.onUserTurnCompletedDelayMs)
33
34
  }).info("EOU metrics");
35
+ } else if (metrics.type === "vad_metrics") {
36
+ logger.child({
37
+ idleTimeMs: Math.round(metrics.idleTimeMs),
38
+ inferenceDurationTotalMs: Math.round(metrics.inferenceDurationTotalMs),
39
+ inferenceCount: metrics.inferenceCount
40
+ }).info("VAD metrics");
34
41
  } else if (metrics.type === "stt_metrics") {
35
42
  logger.child({
36
- audioDuration: metrics.audioDuration
43
+ audioDurationMs: Math.round(metrics.audioDurationMs)
37
44
  }).info("STT metrics");
38
45
  }
39
46
  };
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/metrics/utils.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { log } from '../log.js';\nimport type { AgentMetrics } from './base.js';\n\nfunction roundTwoDecimals(value: number) {\n return Math.round(value * 100) / 100;\n}\n\nexport const logMetrics = (metrics: AgentMetrics) => {\n const logger = log();\n if (metrics.type === 'llm_metrics') {\n logger\n .child({\n ttft: roundTwoDecimals(metrics.ttft),\n inputTokens: metrics.promptTokens,\n promptCachedTokens: metrics.promptCachedTokens,\n outputTokens: metrics.completionTokens,\n tokensPerSecond: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('LLM metrics');\n } else if (metrics.type === 'realtime_model_metrics') {\n logger\n .child({\n ttft: roundTwoDecimals(metrics.ttft),\n input_tokens: metrics.inputTokens,\n cached_input_tokens: metrics.inputTokenDetails.cachedTokens,\n output_tokens: metrics.outputTokens,\n total_tokens: metrics.totalTokens,\n tokens_per_second: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('RealtimeModel metrics');\n } else if (metrics.type === 'tts_metrics') {\n logger\n .child({\n ttfb: roundTwoDecimals(metrics.ttfb),\n audioDuration: metrics.audioDuration,\n })\n .info('TTS metrics');\n } else if (metrics.type === 'eou_metrics') {\n logger\n .child({\n end_of_utterance_delay: roundTwoDecimals(metrics.endOfUtteranceDelay),\n transcription_delay: roundTwoDecimals(metrics.transcriptionDelay),\n })\n .info('EOU metrics');\n } else if (metrics.type === 'stt_metrics') {\n logger\n .child({\n audioDuration: metrics.audioDuration,\n })\n .info('STT metrics');\n }\n};\n"],"mappings":"AAGA,SAAS,WAAW;AAGpB,SAAS,iBAAiB,OAAe;AACvC,SAAO,KAAK,MAAM,QAAQ,GAAG,IAAI;AACnC;AAEO,MAAM,aAAa,CAAC,YAA0B;AACnD,QAAM,SAAS,IAAI;AACnB,MAAI,QAAQ,SAAS,eAAe;AAClC,WACG,MAAM;AAAA,MACL,MAAM,iBAAiB,QAAQ,IAAI;AAAA,MACnC,aAAa,QAAQ;AAAA,MACrB,oBAAoB,QAAQ;AAAA,MAC5B,cAAc,QAAQ;AAAA,MACtB,iBAAiB,iBAAiB,QAAQ,eAAe;AAAA,IAC3D,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,0BAA0B;AACpD,WACG,MAAM;AAAA,MACL,MAAM,iBAAiB,QAAQ,IAAI;AAAA,MACnC,cAAc,QAAQ;AAAA,MACtB,qBAAqB,QAAQ,kBAAkB;AAAA,MAC/C,eAAe,QAAQ;AAAA,MACvB,cAAc,QAAQ;AAAA,MACtB,mBAAmB,iBAAiB,QAAQ,eAAe;AAAA,IAC7D,CAAC,EACA,KAAK,uBAAuB;AAAA,EACjC,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,MAAM,iBAAiB,QAAQ,IAAI;AAAA,MACnC,eAAe,QAAQ;AAAA,IACzB,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,wBAAwB,iBAAiB,QAAQ,mBAAmB;AAAA,MACpE,qBAAqB,iBAAiB,QAAQ,kBAAkB;AAAA,IAClE,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,eAAe,QAAQ;AAAA,IACzB,CAAC,EACA,KAAK,aAAa;AAAA,EACvB;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/metrics/utils.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { log } from '../log.js';\nimport type { AgentMetrics } from './base.js';\n\nfunction roundTwoDecimals(value: number) {\n return Math.round(value * 100) / 100;\n}\n\nexport const logMetrics = (metrics: AgentMetrics) => {\n const logger = log();\n if (metrics.type === 'llm_metrics') {\n logger\n .child({\n ttftMs: roundTwoDecimals(metrics.ttftMs),\n inputTokens: metrics.promptTokens,\n promptCachedTokens: metrics.promptCachedTokens,\n outputTokens: metrics.completionTokens,\n tokensPerSecond: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('LLM metrics');\n } else if (metrics.type === 'realtime_model_metrics') {\n logger\n .child({\n ttftMs: roundTwoDecimals(metrics.ttftMs),\n input_tokens: metrics.inputTokens,\n cached_input_tokens: metrics.inputTokenDetails.cachedTokens,\n output_tokens: metrics.outputTokens,\n total_tokens: metrics.totalTokens,\n tokens_per_second: roundTwoDecimals(metrics.tokensPerSecond),\n })\n .info('RealtimeModel metrics');\n } else if (metrics.type === 'tts_metrics') {\n logger\n .child({\n ttfbMs: roundTwoDecimals(metrics.ttfbMs),\n audioDurationMs: Math.round(metrics.audioDurationMs),\n })\n .info('TTS metrics');\n } else if (metrics.type === 'eou_metrics') {\n logger\n .child({\n endOfUtteranceDelayMs: roundTwoDecimals(metrics.endOfUtteranceDelayMs),\n transcriptionDelayMs: roundTwoDecimals(metrics.transcriptionDelayMs),\n onUserTurnCompletedDelayMs: roundTwoDecimals(metrics.onUserTurnCompletedDelayMs),\n })\n .info('EOU metrics');\n } else if (metrics.type === 'vad_metrics') {\n logger\n .child({\n idleTimeMs: Math.round(metrics.idleTimeMs),\n inferenceDurationTotalMs: Math.round(metrics.inferenceDurationTotalMs),\n inferenceCount: metrics.inferenceCount,\n })\n .info('VAD metrics');\n } else if (metrics.type === 'stt_metrics') {\n logger\n .child({\n audioDurationMs: Math.round(metrics.audioDurationMs),\n })\n .info('STT metrics');\n }\n};\n"],"mappings":"AAGA,SAAS,WAAW;AAGpB,SAAS,iBAAiB,OAAe;AACvC,SAAO,KAAK,MAAM,QAAQ,GAAG,IAAI;AACnC;AAEO,MAAM,aAAa,CAAC,YAA0B;AACnD,QAAM,SAAS,IAAI;AACnB,MAAI,QAAQ,SAAS,eAAe;AAClC,WACG,MAAM;AAAA,MACL,QAAQ,iBAAiB,QAAQ,MAAM;AAAA,MACvC,aAAa,QAAQ;AAAA,MACrB,oBAAoB,QAAQ;AAAA,MAC5B,cAAc,QAAQ;AAAA,MACtB,iBAAiB,iBAAiB,QAAQ,eAAe;AAAA,IAC3D,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,0BAA0B;AACpD,WACG,MAAM;AAAA,MACL,QAAQ,iBAAiB,QAAQ,MAAM;AAAA,MACvC,cAAc,QAAQ;AAAA,MACtB,qBAAqB,QAAQ,kBAAkB;AAAA,MAC/C,eAAe,QAAQ;AAAA,MACvB,cAAc,QAAQ;AAAA,MACtB,mBAAmB,iBAAiB,QAAQ,eAAe;AAAA,IAC7D,CAAC,EACA,KAAK,uBAAuB;AAAA,EACjC,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,QAAQ,iBAAiB,QAAQ,MAAM;AAAA,MACvC,iBAAiB,KAAK,MAAM,QAAQ,eAAe;AAAA,IACrD,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,uBAAuB,iBAAiB,QAAQ,qBAAqB;AAAA,MACrE,sBAAsB,iBAAiB,QAAQ,oBAAoB;AAAA,MACnE,4BAA4B,iBAAiB,QAAQ,0BAA0B;AAAA,IACjF,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,YAAY,KAAK,MAAM,QAAQ,UAAU;AAAA,MACzC,0BAA0B,KAAK,MAAM,QAAQ,wBAAwB;AAAA,MACrE,gBAAgB,QAAQ;AAAA,IAC1B,CAAC,EACA,KAAK,aAAa;AAAA,EACvB,WAAW,QAAQ,SAAS,eAAe;AACzC,WACG,MAAM;AAAA,MACL,iBAAiB,KAAK,MAAM,QAAQ,eAAe;AAAA,IACrD,CAAC,EACA,KAAK,aAAa;AAAA,EACvB;AACF;","names":[]}
package/dist/stt/stt.cjs CHANGED
@@ -53,14 +53,14 @@ class STT extends import_node_events.EventEmitter {
53
53
  async recognize(frame) {
54
54
  const startTime = process.hrtime.bigint();
55
55
  const event = await this._recognize(frame);
56
- const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1e6));
56
+ const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1e6));
57
57
  this.emit("metrics_collected", {
58
58
  type: "stt_metrics",
59
59
  requestId: event.requestId ?? "",
60
60
  timestamp: Date.now(),
61
- duration,
61
+ durationMs,
62
62
  label: this.label,
63
- audioDuration: (0, import_audio.calculateAudioDuration)(frame),
63
+ audioDurationMs: Math.round((0, import_audio.calculateAudioDurationSeconds)(frame) * 1e3),
64
64
  streamed: false
65
65
  });
66
66
  return event;
@@ -152,9 +152,9 @@ class SpeechStream {
152
152
  type: "stt_metrics",
153
153
  timestamp: Date.now(),
154
154
  requestId: event.requestId,
155
- duration: 0,
155
+ durationMs: 0,
156
156
  label: this.#stt.label,
157
- audioDuration: event.recognitionUsage.audioDuration,
157
+ audioDurationMs: Math.round(event.recognitionUsage.audioDuration * 1e3),
158
158
  streamed: true
159
159
  };
160
160
  this.#stt.emit("metrics_collected", metrics);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDuration } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n duration,\n label: this.label,\n audioDuration: calculateAudioDuration(frame),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n duration: 0,\n label: this.#stt.label,\n audioDuration: event.recognitionUsage!.audioDuration,\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAAuC;AACvC,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AAEpE,mBAA8D;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAtBU,SAAAA;AAAA,GAAA;AA6EL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,WAAW,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AAC/E,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,mBAAe,qCAAuB,KAAK;AAAA,MAC3C,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAQF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,UAAU;AAAA,QACV,OAAO,KAAK,KAAK;AAAA,QACjB,eAAe,MAAM,iBAAkB;AAAA,QACvC,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
1
+ {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AAEpE,mBAA8D;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAtBU,SAAAA;AAAA,GAAA;AA6EL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAQF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
package/dist/stt/stt.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { AudioResampler } from "@livekit/rtc-node";
2
2
  import { EventEmitter } from "node:events";
3
3
  import { APIConnectionError, APIError } from "../_exceptions.js";
4
- import { calculateAudioDuration } from "../audio.js";
4
+ import { calculateAudioDurationSeconds } from "../audio.js";
5
5
  import { log } from "../log.js";
6
6
  import { DeferredReadableStream } from "../stream/deferred_stream.js";
7
7
  import { DEFAULT_API_CONNECT_OPTIONS } from "../types.js";
@@ -28,14 +28,14 @@ class STT extends EventEmitter {
28
28
  async recognize(frame) {
29
29
  const startTime = process.hrtime.bigint();
30
30
  const event = await this._recognize(frame);
31
- const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1e6));
31
+ const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1e6));
32
32
  this.emit("metrics_collected", {
33
33
  type: "stt_metrics",
34
34
  requestId: event.requestId ?? "",
35
35
  timestamp: Date.now(),
36
- duration,
36
+ durationMs,
37
37
  label: this.label,
38
- audioDuration: calculateAudioDuration(frame),
38
+ audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1e3),
39
39
  streamed: false
40
40
  });
41
41
  return event;
@@ -127,9 +127,9 @@ class SpeechStream {
127
127
  type: "stt_metrics",
128
128
  timestamp: Date.now(),
129
129
  requestId: event.requestId,
130
- duration: 0,
130
+ durationMs: 0,
131
131
  label: this.#stt.label,
132
- audioDuration: event.recognitionUsage.audioDuration,
132
+ audioDurationMs: Math.round(event.recognitionUsage.audioDuration * 1e3),
133
133
  streamed: true
134
134
  };
135
135
  this.#stt.emit("metrics_collected", metrics);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDuration } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n duration,\n label: this.label,\n audioDuration: calculateAudioDuration(frame),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n duration: 0,\n label: this.#stt.label,\n audioDuration: event.recognitionUsage!.audioDuration,\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,8BAA8B;AACvC,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,mCAAmC;AAEpE,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAtBU,SAAAA;AAAA,GAAA;AA6EL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,WAAW,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AAC/E,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,eAAe,uBAAuB,KAAK;AAAA,MAC3C,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAQF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,UAAU;AAAA,QACV,OAAO,KAAK,KAAK;AAAA,QACjB,eAAe,MAAM,iBAAkB;AAAA,QACvC,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
1
+ {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,qCAAqC;AAC9C,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,mCAAmC;AAEpE,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAtBU,SAAAA;AAAA,GAAA;AA6EL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,MAAM,8BAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAQF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
package/dist/tts/tts.cjs CHANGED
@@ -145,21 +145,22 @@ class SynthesizeStream {
145
145
  }
146
146
  async monitorMetrics() {
147
147
  const startTime = process.hrtime.bigint();
148
- let audioDuration = 0;
148
+ let audioDurationMs = 0;
149
149
  let ttfb = BigInt(-1);
150
150
  let requestId = "";
151
151
  const emit = () => {
152
152
  if (this.#metricsPendingTexts.length) {
153
153
  const text = this.#metricsPendingTexts.shift();
154
154
  const duration = process.hrtime.bigint() - startTime;
155
+ const roundedAudioDurationMs = Math.round(audioDurationMs);
155
156
  const metrics = {
156
157
  type: "tts_metrics",
157
158
  timestamp: Date.now(),
158
159
  requestId,
159
- ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
160
- duration: Math.trunc(Number(duration / BigInt(1e6))),
160
+ ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
161
+ durationMs: Math.trunc(Number(duration / BigInt(1e6))),
161
162
  charactersCount: text.length,
162
- audioDuration,
163
+ audioDurationMs: roundedAudioDurationMs,
163
164
  cancelled: this.abortController.signal.aborted,
164
165
  label: this.#tts.label,
165
166
  streamed: false
@@ -177,7 +178,7 @@ class SynthesizeStream {
177
178
  if (ttfb === BigInt(-1)) {
178
179
  ttfb = process.hrtime.bigint() - startTime;
179
180
  }
180
- audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
181
+ audioDurationMs += audio.frame.samplesPerChannel / audio.frame.sampleRate * 1e3;
181
182
  if (audio.final) {
182
183
  emit();
183
184
  }
@@ -303,7 +304,7 @@ class ChunkedStream {
303
304
  }
304
305
  async monitorMetrics() {
305
306
  const startTime = process.hrtime.bigint();
306
- let audioDuration = 0;
307
+ let audioDurationMs = 0;
307
308
  let ttfb = BigInt(-1);
308
309
  let requestId = "";
309
310
  for await (const audio of this.queue) {
@@ -312,7 +313,7 @@ class ChunkedStream {
312
313
  if (ttfb === BigInt(-1)) {
313
314
  ttfb = process.hrtime.bigint() - startTime;
314
315
  }
315
- audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
316
+ audioDurationMs += audio.frame.samplesPerChannel / audio.frame.sampleRate * 1e3;
316
317
  }
317
318
  this.output.close();
318
319
  const duration = process.hrtime.bigint() - startTime;
@@ -320,10 +321,10 @@ class ChunkedStream {
320
321
  type: "tts_metrics",
321
322
  timestamp: Date.now(),
322
323
  requestId,
323
- ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
324
- duration: Math.trunc(Number(duration / BigInt(1e6))),
324
+ ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
325
+ durationMs: Math.trunc(Number(duration / BigInt(1e6))),
325
326
  charactersCount: this.#text.length,
326
- audioDuration,
327
+ audioDurationMs: Math.round(audioDurationMs),
327
328
  cancelled: false,
328
329
  // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here
329
330
  label: this.#tts.label,
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/tts/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIStatusError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { TTSMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';\n\n/** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */\nexport interface SynthesizedAudio {\n /** Request ID (one segment could be made up of multiple requests) */\n requestId: string;\n /** Segment ID, each segment is separated by a flush */\n segmentId: string;\n /** Synthesized audio frame */\n frame: AudioFrame;\n /** Current segment of the synthesized audio */\n deltaText?: string;\n /** Whether this is the last frame of the segment (streaming only) */\n final: boolean;\n}\n\n/**\n * Describes the capabilities of the TTS provider.\n *\n * @remarks\n * At present, only `streaming` is supplied to this interface, and the framework only supports\n * providers that do have a streaming endpoint.\n */\nexport interface TTSCapabilities {\n streaming: boolean;\n}\n\nexport interface TTSError {\n type: 'tts_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type TTSCallbacks = {\n ['metrics_collected']: (metrics: TTSMetrics) => void;\n ['error']: (error: TTSError) => void;\n};\n\n/**\n * An instance of a text-to-speech adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child TTS class, which inherits this class's methods.\n */\nexport abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCallbacks>) {\n #capabilities: TTSCapabilities;\n #sampleRate: number;\n #numChannels: number;\n abstract label: string;\n\n constructor(sampleRate: number, numChannels: number, capabilities: TTSCapabilities) {\n super();\n this.#capabilities = capabilities;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n }\n\n /** Returns this TTS's capabilities */\n get capabilities(): TTSCapabilities {\n return this.#capabilities;\n }\n\n /** Returns the sample rate of audio frames returned by this TTS */\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n /** Returns the channel count of audio frames returned by this TTS */\n get numChannels(): number {\n return this.#numChannels;\n }\n\n /**\n * Receives text and returns synthesis in the form of a {@link ChunkedStream}\n */\n abstract synthesize(text: string): ChunkedStream;\n\n /**\n * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data\n */\n abstract stream(): SynthesizeStream;\n}\n\n/**\n * An instance of a text-to-speech stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SynthesizeStream class, which inherits this class's methods.\n */\nexport abstract class SynthesizeStream\n implements AsyncIterableIterator<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>\n{\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n static readonly END_OF_STREAM = Symbol('END_OF_STREAM');\n protected input = new AsyncIterableQueue<string | typeof SynthesizeStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected output = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected closed = false;\n abstract label: string;\n #tts: TTS;\n #metricsPendingTexts: string[] = [];\n #metricsText = '';\n #monitorMetricsTask?: Promise<void>;\n private _connOptions: APIConnectOptions;\n protected abortController = new AbortController();\n\n private deferredInputStream: DeferredReadableStream<\n string | typeof SynthesizeStream.FLUSH_SENTINEL\n >;\n private logger = log();\n\n constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {\n this.#tts = tts;\n this._connOptions = connOptions;\n this.deferredInputStream = new DeferredReadableStream();\n this.pumpInput();\n this.abortController.signal.addEventListener('abort', () => {\n this.deferredInputStream.detachSource();\n // TODO (AJS-36) clean this up when we refactor with streams\n this.input.close();\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to synthesize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n // TODO(AJS-37) Remove when refactoring TTS to use streams\n protected async pumpInput() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done || value === SynthesizeStream.FLUSH_SENTINEL) {\n break;\n }\n this.pushText(value);\n }\n this.endInput();\n } catch (error) {\n this.logger.error(error, 'Error reading deferred input stream');\n } finally {\n reader.releaseLock();\n // Ensure output is closed when the stream ends\n if (!this.#monitorMetricsTask) {\n // No text was received, close the output directly\n this.output.close();\n }\n }\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDuration = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n const emit = () => {\n if (this.#metricsPendingTexts.length) {\n const text = this.#metricsPendingTexts.shift()!;\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: text.length,\n audioDuration,\n cancelled: this.abortController.signal.aborted,\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n };\n\n for await (const audio of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(audio);\n if (audio === SynthesizeStream.END_OF_STREAM) continue;\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;\n if (audio.final) {\n emit();\n }\n }\n\n if (requestId) {\n emit();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(text: ReadableStream<string>) {\n this.deferredInputStream.setSource(text);\n }\n\n /** Push a string of text to the TTS */\n /** @deprecated Use `updateInputStream` instead */\n pushText(text: string) {\n if (!this.#monitorMetricsTask) {\n this.#monitorMetricsTask = this.monitorMetrics();\n // Close output when metrics task completes\n this.#monitorMetricsTask.finally(() => this.output.close());\n }\n this.#metricsText += text;\n\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(text);\n }\n\n /** Flush the TTS, causing it to process all pending text */\n flush() {\n if (this.#metricsText) {\n this.#metricsPendingTexts.push(this.#metricsText);\n this.#metricsText = '';\n }\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SynthesizeStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n this.flush();\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): SynthesizeStream {\n return this;\n }\n}\n\n/**\n * An instance of a text-to-speech response, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child ChunkedStream class, which inherits this class's methods.\n */\nexport abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {\n protected queue = new AsyncIterableQueue<SynthesizedAudio>();\n protected output = new AsyncIterableQueue<SynthesizedAudio>();\n protected closed = false;\n abstract label: string;\n #text: string;\n #tts: TTS;\n private _connOptions: APIConnectOptions;\n private logger = log();\n\n constructor(\n text: string,\n tts: TTS,\n connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#text = text;\n this.#tts = tts;\n this._connOptions = connOptions;\n\n this.monitorMetrics();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to generate TTS completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n protected abstract run(): Promise<void>;\n\n get inputText(): string {\n return this.#text;\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDuration = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n for await (const audio of this.queue) {\n this.output.put(audio);\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: this.#text.length,\n audioDuration,\n cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n\n /** Collect every frame into one in a single call */\n async collect(): Promise<AudioFrame> {\n const frames = [];\n for await (const event of this) {\n frames.push(event.frame);\n }\n return mergeFrames(frames);\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): ChunkedStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAE7B,wBAAmD;AACnD,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AACpE,mBAA2E;AA+CpE,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EACA;AAAA,EACA;AAAA,EAGA,YAAY,YAAoB,aAAqB,cAA+B;AAClF,UAAM;AACN,SAAK,gBAAgB;AACrB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAWF;AAgBO,MAAe,iBAEtB;AAAA,EACE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAClE,OAAgB,gBAAgB,OAAO,eAAe;AAAA,EAC5C,QAAQ,IAAI,gCAAoE;AAAA,EAChF,QAAQ,IAAI,gCAEpB;AAAA,EACQ,SAAS,IAAI,gCAErB;AAAA,EACQ,SAAS;AAAA,EAEnB;AAAA,EACA,uBAAiC,CAAC;AAAA,EAClC,eAAe;AAAA,EACf;AAAA,EACQ;AAAA,EACE,kBAAkB,IAAI,gBAAgB;AAAA,EAExC;AAAA,EAGA,aAAS,gBAAI;AAAA,EAErB,YAAY,KAAU,cAAiC,0CAA6B;AAClF,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAuB;AACtD,SAAK,UAAU;AACf,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,WAAK,oBAAoB,aAAa;AAEtC,WAAK,MAAM,MAAM;AACjB,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,kCAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,6CAA6C,aAAa;AAAA,YAC5D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA,EAGA,MAAgB,YAAY;AAC1B,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,QAAQ,UAAU,iBAAiB,gBAAgB;AACrD;AAAA,QACF;AACA,aAAK,SAAS,KAAK;AAAA,MACrB;AACA,WAAK,SAAS;AAAA,IAChB,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,OAAO,qCAAqC;AAAA,IAChE,UAAE;AACA,aAAO,YAAY;AAEnB,UAAI,CAAC,KAAK,qBAAqB;AAE7B,aAAK,OAAO,MAAM;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,gBAAgB;AACpB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,UAAM,OAAO,MAAM;AACjB,UAAI,KAAK,qBAAqB,QAAQ;AACpC,cAAM,OAAO,KAAK,qBAAqB,MAAM;AAC7C,cAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,cAAM,UAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,UACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,UAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,UACvD,iBAAiB,KAAK;AAAA,UACtB;AAAA,UACA,WAAW,KAAK,gBAAgB,OAAO;AAAA,UACvC,OAAO,KAAK,KAAK;AAAA,UACjB,UAAU;AAAA,QACZ;AACA,aAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,UAAU,iBAAiB,cAAe;AAC9C,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AAEA,uBAAiB,MAAM,MAAM,oBAAoB,MAAM,MAAM;AAC7D,UAAI,MAAM,OAAO;AACf,aAAK;AAAA,MACP;AAAA,IACF;AAEA,QAAI,WAAW;AACb,WAAK;AAAA,IACP;AAAA,EACF;AAAA,EAIA,kBAAkB,MAA8B;AAC9C,SAAK,oBAAoB,UAAU,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA,EAIA,SAAS,MAAc;AACrB,QAAI,CAAC,KAAK,qBAAqB;AAC7B,WAAK,sBAAsB,KAAK,eAAe;AAE/C,WAAK,oBAAoB,QAAQ,MAAM,KAAK,OAAO,MAAM,CAAC;AAAA,IAC5D;AACA,SAAK,gBAAgB;AAErB,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,cAAc;AACrB,WAAK,qBAAqB,KAAK,KAAK,YAAY;AAChD,WAAK,eAAe;AAAA,IACtB;AACA,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,iBAAiB,cAAc;AAAA,EAChD;AAAA;AAAA,EAGA,WAAW;AACT,SAAK,MAAM;AACX,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA0F;AACxF,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO;AAAA,EACT;AACF;AAgBO,MAAe,cAAiE;AAAA,EAC3E,QAAQ,IAAI,gCAAqC;AAAA,EACjD,SAAS,IAAI,gCAAqC;AAAA,EAClD,SAAS;AAAA,EAEnB;AAAA,EACA;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EAErB,YACE,MACA,KACA,cAAiC,0CACjC;AACA,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,eAAe;AAMpB,YAAQ,QAAQ,EAAE,KAAK,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAC7E;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,kCAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAIA,IAAI,YAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,gBAAgB;AACpB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,uBAAiB,MAAM,MAAM,oBAAoB,MAAM,MAAM;AAAA,IAC/D;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,iBAAiB,KAAK,MAAM;AAAA,MAC5B;AAAA,MACA,WAAW;AAAA;AAAA,MACX,OAAO,KAAK,KAAK;AAAA,MACjB,UAAU;AAAA,IACZ;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAGA,MAAM,UAA+B;AACnC,UAAM,SAAS,CAAC;AAChB,qBAAiB,SAAS,MAAM;AAC9B,aAAO,KAAK,MAAM,KAAK;AAAA,IACzB;AACA,eAAO,0BAAY,MAAM;AAAA,EAC3B;AAAA,EAEA,OAAkD;AAChD,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAmB;AACtC,WAAO;AAAA,EACT;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/tts/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIStatusError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { TTSMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';\n\n/** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */\nexport interface SynthesizedAudio {\n /** Request ID (one segment could be made up of multiple requests) */\n requestId: string;\n /** Segment ID, each segment is separated by a flush */\n segmentId: string;\n /** Synthesized audio frame */\n frame: AudioFrame;\n /** Current segment of the synthesized audio */\n deltaText?: string;\n /** Whether this is the last frame of the segment (streaming only) */\n final: boolean;\n}\n\n/**\n * Describes the capabilities of the TTS provider.\n *\n * @remarks\n * At present, only `streaming` is supplied to this interface, and the framework only supports\n * providers that do have a streaming endpoint.\n */\nexport interface TTSCapabilities {\n streaming: boolean;\n}\n\nexport interface TTSError {\n type: 'tts_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type TTSCallbacks = {\n ['metrics_collected']: (metrics: TTSMetrics) => void;\n ['error']: (error: TTSError) => void;\n};\n\n/**\n * An instance of a text-to-speech adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child TTS class, which inherits this class's methods.\n */\nexport abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCallbacks>) {\n #capabilities: TTSCapabilities;\n #sampleRate: number;\n #numChannels: number;\n abstract label: string;\n\n constructor(sampleRate: number, numChannels: number, capabilities: TTSCapabilities) {\n super();\n this.#capabilities = capabilities;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n }\n\n /** Returns this TTS's capabilities */\n get capabilities(): TTSCapabilities {\n return this.#capabilities;\n }\n\n /** Returns the sample rate of audio frames returned by this TTS */\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n /** Returns the channel count of audio frames returned by this TTS */\n get numChannels(): number {\n return this.#numChannels;\n }\n\n /**\n * Receives text and returns synthesis in the form of a {@link ChunkedStream}\n */\n abstract synthesize(text: string): ChunkedStream;\n\n /**\n * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data\n */\n abstract stream(): SynthesizeStream;\n}\n\n/**\n * An instance of a text-to-speech stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SynthesizeStream class, which inherits this class's methods.\n */\nexport abstract class SynthesizeStream\n implements AsyncIterableIterator<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>\n{\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n static readonly END_OF_STREAM = Symbol('END_OF_STREAM');\n protected input = new AsyncIterableQueue<string | typeof SynthesizeStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected output = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected closed = false;\n abstract label: string;\n #tts: TTS;\n #metricsPendingTexts: string[] = [];\n #metricsText = '';\n #monitorMetricsTask?: Promise<void>;\n private _connOptions: APIConnectOptions;\n protected abortController = new AbortController();\n\n private deferredInputStream: DeferredReadableStream<\n string | typeof SynthesizeStream.FLUSH_SENTINEL\n >;\n private logger = log();\n\n constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {\n this.#tts = tts;\n this._connOptions = connOptions;\n this.deferredInputStream = new DeferredReadableStream();\n this.pumpInput();\n this.abortController.signal.addEventListener('abort', () => {\n this.deferredInputStream.detachSource();\n // TODO (AJS-36) clean this up when we refactor with streams\n this.input.close();\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to synthesize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n // TODO(AJS-37) Remove when refactoring TTS to use streams\n protected async pumpInput() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done || value === SynthesizeStream.FLUSH_SENTINEL) {\n break;\n }\n this.pushText(value);\n }\n this.endInput();\n } catch (error) {\n this.logger.error(error, 'Error reading deferred input stream');\n } finally {\n reader.releaseLock();\n // Ensure output is closed when the stream ends\n if (!this.#monitorMetricsTask) {\n // No text was received, close the output directly\n this.output.close();\n }\n }\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n const emit = () => {\n if (this.#metricsPendingTexts.length) {\n const text = this.#metricsPendingTexts.shift()!;\n const duration = process.hrtime.bigint() - startTime;\n const roundedAudioDurationMs = Math.round(audioDurationMs);\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: text.length,\n audioDurationMs: roundedAudioDurationMs,\n cancelled: this.abortController.signal.aborted,\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n };\n\n for await (const audio of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(audio);\n if (audio === SynthesizeStream.END_OF_STREAM) continue;\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n if (audio.final) {\n emit();\n }\n }\n\n if (requestId) {\n emit();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(text: ReadableStream<string>) {\n this.deferredInputStream.setSource(text);\n }\n\n /** Push a string of text to the TTS */\n /** @deprecated Use `updateInputStream` instead */\n pushText(text: string) {\n if (!this.#monitorMetricsTask) {\n this.#monitorMetricsTask = this.monitorMetrics();\n // Close output when metrics task completes\n this.#monitorMetricsTask.finally(() => this.output.close());\n }\n this.#metricsText += text;\n\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(text);\n }\n\n /** Flush the TTS, causing it to process all pending text */\n flush() {\n if (this.#metricsText) {\n this.#metricsPendingTexts.push(this.#metricsText);\n this.#metricsText = '';\n }\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SynthesizeStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n this.flush();\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): SynthesizeStream {\n return this;\n }\n}\n\n/**\n * An instance of a text-to-speech response, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child ChunkedStream class, which inherits this class's methods.\n */\nexport abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {\n protected queue = new AsyncIterableQueue<SynthesizedAudio>();\n protected output = new AsyncIterableQueue<SynthesizedAudio>();\n protected closed = false;\n abstract label: string;\n #text: string;\n #tts: TTS;\n private _connOptions: APIConnectOptions;\n private logger = log();\n\n constructor(\n text: string,\n tts: TTS,\n connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#text = text;\n this.#tts = tts;\n this._connOptions = connOptions;\n\n this.monitorMetrics();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to generate TTS completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n protected abstract run(): Promise<void>;\n\n get inputText(): string {\n return this.#text;\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n for await (const audio of this.queue) {\n this.output.put(audio);\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: this.#text.length,\n audioDurationMs: Math.round(audioDurationMs),\n cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n\n /** Collect every frame into one in a single call */\n async collect(): Promise<AudioFrame> {\n const frames = [];\n for await (const event of this) {\n frames.push(event.frame);\n }\n return mergeFrames(frames);\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): ChunkedStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAE7B,wBAAmD;AACnD,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AACpE,mBAA2E;AA+CpE,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EACA;AAAA,EACA;AAAA,EAGA,YAAY,YAAoB,aAAqB,cAA+B;AAClF,UAAM;AACN,SAAK,gBAAgB;AACrB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAWF;AAgBO,MAAe,iBAEtB;AAAA,EACE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAClE,OAAgB,gBAAgB,OAAO,eAAe;AAAA,EAC5C,QAAQ,IAAI,gCAAoE;AAAA,EAChF,QAAQ,IAAI,gCAEpB;AAAA,EACQ,SAAS,IAAI,gCAErB;AAAA,EACQ,SAAS;AAAA,EAEnB;AAAA,EACA,uBAAiC,CAAC;AAAA,EAClC,eAAe;AAAA,EACf;AAAA,EACQ;AAAA,EACE,kBAAkB,IAAI,gBAAgB;AAAA,EAExC;AAAA,EAGA,aAAS,gBAAI;AAAA,EAErB,YAAY,KAAU,cAAiC,0CAA6B;AAClF,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAuB;AACtD,SAAK,UAAU;AACf,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,WAAK,oBAAoB,aAAa;AAEtC,WAAK,MAAM,MAAM;AACjB,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,kCAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,6CAA6C,aAAa;AAAA,YAC5D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA,EAGA,MAAgB,YAAY;AAC1B,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,QAAQ,UAAU,iBAAiB,gBAAgB;AACrD;AAAA,QACF;AACA,aAAK,SAAS,KAAK;AAAA,MACrB;AACA,WAAK,SAAS;AAAA,IAChB,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,OAAO,qCAAqC;AAAA,IAChE,UAAE;AACA,aAAO,YAAY;AAEnB,UAAI,CAAC,KAAK,qBAAqB;AAE7B,aAAK,OAAO,MAAM;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,UAAM,OAAO,MAAM;AACjB,UAAI,KAAK,qBAAqB,QAAQ;AACpC,cAAM,OAAO,KAAK,qBAAqB,MAAM;AAC7C,cAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,cAAM,yBAAyB,KAAK,MAAM,eAAe;AACzD,cAAM,UAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,UACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,UAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,UACzD,iBAAiB,KAAK;AAAA,UACtB,iBAAiB;AAAA,UACjB,WAAW,KAAK,gBAAgB,OAAO;AAAA,UACvC,OAAO,KAAK,KAAK;AAAA,UACjB,UAAU;AAAA,QACZ;AACA,aAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,UAAU,iBAAiB,cAAe;AAC9C,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AAEA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAC9E,UAAI,MAAM,OAAO;AACf,aAAK;AAAA,MACP;AAAA,IACF;AAEA,QAAI,WAAW;AACb,WAAK;AAAA,IACP;AAAA,EACF;AAAA,EAIA,kBAAkB,MAA8B;AAC9C,SAAK,oBAAoB,UAAU,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA,EAIA,SAAS,MAAc;AACrB,QAAI,CAAC,KAAK,qBAAqB;AAC7B,WAAK,sBAAsB,KAAK,eAAe;AAE/C,WAAK,oBAAoB,QAAQ,MAAM,KAAK,OAAO,MAAM,CAAC;AAAA,IAC5D;AACA,SAAK,gBAAgB;AAErB,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,cAAc;AACrB,WAAK,qBAAqB,KAAK,KAAK,YAAY;AAChD,WAAK,eAAe;AAAA,IACtB;AACA,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,iBAAiB,cAAc;AAAA,EAChD;AAAA;AAAA,EAGA,WAAW;AACT,SAAK,MAAM;AACX,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA0F;AACxF,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO;AAAA,EACT;AACF;AAgBO,MAAe,cAAiE;AAAA,EAC3E,QAAQ,IAAI,gCAAqC;AAAA,EACjD,SAAS,IAAI,gCAAqC;AAAA,EAClD,SAAS;AAAA,EAEnB;AAAA,EACA;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EAErB,YACE,MACA,KACA,cAAiC,0CACjC;AACA,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,eAAe;AAMpB,YAAQ,QAAQ,EAAE,KAAK,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAC7E;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,kCAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAIA,IAAI,YAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAAA,IAChF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACzD,iBAAiB,KAAK,MAAM;AAAA,MAC5B,iBAAiB,KAAK,MAAM,eAAe;AAAA,MAC3C,WAAW;AAAA;AAAA,MACX,OAAO,KAAK,KAAK;AAAA,MACjB,UAAU;AAAA,IACZ;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAGA,MAAM,UAA+B;AACnC,UAAM,SAAS,CAAC;AAChB,qBAAiB,SAAS,MAAM;AAC9B,aAAO,KAAK,MAAM,KAAK;AAAA,IACzB;AACA,eAAO,0BAAY,MAAM;AAAA,EAC3B;AAAA,EAEA,OAAkD;AAChD,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAmB;AACtC,WAAO;AAAA,EACT;AACF;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/tts/tts.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,EAAE,kBAAkB,EAA0C,MAAM,aAAa,CAAC;AAEzF,+EAA+E;AAC/E,MAAM,WAAW,gBAAgB;IAC/B,qEAAqE;IACrE,SAAS,EAAE,MAAM,CAAC;IAClB,uDAAuD;IACvD,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,KAAK,EAAE,UAAU,CAAC;IAClB,+CAA+C;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IAItF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE,eAAe;IAOlF,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,mEAAmE;IACnE,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,qEAAqE;IACrE,IAAI,WAAW,IAAI,MAAM,CAExB;IAED;;OAEG;IACH,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa;IAEhD;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,gBAAgB;CACpC;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,gBACpB,YAAW,qBAAqB,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC;;IAE1F,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,MAAM,CAAC,QAAQ,CAAC,aAAa,gBAA2B;IACxD,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,KAAK,+EAEX;IACJ,SAAS,CAAC,MAAM,+EAEZ;IACJ,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAKvB,OAAO,CAAC,YAAY,CAAoB;IACxC,SAAS,CAAC,eAAe,kBAAyB;IAElD,OAAO,CAAC,mBAAmB,CAEzB;IACF,OAAO,CAAC,MAAM,CAAS;gBAEX,GAAG,EAAE,GAAG,EAAE,WAAW,GAAE,iBAA+C;YAoBpE,QAAQ;IAoCtB,OAAO,CAAC,SAAS;cAWD,SAAS;cAuBT,cAAc;IAgD9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,iBAAiB,CAAC,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC;IAI9C,uCAAuC;IACvC,kDAAkD;IAClD,QAAQ,CAAC,IAAI,EAAE,MAAM;IAiBrB,4DAA4D;IAC5D,KAAK;IAcL,2DAA2D;IAC3D,QAAQ;IAWR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAIzF,wDAAwD;IACxD,KAAK;IAIL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,gBAAgB;CAG3C;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,aAAc,YAAW,qBAAqB,CAAC,gBAAgB,CAAC;;IACpF,SAAS,CAAC,KAAK,uCAA8C;IAC7D,SAAS,CAAC,MAAM,uCAA8C;IAC9D,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAGvB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,MAAM,CAAS;gBAGrB,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,GAAG,EACR,WAAW,GAAE,iBAA+C;YAehD,QAAQ;IAoCtB,OAAO,CAAC,SAAS;IAUjB,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,IAAI,SAAS,IAAI,MAAM,CAEtB;cAEe,cAAc;IAgC9B,oDAAoD;IAC9C,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC;IAQpC,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,CAAC,CAAC;IAIjD,wDAAwD;IACxD,KAAK;IAML,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa;CAGxC"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../src/tts/tts.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,EAAE,kBAAkB,EAA0C,MAAM,aAAa,CAAC;AAEzF,+EAA+E;AAC/E,MAAM,WAAW,gBAAgB;IAC/B,qEAAqE;IACrE,SAAS,EAAE,MAAM,CAAC;IAClB,uDAAuD;IACvD,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,KAAK,EAAE,UAAU,CAAC;IAClB,+CAA+C;IAC/C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qEAAqE;IACrE,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IAItF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,YAAY,EAAE,eAAe;IAOlF,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,mEAAmE;IACnE,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,qEAAqE;IACrE,IAAI,WAAW,IAAI,MAAM,CAExB;IAED;;OAEG;IACH,QAAQ,CAAC,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa;IAEhD;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,gBAAgB;CACpC;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,gBACpB,YAAW,qBAAqB,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC;;IAE1F,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,MAAM,CAAC,QAAQ,CAAC,aAAa,gBAA2B;IACxD,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,KAAK,+EAEX;IACJ,SAAS,CAAC,MAAM,+EAEZ;IACJ,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAKvB,OAAO,CAAC,YAAY,CAAoB;IACxC,SAAS,CAAC,eAAe,kBAAyB;IAElD,OAAO,CAAC,mBAAmB,CAEzB;IACF,OAAO,CAAC,MAAM,CAAS;gBAEX,GAAG,EAAE,GAAG,EAAE,WAAW,GAAE,iBAA+C;YAoBpE,QAAQ;IAoCtB,OAAO,CAAC,SAAS;cAWD,SAAS;cAuBT,cAAc;IAiD9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,iBAAiB,CAAC,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC;IAI9C,uCAAuC;IACvC,kDAAkD;IAClD,QAAQ,CAAC,IAAI,EAAE,MAAM;IAiBrB,4DAA4D;IAC5D,KAAK;IAcL,2DAA2D;IAC3D,QAAQ;IAWR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,GAAG,OAAO,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAIzF,wDAAwD;IACxD,KAAK;IAIL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,gBAAgB;CAG3C;AAED;;;;;;;;;;;;;GAaG;AACH,8BAAsB,aAAc,YAAW,qBAAqB,CAAC,gBAAgB,CAAC;;IACpF,SAAS,CAAC,KAAK,uCAA8C;IAC7D,SAAS,CAAC,MAAM,uCAA8C;IAC9D,SAAS,CAAC,MAAM,UAAS;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IAGvB,OAAO,CAAC,YAAY,CAAoB;IACxC,OAAO,CAAC,MAAM,CAAS;gBAGrB,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,GAAG,EACR,WAAW,GAAE,iBAA+C;YAehD,QAAQ;IAoCtB,OAAO,CAAC,SAAS;IAUjB,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,IAAI,SAAS,IAAI,MAAM,CAEtB;cAEe,cAAc;IAgC9B,oDAAoD;IAC9C,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC;IAQpC,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,gBAAgB,CAAC,CAAC;IAIjD,wDAAwD;IACxD,KAAK;IAML,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa;CAGxC"}