@livekit/agents 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. package/README.md +17 -0
  2. package/dist/audio.cjs +77 -0
  3. package/dist/audio.cjs.map +1 -0
  4. package/dist/audio.js +48 -37
  5. package/dist/audio.js.map +1 -1
  6. package/dist/cli.cjs +131 -0
  7. package/dist/cli.cjs.map +1 -0
  8. package/dist/cli.js +96 -122
  9. package/dist/cli.js.map +1 -1
  10. package/dist/generator.cjs +36 -0
  11. package/dist/generator.cjs.map +1 -0
  12. package/dist/generator.js +8 -22
  13. package/dist/generator.js.map +1 -1
  14. package/dist/http_server.cjs +72 -0
  15. package/dist/http_server.cjs.map +1 -0
  16. package/dist/http_server.d.ts +1 -1
  17. package/dist/http_server.js +44 -47
  18. package/dist/http_server.js.map +1 -1
  19. package/dist/index.cjs +78 -0
  20. package/dist/index.cjs.map +1 -0
  21. package/dist/index.js +26 -28
  22. package/dist/index.js.map +1 -1
  23. package/dist/ipc/job_executor.cjs +33 -0
  24. package/dist/ipc/job_executor.cjs.map +1 -0
  25. package/dist/ipc/job_executor.js +7 -4
  26. package/dist/ipc/job_executor.js.map +1 -1
  27. package/dist/ipc/job_main.cjs +147 -0
  28. package/dist/ipc/job_main.cjs.map +1 -0
  29. package/dist/ipc/job_main.d.ts +1 -1
  30. package/dist/ipc/job_main.js +103 -103
  31. package/dist/ipc/job_main.js.map +1 -1
  32. package/dist/ipc/message.cjs +17 -0
  33. package/dist/ipc/message.cjs.map +1 -0
  34. package/dist/ipc/message.js +0 -1
  35. package/dist/ipc/message.js.map +1 -1
  36. package/dist/ipc/proc_job_executor.cjs +174 -0
  37. package/dist/ipc/proc_job_executor.cjs.map +1 -0
  38. package/dist/ipc/proc_job_executor.js +130 -126
  39. package/dist/ipc/proc_job_executor.js.map +1 -1
  40. package/dist/ipc/proc_pool.cjs +126 -0
  41. package/dist/ipc/proc_pool.cjs.map +1 -0
  42. package/dist/ipc/proc_pool.js +93 -96
  43. package/dist/ipc/proc_pool.js.map +1 -1
  44. package/dist/job.cjs +230 -0
  45. package/dist/job.cjs.map +1 -0
  46. package/dist/job.js +195 -198
  47. package/dist/job.js.map +1 -1
  48. package/dist/llm/chat_context.cjs +131 -0
  49. package/dist/llm/chat_context.cjs.map +1 -0
  50. package/dist/llm/chat_context.js +98 -86
  51. package/dist/llm/chat_context.js.map +1 -1
  52. package/dist/llm/function_context.cjs +103 -0
  53. package/dist/llm/function_context.cjs.map +1 -0
  54. package/dist/llm/function_context.js +72 -81
  55. package/dist/llm/function_context.js.map +1 -1
  56. package/dist/llm/function_context.test.cjs +218 -0
  57. package/dist/llm/function_context.test.cjs.map +1 -0
  58. package/dist/llm/function_context.test.js +209 -210
  59. package/dist/llm/function_context.test.js.map +1 -1
  60. package/dist/llm/index.cjs +43 -0
  61. package/dist/llm/index.cjs.map +1 -0
  62. package/dist/llm/index.js +22 -6
  63. package/dist/llm/index.js.map +1 -1
  64. package/dist/llm/llm.cjs +76 -0
  65. package/dist/llm/llm.cjs.map +1 -0
  66. package/dist/llm/llm.js +48 -42
  67. package/dist/llm/llm.js.map +1 -1
  68. package/dist/log.cjs +57 -0
  69. package/dist/log.cjs.map +1 -0
  70. package/dist/log.js +27 -26
  71. package/dist/log.js.map +1 -1
  72. package/dist/multimodal/agent_playout.cjs +228 -0
  73. package/dist/multimodal/agent_playout.cjs.map +1 -0
  74. package/dist/multimodal/agent_playout.d.ts +1 -1
  75. package/dist/multimodal/agent_playout.js +193 -180
  76. package/dist/multimodal/agent_playout.js.map +1 -1
  77. package/dist/multimodal/index.cjs +25 -0
  78. package/dist/multimodal/index.cjs.map +1 -0
  79. package/dist/multimodal/index.js +2 -5
  80. package/dist/multimodal/index.js.map +1 -1
  81. package/dist/multimodal/multimodal_agent.cjs +404 -0
  82. package/dist/multimodal/multimodal_agent.cjs.map +1 -0
  83. package/dist/multimodal/multimodal_agent.d.ts +2 -2
  84. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  85. package/dist/multimodal/multimodal_agent.js +351 -303
  86. package/dist/multimodal/multimodal_agent.js.map +1 -1
  87. package/dist/pipeline/agent_output.cjs +172 -0
  88. package/dist/pipeline/agent_output.cjs.map +1 -0
  89. package/dist/pipeline/agent_output.js +136 -138
  90. package/dist/pipeline/agent_output.js.map +1 -1
  91. package/dist/pipeline/agent_playout.cjs +169 -0
  92. package/dist/pipeline/agent_playout.cjs.map +1 -0
  93. package/dist/pipeline/agent_playout.js +126 -136
  94. package/dist/pipeline/agent_playout.js.map +1 -1
  95. package/dist/pipeline/human_input.cjs +158 -0
  96. package/dist/pipeline/human_input.cjs.map +1 -0
  97. package/dist/pipeline/human_input.js +124 -125
  98. package/dist/pipeline/human_input.js.map +1 -1
  99. package/dist/pipeline/index.cjs +31 -0
  100. package/dist/pipeline/index.cjs.map +1 -0
  101. package/dist/pipeline/index.js +8 -4
  102. package/dist/pipeline/index.js.map +1 -1
  103. package/dist/pipeline/pipeline_agent.cjs +642 -0
  104. package/dist/pipeline/pipeline_agent.cjs.map +1 -0
  105. package/dist/pipeline/pipeline_agent.d.ts +1 -0
  106. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  107. package/dist/pipeline/pipeline_agent.js +595 -650
  108. package/dist/pipeline/pipeline_agent.js.map +1 -1
  109. package/dist/pipeline/speech_handle.cjs +128 -0
  110. package/dist/pipeline/speech_handle.cjs.map +1 -0
  111. package/dist/pipeline/speech_handle.js +102 -100
  112. package/dist/pipeline/speech_handle.js.map +1 -1
  113. package/dist/plugin.cjs +46 -0
  114. package/dist/plugin.cjs.map +1 -0
  115. package/dist/plugin.js +20 -20
  116. package/dist/plugin.js.map +1 -1
  117. package/dist/stt/index.cjs +38 -0
  118. package/dist/stt/index.cjs.map +1 -0
  119. package/dist/stt/index.js +13 -5
  120. package/dist/stt/index.js.map +1 -1
  121. package/dist/stt/stream_adapter.cjs +87 -0
  122. package/dist/stt/stream_adapter.cjs.map +1 -0
  123. package/dist/stt/stream_adapter.js +58 -55
  124. package/dist/stt/stream_adapter.js.map +1 -1
  125. package/dist/stt/stt.cjs +98 -0
  126. package/dist/stt/stt.cjs.map +1 -0
  127. package/dist/stt/stt.js +63 -98
  128. package/dist/stt/stt.js.map +1 -1
  129. package/dist/tokenize/basic/basic.cjs +98 -0
  130. package/dist/tokenize/basic/basic.cjs.map +1 -0
  131. package/dist/tokenize/basic/basic.js +56 -45
  132. package/dist/tokenize/basic/basic.js.map +1 -1
  133. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  134. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  135. package/dist/tokenize/basic/hyphenator.js +66 -82
  136. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  137. package/dist/tokenize/basic/index.cjs +35 -0
  138. package/dist/tokenize/basic/index.cjs.map +1 -0
  139. package/dist/tokenize/basic/index.js +7 -4
  140. package/dist/tokenize/basic/index.js.map +1 -1
  141. package/dist/tokenize/basic/paragraph.cjs +57 -0
  142. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  143. package/dist/tokenize/basic/paragraph.js +30 -35
  144. package/dist/tokenize/basic/paragraph.js.map +1 -1
  145. package/dist/tokenize/basic/sentence.cjs +83 -0
  146. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  147. package/dist/tokenize/basic/sentence.js +56 -57
  148. package/dist/tokenize/basic/sentence.js.map +1 -1
  149. package/dist/tokenize/basic/word.cjs +44 -0
  150. package/dist/tokenize/basic/word.cjs.map +1 -0
  151. package/dist/tokenize/basic/word.js +17 -20
  152. package/dist/tokenize/basic/word.js.map +1 -1
  153. package/dist/tokenize/index.cjs +55 -0
  154. package/dist/tokenize/index.cjs.map +1 -0
  155. package/dist/tokenize/index.js +18 -7
  156. package/dist/tokenize/index.js.map +1 -1
  157. package/dist/tokenize/token_stream.cjs +164 -0
  158. package/dist/tokenize/token_stream.cjs.map +1 -0
  159. package/dist/tokenize/token_stream.js +133 -139
  160. package/dist/tokenize/token_stream.js.map +1 -1
  161. package/dist/tokenize/tokenizer.cjs +184 -0
  162. package/dist/tokenize/tokenizer.cjs.map +1 -0
  163. package/dist/tokenize/tokenizer.js +138 -99
  164. package/dist/tokenize/tokenizer.js.map +1 -1
  165. package/dist/transcription.cjs +131 -0
  166. package/dist/transcription.cjs.map +1 -0
  167. package/dist/transcription.d.ts +2 -0
  168. package/dist/transcription.d.ts.map +1 -1
  169. package/dist/transcription.js +99 -93
  170. package/dist/transcription.js.map +1 -1
  171. package/dist/tts/index.cjs +38 -0
  172. package/dist/tts/index.cjs.map +1 -0
  173. package/dist/tts/index.js +13 -5
  174. package/dist/tts/index.js.map +1 -1
  175. package/dist/tts/stream_adapter.cjs +78 -0
  176. package/dist/tts/stream_adapter.cjs.map +1 -0
  177. package/dist/tts/stream_adapter.js +50 -47
  178. package/dist/tts/stream_adapter.js.map +1 -1
  179. package/dist/tts/tts.cjs +127 -0
  180. package/dist/tts/tts.cjs.map +1 -0
  181. package/dist/tts/tts.js +90 -120
  182. package/dist/tts/tts.js.map +1 -1
  183. package/dist/utils.cjs +284 -0
  184. package/dist/utils.cjs.map +1 -0
  185. package/dist/utils.js +242 -247
  186. package/dist/utils.js.map +1 -1
  187. package/dist/vad.cjs +92 -0
  188. package/dist/vad.cjs.map +1 -0
  189. package/dist/vad.js +57 -52
  190. package/dist/vad.js.map +1 -1
  191. package/dist/version.cjs +29 -0
  192. package/dist/version.cjs.map +1 -0
  193. package/dist/version.js +4 -4
  194. package/dist/version.js.map +1 -1
  195. package/dist/worker.cjs +576 -0
  196. package/dist/worker.cjs.map +1 -0
  197. package/dist/worker.d.ts +1 -1
  198. package/dist/worker.js +511 -484
  199. package/dist/worker.js.map +1 -1
  200. package/package.json +23 -7
  201. package/src/ipc/job_main.ts +66 -64
  202. package/src/multimodal/multimodal_agent.ts +29 -2
  203. package/src/pipeline/pipeline_agent.ts +25 -24
  204. package/src/transcription.ts +5 -0
  205. package/.turbo/turbo-build.log +0 -4
  206. package/CHANGELOG.md +0 -165
  207. package/api-extractor.json +0 -20
  208. package/tsconfig.json +0 -16
  209. package/tsconfig.tsbuildinfo +0 -1
@@ -1,144 +1,138 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- import { randomUUID } from 'node:crypto';
5
- import { AsyncIterableQueue } from '../utils.js';
6
- import { SentenceStream, WordStream } from './tokenizer.js';
7
- export class BufferedTokenStream {
8
- queue = new AsyncIterableQueue();
9
- closed = false;
10
- #func;
11
- #minTokenLength;
12
- #minContextLength;
13
- #bufTokens = [];
14
- #inBuf = '';
15
- #outBuf = '';
16
- #currentSegmentId;
17
- constructor(func, minTokenLength, minContextLength) {
18
- this.#func = func;
19
- this.#minTokenLength = minTokenLength;
20
- this.#minContextLength = minContextLength;
21
- this.#currentSegmentId = randomUUID();
22
- }
23
- /** Push a string of text into the token stream */
24
- pushText(text) {
25
- if (this.closed) {
26
- throw new Error('Stream is closed');
27
- }
28
- this.#inBuf += text;
29
- if (this.#inBuf.length < this.#minContextLength)
30
- return;
31
- while (true) {
32
- const tokens = this.#func(this.#inBuf);
33
- if (tokens.length <= 1)
34
- break;
35
- if (this.#outBuf)
36
- this.#outBuf += ' ';
37
- const tok = tokens.shift();
38
- let tokText = tok;
39
- if (tok.length > 1 && typeof tok[1] === 'number') {
40
- tokText = tok[0];
41
- }
42
- this.#outBuf += tokText;
43
- if (this.#outBuf.length >= this.#minTokenLength) {
44
- this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });
45
- this.#outBuf = '';
46
- }
47
- if (typeof tok !== 'string') {
48
- this.#inBuf = this.#inBuf.slice(tok[2]);
49
- }
50
- else {
51
- this.#inBuf = this.#inBuf
52
- .slice(Math.max(0, this.#inBuf.indexOf(tok)) + tok.length)
53
- .trimStart();
54
- }
55
- }
56
- }
57
- /** Flush the stream, causing it to process all pending text */
58
- flush() {
59
- if (this.closed) {
60
- throw new Error('Stream is closed');
61
- }
62
- if (this.#inBuf || this.#outBuf) {
63
- const tokens = this.#func(this.#inBuf);
64
- if (tokens) {
65
- if (this.#outBuf)
66
- this.#outBuf += ' ';
67
- if (typeof tokens[0] !== 'string') {
68
- this.#outBuf += tokens.map((tok) => tok[0]).join(' ');
69
- }
70
- else {
71
- this.#outBuf += tokens.join(' ');
72
- }
73
- }
74
- if (this.#outBuf) {
75
- this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });
76
- }
77
- this.#currentSegmentId = randomUUID();
1
+ import { randomUUID } from "node:crypto";
2
+ import { AsyncIterableQueue } from "../utils.js";
3
+ import { SentenceStream, WordStream } from "./tokenizer.js";
4
+ class BufferedTokenStream {
5
+ queue = new AsyncIterableQueue();
6
+ closed = false;
7
+ #func;
8
+ #minTokenLength;
9
+ #minContextLength;
10
+ #bufTokens = [];
11
+ #inBuf = "";
12
+ #outBuf = "";
13
+ #currentSegmentId;
14
+ constructor(func, minTokenLength, minContextLength) {
15
+ this.#func = func;
16
+ this.#minTokenLength = minTokenLength;
17
+ this.#minContextLength = minContextLength;
18
+ this.#currentSegmentId = randomUUID();
19
+ }
20
+ /** Push a string of text into the token stream */
21
+ pushText(text) {
22
+ if (this.closed) {
23
+ throw new Error("Stream is closed");
24
+ }
25
+ this.#inBuf += text;
26
+ if (this.#inBuf.length < this.#minContextLength) return;
27
+ while (true) {
28
+ const tokens = this.#func(this.#inBuf);
29
+ if (tokens.length <= 1) break;
30
+ if (this.#outBuf) this.#outBuf += " ";
31
+ const tok = tokens.shift();
32
+ let tokText = tok;
33
+ if (tok.length > 1 && typeof tok[1] === "number") {
34
+ tokText = tok[0];
35
+ }
36
+ this.#outBuf += tokText;
37
+ if (this.#outBuf.length >= this.#minTokenLength) {
38
+ this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });
39
+ this.#outBuf = "";
40
+ }
41
+ if (typeof tok !== "string") {
42
+ this.#inBuf = this.#inBuf.slice(tok[2]);
43
+ } else {
44
+ this.#inBuf = this.#inBuf.slice(Math.max(0, this.#inBuf.indexOf(tok)) + tok.length).trimStart();
45
+ }
46
+ }
47
+ }
48
+ /** Flush the stream, causing it to process all pending text */
49
+ flush() {
50
+ if (this.closed) {
51
+ throw new Error("Stream is closed");
52
+ }
53
+ if (this.#inBuf || this.#outBuf) {
54
+ const tokens = this.#func(this.#inBuf);
55
+ if (tokens) {
56
+ if (this.#outBuf) this.#outBuf += " ";
57
+ if (typeof tokens[0] !== "string") {
58
+ this.#outBuf += tokens.map((tok) => tok[0]).join(" ");
59
+ } else {
60
+ this.#outBuf += tokens.join(" ");
78
61
  }
79
- this.#inBuf = '';
80
- this.#outBuf = '';
81
- }
82
- /** Mark the input as ended and forbid additional pushes */
83
- endInput() {
84
- if (this.closed) {
85
- throw new Error('Stream is closed');
86
- }
87
- this.flush();
88
- this.close();
89
- }
90
- next() {
91
- return this.queue.next();
92
- }
93
- /** Close both the input and output of the token stream */
94
- close() {
95
- this.queue.close();
96
- this.closed = true;
97
- }
98
- [Symbol.asyncIterator]() {
99
- return this;
100
- }
62
+ }
63
+ if (this.#outBuf) {
64
+ this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });
65
+ }
66
+ this.#currentSegmentId = randomUUID();
67
+ }
68
+ this.#inBuf = "";
69
+ this.#outBuf = "";
70
+ }
71
+ /** Mark the input as ended and forbid additional pushes */
72
+ endInput() {
73
+ if (this.closed) {
74
+ throw new Error("Stream is closed");
75
+ }
76
+ this.flush();
77
+ this.close();
78
+ }
79
+ next() {
80
+ return this.queue.next();
81
+ }
82
+ /** Close both the input and output of the token stream */
83
+ close() {
84
+ this.queue.close();
85
+ this.closed = true;
86
+ }
87
+ [Symbol.asyncIterator]() {
88
+ return this;
89
+ }
101
90
  }
102
- export class BufferedSentenceStream extends SentenceStream {
103
- #stream;
104
- constructor(func, minTokenLength, minContextLength) {
105
- super();
106
- this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength);
107
- }
108
- pushText(text) {
109
- this.#stream.pushText(text);
110
- }
111
- flush() {
112
- this.#stream.flush();
113
- }
114
- close() {
115
- super.close();
116
- this.#stream.close();
117
- }
118
- next() {
119
- return this.#stream.next();
120
- }
91
+ class BufferedSentenceStream extends SentenceStream {
92
+ #stream;
93
+ constructor(func, minTokenLength, minContextLength) {
94
+ super();
95
+ this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength);
96
+ }
97
+ pushText(text) {
98
+ this.#stream.pushText(text);
99
+ }
100
+ flush() {
101
+ this.#stream.flush();
102
+ }
103
+ close() {
104
+ super.close();
105
+ this.#stream.close();
106
+ }
107
+ next() {
108
+ return this.#stream.next();
109
+ }
121
110
  }
122
- export class BufferedWordStream extends WordStream {
123
- #stream;
124
- constructor(func, minTokenLength, minContextLength) {
125
- super();
126
- this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength);
127
- }
128
- pushText(text) {
129
- this.#stream.pushText(text);
130
- }
131
- flush() {
132
- this.#stream.flush();
133
- }
134
- endInput() {
135
- this.#stream.endInput();
136
- }
137
- close() {
138
- this.#stream.close();
139
- }
140
- next() {
141
- return this.#stream.next();
142
- }
111
+ class BufferedWordStream extends WordStream {
112
+ #stream;
113
+ constructor(func, minTokenLength, minContextLength) {
114
+ super();
115
+ this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength);
116
+ }
117
+ pushText(text) {
118
+ this.#stream.pushText(text);
119
+ }
120
+ flush() {
121
+ this.#stream.flush();
122
+ }
123
+ endInput() {
124
+ this.#stream.endInput();
125
+ }
126
+ close() {
127
+ this.#stream.close();
128
+ }
129
+ next() {
130
+ return this.#stream.next();
131
+ }
143
132
  }
133
+ export {
134
+ BufferedSentenceStream,
135
+ BufferedTokenStream,
136
+ BufferedWordStream
137
+ };
144
138
  //# sourceMappingURL=token_stream.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"token_stream.js","sourceRoot":"","sources":["../../src/tokenize/token_stream.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEjD,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAI5D,MAAM,OAAO,mBAAmB;IACpB,KAAK,GAAG,IAAI,kBAAkB,EAAa,CAAC;IAC5C,MAAM,GAAG,KAAK,CAAC;IAEzB,KAAK,CAAe;IACpB,eAAe,CAAS;IACxB,iBAAiB,CAAS;IAC1B,UAAU,GAAa,EAAE,CAAC;IAC1B,MAAM,GAAG,EAAE,CAAC;IACZ,OAAO,GAAG,EAAE,CAAC;IACb,iBAAiB,CAAS;IAE1B,YAAY,IAAkB,EAAE,cAAsB,EAAE,gBAAwB;QAC9E,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,eAAe,GAAG,cAAc,CAAC;QACtC,IAAI,CAAC,iBAAiB,GAAG,gBAAgB,CAAC;QAE1C,IAAI,CAAC,iBAAiB,GAAG,UAAU,EAAE,CAAC;IACxC,CAAC;IAED,kDAAkD;IAClD,QAAQ,CAAC,IAAY;QACnB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC;QACpB,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,iBAAiB;YAAE,OAAO;QAExD,OAAO,IAAI,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;gBAAE,MAAM;YAE9B,IAAI,IAAI,CAAC,OAAO;gBAAE,IAAI,CAAC,OAAO,IAAI,GAAG,CAAC;YAEtC,MAAM,GAAG,GAAG,MAAM,CAAC,KAAK,EAAG,CAAC;YAC5B,IAAI,OAAO,GAAG,GAAa,CAAC;YAC5B,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;gBACjD,OAAO,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;YACnB,CAAC;YAED,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC;YACxB,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;gBAChD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC;gBAC3E,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;YACpB,CAAC;YAED,IAAI,OAAO,GAAI,KAAK,QAAQ,EAAE,CAAC;gBAC7B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAI,CAAC,CAAC,CAAC,CAAC,CAAC;YAC3C,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM;qBACtB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC;qBACzD,SAAS,EAAE,CAAC;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,KAAK;QACH,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACtC,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvC,IAAI,MAAM,EAAE,CAAC;gBACX,IAAI,IAAI,CAAC,OAAO;oBAAE,IAAI,CAAC,OAAO,IAAI,GAAG,CAAC;gBAEtC,IAAI,OAAO,MAAM,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;oBAClC,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACxD,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,OAAO,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACnC,CAAC;YACH,CAAC;YAED,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;gBACjB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,iBAAiB,EAAE,CAAC,CAAC;YAC7E,CAAC;YAED,IAAI,CAAC,iBAAiB,GAAG,UAAU,EAAE,CAAC;QACxC,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;IACpB,CAAC;IAED,2DAA2D;IAC3D,QAAQ;QACN,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACtC,CAAC;QACD,IAAI,CAAC,KAAK,EAAE,CAAC;QACb,IAAI,CAAC,KAAK,EAAE,CAAC;IACf,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IAC3B,CAAC;IAED,0DAA0D;IAC1D,KAAK;QACH,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;IACrB,CAAC;IAED,CAAC,MAAM,CAAC,aAAa,CAAC;QACpB,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAED,MAAM,OAAO,sBAAuB,SAAQ,cAAc;IACxD,OAAO,CAAsB;IAE7B,YAAY,IAAkB,EAAE,cAAsB,EAAE,gBAAwB;QAC9E,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG,IAAI,mBAAmB,CAAC,IAAI,EAAE,cAAc,EAAE,gBAAgB,CAAC,CAAC;IACjF,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED,KAAK;QACH,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IAC7B,CAAC;CACF;AAED,MAAM,OAAO,kBAAmB,SAAQ,UAAU;IAChD,OAAO,CAAsB;IAE7B,YAAY,IAAkB,EAAE,cAAsB,EAAE,gBAAwB;QAC9E,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG,IAAI,mBAAmB,CAAC,IAAI,EAAE,cAAc,EAAE,gBAAgB,CAAC,CAAC;IACjF,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED,QAAQ;QACN,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC;IAC1B,CAAC;IAED,KAAK;QACH,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IAC7B,CAAC;CACF"}
1
+ {"version":3,"sources":["../../src/tokenize/token_stream.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { randomUUID } from 'node:crypto';\nimport { AsyncIterableQueue } from '../utils.js';\nimport type { TokenData } from './tokenizer.js';\nimport { SentenceStream, WordStream } from './tokenizer.js';\n\ntype TokenizeFunc = (x: string) => string[] | [string, number, number][];\n\nexport class BufferedTokenStream implements AsyncIterableIterator<TokenData> {\n protected queue = new AsyncIterableQueue<TokenData>();\n protected closed = false;\n\n #func: TokenizeFunc;\n #minTokenLength: number;\n #minContextLength: number;\n #bufTokens: string[] = [];\n #inBuf = '';\n #outBuf = '';\n #currentSegmentId: string;\n\n constructor(func: TokenizeFunc, minTokenLength: number, minContextLength: number) {\n this.#func = func;\n this.#minTokenLength = minTokenLength;\n this.#minContextLength = minContextLength;\n\n this.#currentSegmentId = randomUUID();\n }\n\n /** Push a string of text into the token stream */\n pushText(text: string) {\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n this.#inBuf += text;\n if (this.#inBuf.length < this.#minContextLength) return;\n\n while (true) {\n const tokens = this.#func(this.#inBuf);\n if (tokens.length <= 1) break;\n\n if (this.#outBuf) this.#outBuf += ' ';\n\n const tok = tokens.shift()!;\n let tokText = tok as string;\n if (tok.length > 1 && typeof tok[1] === 'number') {\n tokText = tok[0];\n }\n\n this.#outBuf += tokText;\n if (this.#outBuf.length >= this.#minTokenLength) {\n this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });\n this.#outBuf = '';\n }\n\n if (typeof tok! !== 'string') {\n this.#inBuf = this.#inBuf.slice(tok![2]);\n } else {\n this.#inBuf = this.#inBuf\n .slice(Math.max(0, this.#inBuf.indexOf(tok)) + tok.length)\n .trimStart();\n }\n }\n }\n\n /** Flush the stream, causing it to process all pending text */\n flush() {\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.#inBuf || this.#outBuf) {\n const tokens = this.#func(this.#inBuf);\n if (tokens) {\n if (this.#outBuf) this.#outBuf += ' ';\n\n if (typeof tokens[0] !== 'string') {\n this.#outBuf += tokens.map((tok) => tok[0]).join(' ');\n } else {\n this.#outBuf += tokens.join(' ');\n }\n }\n\n if (this.#outBuf) {\n this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });\n }\n\n this.#currentSegmentId = randomUUID();\n }\n\n this.#inBuf = '';\n this.#outBuf = '';\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.flush();\n this.close();\n }\n\n next(): Promise<IteratorResult<TokenData>> {\n return this.queue.next();\n }\n\n /** Close both the input and output of the token stream */\n close() {\n this.queue.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): BufferedTokenStream {\n return this;\n }\n}\n\nexport class BufferedSentenceStream extends SentenceStream {\n #stream: BufferedTokenStream;\n\n constructor(func: TokenizeFunc, minTokenLength: number, minContextLength: number) {\n super();\n this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength);\n }\n\n pushText(text: string) {\n this.#stream.pushText(text);\n }\n\n flush() {\n this.#stream.flush();\n }\n\n close() {\n super.close();\n this.#stream.close();\n }\n\n next(): Promise<IteratorResult<TokenData>> {\n return this.#stream.next();\n }\n}\n\nexport class BufferedWordStream extends WordStream {\n #stream: BufferedTokenStream;\n\n constructor(func: TokenizeFunc, minTokenLength: number, minContextLength: number) {\n super();\n this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength);\n }\n\n pushText(text: string) {\n this.#stream.pushText(text);\n }\n\n flush() {\n this.#stream.flush();\n }\n\n endInput() {\n this.#stream.endInput();\n }\n\n close() {\n this.#stream.close();\n }\n\n next(): Promise<IteratorResult<TokenData>> {\n return this.#stream.next();\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAC3B,SAAS,0BAA0B;AAEnC,SAAS,gBAAgB,kBAAkB;AAIpC,MAAM,oBAAgE;AAAA,EACjE,QAAQ,IAAI,mBAA8B;AAAA,EAC1C,SAAS;AAAA,EAEnB;AAAA,EACA;AAAA,EACA;AAAA,EACA,aAAuB,CAAC;AAAA,EACxB,SAAS;AAAA,EACT,UAAU;AAAA,EACV;AAAA,EAEA,YAAY,MAAoB,gBAAwB,kBAA0B;AAChF,SAAK,QAAQ;AACb,SAAK,kBAAkB;AACvB,SAAK,oBAAoB;AAEzB,SAAK,oBAAoB,WAAW;AAAA,EACtC;AAAA;AAAA,EAGA,SAAS,MAAc;AACrB,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,SAAK,UAAU;AACf,QAAI,KAAK,OAAO,SAAS,KAAK,kBAAmB;AAEjD,WAAO,MAAM;AACX,YAAM,SAAS,KAAK,MAAM,KAAK,MAAM;AACrC,UAAI,OAAO,UAAU,EAAG;AAExB,UAAI,KAAK,QAAS,MAAK,WAAW;AAElC,YAAM,MAAM,OAAO,MAAM;AACzB,UAAI,UAAU;AACd,UAAI,IAAI,SAAS,KAAK,OAAO,IAAI,CAAC,MAAM,UAAU;AAChD,kBAAU,IAAI,CAAC;AAAA,MACjB;AAEA,WAAK,WAAW;AAChB,UAAI,KAAK,QAAQ,UAAU,KAAK,iBAAiB;AAC/C,aAAK,MAAM,IAAI,EAAE,OAAO,KAAK,SAAS,WAAW,KAAK,kBAAkB,CAAC;AACzE,aAAK,UAAU;AAAA,MACjB;AAEA,UAAI,OAAO,QAAS,UAAU;AAC5B,aAAK,SAAS,KAAK,OAAO,MAAM,IAAK,CAAC,CAAC;AAAA,MACzC,OAAO;AACL,aAAK,SAAS,KAAK,OAChB,MAAM,KAAK,IAAI,GAAG,KAAK,OAAO,QAAQ,GAAG,CAAC,IAAI,IAAI,MAAM,EACxD,UAAU;AAAA,MACf;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,UAAU,KAAK,SAAS;AAC/B,YAAM,SAAS,KAAK,MAAM,KAAK,MAAM;AACrC,UAAI,QAAQ;AACV,YAAI,KAAK,QAAS,MAAK,WAAW;AAElC,YAAI,OAAO,OAAO,CAAC,MAAM,UAAU;AACjC,eAAK,WAAW,OAAO,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC,EAAE,KAAK,GAAG;AAAA,QACtD,OAAO;AACL,eAAK,WAAW,OAAO,KAAK,GAAG;AAAA,QACjC;AAAA,MACF;AAEA,UAAI,KAAK,SAAS;AAChB,aAAK,MAAM,IAAI,EAAE,OAAO,KAAK,SAAS,WAAW,KAAK,kBAAkB,CAAC;AAAA,MAC3E;AAEA,WAAK,oBAAoB,WAAW;AAAA,IACtC;AAEA,SAAK,SAAS;AACd,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM;AACX,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,MAAM,KAAK;AAAA,EACzB;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAyB;AAC5C,WAAO;AAAA,EACT;AACF;AAEO,MAAM,+BAA+B,eAAe;AAAA,EACzD;AAAA,EAEA,YAAY,MAAoB,gBAAwB,kBAA0B;AAChF,UAAM;AACN,SAAK,UAAU,IAAI,oBAAoB,MAAM,gBAAgB,gBAAgB;AAAA,EAC/E;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,QAAQ,SAAS,IAAI;AAAA,EAC5B;AAAA,EAEA,QAAQ;AACN,SAAK,QAAQ,MAAM;AAAA,EACrB;AAAA,EAEA,QAAQ;AACN,UAAM,MAAM;AACZ,SAAK,QAAQ,MAAM;AAAA,EACrB;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,QAAQ,KAAK;AAAA,EAC3B;AACF;AAEO,MAAM,2BAA2B,WAAW;AAAA,EACjD;AAAA,EAEA,YAAY,MAAoB,gBAAwB,kBAA0B;AAChF,UAAM;AACN,SAAK,UAAU,IAAI,oBAAoB,MAAM,gBAAgB,gBAAgB;AAAA,EAC/E;AAAA,EAEA,SAAS,MAAc;AACrB,SAAK,QAAQ,SAAS,IAAI;AAAA,EAC5B;AAAA,EAEA,QAAQ;AACN,SAAK,QAAQ,MAAM;AAAA,EACrB;AAAA,EAEA,WAAW;AACT,SAAK,QAAQ,SAAS;AAAA,EACxB;AAAA,EAEA,QAAQ;AACN,SAAK,QAAQ,MAAM;AAAA,EACrB;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,QAAQ,KAAK;AAAA,EAC3B;AACF;","names":[]}
@@ -0,0 +1,184 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var tokenizer_exports = {};
20
+ __export(tokenizer_exports, {
21
+ PUNCTUATIONS: () => PUNCTUATIONS,
22
+ SentenceStream: () => SentenceStream,
23
+ SentenceTokenizer: () => SentenceTokenizer,
24
+ WordStream: () => WordStream,
25
+ WordTokenizer: () => WordTokenizer
26
+ });
27
+ module.exports = __toCommonJS(tokenizer_exports);
28
+ var import_utils = require("../utils.cjs");
29
+ const PUNCTUATIONS = [
30
+ "!",
31
+ '"',
32
+ "#",
33
+ "$",
34
+ "%",
35
+ "&",
36
+ "'",
37
+ "(",
38
+ ")",
39
+ "*",
40
+ "+",
41
+ ",",
42
+ "-",
43
+ ".",
44
+ "/",
45
+ ":",
46
+ ";",
47
+ "<",
48
+ "=",
49
+ ">",
50
+ "?",
51
+ "@",
52
+ "[",
53
+ "\\",
54
+ "]",
55
+ "^",
56
+ "_",
57
+ "`",
58
+ "{",
59
+ "|",
60
+ "}",
61
+ "~",
62
+ "\xB1",
63
+ "\u2014",
64
+ "\u2018",
65
+ "\u2019",
66
+ "\u201C",
67
+ "\u201D",
68
+ "\u2026"
69
+ ];
70
+ class SentenceTokenizer {
71
+ }
72
+ class SentenceStream {
73
+ static FLUSH_SENTINEL = Symbol("FLUSH_SENTINEL");
74
+ input = new import_utils.AsyncIterableQueue();
75
+ queue = new import_utils.AsyncIterableQueue();
76
+ #closed = false;
77
+ get closed() {
78
+ return this.#closed;
79
+ }
80
+ /** Push a string of text to the tokenizer */
81
+ pushText(text) {
82
+ if (this.input.closed) {
83
+ throw new Error("Input is closed");
84
+ }
85
+ if (this.#closed) {
86
+ throw new Error("Stream is closed");
87
+ }
88
+ this.input.put(text);
89
+ }
90
+ /** Flush the tokenizer, causing it to process all pending text */
91
+ flush() {
92
+ if (this.input.closed) {
93
+ throw new Error("Input is closed");
94
+ }
95
+ if (this.#closed) {
96
+ throw new Error("Stream is closed");
97
+ }
98
+ this.input.put(SentenceStream.FLUSH_SENTINEL);
99
+ }
100
+ /** Mark the input as ended and forbid additional pushes */
101
+ endInput() {
102
+ if (this.input.closed) {
103
+ throw new Error("Input is closed");
104
+ }
105
+ if (this.#closed) {
106
+ throw new Error("Stream is closed");
107
+ }
108
+ this.input.close();
109
+ }
110
+ next() {
111
+ return this.queue.next();
112
+ }
113
+ /** Close both the input and output of the tokenizer stream */
114
+ close() {
115
+ this.input.close();
116
+ this.queue.close();
117
+ this.#closed = true;
118
+ }
119
+ [Symbol.asyncIterator]() {
120
+ return this;
121
+ }
122
+ }
123
+ class WordTokenizer {
124
+ }
125
+ class WordStream {
126
+ static FLUSH_SENTINEL = Symbol("FLUSH_SENTINEL");
127
+ input = new import_utils.AsyncIterableQueue();
128
+ queue = new import_utils.AsyncIterableQueue();
129
+ #closed = false;
130
+ get closed() {
131
+ return this.#closed;
132
+ }
133
+ /** Push a string of text to the tokenizer */
134
+ pushText(text) {
135
+ if (this.input.closed) {
136
+ throw new Error("Input is closed");
137
+ }
138
+ if (this.#closed) {
139
+ throw new Error("Stream is closed");
140
+ }
141
+ this.input.put(text);
142
+ }
143
+ /** Flush the tokenizer, causing it to process all pending text */
144
+ flush() {
145
+ if (this.input.closed) {
146
+ throw new Error("Input is closed");
147
+ }
148
+ if (this.#closed) {
149
+ throw new Error("Stream is closed");
150
+ }
151
+ this.input.put(WordStream.FLUSH_SENTINEL);
152
+ }
153
+ /** Mark the input as ended and forbid additional pushes */
154
+ endInput() {
155
+ if (this.input.closed) {
156
+ throw new Error("Input is closed");
157
+ }
158
+ if (this.#closed) {
159
+ throw new Error("Stream is closed");
160
+ }
161
+ this.input.close();
162
+ }
163
+ next() {
164
+ return this.queue.next();
165
+ }
166
+ /** Close both the input and output of the tokenizer stream */
167
+ close() {
168
+ this.input.close();
169
+ this.queue.close();
170
+ this.#closed = true;
171
+ }
172
+ [Symbol.asyncIterator]() {
173
+ return this;
174
+ }
175
+ }
176
+ // Annotate the CommonJS export names for ESM import in node:
177
+ 0 && (module.exports = {
178
+ PUNCTUATIONS,
179
+ SentenceStream,
180
+ SentenceTokenizer,
181
+ WordStream,
182
+ WordTokenizer
183
+ });
184
+ //# sourceMappingURL=tokenizer.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/tokenize/tokenizer.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AsyncIterableQueue } from '../utils.js';\n\n// prettier-ignore\nexport const PUNCTUATIONS = [\n '!', '\"', '#', '$', '%', '&', \"'\", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=',\n '>', '?', '@', '[', '\\\\', ']', '^', '_', '`', '{', '|', '}', '~', '±', '—', '‘', '’', '“', '”',\n '…',\n]\n\nexport interface TokenData {\n segmentId: string;\n token: string;\n}\n\nexport abstract class SentenceTokenizer {\n abstract tokenize(text: string, language?: string): string[];\n\n /**\n * Returns a {@link SentenceStream} that can be used to push strings and receive smaller segments.\n */\n abstract stream(): SentenceStream;\n}\n\nexport abstract class SentenceStream {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<string | typeof SentenceStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<TokenData>();\n #closed = false;\n\n get closed(): boolean {\n return this.#closed;\n }\n\n /** Push a string of text to the tokenizer */\n pushText(text: string) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.#closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(text);\n }\n\n /** Flush the tokenizer, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.#closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SentenceStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.#closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<TokenData>> {\n return this.queue.next();\n }\n\n /** Close both the input and output of the tokenizer stream */\n close() {\n this.input.close();\n this.queue.close();\n this.#closed = true;\n }\n\n [Symbol.asyncIterator](): SentenceStream {\n return this;\n }\n}\n\nexport abstract class WordTokenizer {\n abstract tokenize(text: string, language?: string): string[];\n\n /**\n * Returns a {@link WordStream} that can be used to push words and receive smaller segments.\n */\n abstract stream(): WordStream;\n}\n\nexport abstract class WordStream {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<string | typeof WordStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<TokenData>();\n #closed = false;\n\n get closed(): boolean {\n return this.#closed;\n }\n\n /** Push a string of text to the tokenizer */\n pushText(text: string) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.#closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(text);\n }\n\n /** Flush the tokenizer, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.#closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(WordStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.#closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<TokenData>> {\n return this.queue.next();\n }\n\n /** Close both the input and output of the tokenizer stream */\n close() {\n this.input.close();\n this.queue.close();\n this.#closed = true;\n }\n\n [Symbol.asyncIterator](): WordStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,mBAAmC;AAG5B,MAAM,eAAe;AAAA,EAC1B;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAC1F;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAM;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAAK;AAAA,EAC3F;AACF;AAOO,MAAe,kBAAkB;AAOxC;AAEO,MAAe,eAAe;AAAA,EACnC,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAkE;AAAA,EAC9E,QAAQ,IAAI,gCAA8B;AAAA,EACpD,UAAU;AAAA,EAEV,IAAI,SAAkB;AACpB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,SAAS,MAAc;AACrB,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,eAAe,cAAc;AAAA,EAC9C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,MAAM,KAAK;AAAA,EACzB;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAoB;AACvC,WAAO;AAAA,EACT;AACF;AAEO,MAAe,cAAc;AAOpC;AAEO,MAAe,WAAW;AAAA,EAC/B,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAA8D;AAAA,EAC1E,QAAQ,IAAI,gCAA8B;AAAA,EACpD,UAAU;AAAA,EAEV,IAAI,SAAkB;AACpB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,SAAS,MAAc;AACrB,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,WAAW,cAAc;AAAA,EAC1C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA2C;AACzC,WAAO,KAAK,MAAM,KAAK;AAAA,EACzB;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAgB;AACnC,WAAO;AAAA,EACT;AACF;","names":[]}