@livekit/agents 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/README.md +17 -0
  2. package/dist/audio.cjs +77 -0
  3. package/dist/audio.cjs.map +1 -0
  4. package/dist/audio.js +48 -37
  5. package/dist/audio.js.map +1 -1
  6. package/dist/cli.cjs +131 -0
  7. package/dist/cli.cjs.map +1 -0
  8. package/dist/cli.js +96 -122
  9. package/dist/cli.js.map +1 -1
  10. package/dist/generator.cjs +36 -0
  11. package/dist/generator.cjs.map +1 -0
  12. package/dist/generator.js +8 -22
  13. package/dist/generator.js.map +1 -1
  14. package/dist/http_server.cjs +72 -0
  15. package/dist/http_server.cjs.map +1 -0
  16. package/dist/http_server.d.ts +1 -1
  17. package/dist/http_server.js +44 -47
  18. package/dist/http_server.js.map +1 -1
  19. package/dist/index.cjs +78 -0
  20. package/dist/index.cjs.map +1 -0
  21. package/dist/index.js +26 -28
  22. package/dist/index.js.map +1 -1
  23. package/dist/ipc/job_executor.cjs +33 -0
  24. package/dist/ipc/job_executor.cjs.map +1 -0
  25. package/dist/ipc/job_executor.js +7 -4
  26. package/dist/ipc/job_executor.js.map +1 -1
  27. package/dist/ipc/job_main.cjs +147 -0
  28. package/dist/ipc/job_main.cjs.map +1 -0
  29. package/dist/ipc/job_main.d.ts +1 -1
  30. package/dist/ipc/job_main.js +103 -103
  31. package/dist/ipc/job_main.js.map +1 -1
  32. package/dist/ipc/message.cjs +17 -0
  33. package/dist/ipc/message.cjs.map +1 -0
  34. package/dist/ipc/message.js +0 -1
  35. package/dist/ipc/message.js.map +1 -1
  36. package/dist/ipc/proc_job_executor.cjs +174 -0
  37. package/dist/ipc/proc_job_executor.cjs.map +1 -0
  38. package/dist/ipc/proc_job_executor.js +130 -126
  39. package/dist/ipc/proc_job_executor.js.map +1 -1
  40. package/dist/ipc/proc_pool.cjs +126 -0
  41. package/dist/ipc/proc_pool.cjs.map +1 -0
  42. package/dist/ipc/proc_pool.js +93 -96
  43. package/dist/ipc/proc_pool.js.map +1 -1
  44. package/dist/job.cjs +230 -0
  45. package/dist/job.cjs.map +1 -0
  46. package/dist/job.js +195 -198
  47. package/dist/job.js.map +1 -1
  48. package/dist/llm/chat_context.cjs +131 -0
  49. package/dist/llm/chat_context.cjs.map +1 -0
  50. package/dist/llm/chat_context.js +98 -86
  51. package/dist/llm/chat_context.js.map +1 -1
  52. package/dist/llm/function_context.cjs +103 -0
  53. package/dist/llm/function_context.cjs.map +1 -0
  54. package/dist/llm/function_context.js +72 -81
  55. package/dist/llm/function_context.js.map +1 -1
  56. package/dist/llm/function_context.test.cjs +218 -0
  57. package/dist/llm/function_context.test.cjs.map +1 -0
  58. package/dist/llm/function_context.test.js +209 -210
  59. package/dist/llm/function_context.test.js.map +1 -1
  60. package/dist/llm/index.cjs +43 -0
  61. package/dist/llm/index.cjs.map +1 -0
  62. package/dist/llm/index.js +22 -6
  63. package/dist/llm/index.js.map +1 -1
  64. package/dist/llm/llm.cjs +76 -0
  65. package/dist/llm/llm.cjs.map +1 -0
  66. package/dist/llm/llm.js +48 -42
  67. package/dist/llm/llm.js.map +1 -1
  68. package/dist/log.cjs +57 -0
  69. package/dist/log.cjs.map +1 -0
  70. package/dist/log.js +27 -26
  71. package/dist/log.js.map +1 -1
  72. package/dist/multimodal/agent_playout.cjs +228 -0
  73. package/dist/multimodal/agent_playout.cjs.map +1 -0
  74. package/dist/multimodal/agent_playout.d.ts +1 -1
  75. package/dist/multimodal/agent_playout.js +193 -180
  76. package/dist/multimodal/agent_playout.js.map +1 -1
  77. package/dist/multimodal/index.cjs +25 -0
  78. package/dist/multimodal/index.cjs.map +1 -0
  79. package/dist/multimodal/index.js +2 -5
  80. package/dist/multimodal/index.js.map +1 -1
  81. package/dist/multimodal/multimodal_agent.cjs +404 -0
  82. package/dist/multimodal/multimodal_agent.cjs.map +1 -0
  83. package/dist/multimodal/multimodal_agent.d.ts +1 -1
  84. package/dist/multimodal/multimodal_agent.js +351 -330
  85. package/dist/multimodal/multimodal_agent.js.map +1 -1
  86. package/dist/pipeline/agent_output.cjs +172 -0
  87. package/dist/pipeline/agent_output.cjs.map +1 -0
  88. package/dist/pipeline/agent_output.js +136 -138
  89. package/dist/pipeline/agent_output.js.map +1 -1
  90. package/dist/pipeline/agent_playout.cjs +169 -0
  91. package/dist/pipeline/agent_playout.cjs.map +1 -0
  92. package/dist/pipeline/agent_playout.js +126 -136
  93. package/dist/pipeline/agent_playout.js.map +1 -1
  94. package/dist/pipeline/human_input.cjs +158 -0
  95. package/dist/pipeline/human_input.cjs.map +1 -0
  96. package/dist/pipeline/human_input.js +124 -125
  97. package/dist/pipeline/human_input.js.map +1 -1
  98. package/dist/pipeline/index.cjs +31 -0
  99. package/dist/pipeline/index.cjs.map +1 -0
  100. package/dist/pipeline/index.js +8 -4
  101. package/dist/pipeline/index.js.map +1 -1
  102. package/dist/pipeline/pipeline_agent.cjs +642 -0
  103. package/dist/pipeline/pipeline_agent.cjs.map +1 -0
  104. package/dist/pipeline/pipeline_agent.js +595 -651
  105. package/dist/pipeline/pipeline_agent.js.map +1 -1
  106. package/dist/pipeline/speech_handle.cjs +128 -0
  107. package/dist/pipeline/speech_handle.cjs.map +1 -0
  108. package/dist/pipeline/speech_handle.js +102 -100
  109. package/dist/pipeline/speech_handle.js.map +1 -1
  110. package/dist/plugin.cjs +46 -0
  111. package/dist/plugin.cjs.map +1 -0
  112. package/dist/plugin.js +20 -20
  113. package/dist/plugin.js.map +1 -1
  114. package/dist/stt/index.cjs +38 -0
  115. package/dist/stt/index.cjs.map +1 -0
  116. package/dist/stt/index.js +13 -5
  117. package/dist/stt/index.js.map +1 -1
  118. package/dist/stt/stream_adapter.cjs +87 -0
  119. package/dist/stt/stream_adapter.cjs.map +1 -0
  120. package/dist/stt/stream_adapter.js +58 -55
  121. package/dist/stt/stream_adapter.js.map +1 -1
  122. package/dist/stt/stt.cjs +98 -0
  123. package/dist/stt/stt.cjs.map +1 -0
  124. package/dist/stt/stt.js +63 -98
  125. package/dist/stt/stt.js.map +1 -1
  126. package/dist/tokenize/basic/basic.cjs +98 -0
  127. package/dist/tokenize/basic/basic.cjs.map +1 -0
  128. package/dist/tokenize/basic/basic.js +56 -45
  129. package/dist/tokenize/basic/basic.js.map +1 -1
  130. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  131. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  132. package/dist/tokenize/basic/hyphenator.js +66 -82
  133. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  134. package/dist/tokenize/basic/index.cjs +35 -0
  135. package/dist/tokenize/basic/index.cjs.map +1 -0
  136. package/dist/tokenize/basic/index.js +7 -4
  137. package/dist/tokenize/basic/index.js.map +1 -1
  138. package/dist/tokenize/basic/paragraph.cjs +57 -0
  139. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  140. package/dist/tokenize/basic/paragraph.js +30 -35
  141. package/dist/tokenize/basic/paragraph.js.map +1 -1
  142. package/dist/tokenize/basic/sentence.cjs +83 -0
  143. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  144. package/dist/tokenize/basic/sentence.js +56 -57
  145. package/dist/tokenize/basic/sentence.js.map +1 -1
  146. package/dist/tokenize/basic/word.cjs +44 -0
  147. package/dist/tokenize/basic/word.cjs.map +1 -0
  148. package/dist/tokenize/basic/word.js +17 -20
  149. package/dist/tokenize/basic/word.js.map +1 -1
  150. package/dist/tokenize/index.cjs +55 -0
  151. package/dist/tokenize/index.cjs.map +1 -0
  152. package/dist/tokenize/index.js +18 -7
  153. package/dist/tokenize/index.js.map +1 -1
  154. package/dist/tokenize/token_stream.cjs +164 -0
  155. package/dist/tokenize/token_stream.cjs.map +1 -0
  156. package/dist/tokenize/token_stream.js +133 -139
  157. package/dist/tokenize/token_stream.js.map +1 -1
  158. package/dist/tokenize/tokenizer.cjs +184 -0
  159. package/dist/tokenize/tokenizer.cjs.map +1 -0
  160. package/dist/tokenize/tokenizer.js +138 -99
  161. package/dist/tokenize/tokenizer.js.map +1 -1
  162. package/dist/transcription.cjs +131 -0
  163. package/dist/transcription.cjs.map +1 -0
  164. package/dist/transcription.js +99 -96
  165. package/dist/transcription.js.map +1 -1
  166. package/dist/tts/index.cjs +38 -0
  167. package/dist/tts/index.cjs.map +1 -0
  168. package/dist/tts/index.js +13 -5
  169. package/dist/tts/index.js.map +1 -1
  170. package/dist/tts/stream_adapter.cjs +78 -0
  171. package/dist/tts/stream_adapter.cjs.map +1 -0
  172. package/dist/tts/stream_adapter.js +50 -47
  173. package/dist/tts/stream_adapter.js.map +1 -1
  174. package/dist/tts/tts.cjs +127 -0
  175. package/dist/tts/tts.cjs.map +1 -0
  176. package/dist/tts/tts.js +90 -120
  177. package/dist/tts/tts.js.map +1 -1
  178. package/dist/utils.cjs +284 -0
  179. package/dist/utils.cjs.map +1 -0
  180. package/dist/utils.js +242 -247
  181. package/dist/utils.js.map +1 -1
  182. package/dist/vad.cjs +92 -0
  183. package/dist/vad.cjs.map +1 -0
  184. package/dist/vad.js +57 -52
  185. package/dist/vad.js.map +1 -1
  186. package/dist/version.cjs +29 -0
  187. package/dist/version.cjs.map +1 -0
  188. package/dist/version.js +4 -4
  189. package/dist/version.js.map +1 -1
  190. package/dist/worker.cjs +576 -0
  191. package/dist/worker.cjs.map +1 -0
  192. package/dist/worker.d.ts +1 -1
  193. package/dist/worker.js +511 -484
  194. package/dist/worker.js.map +1 -1
  195. package/package.json +18 -8
  196. package/src/ipc/job_main.ts +66 -64
  197. package/src/pipeline/pipeline_agent.ts +23 -23
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var basic_exports = {};
30
+ __export(basic_exports, {
31
+ SentenceTokenizer: () => SentenceTokenizer,
32
+ WordTokenizer: () => WordTokenizer,
33
+ hyphenateWord: () => hyphenateWord,
34
+ tokenizeParagraphs: () => tokenizeParagraphs
35
+ });
36
+ module.exports = __toCommonJS(basic_exports);
37
+ var tokenizer = __toESM(require("../index.cjs"), 1);
38
+ var import_token_stream = require("../token_stream.cjs");
39
+ var import_hyphenator = require("./hyphenator.cjs");
40
+ var import_paragraph = require("./paragraph.cjs");
41
+ var import_sentence = require("./sentence.cjs");
42
+ var import_word = require("./word.cjs");
43
+ class SentenceTokenizer extends tokenizer.SentenceTokenizer {
44
+ #config;
45
+ constructor(language = "en-US", minSentenceLength = 20, streamContextLength = 10) {
46
+ super();
47
+ this.#config = {
48
+ language,
49
+ minSentenceLength,
50
+ streamContextLength
51
+ };
52
+ }
53
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
54
+ tokenize(text, language) {
55
+ return (0, import_sentence.splitSentences)(text, this.#config.minSentenceLength).map((tok) => tok[0]);
56
+ }
57
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
58
+ stream(language) {
59
+ return new import_token_stream.BufferedSentenceStream(
60
+ (text) => (0, import_sentence.splitSentences)(text, this.#config.minSentenceLength),
61
+ this.#config.minSentenceLength,
62
+ this.#config.streamContextLength
63
+ );
64
+ }
65
+ }
66
+ class WordTokenizer extends tokenizer.WordTokenizer {
67
+ #ignorePunctuation;
68
+ constructor(ignorePunctuation = true) {
69
+ super();
70
+ this.#ignorePunctuation = ignorePunctuation;
71
+ }
72
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
73
+ tokenize(text, language) {
74
+ return (0, import_word.splitWords)(text, this.#ignorePunctuation).map((tok) => tok[0]);
75
+ }
76
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
77
+ stream(language) {
78
+ return new import_token_stream.BufferedWordStream(
79
+ (text) => (0, import_word.splitWords)(text, this.#ignorePunctuation),
80
+ 1,
81
+ 1
82
+ );
83
+ }
84
+ }
85
+ const hyphenateWord = (word) => {
86
+ return import_hyphenator.hyphenator.hyphenateWord(word);
87
+ };
88
+ const tokenizeParagraphs = (text) => {
89
+ return (0, import_paragraph.splitParagraphs)(text).map((tok) => tok[0]);
90
+ };
91
+ // Annotate the CommonJS export names for ESM import in node:
92
+ 0 && (module.exports = {
93
+ SentenceTokenizer,
94
+ WordTokenizer,
95
+ hyphenateWord,
96
+ tokenizeParagraphs
97
+ });
98
+ //# sourceMappingURL=basic.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport * as tokenizer from '../index.js';\nimport { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';\nimport { hyphenator } from './hyphenator.js';\nimport { splitParagraphs } from './paragraph.js';\nimport { splitSentences } from './sentence.js';\nimport { splitWords } from './word.js';\n\ninterface TokenizerOptions {\n language: string;\n minSentenceLength: number;\n streamContextLength: number;\n}\n\nexport class SentenceTokenizer extends tokenizer.SentenceTokenizer {\n #config: TokenizerOptions;\n\n constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {\n super();\n this.#config = {\n language,\n minSentenceLength,\n streamContextLength,\n };\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.SentenceStream {\n return new BufferedSentenceStream(\n (text: string) => splitSentences(text, this.#config.minSentenceLength),\n this.#config.minSentenceLength,\n this.#config.streamContextLength,\n );\n }\n}\n\nexport class WordTokenizer extends tokenizer.WordTokenizer {\n #ignorePunctuation: boolean;\n\n constructor(ignorePunctuation = true) {\n super();\n this.#ignorePunctuation = ignorePunctuation;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.WordStream {\n return new BufferedWordStream(\n (text: string) => splitWords(text, this.#ignorePunctuation),\n 1,\n 1,\n );\n }\n}\n\nexport const hyphenateWord = (word: string): string[] => {\n return hyphenator.hyphenateWord(word);\n};\n\nexport const tokenizeParagraphs = (text: string): string[] => {\n return splitParagraphs(text).map((tok) => tok[0]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,gBAA2B;AAC3B,0BAA2D;AAC3D,wBAA2B;AAC3B,uBAAgC;AAChC,sBAA+B;AAC/B,kBAA2B;AAQpB,MAAM,0BAA0B,UAAU,kBAAkB;AAAA,EACjE;AAAA,EAEA,YAAY,WAAW,SAAS,oBAAoB,IAAI,sBAAsB,IAAI;AAChF,UAAM;AACN,SAAK,UAAU;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,eAAO,gCAAe,MAAM,KAAK,QAAQ,iBAAiB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACjF;AAAA;AAAA,EAGA,OAAO,UAA6C;AAClD,WAAO,IAAI;AAAA,MACT,CAAC,aAAiB,gCAAe,MAAM,KAAK,QAAQ,iBAAiB;AAAA,MACrE,KAAK,QAAQ;AAAA,MACb,KAAK,QAAQ;AAAA,IACf;AAAA,EACF;AACF;AAEO,MAAM,sBAAsB,UAAU,cAAc;AAAA,EACzD;AAAA,EAEA,YAAY,oBAAoB,MAAM;AACpC,UAAM;AACN,SAAK,qBAAqB;AAAA,EAC5B;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,eAAO,wBAAW,MAAM,KAAK,kBAAkB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACtE;AAAA;AAAA,EAGA,OAAO,UAAyC;AAC9C,WAAO,IAAI;AAAA,MACT,CAAC,aAAiB,wBAAW,MAAM,KAAK,kBAAkB;AAAA,MAC1D;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,gBAAgB,CAAC,SAA2B;AACvD,SAAO,6BAAW,cAAc,IAAI;AACtC;AAEO,MAAM,qBAAqB,CAAC,SAA2B;AAC5D,aAAO,kCAAgB,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAClD;","names":[]}
@@ -1,50 +1,61 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- import * as tokenizer from '../index.js';
5
- import { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';
6
- import { hyphenator } from './hyphenator.js';
7
- import { splitParagraphs } from './paragraph.js';
8
- import { splitSentences } from './sentence.js';
9
- import { splitWords } from './word.js';
10
- export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
11
- #config;
12
- constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {
13
- super();
14
- this.#config = {
15
- language,
16
- minSentenceLength,
17
- streamContextLength,
18
- };
19
- }
20
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
21
- tokenize(text, language) {
22
- return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);
23
- }
24
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
25
- stream(language) {
26
- return new BufferedSentenceStream((text) => splitSentences(text, this.#config.minSentenceLength), this.#config.minSentenceLength, this.#config.streamContextLength);
27
- }
1
+ import * as tokenizer from "../index.js";
2
+ import { BufferedSentenceStream, BufferedWordStream } from "../token_stream.js";
3
+ import { hyphenator } from "./hyphenator.js";
4
+ import { splitParagraphs } from "./paragraph.js";
5
+ import { splitSentences } from "./sentence.js";
6
+ import { splitWords } from "./word.js";
7
+ class SentenceTokenizer extends tokenizer.SentenceTokenizer {
8
+ #config;
9
+ constructor(language = "en-US", minSentenceLength = 20, streamContextLength = 10) {
10
+ super();
11
+ this.#config = {
12
+ language,
13
+ minSentenceLength,
14
+ streamContextLength
15
+ };
16
+ }
17
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
18
+ tokenize(text, language) {
19
+ return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);
20
+ }
21
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
22
+ stream(language) {
23
+ return new BufferedSentenceStream(
24
+ (text) => splitSentences(text, this.#config.minSentenceLength),
25
+ this.#config.minSentenceLength,
26
+ this.#config.streamContextLength
27
+ );
28
+ }
28
29
  }
29
- export class WordTokenizer extends tokenizer.WordTokenizer {
30
- #ignorePunctuation;
31
- constructor(ignorePunctuation = true) {
32
- super();
33
- this.#ignorePunctuation = ignorePunctuation;
34
- }
35
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
36
- tokenize(text, language) {
37
- return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);
38
- }
39
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
40
- stream(language) {
41
- return new BufferedWordStream((text) => splitWords(text, this.#ignorePunctuation), 1, 1);
42
- }
30
+ class WordTokenizer extends tokenizer.WordTokenizer {
31
+ #ignorePunctuation;
32
+ constructor(ignorePunctuation = true) {
33
+ super();
34
+ this.#ignorePunctuation = ignorePunctuation;
35
+ }
36
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
37
+ tokenize(text, language) {
38
+ return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);
39
+ }
40
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
41
+ stream(language) {
42
+ return new BufferedWordStream(
43
+ (text) => splitWords(text, this.#ignorePunctuation),
44
+ 1,
45
+ 1
46
+ );
47
+ }
43
48
  }
44
- export const hyphenateWord = (word) => {
45
- return hyphenator.hyphenateWord(word);
49
+ const hyphenateWord = (word) => {
50
+ return hyphenator.hyphenateWord(word);
46
51
  };
47
- export const tokenizeParagraphs = (text) => {
48
- return splitParagraphs(text).map((tok) => tok[0]);
52
+ const tokenizeParagraphs = (text) => {
53
+ return splitParagraphs(text).map((tok) => tok[0]);
54
+ };
55
+ export {
56
+ SentenceTokenizer,
57
+ WordTokenizer,
58
+ hyphenateWord,
59
+ tokenizeParagraphs
49
60
  };
50
61
  //# sourceMappingURL=basic.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"basic.js","sourceRoot":"","sources":["../../../src/tokenize/basic/basic.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,KAAK,SAAS,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAChF,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAQvC,MAAM,OAAO,iBAAkB,SAAQ,SAAS,CAAC,iBAAiB;IAChE,OAAO,CAAmB;IAE1B,YAAY,QAAQ,GAAG,OAAO,EAAE,iBAAiB,GAAG,EAAE,EAAE,mBAAmB,GAAG,EAAE;QAC9E,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG;YACb,QAAQ;YACR,iBAAiB;YACjB,mBAAmB;SACpB,CAAC;IACJ,CAAC;IAED,6DAA6D;IAC7D,QAAQ,CAAC,IAAY,EAAE,QAAiB;QACtC,OAAO,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACnF,CAAC;IAED,6DAA6D;IAC7D,MAAM,CAAC,QAAiB;QACtB,OAAO,IAAI,sBAAsB,CAC/B,CAAC,IAAY,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC,EACtE,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAC9B,IAAI,CAAC,OAAO,CAAC,mBAAmB,CACjC,CAAC;IACJ,CAAC;CACF;AAED,MAAM,OAAO,aAAc,SAAQ,SAAS,CAAC,aAAa;IACxD,kBAAkB,CAAU;IAE5B,YAAY,iBAAiB,GAAG,IAAI;QAClC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,kBAAkB,GAAG,iBAAiB,CAAC;IAC9C,CAAC;IAED,6DAA6D;IAC7D,QAAQ,CAAC,IAAY,EAAE,QAAiB;QACtC,OAAO,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,6DAA6D;IAC7D,MAAM,CAAC,QAAiB;QACtB,OAAO,IAAI,kBAAkB,CAC3B,CAAC,IAAY,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,kBAAkB,CAAC,EAC3D,CAAC,EACD,CAAC,CACF,CAAC;IACJ,CAAC;CACF;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,IAAY,EAAY,EAAE;IACtD,OAAO,UAAU,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;AACxC,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,IAAY,EAAY,EAAE;IAC3D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACpD,CAAC,CAAC"}
1
+ {"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport * as tokenizer from '../index.js';\nimport { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';\nimport { hyphenator } from './hyphenator.js';\nimport { splitParagraphs } from './paragraph.js';\nimport { splitSentences } from './sentence.js';\nimport { splitWords } from './word.js';\n\ninterface TokenizerOptions {\n language: string;\n minSentenceLength: number;\n streamContextLength: number;\n}\n\nexport class SentenceTokenizer extends tokenizer.SentenceTokenizer {\n #config: TokenizerOptions;\n\n constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {\n super();\n this.#config = {\n language,\n minSentenceLength,\n streamContextLength,\n };\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.SentenceStream {\n return new BufferedSentenceStream(\n (text: string) => splitSentences(text, this.#config.minSentenceLength),\n this.#config.minSentenceLength,\n this.#config.streamContextLength,\n );\n }\n}\n\nexport class WordTokenizer extends tokenizer.WordTokenizer {\n #ignorePunctuation: boolean;\n\n constructor(ignorePunctuation = true) {\n super();\n this.#ignorePunctuation = ignorePunctuation;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.WordStream {\n return new BufferedWordStream(\n (text: string) => splitWords(text, this.#ignorePunctuation),\n 1,\n 1,\n );\n }\n}\n\nexport const hyphenateWord = (word: string): string[] => {\n return hyphenator.hyphenateWord(word);\n};\n\nexport const tokenizeParagraphs = (text: string): string[] => {\n return splitParagraphs(text).map((tok) => tok[0]);\n};\n"],"mappings":"AAGA,YAAY,eAAe;AAC3B,SAAS,wBAAwB,0BAA0B;AAC3D,SAAS,kBAAkB;AAC3B,SAAS,uBAAuB;AAChC,SAAS,sBAAsB;AAC/B,SAAS,kBAAkB;AAQpB,MAAM,0BAA0B,UAAU,kBAAkB;AAAA,EACjE;AAAA,EAEA,YAAY,WAAW,SAAS,oBAAoB,IAAI,sBAAsB,IAAI;AAChF,UAAM;AACN,SAAK,UAAU;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,WAAO,eAAe,MAAM,KAAK,QAAQ,iBAAiB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACjF;AAAA;AAAA,EAGA,OAAO,UAA6C;AAClD,WAAO,IAAI;AAAA,MACT,CAAC,SAAiB,eAAe,MAAM,KAAK,QAAQ,iBAAiB;AAAA,MACrE,KAAK,QAAQ;AAAA,MACb,KAAK,QAAQ;AAAA,IACf;AAAA,EACF;AACF;AAEO,MAAM,sBAAsB,UAAU,cAAc;AAAA,EACzD;AAAA,EAEA,YAAY,oBAAoB,MAAM;AACpC,UAAM;AACN,SAAK,qBAAqB;AAAA,EAC5B;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,WAAO,WAAW,MAAM,KAAK,kBAAkB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACtE;AAAA;AAAA,EAGA,OAAO,UAAyC;AAC9C,WAAO,IAAI;AAAA,MACT,CAAC,SAAiB,WAAW,MAAM,KAAK,kBAAkB;AAAA,MAC1D;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,gBAAgB,CAAC,SAA2B;AACvD,SAAO,WAAW,cAAc,IAAI;AACtC;AAEO,MAAM,qBAAqB,CAAC,SAA2B;AAC5D,SAAO,gBAAgB,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAClD;","names":[]}