@livekit/agents 0.4.6 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +17 -0
  2. package/dist/audio.cjs +77 -0
  3. package/dist/audio.cjs.map +1 -0
  4. package/dist/audio.js +48 -37
  5. package/dist/audio.js.map +1 -1
  6. package/dist/cli.cjs +131 -0
  7. package/dist/cli.cjs.map +1 -0
  8. package/dist/cli.js +96 -122
  9. package/dist/cli.js.map +1 -1
  10. package/dist/generator.cjs +36 -0
  11. package/dist/generator.cjs.map +1 -0
  12. package/dist/generator.js +8 -22
  13. package/dist/generator.js.map +1 -1
  14. package/dist/http_server.cjs +72 -0
  15. package/dist/http_server.cjs.map +1 -0
  16. package/dist/http_server.d.ts +1 -1
  17. package/dist/http_server.js +44 -47
  18. package/dist/http_server.js.map +1 -1
  19. package/dist/index.cjs +78 -0
  20. package/dist/index.cjs.map +1 -0
  21. package/dist/index.js +26 -28
  22. package/dist/index.js.map +1 -1
  23. package/dist/ipc/job_executor.cjs +33 -0
  24. package/dist/ipc/job_executor.cjs.map +1 -0
  25. package/dist/ipc/job_executor.js +7 -4
  26. package/dist/ipc/job_executor.js.map +1 -1
  27. package/dist/ipc/job_main.cjs +147 -0
  28. package/dist/ipc/job_main.cjs.map +1 -0
  29. package/dist/ipc/job_main.d.ts +1 -1
  30. package/dist/ipc/job_main.js +103 -103
  31. package/dist/ipc/job_main.js.map +1 -1
  32. package/dist/ipc/message.cjs +17 -0
  33. package/dist/ipc/message.cjs.map +1 -0
  34. package/dist/ipc/message.js +0 -1
  35. package/dist/ipc/message.js.map +1 -1
  36. package/dist/ipc/proc_job_executor.cjs +174 -0
  37. package/dist/ipc/proc_job_executor.cjs.map +1 -0
  38. package/dist/ipc/proc_job_executor.js +130 -126
  39. package/dist/ipc/proc_job_executor.js.map +1 -1
  40. package/dist/ipc/proc_pool.cjs +126 -0
  41. package/dist/ipc/proc_pool.cjs.map +1 -0
  42. package/dist/ipc/proc_pool.js +93 -96
  43. package/dist/ipc/proc_pool.js.map +1 -1
  44. package/dist/job.cjs +230 -0
  45. package/dist/job.cjs.map +1 -0
  46. package/dist/job.d.ts +6 -1
  47. package/dist/job.d.ts.map +1 -1
  48. package/dist/job.js +195 -198
  49. package/dist/job.js.map +1 -1
  50. package/dist/llm/chat_context.cjs +131 -0
  51. package/dist/llm/chat_context.cjs.map +1 -0
  52. package/dist/llm/chat_context.js +98 -86
  53. package/dist/llm/chat_context.js.map +1 -1
  54. package/dist/llm/function_context.cjs +103 -0
  55. package/dist/llm/function_context.cjs.map +1 -0
  56. package/dist/llm/function_context.js +72 -81
  57. package/dist/llm/function_context.js.map +1 -1
  58. package/dist/llm/function_context.test.cjs +218 -0
  59. package/dist/llm/function_context.test.cjs.map +1 -0
  60. package/dist/llm/function_context.test.js +209 -210
  61. package/dist/llm/function_context.test.js.map +1 -1
  62. package/dist/llm/index.cjs +43 -0
  63. package/dist/llm/index.cjs.map +1 -0
  64. package/dist/llm/index.js +22 -6
  65. package/dist/llm/index.js.map +1 -1
  66. package/dist/llm/llm.cjs +76 -0
  67. package/dist/llm/llm.cjs.map +1 -0
  68. package/dist/llm/llm.js +48 -42
  69. package/dist/llm/llm.js.map +1 -1
  70. package/dist/log.cjs +57 -0
  71. package/dist/log.cjs.map +1 -0
  72. package/dist/log.js +27 -26
  73. package/dist/log.js.map +1 -1
  74. package/dist/multimodal/agent_playout.cjs +228 -0
  75. package/dist/multimodal/agent_playout.cjs.map +1 -0
  76. package/dist/multimodal/agent_playout.d.ts +1 -1
  77. package/dist/multimodal/agent_playout.js +193 -180
  78. package/dist/multimodal/agent_playout.js.map +1 -1
  79. package/dist/multimodal/index.cjs +25 -0
  80. package/dist/multimodal/index.cjs.map +1 -0
  81. package/dist/multimodal/index.js +2 -5
  82. package/dist/multimodal/index.js.map +1 -1
  83. package/dist/multimodal/multimodal_agent.cjs +404 -0
  84. package/dist/multimodal/multimodal_agent.cjs.map +1 -0
  85. package/dist/multimodal/multimodal_agent.d.ts +1 -1
  86. package/dist/multimodal/multimodal_agent.js +351 -330
  87. package/dist/multimodal/multimodal_agent.js.map +1 -1
  88. package/dist/pipeline/agent_output.cjs +172 -0
  89. package/dist/pipeline/agent_output.cjs.map +1 -0
  90. package/dist/pipeline/agent_output.js +136 -138
  91. package/dist/pipeline/agent_output.js.map +1 -1
  92. package/dist/pipeline/agent_playout.cjs +169 -0
  93. package/dist/pipeline/agent_playout.cjs.map +1 -0
  94. package/dist/pipeline/agent_playout.js +126 -136
  95. package/dist/pipeline/agent_playout.js.map +1 -1
  96. package/dist/pipeline/human_input.cjs +158 -0
  97. package/dist/pipeline/human_input.cjs.map +1 -0
  98. package/dist/pipeline/human_input.js +124 -125
  99. package/dist/pipeline/human_input.js.map +1 -1
  100. package/dist/pipeline/index.cjs +31 -0
  101. package/dist/pipeline/index.cjs.map +1 -0
  102. package/dist/pipeline/index.js +8 -4
  103. package/dist/pipeline/index.js.map +1 -1
  104. package/dist/pipeline/pipeline_agent.cjs +642 -0
  105. package/dist/pipeline/pipeline_agent.cjs.map +1 -0
  106. package/dist/pipeline/pipeline_agent.js +595 -651
  107. package/dist/pipeline/pipeline_agent.js.map +1 -1
  108. package/dist/pipeline/speech_handle.cjs +128 -0
  109. package/dist/pipeline/speech_handle.cjs.map +1 -0
  110. package/dist/pipeline/speech_handle.js +102 -100
  111. package/dist/pipeline/speech_handle.js.map +1 -1
  112. package/dist/plugin.cjs +46 -0
  113. package/dist/plugin.cjs.map +1 -0
  114. package/dist/plugin.js +20 -20
  115. package/dist/plugin.js.map +1 -1
  116. package/dist/stt/index.cjs +38 -0
  117. package/dist/stt/index.cjs.map +1 -0
  118. package/dist/stt/index.js +13 -5
  119. package/dist/stt/index.js.map +1 -1
  120. package/dist/stt/stream_adapter.cjs +87 -0
  121. package/dist/stt/stream_adapter.cjs.map +1 -0
  122. package/dist/stt/stream_adapter.js +58 -55
  123. package/dist/stt/stream_adapter.js.map +1 -1
  124. package/dist/stt/stt.cjs +98 -0
  125. package/dist/stt/stt.cjs.map +1 -0
  126. package/dist/stt/stt.js +63 -98
  127. package/dist/stt/stt.js.map +1 -1
  128. package/dist/tokenize/basic/basic.cjs +98 -0
  129. package/dist/tokenize/basic/basic.cjs.map +1 -0
  130. package/dist/tokenize/basic/basic.d.ts +1 -1
  131. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  132. package/dist/tokenize/basic/basic.js +56 -45
  133. package/dist/tokenize/basic/basic.js.map +1 -1
  134. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  135. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  136. package/dist/tokenize/basic/hyphenator.js +66 -82
  137. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  138. package/dist/tokenize/basic/index.cjs +35 -0
  139. package/dist/tokenize/basic/index.cjs.map +1 -0
  140. package/dist/tokenize/basic/index.js +7 -4
  141. package/dist/tokenize/basic/index.js.map +1 -1
  142. package/dist/tokenize/basic/paragraph.cjs +57 -0
  143. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  144. package/dist/tokenize/basic/paragraph.js +30 -35
  145. package/dist/tokenize/basic/paragraph.js.map +1 -1
  146. package/dist/tokenize/basic/sentence.cjs +89 -0
  147. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  148. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  149. package/dist/tokenize/basic/sentence.js +62 -57
  150. package/dist/tokenize/basic/sentence.js.map +1 -1
  151. package/dist/tokenize/basic/word.cjs +44 -0
  152. package/dist/tokenize/basic/word.cjs.map +1 -0
  153. package/dist/tokenize/basic/word.js +17 -20
  154. package/dist/tokenize/basic/word.js.map +1 -1
  155. package/dist/tokenize/index.cjs +55 -0
  156. package/dist/tokenize/index.cjs.map +1 -0
  157. package/dist/tokenize/index.js +18 -7
  158. package/dist/tokenize/index.js.map +1 -1
  159. package/dist/tokenize/token_stream.cjs +164 -0
  160. package/dist/tokenize/token_stream.cjs.map +1 -0
  161. package/dist/tokenize/token_stream.js +133 -139
  162. package/dist/tokenize/token_stream.js.map +1 -1
  163. package/dist/tokenize/tokenizer.cjs +184 -0
  164. package/dist/tokenize/tokenizer.cjs.map +1 -0
  165. package/dist/tokenize/tokenizer.js +138 -99
  166. package/dist/tokenize/tokenizer.js.map +1 -1
  167. package/dist/tokenize/tokenizer.test.cjs +220 -0
  168. package/dist/tokenize/tokenizer.test.cjs.map +1 -0
  169. package/dist/tokenize/tokenizer.test.d.ts +2 -0
  170. package/dist/tokenize/tokenizer.test.d.ts.map +1 -0
  171. package/dist/tokenize/tokenizer.test.js +219 -0
  172. package/dist/tokenize/tokenizer.test.js.map +1 -0
  173. package/dist/transcription.cjs +131 -0
  174. package/dist/transcription.cjs.map +1 -0
  175. package/dist/transcription.js +99 -96
  176. package/dist/transcription.js.map +1 -1
  177. package/dist/tts/index.cjs +38 -0
  178. package/dist/tts/index.cjs.map +1 -0
  179. package/dist/tts/index.js +13 -5
  180. package/dist/tts/index.js.map +1 -1
  181. package/dist/tts/stream_adapter.cjs +78 -0
  182. package/dist/tts/stream_adapter.cjs.map +1 -0
  183. package/dist/tts/stream_adapter.js +50 -47
  184. package/dist/tts/stream_adapter.js.map +1 -1
  185. package/dist/tts/tts.cjs +127 -0
  186. package/dist/tts/tts.cjs.map +1 -0
  187. package/dist/tts/tts.js +90 -120
  188. package/dist/tts/tts.js.map +1 -1
  189. package/dist/utils.cjs +284 -0
  190. package/dist/utils.cjs.map +1 -0
  191. package/dist/utils.js +242 -247
  192. package/dist/utils.js.map +1 -1
  193. package/dist/vad.cjs +92 -0
  194. package/dist/vad.cjs.map +1 -0
  195. package/dist/vad.js +57 -52
  196. package/dist/vad.js.map +1 -1
  197. package/dist/version.cjs +29 -0
  198. package/dist/version.cjs.map +1 -0
  199. package/dist/version.js +4 -4
  200. package/dist/version.js.map +1 -1
  201. package/dist/worker.cjs +577 -0
  202. package/dist/worker.cjs.map +1 -0
  203. package/dist/worker.d.ts +1 -1
  204. package/dist/worker.d.ts.map +1 -1
  205. package/dist/worker.js +512 -484
  206. package/dist/worker.js.map +1 -1
  207. package/package.json +18 -8
  208. package/src/ipc/job_main.ts +66 -64
  209. package/src/job.ts +3 -2
  210. package/src/pipeline/pipeline_agent.ts +23 -23
  211. package/src/tokenize/basic/basic.ts +1 -1
  212. package/src/tokenize/basic/sentence.ts +14 -8
  213. package/src/tokenize/tokenizer.test.ts +255 -0
  214. package/src/worker.ts +1 -0
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+ var basic_exports = {};
30
+ __export(basic_exports, {
31
+ SentenceTokenizer: () => SentenceTokenizer,
32
+ WordTokenizer: () => WordTokenizer,
33
+ hyphenateWord: () => hyphenateWord,
34
+ tokenizeParagraphs: () => tokenizeParagraphs
35
+ });
36
+ module.exports = __toCommonJS(basic_exports);
37
+ var import_token_stream = require("../token_stream.cjs");
38
+ var tokenizer = __toESM(require("../tokenizer.cjs"), 1);
39
+ var import_hyphenator = require("./hyphenator.cjs");
40
+ var import_paragraph = require("./paragraph.cjs");
41
+ var import_sentence = require("./sentence.cjs");
42
+ var import_word = require("./word.cjs");
43
+ class SentenceTokenizer extends tokenizer.SentenceTokenizer {
44
+ #config;
45
+ constructor(language = "en-US", minSentenceLength = 20, streamContextLength = 10) {
46
+ super();
47
+ this.#config = {
48
+ language,
49
+ minSentenceLength,
50
+ streamContextLength
51
+ };
52
+ }
53
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
54
+ tokenize(text, language) {
55
+ return (0, import_sentence.splitSentences)(text, this.#config.minSentenceLength).map((tok) => tok[0]);
56
+ }
57
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
58
+ stream(language) {
59
+ return new import_token_stream.BufferedSentenceStream(
60
+ (text) => (0, import_sentence.splitSentences)(text, this.#config.minSentenceLength),
61
+ this.#config.minSentenceLength,
62
+ this.#config.streamContextLength
63
+ );
64
+ }
65
+ }
66
+ class WordTokenizer extends tokenizer.WordTokenizer {
67
+ #ignorePunctuation;
68
+ constructor(ignorePunctuation = true) {
69
+ super();
70
+ this.#ignorePunctuation = ignorePunctuation;
71
+ }
72
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
73
+ tokenize(text, language) {
74
+ return (0, import_word.splitWords)(text, this.#ignorePunctuation).map((tok) => tok[0]);
75
+ }
76
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
77
+ stream(language) {
78
+ return new import_token_stream.BufferedWordStream(
79
+ (text) => (0, import_word.splitWords)(text, this.#ignorePunctuation),
80
+ 1,
81
+ 1
82
+ );
83
+ }
84
+ }
85
+ const hyphenateWord = (word) => {
86
+ return import_hyphenator.hyphenator.hyphenateWord(word);
87
+ };
88
+ const tokenizeParagraphs = (text) => {
89
+ return (0, import_paragraph.splitParagraphs)(text).map((tok) => tok[0]);
90
+ };
91
+ // Annotate the CommonJS export names for ESM import in node:
92
+ 0 && (module.exports = {
93
+ SentenceTokenizer,
94
+ WordTokenizer,
95
+ hyphenateWord,
96
+ tokenizeParagraphs
97
+ });
98
+ //# sourceMappingURL=basic.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';\nimport * as tokenizer from '../tokenizer.js';\nimport { hyphenator } from './hyphenator.js';\nimport { splitParagraphs } from './paragraph.js';\nimport { splitSentences } from './sentence.js';\nimport { splitWords } from './word.js';\n\ninterface TokenizerOptions {\n language: string;\n minSentenceLength: number;\n streamContextLength: number;\n}\n\nexport class SentenceTokenizer extends tokenizer.SentenceTokenizer {\n #config: TokenizerOptions;\n\n constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {\n super();\n this.#config = {\n language,\n minSentenceLength,\n streamContextLength,\n };\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.SentenceStream {\n return new BufferedSentenceStream(\n (text: string) => splitSentences(text, this.#config.minSentenceLength),\n this.#config.minSentenceLength,\n this.#config.streamContextLength,\n );\n }\n}\n\nexport class WordTokenizer extends tokenizer.WordTokenizer {\n #ignorePunctuation: boolean;\n\n constructor(ignorePunctuation = true) {\n super();\n this.#ignorePunctuation = ignorePunctuation;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.WordStream {\n return new BufferedWordStream(\n (text: string) => splitWords(text, this.#ignorePunctuation),\n 1,\n 1,\n );\n }\n}\n\nexport const hyphenateWord = (word: string): string[] => {\n return hyphenator.hyphenateWord(word);\n};\n\nexport const tokenizeParagraphs = (text: string): string[] => {\n return splitParagraphs(text).map((tok) => tok[0]);\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,0BAA2D;AAC3D,gBAA2B;AAC3B,wBAA2B;AAC3B,uBAAgC;AAChC,sBAA+B;AAC/B,kBAA2B;AAQpB,MAAM,0BAA0B,UAAU,kBAAkB;AAAA,EACjE;AAAA,EAEA,YAAY,WAAW,SAAS,oBAAoB,IAAI,sBAAsB,IAAI;AAChF,UAAM;AACN,SAAK,UAAU;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,eAAO,gCAAe,MAAM,KAAK,QAAQ,iBAAiB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACjF;AAAA;AAAA,EAGA,OAAO,UAA6C;AAClD,WAAO,IAAI;AAAA,MACT,CAAC,aAAiB,gCAAe,MAAM,KAAK,QAAQ,iBAAiB;AAAA,MACrE,KAAK,QAAQ;AAAA,MACb,KAAK,QAAQ;AAAA,IACf;AAAA,EACF;AACF;AAEO,MAAM,sBAAsB,UAAU,cAAc;AAAA,EACzD;AAAA,EAEA,YAAY,oBAAoB,MAAM;AACpC,UAAM;AACN,SAAK,qBAAqB;AAAA,EAC5B;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,eAAO,wBAAW,MAAM,KAAK,kBAAkB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACtE;AAAA;AAAA,EAGA,OAAO,UAAyC;AAC9C,WAAO,IAAI;AAAA,MACT,CAAC,aAAiB,wBAAW,MAAM,KAAK,kBAAkB;AAAA,MAC1D;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,gBAAgB,CAAC,SAA2B;AACvD,SAAO,6BAAW,cAAc,IAAI;AACtC;AAEO,MAAM,qBAAqB,CAAC,SAA2B;AAC5D,aAAO,kCAAgB,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAClD;","names":[]}
@@ -1,4 +1,4 @@
1
- import * as tokenizer from '../index.js';
1
+ import * as tokenizer from '../tokenizer.js';
2
2
  export declare class SentenceTokenizer extends tokenizer.SentenceTokenizer {
3
3
  #private;
4
4
  constructor(language?: string, minSentenceLength?: number, streamContextLength?: number);
@@ -1 +1 @@
1
- {"version":3,"file":"basic.d.ts","sourceRoot":"","sources":["../../../src/tokenize/basic/basic.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,SAAS,MAAM,aAAa,CAAC;AAazC,qBAAa,iBAAkB,SAAQ,SAAS,CAAC,iBAAiB;;gBAGpD,QAAQ,SAAU,EAAE,iBAAiB,SAAK,EAAE,mBAAmB,SAAK;IAUhF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE;IAKnD,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,cAAc;CAOpD;AAED,qBAAa,aAAc,SAAQ,SAAS,CAAC,aAAa;;gBAG5C,iBAAiB,UAAO;IAMpC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE;IAKnD,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,UAAU;CAOhD;AAED,eAAO,MAAM,aAAa,SAAU,MAAM,KAAG,MAAM,EAElD,CAAC;AAEF,eAAO,MAAM,kBAAkB,SAAU,MAAM,KAAG,MAAM,EAEvD,CAAC"}
1
+ {"version":3,"file":"basic.d.ts","sourceRoot":"","sources":["../../../src/tokenize/basic/basic.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,SAAS,MAAM,iBAAiB,CAAC;AAY7C,qBAAa,iBAAkB,SAAQ,SAAS,CAAC,iBAAiB;;gBAGpD,QAAQ,SAAU,EAAE,iBAAiB,SAAK,EAAE,mBAAmB,SAAK;IAUhF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE;IAKnD,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,cAAc;CAOpD;AAED,qBAAa,aAAc,SAAQ,SAAS,CAAC,aAAa;;gBAG5C,iBAAiB,UAAO;IAMpC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,MAAM,EAAE;IAKnD,MAAM,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,UAAU;CAOhD;AAED,eAAO,MAAM,aAAa,SAAU,MAAM,KAAG,MAAM,EAElD,CAAC;AAEF,eAAO,MAAM,kBAAkB,SAAU,MAAM,KAAG,MAAM,EAEvD,CAAC"}
@@ -1,50 +1,61 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
- //
3
- // SPDX-License-Identifier: Apache-2.0
4
- import * as tokenizer from '../index.js';
5
- import { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';
6
- import { hyphenator } from './hyphenator.js';
7
- import { splitParagraphs } from './paragraph.js';
8
- import { splitSentences } from './sentence.js';
9
- import { splitWords } from './word.js';
10
- export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
11
- #config;
12
- constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {
13
- super();
14
- this.#config = {
15
- language,
16
- minSentenceLength,
17
- streamContextLength,
18
- };
19
- }
20
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
21
- tokenize(text, language) {
22
- return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);
23
- }
24
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
25
- stream(language) {
26
- return new BufferedSentenceStream((text) => splitSentences(text, this.#config.minSentenceLength), this.#config.minSentenceLength, this.#config.streamContextLength);
27
- }
1
+ import { BufferedSentenceStream, BufferedWordStream } from "../token_stream.js";
2
+ import * as tokenizer from "../tokenizer.js";
3
+ import { hyphenator } from "./hyphenator.js";
4
+ import { splitParagraphs } from "./paragraph.js";
5
+ import { splitSentences } from "./sentence.js";
6
+ import { splitWords } from "./word.js";
7
+ class SentenceTokenizer extends tokenizer.SentenceTokenizer {
8
+ #config;
9
+ constructor(language = "en-US", minSentenceLength = 20, streamContextLength = 10) {
10
+ super();
11
+ this.#config = {
12
+ language,
13
+ minSentenceLength,
14
+ streamContextLength
15
+ };
16
+ }
17
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
18
+ tokenize(text, language) {
19
+ return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);
20
+ }
21
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
22
+ stream(language) {
23
+ return new BufferedSentenceStream(
24
+ (text) => splitSentences(text, this.#config.minSentenceLength),
25
+ this.#config.minSentenceLength,
26
+ this.#config.streamContextLength
27
+ );
28
+ }
28
29
  }
29
- export class WordTokenizer extends tokenizer.WordTokenizer {
30
- #ignorePunctuation;
31
- constructor(ignorePunctuation = true) {
32
- super();
33
- this.#ignorePunctuation = ignorePunctuation;
34
- }
35
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
36
- tokenize(text, language) {
37
- return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);
38
- }
39
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
40
- stream(language) {
41
- return new BufferedWordStream((text) => splitWords(text, this.#ignorePunctuation), 1, 1);
42
- }
30
+ class WordTokenizer extends tokenizer.WordTokenizer {
31
+ #ignorePunctuation;
32
+ constructor(ignorePunctuation = true) {
33
+ super();
34
+ this.#ignorePunctuation = ignorePunctuation;
35
+ }
36
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
37
+ tokenize(text, language) {
38
+ return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);
39
+ }
40
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
41
+ stream(language) {
42
+ return new BufferedWordStream(
43
+ (text) => splitWords(text, this.#ignorePunctuation),
44
+ 1,
45
+ 1
46
+ );
47
+ }
43
48
  }
44
- export const hyphenateWord = (word) => {
45
- return hyphenator.hyphenateWord(word);
49
+ const hyphenateWord = (word) => {
50
+ return hyphenator.hyphenateWord(word);
46
51
  };
47
- export const tokenizeParagraphs = (text) => {
48
- return splitParagraphs(text).map((tok) => tok[0]);
52
+ const tokenizeParagraphs = (text) => {
53
+ return splitParagraphs(text).map((tok) => tok[0]);
54
+ };
55
+ export {
56
+ SentenceTokenizer,
57
+ WordTokenizer,
58
+ hyphenateWord,
59
+ tokenizeParagraphs
49
60
  };
50
61
  //# sourceMappingURL=basic.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"basic.js","sourceRoot":"","sources":["../../../src/tokenize/basic/basic.ts"],"names":[],"mappings":"AAAA,6CAA6C;AAC7C,EAAE;AACF,sCAAsC;AACtC,OAAO,KAAK,SAAS,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,sBAAsB,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAChF,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAQvC,MAAM,OAAO,iBAAkB,SAAQ,SAAS,CAAC,iBAAiB;IAChE,OAAO,CAAmB;IAE1B,YAAY,QAAQ,GAAG,OAAO,EAAE,iBAAiB,GAAG,EAAE,EAAE,mBAAmB,GAAG,EAAE;QAC9E,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,GAAG;YACb,QAAQ;YACR,iBAAiB;YACjB,mBAAmB;SACpB,CAAC;IACJ,CAAC;IAED,6DAA6D;IAC7D,QAAQ,CAAC,IAAY,EAAE,QAAiB;QACtC,OAAO,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACnF,CAAC;IAED,6DAA6D;IAC7D,MAAM,CAAC,QAAiB;QACtB,OAAO,IAAI,sBAAsB,CAC/B,CAAC,IAAY,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC,EACtE,IAAI,CAAC,OAAO,CAAC,iBAAiB,EAC9B,IAAI,CAAC,OAAO,CAAC,mBAAmB,CACjC,CAAC;IACJ,CAAC;CACF;AAED,MAAM,OAAO,aAAc,SAAQ,SAAS,CAAC,aAAa;IACxD,kBAAkB,CAAU;IAE5B,YAAY,iBAAiB,GAAG,IAAI;QAClC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,kBAAkB,GAAG,iBAAiB,CAAC;IAC9C,CAAC;IAED,6DAA6D;IAC7D,QAAQ,CAAC,IAAY,EAAE,QAAiB;QACtC,OAAO,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,6DAA6D;IAC7D,MAAM,CAAC,QAAiB;QACtB,OAAO,IAAI,kBAAkB,CAC3B,CAAC,IAAY,EAAE,EAAE,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,kBAAkB,CAAC,EAC3D,CAAC,EACD,CAAC,CACF,CAAC;IACJ,CAAC;CACF;AAED,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,IAAY,EAAY,EAAE;IACtD,OAAO,UAAU,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;AACxC,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,IAAY,EAAY,EAAE;IAC3D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACpD,CAAC,CAAC"}
1
+ {"version":3,"sources":["../../../src/tokenize/basic/basic.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';\nimport * as tokenizer from '../tokenizer.js';\nimport { hyphenator } from './hyphenator.js';\nimport { splitParagraphs } from './paragraph.js';\nimport { splitSentences } from './sentence.js';\nimport { splitWords } from './word.js';\n\ninterface TokenizerOptions {\n language: string;\n minSentenceLength: number;\n streamContextLength: number;\n}\n\nexport class SentenceTokenizer extends tokenizer.SentenceTokenizer {\n #config: TokenizerOptions;\n\n constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {\n super();\n this.#config = {\n language,\n minSentenceLength,\n streamContextLength,\n };\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.SentenceStream {\n return new BufferedSentenceStream(\n (text: string) => splitSentences(text, this.#config.minSentenceLength),\n this.#config.minSentenceLength,\n this.#config.streamContextLength,\n );\n }\n}\n\nexport class WordTokenizer extends tokenizer.WordTokenizer {\n #ignorePunctuation: boolean;\n\n constructor(ignorePunctuation = true) {\n super();\n this.#ignorePunctuation = ignorePunctuation;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n tokenize(text: string, language?: string): string[] {\n return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n stream(language?: string): tokenizer.WordStream {\n return new BufferedWordStream(\n (text: string) => splitWords(text, this.#ignorePunctuation),\n 1,\n 1,\n );\n }\n}\n\nexport const hyphenateWord = (word: string): string[] => {\n return hyphenator.hyphenateWord(word);\n};\n\nexport const tokenizeParagraphs = (text: string): string[] => {\n return splitParagraphs(text).map((tok) => tok[0]);\n};\n"],"mappings":"AAGA,SAAS,wBAAwB,0BAA0B;AAC3D,YAAY,eAAe;AAC3B,SAAS,kBAAkB;AAC3B,SAAS,uBAAuB;AAChC,SAAS,sBAAsB;AAC/B,SAAS,kBAAkB;AAQpB,MAAM,0BAA0B,UAAU,kBAAkB;AAAA,EACjE;AAAA,EAEA,YAAY,WAAW,SAAS,oBAAoB,IAAI,sBAAsB,IAAI;AAChF,UAAM;AACN,SAAK,UAAU;AAAA,MACb;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,WAAO,eAAe,MAAM,KAAK,QAAQ,iBAAiB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACjF;AAAA;AAAA,EAGA,OAAO,UAA6C;AAClD,WAAO,IAAI;AAAA,MACT,CAAC,SAAiB,eAAe,MAAM,KAAK,QAAQ,iBAAiB;AAAA,MACrE,KAAK,QAAQ;AAAA,MACb,KAAK,QAAQ;AAAA,IACf;AAAA,EACF;AACF;AAEO,MAAM,sBAAsB,UAAU,cAAc;AAAA,EACzD;AAAA,EAEA,YAAY,oBAAoB,MAAM;AACpC,UAAM;AACN,SAAK,qBAAqB;AAAA,EAC5B;AAAA;AAAA,EAGA,SAAS,MAAc,UAA6B;AAClD,WAAO,WAAW,MAAM,KAAK,kBAAkB,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAAA,EACtE;AAAA;AAAA,EAGA,OAAO,UAAyC;AAC9C,WAAO,IAAI;AAAA,MACT,CAAC,SAAiB,WAAW,MAAM,KAAK,kBAAkB;AAAA,MAC1D;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;AAEO,MAAM,gBAAgB,CAAC,SAA2B;AACvD,SAAO,WAAW,cAAc,IAAI;AACtC;AAEO,MAAM,qBAAqB,CAAC,SAA2B;AAC5D,SAAO,gBAAgB,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,CAAC,CAAC;AAClD;","names":[]}