@livekit/agents 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +40 -0
  3. package/dist/audio.js +17 -30
  4. package/dist/audio.js.map +1 -1
  5. package/dist/cli.js +3 -14
  6. package/dist/cli.js.map +1 -1
  7. package/dist/http_server.d.ts +1 -1
  8. package/dist/http_server.js +5 -9
  9. package/dist/http_server.js.map +1 -1
  10. package/dist/index.d.ts +3 -2
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +14 -2
  13. package/dist/index.js.map +1 -1
  14. package/dist/ipc/job_executor.js +3 -5
  15. package/dist/ipc/job_executor.js.map +1 -1
  16. package/dist/ipc/job_main.d.ts +1 -1
  17. package/dist/ipc/proc_job_executor.js +66 -80
  18. package/dist/ipc/proc_job_executor.js.map +1 -1
  19. package/dist/ipc/proc_pool.d.ts +3 -3
  20. package/dist/ipc/proc_pool.d.ts.map +1 -1
  21. package/dist/ipc/proc_pool.js +16 -11
  22. package/dist/ipc/proc_pool.js.map +1 -1
  23. package/dist/job.js +56 -73
  24. package/dist/job.js.map +1 -1
  25. package/dist/llm/chat_context.d.ts +66 -0
  26. package/dist/llm/chat_context.d.ts.map +1 -0
  27. package/dist/llm/chat_context.js +93 -0
  28. package/dist/llm/chat_context.js.map +1 -0
  29. package/dist/llm/function_context.d.ts +19 -1
  30. package/dist/llm/function_context.d.ts.map +1 -1
  31. package/dist/llm/function_context.js +54 -18
  32. package/dist/llm/function_context.js.map +1 -1
  33. package/dist/llm/function_context.test.d.ts +2 -0
  34. package/dist/llm/function_context.test.d.ts.map +1 -0
  35. package/dist/llm/function_context.test.js +218 -0
  36. package/dist/llm/function_context.test.js.map +1 -0
  37. package/dist/llm/index.d.ts +3 -2
  38. package/dist/llm/index.d.ts.map +1 -1
  39. package/dist/llm/index.js +3 -2
  40. package/dist/llm/index.js.map +1 -1
  41. package/dist/llm/llm.d.ts +53 -0
  42. package/dist/llm/llm.d.ts.map +1 -0
  43. package/dist/llm/llm.js +45 -0
  44. package/dist/llm/llm.js.map +1 -0
  45. package/dist/multimodal/agent_playout.d.ts +1 -1
  46. package/dist/multimodal/agent_playout.js +116 -153
  47. package/dist/multimodal/agent_playout.js.map +1 -1
  48. package/dist/multimodal/multimodal_agent.d.ts +4 -3
  49. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  50. package/dist/multimodal/multimodal_agent.js +214 -237
  51. package/dist/multimodal/multimodal_agent.js.map +1 -1
  52. package/dist/pipeline/agent_output.d.ts +30 -0
  53. package/dist/pipeline/agent_output.d.ts.map +1 -0
  54. package/dist/pipeline/agent_output.js +155 -0
  55. package/dist/pipeline/agent_output.js.map +1 -0
  56. package/dist/pipeline/agent_playout.d.ts +38 -0
  57. package/dist/pipeline/agent_playout.d.ts.map +1 -0
  58. package/dist/pipeline/agent_playout.js +142 -0
  59. package/dist/pipeline/agent_playout.js.map +1 -0
  60. package/dist/pipeline/human_input.d.ts +28 -0
  61. package/dist/pipeline/human_input.d.ts.map +1 -0
  62. package/dist/pipeline/human_input.js +134 -0
  63. package/dist/pipeline/human_input.js.map +1 -0
  64. package/dist/pipeline/index.d.ts +2 -0
  65. package/dist/pipeline/index.d.ts.map +1 -0
  66. package/dist/pipeline/index.js +5 -0
  67. package/dist/pipeline/index.js.map +1 -0
  68. package/dist/pipeline/pipeline_agent.d.ts +134 -0
  69. package/dist/pipeline/pipeline_agent.d.ts.map +1 -0
  70. package/dist/pipeline/pipeline_agent.js +661 -0
  71. package/dist/pipeline/pipeline_agent.js.map +1 -0
  72. package/dist/pipeline/speech_handle.d.ts +27 -0
  73. package/dist/pipeline/speech_handle.d.ts.map +1 -0
  74. package/dist/pipeline/speech_handle.js +102 -0
  75. package/dist/pipeline/speech_handle.js.map +1 -0
  76. package/dist/plugin.js +7 -20
  77. package/dist/plugin.js.map +1 -1
  78. package/dist/stt/index.d.ts +1 -2
  79. package/dist/stt/index.d.ts.map +1 -1
  80. package/dist/stt/index.js +1 -2
  81. package/dist/stt/index.js.map +1 -1
  82. package/dist/stt/stt.d.ts +62 -24
  83. package/dist/stt/stt.d.ts.map +1 -1
  84. package/dist/stt/stt.js +77 -27
  85. package/dist/stt/stt.js.map +1 -1
  86. package/dist/tokenize/basic/basic.d.ts +16 -0
  87. package/dist/tokenize/basic/basic.d.ts.map +1 -0
  88. package/dist/tokenize/basic/basic.js +50 -0
  89. package/dist/tokenize/basic/basic.js.map +1 -0
  90. package/dist/tokenize/basic/hyphenator.d.ts +17 -0
  91. package/dist/tokenize/basic/hyphenator.d.ts.map +1 -0
  92. package/dist/tokenize/basic/hyphenator.js +420 -0
  93. package/dist/tokenize/basic/hyphenator.js.map +1 -0
  94. package/dist/tokenize/basic/index.d.ts +2 -0
  95. package/dist/tokenize/basic/index.d.ts.map +1 -0
  96. package/dist/tokenize/basic/index.js +5 -0
  97. package/dist/tokenize/basic/index.js.map +1 -0
  98. package/dist/tokenize/basic/paragraph.d.ts +5 -0
  99. package/dist/tokenize/basic/paragraph.d.ts.map +1 -0
  100. package/dist/tokenize/basic/paragraph.js +38 -0
  101. package/dist/tokenize/basic/paragraph.js.map +1 -0
  102. package/dist/tokenize/basic/sentence.d.ts +5 -0
  103. package/dist/tokenize/basic/sentence.d.ts.map +1 -0
  104. package/dist/tokenize/basic/sentence.js +60 -0
  105. package/dist/tokenize/basic/sentence.js.map +1 -0
  106. package/dist/tokenize/basic/word.d.ts +5 -0
  107. package/dist/tokenize/basic/word.d.ts.map +1 -0
  108. package/dist/tokenize/basic/word.js +23 -0
  109. package/dist/tokenize/basic/word.js.map +1 -0
  110. package/dist/tokenize/index.d.ts +5 -0
  111. package/dist/tokenize/index.d.ts.map +1 -0
  112. package/dist/tokenize/index.js +8 -0
  113. package/dist/tokenize/index.js.map +1 -0
  114. package/dist/tokenize/token_stream.d.ts +36 -0
  115. package/dist/tokenize/token_stream.d.ts.map +1 -0
  116. package/dist/tokenize/token_stream.js +136 -0
  117. package/dist/tokenize/token_stream.js.map +1 -0
  118. package/dist/tokenize/tokenizer.d.ts +55 -0
  119. package/dist/tokenize/tokenizer.d.ts.map +1 -0
  120. package/dist/tokenize/tokenizer.js +117 -0
  121. package/dist/tokenize/tokenizer.js.map +1 -0
  122. package/dist/transcription.js +78 -89
  123. package/dist/transcription.js.map +1 -1
  124. package/dist/tts/index.d.ts +1 -3
  125. package/dist/tts/index.d.ts.map +1 -1
  126. package/dist/tts/index.js +1 -3
  127. package/dist/tts/index.js.map +1 -1
  128. package/dist/tts/tts.d.ts +66 -37
  129. package/dist/tts/tts.d.ts.map +1 -1
  130. package/dist/tts/tts.js +79 -74
  131. package/dist/tts/tts.js.map +1 -1
  132. package/dist/utils.d.ts +21 -6
  133. package/dist/utils.d.ts.map +1 -1
  134. package/dist/utils.js +120 -76
  135. package/dist/utils.js.map +1 -1
  136. package/dist/vad.d.ts +43 -39
  137. package/dist/vad.d.ts.map +1 -1
  138. package/dist/vad.js +51 -4
  139. package/dist/vad.js.map +1 -1
  140. package/dist/worker.d.ts +1 -1
  141. package/dist/worker.js +257 -247
  142. package/dist/worker.js.map +1 -1
  143. package/package.json +4 -3
  144. package/src/index.ts +16 -2
  145. package/src/ipc/proc_pool.ts +4 -4
  146. package/src/llm/chat_context.ts +147 -0
  147. package/src/llm/function_context.test.ts +248 -0
  148. package/src/llm/function_context.ts +77 -18
  149. package/src/llm/index.ts +21 -2
  150. package/src/llm/llm.ts +102 -0
  151. package/src/multimodal/multimodal_agent.ts +19 -6
  152. package/src/pipeline/agent_output.ts +185 -0
  153. package/src/pipeline/agent_playout.ts +187 -0
  154. package/src/pipeline/human_input.ts +166 -0
  155. package/src/pipeline/index.ts +15 -0
  156. package/src/pipeline/pipeline_agent.ts +917 -0
  157. package/src/pipeline/speech_handle.ts +136 -0
  158. package/src/stt/index.ts +8 -2
  159. package/src/stt/stt.ts +98 -31
  160. package/src/tokenize/basic/basic.ts +73 -0
  161. package/src/tokenize/basic/hyphenator.ts +436 -0
  162. package/src/tokenize/basic/index.ts +5 -0
  163. package/src/tokenize/basic/paragraph.ts +43 -0
  164. package/src/tokenize/basic/sentence.ts +69 -0
  165. package/src/tokenize/basic/word.ts +27 -0
  166. package/src/tokenize/index.ts +16 -0
  167. package/src/tokenize/token_stream.ts +163 -0
  168. package/src/tokenize/tokenizer.ts +152 -0
  169. package/src/tts/index.ts +1 -20
  170. package/src/tts/tts.ts +110 -57
  171. package/src/utils.ts +95 -25
  172. package/src/vad.ts +86 -45
  173. package/tsconfig.tsbuildinfo +1 -1
  174. package/dist/stt/stream_adapter.d.ts +0 -19
  175. package/dist/stt/stream_adapter.d.ts.map +0 -1
  176. package/dist/stt/stream_adapter.js +0 -96
  177. package/dist/stt/stream_adapter.js.map +0 -1
  178. package/dist/tokenize.d.ts +0 -15
  179. package/dist/tokenize.d.ts.map +0 -1
  180. package/dist/tokenize.js +0 -12
  181. package/dist/tokenize.js.map +0 -1
  182. package/dist/tts/stream_adapter.d.ts +0 -19
  183. package/dist/tts/stream_adapter.d.ts.map +0 -1
  184. package/dist/tts/stream_adapter.js +0 -111
  185. package/dist/tts/stream_adapter.js.map +0 -1
  186. package/src/stt/stream_adapter.ts +0 -104
  187. package/src/tokenize.ts +0 -22
  188. package/src/tts/stream_adapter.ts +0 -93
@@ -0,0 +1,136 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { randomUUID } from 'crypto';
5
+ import type { LLMStream } from '../llm/index.js';
6
+ import { Future } from '../utils.js';
7
+ import type { SynthesisHandle } from './agent_output.js';
8
+
9
+ export class SpeechHandle {
10
+ #id: string;
11
+ #allowInterruptions: boolean;
12
+ #addToChatCtx: boolean;
13
+ #isReply: boolean;
14
+ #userQuestion: string;
15
+ #userCommitted = false;
16
+ #initFut = new Future();
17
+ #speechCommitted = false;
18
+ #source?: string | LLMStream | AsyncIterable<string>;
19
+ #synthesisHandle?: SynthesisHandle;
20
+ #initialized = false;
21
+
22
+ constructor(
23
+ id: string,
24
+ allowInterruptions: boolean,
25
+ addToChatCtx: boolean,
26
+ isReply: boolean,
27
+ userQuestion: string,
28
+ ) {
29
+ this.#id = id;
30
+ this.#allowInterruptions = allowInterruptions;
31
+ this.#addToChatCtx = addToChatCtx;
32
+ this.#isReply = isReply;
33
+ this.#userQuestion = userQuestion;
34
+ }
35
+
36
+ static createAssistantReply(
37
+ allowInterruptions: boolean,
38
+ addToChatCtx: boolean,
39
+ userQuestion: string,
40
+ ): SpeechHandle {
41
+ return new SpeechHandle(randomUUID(), allowInterruptions, addToChatCtx, true, userQuestion);
42
+ }
43
+
44
+ static createAssistantSpeech(allowInterruptions: boolean, addToChatCtx: boolean): SpeechHandle {
45
+ return new SpeechHandle(randomUUID(), allowInterruptions, addToChatCtx, false, '');
46
+ }
47
+
48
+ async waitForInitialization() {
49
+ await this.#initFut.await;
50
+ }
51
+
52
+ initialize(source: string | LLMStream | AsyncIterable<string>, synthesisHandle: SynthesisHandle) {
53
+ if (this.interrupted) {
54
+ throw new Error('speech was interrupted');
55
+ }
56
+
57
+ this.#source = source;
58
+ this.#synthesisHandle = synthesisHandle;
59
+ this.#initialized = true;
60
+ this.#initFut.resolve();
61
+ }
62
+
63
+ markUserCommitted() {
64
+ this.#userCommitted = true;
65
+ }
66
+
67
+ markSpeechCommitted() {
68
+ this.#speechCommitted = true;
69
+ }
70
+
71
+ get userCommitted(): boolean {
72
+ return this.#userCommitted;
73
+ }
74
+
75
+ get speechCommitted(): boolean {
76
+ return this.#speechCommitted;
77
+ }
78
+
79
+ get id(): string {
80
+ return this.#id;
81
+ }
82
+
83
+ get allowInterruptions(): boolean {
84
+ return this.#allowInterruptions;
85
+ }
86
+
87
+ get addToChatCtx(): boolean {
88
+ return this.#addToChatCtx;
89
+ }
90
+
91
+ get source(): string | LLMStream | AsyncIterable<string> {
92
+ if (!this.#source) {
93
+ throw new Error('speech not initialized');
94
+ }
95
+ return this.#source;
96
+ }
97
+
98
+ get synthesisHandle(): SynthesisHandle {
99
+ if (!this.#synthesisHandle) {
100
+ throw new Error('speech not initialized');
101
+ }
102
+ return this.#synthesisHandle;
103
+ }
104
+
105
+ set synthesisHandle(handle: SynthesisHandle) {
106
+ this.#synthesisHandle = handle;
107
+ }
108
+
109
+ get initialized(): boolean {
110
+ return this.#initialized;
111
+ }
112
+
113
+ get isReply(): boolean {
114
+ return this.#isReply;
115
+ }
116
+
117
+ get userQuestion(): string {
118
+ return this.#userQuestion;
119
+ }
120
+
121
+ get interrupted(): boolean {
122
+ return !!this.#synthesisHandle?.interrupted;
123
+ }
124
+
125
+ interrupt() {
126
+ if (!this.#allowInterruptions) {
127
+ throw new Error('interruptions are not allowed');
128
+ }
129
+ this.cancel();
130
+ }
131
+
132
+ cancel() {
133
+ this.#initFut.reject(new Error());
134
+ this.#synthesisHandle?.interrupt();
135
+ }
136
+ }
package/src/stt/index.ts CHANGED
@@ -2,5 +2,11 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
 
5
- export { STT, SpeechEvent, SpeechEventType, SpeechStream, type SpeechData } from './stt.js';
6
- export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
5
+ export {
6
+ type SpeechEvent,
7
+ type SpeechData,
8
+ type STTCapabilities,
9
+ SpeechEventType,
10
+ STT,
11
+ SpeechStream,
12
+ } from './stt.js';
package/src/stt/stt.ts CHANGED
@@ -2,13 +2,14 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { AudioFrame } from '@livekit/rtc-node';
5
- import type { AudioBuffer } from '../utils.js';
5
+ import { AsyncIterableQueue } from '../utils.js';
6
6
 
7
+ /** Indicates start/middle/end of speech */
7
8
  export enum SpeechEventType {
8
9
  /**
9
10
  * Indicate the start of speech.
10
11
  * If the STT doesn't support this event, this will be emitted at the same time
11
- * as the first INTERMIN_TRANSCRIPT.
12
+ * as the first INTERIM_TRANSCRIPT.
12
13
  */
13
14
  START_OF_SPEECH = 0,
14
15
  /**
@@ -27,6 +28,7 @@ export enum SpeechEventType {
27
28
  END_OF_SPEECH = 3,
28
29
  }
29
30
 
31
+ /** SpeechData contains metadata about this {@link SpeechEvent}. */
30
32
  export interface SpeechData {
31
33
  language: string;
32
34
  text: string;
@@ -35,51 +37,116 @@ export interface SpeechData {
35
37
  confidence: number;
36
38
  }
37
39
 
38
- export class SpeechEvent {
40
+ /** SpeechEvent is a packet of speech-to-text data. */
41
+ export interface SpeechEvent {
39
42
  type: SpeechEventType;
40
43
  alternatives: SpeechData[];
44
+ }
41
45
 
42
- constructor(type: SpeechEventType, alternatives: SpeechData[] = []) {
43
- this.type = type;
44
- this.alternatives = alternatives;
45
- }
46
+ /**
47
+ * Describes the capabilities of the STT provider.
48
+ *
49
+ * @remarks
50
+ * At present, the framework only supports providers that have a streaming endpoint.
51
+ */
52
+ export interface STTCapabilities {
53
+ streaming: boolean;
54
+ interimResults: boolean;
46
55
  }
47
56
 
48
- export abstract class SpeechStream implements IterableIterator<SpeechEvent> {
49
- /**
50
- * Push a frame to be recognised.
51
- * It is recommended to push frames as soon as they are available.
52
- */
53
- abstract pushFrame(token: AudioFrame): void;
57
+ /**
58
+ * An instance of a speech-to-text adapter.
59
+ *
60
+ * @remarks
61
+ * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
62
+ * exports its own child STT class, which inherits this class's methods.
63
+ */
64
+ export abstract class STT {
65
+ #capabilities: STTCapabilities;
66
+
67
+ constructor(capabilities: STTCapabilities) {
68
+ this.#capabilities = capabilities;
69
+ }
70
+
71
+ /** Returns this STT's capabilities */
72
+ get capabilities(): STTCapabilities {
73
+ return this.#capabilities;
74
+ }
54
75
 
55
76
  /**
56
- * Close the stream.
57
- *
58
- * @param wait
59
- * Whether to wait for the STT to finish processing the remaining
60
- * frames before closing
77
+ * Returns a {@link SpeechStream} that can be used to push audio frames and receive
78
+ * transcriptions
61
79
  */
62
- abstract close(wait: boolean): Promise<void>;
80
+ abstract stream(): SpeechStream;
81
+ }
63
82
 
64
- abstract next(): IteratorResult<SpeechEvent>;
83
+ /**
84
+ * An instance of a speech-to-text stream, as an asynchronous iterable iterator.
85
+ *
86
+ * @example Looping through frames
87
+ * ```ts
88
+ * for await (const event of stream) {
89
+ * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {
90
+ * console.log(event.alternatives[0].text)
91
+ * }
92
+ * }
93
+ * ```
94
+ *
95
+ * @remarks
96
+ * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
97
+ * exports its own child SpeechStream class, which inherits this class's methods.
98
+ */
99
+ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {
100
+ protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
101
+ protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();
102
+ protected queue = new AsyncIterableQueue<SpeechEvent>();
103
+ protected closed = false;
65
104
 
66
- [Symbol.iterator](): SpeechStream {
67
- return this;
105
+ /** Push an audio frame to the STT */
106
+ pushFrame(frame: AudioFrame) {
107
+ if (this.input.closed) {
108
+ throw new Error('Input is closed');
109
+ }
110
+ if (this.closed) {
111
+ throw new Error('Stream is closed');
112
+ }
113
+ this.input.put(frame);
68
114
  }
69
- }
70
115
 
71
- export abstract class STT {
72
- #streamingSupported: boolean;
116
+ /** Flush the STT, causing it to process all pending text */
117
+ flush() {
118
+ if (this.input.closed) {
119
+ throw new Error('Input is closed');
120
+ }
121
+ if (this.closed) {
122
+ throw new Error('Stream is closed');
123
+ }
124
+ this.input.put(SpeechStream.FLUSH_SENTINEL);
125
+ }
73
126
 
74
- constructor(streamingSupported: boolean) {
75
- this.#streamingSupported = streamingSupported;
127
+ /** Mark the input as ended and forbid additional pushes */
128
+ endInput() {
129
+ if (this.input.closed) {
130
+ throw new Error('Input is closed');
131
+ }
132
+ if (this.closed) {
133
+ throw new Error('Stream is closed');
134
+ }
135
+ this.input.close();
76
136
  }
77
137
 
78
- abstract recognize(buffer: AudioBuffer, language?: string): Promise<SpeechEvent>;
138
+ next(): Promise<IteratorResult<SpeechEvent>> {
139
+ return this.queue.next();
140
+ }
79
141
 
80
- abstract stream(language: string | undefined): SpeechStream;
142
+ /** Close both the input and output of the STT stream */
143
+ close() {
144
+ this.input.close();
145
+ this.queue.close();
146
+ this.closed = true;
147
+ }
81
148
 
82
- get streamingSupported(): boolean {
83
- return this.#streamingSupported;
149
+ [Symbol.asyncIterator](): SpeechStream {
150
+ return this;
84
151
  }
85
152
  }
@@ -0,0 +1,73 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import * as tokenizer from '../index.js';
5
+ import { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';
6
+ import { hyphenator } from './hyphenator.js';
7
+ import { splitParagraphs } from './paragraph.js';
8
+ import { splitSentences } from './sentence.js';
9
+ import { splitWords } from './word.js';
10
+
11
+ interface TokenizerOptions {
12
+ language: string;
13
+ minSentenceLength: number;
14
+ streamContextLength: number;
15
+ }
16
+
17
+ export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
18
+ #config: TokenizerOptions;
19
+
20
+ constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {
21
+ super();
22
+ this.#config = {
23
+ language,
24
+ minSentenceLength,
25
+ streamContextLength,
26
+ };
27
+ }
28
+
29
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
30
+ tokenize(text: string, language?: string): string[] {
31
+ return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);
32
+ }
33
+
34
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
35
+ stream(language?: string): tokenizer.SentenceStream {
36
+ return new BufferedSentenceStream(
37
+ (text: string) => splitSentences(text, this.#config.minSentenceLength),
38
+ this.#config.minSentenceLength,
39
+ this.#config.streamContextLength,
40
+ );
41
+ }
42
+ }
43
+
44
+ export class WordTokenizer extends tokenizer.WordTokenizer {
45
+ #ignorePunctuation: boolean;
46
+
47
+ constructor(ignorePunctuation = true) {
48
+ super();
49
+ this.#ignorePunctuation = ignorePunctuation;
50
+ }
51
+
52
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
53
+ tokenize(text: string, language?: string): string[] {
54
+ return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);
55
+ }
56
+
57
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
58
+ stream(language?: string): tokenizer.WordStream {
59
+ return new BufferedWordStream(
60
+ (text: string) => splitWords(text, this.#ignorePunctuation),
61
+ 1,
62
+ 1,
63
+ );
64
+ }
65
+ }
66
+
67
+ export const hyphenateWord = (word: string): string[] => {
68
+ return hyphenator.hyphenateWord(word);
69
+ };
70
+
71
+ export const tokenizeParagraphs = (text: string): string[] => {
72
+ return splitParagraphs(text).map((tok) => tok[0]);
73
+ };