@livekit/agents 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +40 -0
- package/dist/audio.js +17 -30
- package/dist/audio.js.map +1 -1
- package/dist/cli.js +3 -14
- package/dist/cli.js.map +1 -1
- package/dist/http_server.d.ts +1 -1
- package/dist/http_server.js +5 -9
- package/dist/http_server.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -2
- package/dist/index.js.map +1 -1
- package/dist/ipc/job_executor.js +3 -5
- package/dist/ipc/job_executor.js.map +1 -1
- package/dist/ipc/job_main.d.ts +1 -1
- package/dist/ipc/proc_job_executor.js +66 -80
- package/dist/ipc/proc_job_executor.js.map +1 -1
- package/dist/ipc/proc_pool.d.ts +3 -3
- package/dist/ipc/proc_pool.d.ts.map +1 -1
- package/dist/ipc/proc_pool.js +16 -11
- package/dist/ipc/proc_pool.js.map +1 -1
- package/dist/job.js +56 -73
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.d.ts +66 -0
- package/dist/llm/chat_context.d.ts.map +1 -0
- package/dist/llm/chat_context.js +93 -0
- package/dist/llm/chat_context.js.map +1 -0
- package/dist/llm/function_context.d.ts +19 -1
- package/dist/llm/function_context.d.ts.map +1 -1
- package/dist/llm/function_context.js +54 -18
- package/dist/llm/function_context.js.map +1 -1
- package/dist/llm/function_context.test.d.ts +2 -0
- package/dist/llm/function_context.test.d.ts.map +1 -0
- package/dist/llm/function_context.test.js +218 -0
- package/dist/llm/function_context.test.js.map +1 -0
- package/dist/llm/index.d.ts +3 -2
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +3 -2
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.d.ts +53 -0
- package/dist/llm/llm.d.ts.map +1 -0
- package/dist/llm/llm.js +45 -0
- package/dist/llm/llm.js.map +1 -0
- package/dist/multimodal/agent_playout.d.ts +1 -1
- package/dist/multimodal/agent_playout.js +116 -153
- package/dist/multimodal/agent_playout.js.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts +4 -3
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +214 -237
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_output.d.ts +30 -0
- package/dist/pipeline/agent_output.d.ts.map +1 -0
- package/dist/pipeline/agent_output.js +155 -0
- package/dist/pipeline/agent_output.js.map +1 -0
- package/dist/pipeline/agent_playout.d.ts +38 -0
- package/dist/pipeline/agent_playout.d.ts.map +1 -0
- package/dist/pipeline/agent_playout.js +142 -0
- package/dist/pipeline/agent_playout.js.map +1 -0
- package/dist/pipeline/human_input.d.ts +28 -0
- package/dist/pipeline/human_input.d.ts.map +1 -0
- package/dist/pipeline/human_input.js +134 -0
- package/dist/pipeline/human_input.js.map +1 -0
- package/dist/pipeline/index.d.ts +2 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +5 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/pipeline_agent.d.ts +134 -0
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -0
- package/dist/pipeline/pipeline_agent.js +661 -0
- package/dist/pipeline/pipeline_agent.js.map +1 -0
- package/dist/pipeline/speech_handle.d.ts +27 -0
- package/dist/pipeline/speech_handle.d.ts.map +1 -0
- package/dist/pipeline/speech_handle.js +102 -0
- package/dist/pipeline/speech_handle.js.map +1 -0
- package/dist/plugin.js +7 -20
- package/dist/plugin.js.map +1 -1
- package/dist/stt/index.d.ts +1 -2
- package/dist/stt/index.d.ts.map +1 -1
- package/dist/stt/index.js +1 -2
- package/dist/stt/index.js.map +1 -1
- package/dist/stt/stt.d.ts +62 -24
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +77 -27
- package/dist/stt/stt.js.map +1 -1
- package/dist/tokenize/basic/basic.d.ts +16 -0
- package/dist/tokenize/basic/basic.d.ts.map +1 -0
- package/dist/tokenize/basic/basic.js +50 -0
- package/dist/tokenize/basic/basic.js.map +1 -0
- package/dist/tokenize/basic/hyphenator.d.ts +17 -0
- package/dist/tokenize/basic/hyphenator.d.ts.map +1 -0
- package/dist/tokenize/basic/hyphenator.js +420 -0
- package/dist/tokenize/basic/hyphenator.js.map +1 -0
- package/dist/tokenize/basic/index.d.ts +2 -0
- package/dist/tokenize/basic/index.d.ts.map +1 -0
- package/dist/tokenize/basic/index.js +5 -0
- package/dist/tokenize/basic/index.js.map +1 -0
- package/dist/tokenize/basic/paragraph.d.ts +5 -0
- package/dist/tokenize/basic/paragraph.d.ts.map +1 -0
- package/dist/tokenize/basic/paragraph.js +38 -0
- package/dist/tokenize/basic/paragraph.js.map +1 -0
- package/dist/tokenize/basic/sentence.d.ts +5 -0
- package/dist/tokenize/basic/sentence.d.ts.map +1 -0
- package/dist/tokenize/basic/sentence.js +60 -0
- package/dist/tokenize/basic/sentence.js.map +1 -0
- package/dist/tokenize/basic/word.d.ts +5 -0
- package/dist/tokenize/basic/word.d.ts.map +1 -0
- package/dist/tokenize/basic/word.js +23 -0
- package/dist/tokenize/basic/word.js.map +1 -0
- package/dist/tokenize/index.d.ts +5 -0
- package/dist/tokenize/index.d.ts.map +1 -0
- package/dist/tokenize/index.js +8 -0
- package/dist/tokenize/index.js.map +1 -0
- package/dist/tokenize/token_stream.d.ts +36 -0
- package/dist/tokenize/token_stream.d.ts.map +1 -0
- package/dist/tokenize/token_stream.js +136 -0
- package/dist/tokenize/token_stream.js.map +1 -0
- package/dist/tokenize/tokenizer.d.ts +55 -0
- package/dist/tokenize/tokenizer.d.ts.map +1 -0
- package/dist/tokenize/tokenizer.js +117 -0
- package/dist/tokenize/tokenizer.js.map +1 -0
- package/dist/transcription.js +78 -89
- package/dist/transcription.js.map +1 -1
- package/dist/tts/index.d.ts +1 -3
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +1 -3
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/tts.d.ts +66 -37
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +79 -74
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.d.ts +21 -6
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +120 -76
- package/dist/utils.js.map +1 -1
- package/dist/vad.d.ts +43 -39
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +51 -4
- package/dist/vad.js.map +1 -1
- package/dist/worker.d.ts +1 -1
- package/dist/worker.js +257 -247
- package/dist/worker.js.map +1 -1
- package/package.json +4 -3
- package/src/index.ts +16 -2
- package/src/ipc/proc_pool.ts +4 -4
- package/src/llm/chat_context.ts +147 -0
- package/src/llm/function_context.test.ts +248 -0
- package/src/llm/function_context.ts +77 -18
- package/src/llm/index.ts +21 -2
- package/src/llm/llm.ts +102 -0
- package/src/multimodal/multimodal_agent.ts +19 -6
- package/src/pipeline/agent_output.ts +185 -0
- package/src/pipeline/agent_playout.ts +187 -0
- package/src/pipeline/human_input.ts +166 -0
- package/src/pipeline/index.ts +15 -0
- package/src/pipeline/pipeline_agent.ts +917 -0
- package/src/pipeline/speech_handle.ts +136 -0
- package/src/stt/index.ts +8 -2
- package/src/stt/stt.ts +98 -31
- package/src/tokenize/basic/basic.ts +73 -0
- package/src/tokenize/basic/hyphenator.ts +436 -0
- package/src/tokenize/basic/index.ts +5 -0
- package/src/tokenize/basic/paragraph.ts +43 -0
- package/src/tokenize/basic/sentence.ts +69 -0
- package/src/tokenize/basic/word.ts +27 -0
- package/src/tokenize/index.ts +16 -0
- package/src/tokenize/token_stream.ts +163 -0
- package/src/tokenize/tokenizer.ts +152 -0
- package/src/tts/index.ts +1 -20
- package/src/tts/tts.ts +110 -57
- package/src/utils.ts +95 -25
- package/src/vad.ts +86 -45
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/stt/stream_adapter.d.ts +0 -19
- package/dist/stt/stream_adapter.d.ts.map +0 -1
- package/dist/stt/stream_adapter.js +0 -96
- package/dist/stt/stream_adapter.js.map +0 -1
- package/dist/tokenize.d.ts +0 -15
- package/dist/tokenize.d.ts.map +0 -1
- package/dist/tokenize.js +0 -12
- package/dist/tokenize.js.map +0 -1
- package/dist/tts/stream_adapter.d.ts +0 -19
- package/dist/tts/stream_adapter.d.ts.map +0 -1
- package/dist/tts/stream_adapter.js +0 -111
- package/dist/tts/stream_adapter.js.map +0 -1
- package/src/stt/stream_adapter.ts +0 -104
- package/src/tokenize.ts +0 -22
- package/src/tts/stream_adapter.ts +0 -93
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { randomUUID } from 'crypto';
|
|
5
|
+
import type { LLMStream } from '../llm/index.js';
|
|
6
|
+
import { Future } from '../utils.js';
|
|
7
|
+
import type { SynthesisHandle } from './agent_output.js';
|
|
8
|
+
|
|
9
|
+
export class SpeechHandle {
|
|
10
|
+
#id: string;
|
|
11
|
+
#allowInterruptions: boolean;
|
|
12
|
+
#addToChatCtx: boolean;
|
|
13
|
+
#isReply: boolean;
|
|
14
|
+
#userQuestion: string;
|
|
15
|
+
#userCommitted = false;
|
|
16
|
+
#initFut = new Future();
|
|
17
|
+
#speechCommitted = false;
|
|
18
|
+
#source?: string | LLMStream | AsyncIterable<string>;
|
|
19
|
+
#synthesisHandle?: SynthesisHandle;
|
|
20
|
+
#initialized = false;
|
|
21
|
+
|
|
22
|
+
constructor(
|
|
23
|
+
id: string,
|
|
24
|
+
allowInterruptions: boolean,
|
|
25
|
+
addToChatCtx: boolean,
|
|
26
|
+
isReply: boolean,
|
|
27
|
+
userQuestion: string,
|
|
28
|
+
) {
|
|
29
|
+
this.#id = id;
|
|
30
|
+
this.#allowInterruptions = allowInterruptions;
|
|
31
|
+
this.#addToChatCtx = addToChatCtx;
|
|
32
|
+
this.#isReply = isReply;
|
|
33
|
+
this.#userQuestion = userQuestion;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
static createAssistantReply(
|
|
37
|
+
allowInterruptions: boolean,
|
|
38
|
+
addToChatCtx: boolean,
|
|
39
|
+
userQuestion: string,
|
|
40
|
+
): SpeechHandle {
|
|
41
|
+
return new SpeechHandle(randomUUID(), allowInterruptions, addToChatCtx, true, userQuestion);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
static createAssistantSpeech(allowInterruptions: boolean, addToChatCtx: boolean): SpeechHandle {
|
|
45
|
+
return new SpeechHandle(randomUUID(), allowInterruptions, addToChatCtx, false, '');
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async waitForInitialization() {
|
|
49
|
+
await this.#initFut.await;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
initialize(source: string | LLMStream | AsyncIterable<string>, synthesisHandle: SynthesisHandle) {
|
|
53
|
+
if (this.interrupted) {
|
|
54
|
+
throw new Error('speech was interrupted');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
this.#source = source;
|
|
58
|
+
this.#synthesisHandle = synthesisHandle;
|
|
59
|
+
this.#initialized = true;
|
|
60
|
+
this.#initFut.resolve();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
markUserCommitted() {
|
|
64
|
+
this.#userCommitted = true;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
markSpeechCommitted() {
|
|
68
|
+
this.#speechCommitted = true;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get userCommitted(): boolean {
|
|
72
|
+
return this.#userCommitted;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
get speechCommitted(): boolean {
|
|
76
|
+
return this.#speechCommitted;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
get id(): string {
|
|
80
|
+
return this.#id;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
get allowInterruptions(): boolean {
|
|
84
|
+
return this.#allowInterruptions;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
get addToChatCtx(): boolean {
|
|
88
|
+
return this.#addToChatCtx;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
get source(): string | LLMStream | AsyncIterable<string> {
|
|
92
|
+
if (!this.#source) {
|
|
93
|
+
throw new Error('speech not initialized');
|
|
94
|
+
}
|
|
95
|
+
return this.#source;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
get synthesisHandle(): SynthesisHandle {
|
|
99
|
+
if (!this.#synthesisHandle) {
|
|
100
|
+
throw new Error('speech not initialized');
|
|
101
|
+
}
|
|
102
|
+
return this.#synthesisHandle;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
set synthesisHandle(handle: SynthesisHandle) {
|
|
106
|
+
this.#synthesisHandle = handle;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
get initialized(): boolean {
|
|
110
|
+
return this.#initialized;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
get isReply(): boolean {
|
|
114
|
+
return this.#isReply;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
get userQuestion(): string {
|
|
118
|
+
return this.#userQuestion;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
get interrupted(): boolean {
|
|
122
|
+
return !!this.#synthesisHandle?.interrupted;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
interrupt() {
|
|
126
|
+
if (!this.#allowInterruptions) {
|
|
127
|
+
throw new Error('interruptions are not allowed');
|
|
128
|
+
}
|
|
129
|
+
this.cancel();
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
cancel() {
|
|
133
|
+
this.#initFut.reject(new Error());
|
|
134
|
+
this.#synthesisHandle?.interrupt();
|
|
135
|
+
}
|
|
136
|
+
}
|
package/src/stt/index.ts
CHANGED
|
@@ -2,5 +2,11 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
export {
|
|
6
|
-
|
|
5
|
+
export {
|
|
6
|
+
type SpeechEvent,
|
|
7
|
+
type SpeechData,
|
|
8
|
+
type STTCapabilities,
|
|
9
|
+
SpeechEventType,
|
|
10
|
+
STT,
|
|
11
|
+
SpeechStream,
|
|
12
|
+
} from './stt.js';
|
package/src/stt/stt.ts
CHANGED
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
5
|
-
import
|
|
5
|
+
import { AsyncIterableQueue } from '../utils.js';
|
|
6
6
|
|
|
7
|
+
/** Indicates start/middle/end of speech */
|
|
7
8
|
export enum SpeechEventType {
|
|
8
9
|
/**
|
|
9
10
|
* Indicate the start of speech.
|
|
10
11
|
* If the STT doesn't support this event, this will be emitted at the same time
|
|
11
|
-
* as the first
|
|
12
|
+
* as the first INTERIM_TRANSCRIPT.
|
|
12
13
|
*/
|
|
13
14
|
START_OF_SPEECH = 0,
|
|
14
15
|
/**
|
|
@@ -27,6 +28,7 @@ export enum SpeechEventType {
|
|
|
27
28
|
END_OF_SPEECH = 3,
|
|
28
29
|
}
|
|
29
30
|
|
|
31
|
+
/** SpeechData contains metadata about this {@link SpeechEvent}. */
|
|
30
32
|
export interface SpeechData {
|
|
31
33
|
language: string;
|
|
32
34
|
text: string;
|
|
@@ -35,51 +37,116 @@ export interface SpeechData {
|
|
|
35
37
|
confidence: number;
|
|
36
38
|
}
|
|
37
39
|
|
|
38
|
-
|
|
40
|
+
/** SpeechEvent is a packet of speech-to-text data. */
|
|
41
|
+
export interface SpeechEvent {
|
|
39
42
|
type: SpeechEventType;
|
|
40
43
|
alternatives: SpeechData[];
|
|
44
|
+
}
|
|
41
45
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
/**
|
|
47
|
+
* Describes the capabilities of the STT provider.
|
|
48
|
+
*
|
|
49
|
+
* @remarks
|
|
50
|
+
* At present, the framework only supports providers that have a streaming endpoint.
|
|
51
|
+
*/
|
|
52
|
+
export interface STTCapabilities {
|
|
53
|
+
streaming: boolean;
|
|
54
|
+
interimResults: boolean;
|
|
46
55
|
}
|
|
47
56
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
57
|
+
/**
|
|
58
|
+
* An instance of a speech-to-text adapter.
|
|
59
|
+
*
|
|
60
|
+
* @remarks
|
|
61
|
+
* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
|
|
62
|
+
* exports its own child STT class, which inherits this class's methods.
|
|
63
|
+
*/
|
|
64
|
+
export abstract class STT {
|
|
65
|
+
#capabilities: STTCapabilities;
|
|
66
|
+
|
|
67
|
+
constructor(capabilities: STTCapabilities) {
|
|
68
|
+
this.#capabilities = capabilities;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** Returns this STT's capabilities */
|
|
72
|
+
get capabilities(): STTCapabilities {
|
|
73
|
+
return this.#capabilities;
|
|
74
|
+
}
|
|
54
75
|
|
|
55
76
|
/**
|
|
56
|
-
*
|
|
57
|
-
*
|
|
58
|
-
* @param wait
|
|
59
|
-
* Whether to wait for the STT to finish processing the remaining
|
|
60
|
-
* frames before closing
|
|
77
|
+
* Returns a {@link SpeechStream} that can be used to push audio frames and receive
|
|
78
|
+
* transcriptions
|
|
61
79
|
*/
|
|
62
|
-
abstract
|
|
80
|
+
abstract stream(): SpeechStream;
|
|
81
|
+
}
|
|
63
82
|
|
|
64
|
-
|
|
83
|
+
/**
|
|
84
|
+
* An instance of a speech-to-text stream, as an asynchronous iterable iterator.
|
|
85
|
+
*
|
|
86
|
+
* @example Looping through frames
|
|
87
|
+
* ```ts
|
|
88
|
+
* for await (const event of stream) {
|
|
89
|
+
* if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {
|
|
90
|
+
* console.log(event.alternatives[0].text)
|
|
91
|
+
* }
|
|
92
|
+
* }
|
|
93
|
+
* ```
|
|
94
|
+
*
|
|
95
|
+
* @remarks
|
|
96
|
+
* This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
|
|
97
|
+
* exports its own child SpeechStream class, which inherits this class's methods.
|
|
98
|
+
*/
|
|
99
|
+
export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {
|
|
100
|
+
protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
|
|
101
|
+
protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();
|
|
102
|
+
protected queue = new AsyncIterableQueue<SpeechEvent>();
|
|
103
|
+
protected closed = false;
|
|
65
104
|
|
|
66
|
-
|
|
67
|
-
|
|
105
|
+
/** Push an audio frame to the STT */
|
|
106
|
+
pushFrame(frame: AudioFrame) {
|
|
107
|
+
if (this.input.closed) {
|
|
108
|
+
throw new Error('Input is closed');
|
|
109
|
+
}
|
|
110
|
+
if (this.closed) {
|
|
111
|
+
throw new Error('Stream is closed');
|
|
112
|
+
}
|
|
113
|
+
this.input.put(frame);
|
|
68
114
|
}
|
|
69
|
-
}
|
|
70
115
|
|
|
71
|
-
|
|
72
|
-
|
|
116
|
+
/** Flush the STT, causing it to process all pending text */
|
|
117
|
+
flush() {
|
|
118
|
+
if (this.input.closed) {
|
|
119
|
+
throw new Error('Input is closed');
|
|
120
|
+
}
|
|
121
|
+
if (this.closed) {
|
|
122
|
+
throw new Error('Stream is closed');
|
|
123
|
+
}
|
|
124
|
+
this.input.put(SpeechStream.FLUSH_SENTINEL);
|
|
125
|
+
}
|
|
73
126
|
|
|
74
|
-
|
|
75
|
-
|
|
127
|
+
/** Mark the input as ended and forbid additional pushes */
|
|
128
|
+
endInput() {
|
|
129
|
+
if (this.input.closed) {
|
|
130
|
+
throw new Error('Input is closed');
|
|
131
|
+
}
|
|
132
|
+
if (this.closed) {
|
|
133
|
+
throw new Error('Stream is closed');
|
|
134
|
+
}
|
|
135
|
+
this.input.close();
|
|
76
136
|
}
|
|
77
137
|
|
|
78
|
-
|
|
138
|
+
next(): Promise<IteratorResult<SpeechEvent>> {
|
|
139
|
+
return this.queue.next();
|
|
140
|
+
}
|
|
79
141
|
|
|
80
|
-
|
|
142
|
+
/** Close both the input and output of the STT stream */
|
|
143
|
+
close() {
|
|
144
|
+
this.input.close();
|
|
145
|
+
this.queue.close();
|
|
146
|
+
this.closed = true;
|
|
147
|
+
}
|
|
81
148
|
|
|
82
|
-
|
|
83
|
-
return this
|
|
149
|
+
[Symbol.asyncIterator](): SpeechStream {
|
|
150
|
+
return this;
|
|
84
151
|
}
|
|
85
152
|
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import * as tokenizer from '../index.js';
|
|
5
|
+
import { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js';
|
|
6
|
+
import { hyphenator } from './hyphenator.js';
|
|
7
|
+
import { splitParagraphs } from './paragraph.js';
|
|
8
|
+
import { splitSentences } from './sentence.js';
|
|
9
|
+
import { splitWords } from './word.js';
|
|
10
|
+
|
|
11
|
+
interface TokenizerOptions {
|
|
12
|
+
language: string;
|
|
13
|
+
minSentenceLength: number;
|
|
14
|
+
streamContextLength: number;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
|
|
18
|
+
#config: TokenizerOptions;
|
|
19
|
+
|
|
20
|
+
constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {
|
|
21
|
+
super();
|
|
22
|
+
this.#config = {
|
|
23
|
+
language,
|
|
24
|
+
minSentenceLength,
|
|
25
|
+
streamContextLength,
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
30
|
+
tokenize(text: string, language?: string): string[] {
|
|
31
|
+
return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
35
|
+
stream(language?: string): tokenizer.SentenceStream {
|
|
36
|
+
return new BufferedSentenceStream(
|
|
37
|
+
(text: string) => splitSentences(text, this.#config.minSentenceLength),
|
|
38
|
+
this.#config.minSentenceLength,
|
|
39
|
+
this.#config.streamContextLength,
|
|
40
|
+
);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export class WordTokenizer extends tokenizer.WordTokenizer {
|
|
45
|
+
#ignorePunctuation: boolean;
|
|
46
|
+
|
|
47
|
+
constructor(ignorePunctuation = true) {
|
|
48
|
+
super();
|
|
49
|
+
this.#ignorePunctuation = ignorePunctuation;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
53
|
+
tokenize(text: string, language?: string): string[] {
|
|
54
|
+
return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
58
|
+
stream(language?: string): tokenizer.WordStream {
|
|
59
|
+
return new BufferedWordStream(
|
|
60
|
+
(text: string) => splitWords(text, this.#ignorePunctuation),
|
|
61
|
+
1,
|
|
62
|
+
1,
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export const hyphenateWord = (word: string): string[] => {
|
|
68
|
+
return hyphenator.hyphenateWord(word);
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
export const tokenizeParagraphs = (text: string): string[] => {
|
|
72
|
+
return splitParagraphs(text).map((tok) => tok[0]);
|
|
73
|
+
};
|