@livekit/agents-plugin-livekit 1.0.50 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.js +1 -1
- package/dist/turn_detector/base.cjs +7 -2
- package/dist/turn_detector/base.cjs.map +1 -1
- package/dist/turn_detector/base.d.cts +2 -0
- package/dist/turn_detector/base.d.ts +2 -0
- package/dist/turn_detector/base.d.ts.map +1 -1
- package/dist/turn_detector/base.js +8 -3
- package/dist/turn_detector/base.js.map +1 -1
- package/dist/turn_detector/multilingual.cjs +1 -0
- package/dist/turn_detector/multilingual.cjs.map +1 -1
- package/dist/turn_detector/multilingual.d.ts.map +1 -1
- package/dist/turn_detector/multilingual.js +2 -1
- package/dist/turn_detector/multilingual.js.map +1 -1
- package/package.json +3 -3
- package/src/turn_detector/base.ts +10 -3
- package/src/turn_detector/multilingual.ts +3 -1
package/dist/index.cjs
CHANGED
package/dist/index.js
CHANGED
|
@@ -146,6 +146,12 @@ class EOUModel {
|
|
|
146
146
|
loadLanguages;
|
|
147
147
|
languagesFuture = new import_agents.Future();
|
|
148
148
|
#logger = (0, import_agents.log)();
|
|
149
|
+
get model() {
|
|
150
|
+
return import_constants.MODEL_REVISIONS[this.modelType];
|
|
151
|
+
}
|
|
152
|
+
get provider() {
|
|
153
|
+
return "livekit";
|
|
154
|
+
}
|
|
149
155
|
constructor(opts) {
|
|
150
156
|
const {
|
|
151
157
|
modelType = "en",
|
|
@@ -176,8 +182,7 @@ class EOUModel {
|
|
|
176
182
|
const languages = await this.languagesFuture.await;
|
|
177
183
|
let langData = languages[lang];
|
|
178
184
|
if (langData === void 0 && lang.includes("-")) {
|
|
179
|
-
|
|
180
|
-
langData = languages[baseLang];
|
|
185
|
+
langData = languages[(0, import_agents.getBaseLanguage)(lang)];
|
|
181
186
|
}
|
|
182
187
|
if (langData === void 0) {
|
|
183
188
|
this.#logger.warn(`Language ${language} not supported by EOU model`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/turn_detector/base.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type PreTrainedTokenizer } from '@huggingface/transformers';\nimport type { ipc, llm } from '@livekit/agents';\nimport { Future, InferenceRunner, getJobContext, log } from '@livekit/agents';\nimport { readFileSync } from 'node:fs';\nimport os from 'node:os';\nimport { InferenceSession, Tensor } from 'onnxruntime-node';\nimport { downloadFileToCacheDir } from '../hf_utils.js';\nimport {\n type EOUModelType,\n HG_MODEL_REPO,\n MAX_HISTORY_TURNS,\n MODEL_REVISIONS,\n ONNX_FILEPATH,\n} from './constants.js';\nimport { normalizeText } from './utils.js';\n\ntype RawChatItem = { role: string; content: string };\n\ntype EOUOutput = { eouProbability: number; input: string; duration: number };\n\nexport abstract class EOURunnerBase extends InferenceRunner<RawChatItem[], EOUOutput> {\n private modelType: EOUModelType;\n private modelRevision: string;\n\n private session?: InferenceSession;\n private tokenizer?: PreTrainedTokenizer;\n\n #logger = log();\n\n constructor(modelType: EOUModelType) {\n super();\n this.modelType = modelType;\n this.modelRevision = MODEL_REVISIONS[modelType];\n }\n\n async initialize() {\n const { AutoTokenizer } = await import('@huggingface/transformers');\n\n try {\n const onnxModelPath = await downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: ONNX_FILEPATH,\n revision: this.modelRevision,\n localFileOnly: true,\n });\n\n // TODO(brian): support session config once onnxruntime-node supports it\n const sessOptions: InferenceSession.SessionOptions = {\n intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)),\n interOpNumThreads: 1,\n executionProviders: [{ name: 'cpu' }],\n };\n\n this.session = await InferenceSession.create(onnxModelPath, sessOptions);\n\n this.tokenizer = await AutoTokenizer.from_pretrained('livekit/turn-detector', {\n revision: this.modelRevision,\n local_files_only: true,\n });\n } catch (e) {\n const errorMessage = String(e);\n\n // Check if the error is related to missing local files\n if (\n errorMessage.includes('local_files_only=true') ||\n errorMessage.includes('file was not found locally') ||\n errorMessage.includes('File not found in cache')\n ) {\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: Required model files not found locally.\\n\\n` +\n `This usually means you need to download the model files first. Please run one of these commands:\\n\\n` +\n ` If using Node.js starter template:\\n` +\n ` pnpm download-files\\n\\n` +\n ` If using the agent directly:\\n` +\n ` node ./your_agent.ts download-files\\n\\n` +\n `Then try running your application again.\\n\\n` +\n `Original error: ${e}`,\n );\n }\n\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: ${e}`,\n );\n }\n }\n\n async run(data: RawChatItem[]) {\n const startTime = Date.now();\n\n const text = this.formatChatCtx(data);\n\n const inputs = this.tokenizer!.encode(text, { add_special_tokens: false });\n this.#logger.debug({ inputs: JSON.stringify(inputs), text }, 'EOU inputs');\n\n const outputs = await this.session!.run(\n { input_ids: new Tensor('int64', inputs, [1, inputs.length]) },\n ['prob'],\n );\n\n const probData = outputs.prob!.data;\n // should be the logits of the last token\n const eouProbability = probData[probData.length - 1] as number;\n const endTime = Date.now();\n\n const result = {\n eouProbability,\n input: text,\n duration: (endTime - startTime) / 1000,\n };\n\n this.#logger.child({ result }).debug('eou prediction');\n return result;\n }\n\n async close() {\n await this.session?.release();\n }\n\n private formatChatCtx(chatCtx: RawChatItem[]): string {\n const newChatCtx: RawChatItem[] = [];\n let lastMsg: RawChatItem | undefined = undefined;\n\n for (const msg of chatCtx) {\n const content = msg.content;\n if (!content) continue;\n\n const norm = normalizeText(content);\n\n // need to combine adjacent turns together to match training data\n if (lastMsg !== undefined && lastMsg.role === msg.role) {\n lastMsg.content += ` ${norm}`;\n } else {\n newChatCtx.push({ role: msg.role, content: norm });\n lastMsg = newChatCtx[newChatCtx.length - 1]!;\n }\n }\n\n // TODO(brian): investigate add_special_tokens options\n const convoText = this.tokenizer!.apply_chat_template(newChatCtx, {\n add_generation_prompt: false,\n tokenize: false,\n }) as string;\n\n // remove the EOU token from current utterance\n return convoText.slice(0, convoText.lastIndexOf('<|im_end|>'));\n }\n}\n\nexport interface EOUModelOptions {\n modelType: EOUModelType;\n executor?: ipc.InferenceExecutor;\n unlikelyThreshold?: number;\n loadLanguages?: boolean;\n}\n\ntype LanguageData = {\n threshold: number;\n};\n\nexport abstract class EOUModel {\n private modelType: EOUModelType;\n private executor: ipc.InferenceExecutor;\n private threshold: number | undefined;\n private loadLanguages: boolean;\n\n protected languagesFuture: Future<Record<string, LanguageData>> = new Future();\n\n #logger = log();\n\n constructor(opts: EOUModelOptions) {\n const {\n modelType = 'en',\n executor = getJobContext().inferenceExecutor,\n unlikelyThreshold,\n loadLanguages = true,\n } = opts;\n\n this.modelType = modelType;\n this.executor = executor;\n this.threshold = unlikelyThreshold;\n this.loadLanguages = loadLanguages;\n\n if (loadLanguages) {\n downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: 'languages.json',\n revision: MODEL_REVISIONS[modelType],\n localFileOnly: true,\n }).then((path) => {\n this.languagesFuture.resolve(JSON.parse(readFileSync(path, 'utf8')));\n });\n }\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (language === undefined) {\n return this.threshold;\n }\n\n const lang = language.toLowerCase();\n const languages = await this.languagesFuture.await;\n\n // try the full language code first\n let langData = languages[lang];\n\n if (langData === undefined && lang.includes('-')) {\n const baseLang = lang.split('-')[0]!;\n langData = languages[baseLang];\n }\n\n if (langData === undefined) {\n this.#logger.warn(`Language ${language} not supported by EOU model`);\n return undefined;\n }\n\n // if a custom threshold is provided, use it\n return this.threshold !== undefined ? this.threshold : langData.threshold;\n }\n\n async supportsLanguage(language?: string): Promise<boolean> {\n return (await this.unlikelyThreshold(language)) !== undefined;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n let messages: RawChatItem[] = [];\n\n for (const message of chatCtx.items) {\n // skip system and developer messages or tool call messages\n if (message.type !== 'message' || message.role in ['system', 'developer']) {\n continue;\n }\n\n for (const content of message.content) {\n if (typeof content === 'string') {\n messages.push({\n role: message.role === 'assistant' ? 'assistant' : 'user',\n content: content,\n });\n }\n }\n }\n\n messages = messages.slice(-MAX_HISTORY_TURNS);\n\n const result = await this.executor.doInference(this.inferenceMethod(), messages);\n if (result === undefined) {\n throw new Error('EOU inference should always returns a result');\n }\n\n return (result as EOUOutput).eouProbability;\n }\n\n abstract inferenceMethod(): string;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,0BAAyC;AAEzC,oBAA4D;AAC5D,qBAA6B;AAC7B,qBAAe;AACf,8BAAyC;AACzC,sBAAuC;AACvC,uBAMO;AACP,mBAA8B;AAMvB,MAAe,sBAAsB,8BAA0C;AAAA,EAC5E;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EAER,cAAU,mBAAI;AAAA,EAEd,YAAY,WAAyB;AACnC,UAAM;AACN,SAAK,YAAY;AACjB,SAAK,gBAAgB,iCAAgB,SAAS;AAAA,EAChD;AAAA,EAEA,MAAM,aAAa;AACjB,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,2BAA2B;AAElE,QAAI;AACF,YAAM,gBAAgB,UAAM,wCAAuB;AAAA,QACjD,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,KAAK;AAAA,QACf,eAAe;AAAA,MACjB,CAAC;AAGD,YAAM,cAA+C;AAAA,QACnD,mBAAmB,KAAK,IAAI,GAAG,KAAK,MAAM,eAAAA,QAAG,KAAK,EAAE,SAAS,CAAC,CAAC;AAAA,QAC/D,mBAAmB;AAAA,QACnB,oBAAoB,CAAC,EAAE,MAAM,MAAM,CAAC;AAAA,MACtC;AAEA,WAAK,UAAU,MAAM,yCAAiB,OAAO,eAAe,WAAW;AAEvE,WAAK,YAAY,MAAM,cAAc,gBAAgB,yBAAyB;AAAA,QAC5E,UAAU,KAAK;AAAA,QACf,kBAAkB;AAAA,MACpB,CAAC;AAAA,IACH,SAAS,GAAG;AACV,YAAM,eAAe,OAAO,CAAC;AAG7B,UACE,aAAa,SAAS,uBAAuB,KAC7C,aAAa,SAAS,4BAA4B,KAClD,aAAa,SAAS,yBAAyB,GAC/C;AACA,cAAM,IAAI;AAAA,UACR,+CAA+C,KAAK,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAOxC,CAAC;AAAA,QACxB;AAAA,MACF;AAEA,YAAM,IAAI;AAAA,QACR,+CAA+C,KAAK,SAAS,uBAAuB,CAAC;AAAA,MACvF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,IAAI,MAAqB;AAC7B,UAAM,YAAY,KAAK,IAAI;AAE3B,UAAM,OAAO,KAAK,cAAc,IAAI;AAEpC,UAAM,SAAS,KAAK,UAAW,OAAO,MAAM,EAAE,oBAAoB,MAAM,CAAC;AACzE,SAAK,QAAQ,MAAM,EAAE,QAAQ,KAAK,UAAU,MAAM,GAAG,KAAK,GAAG,YAAY;AAEzE,UAAM,UAAU,MAAM,KAAK,QAAS;AAAA,MAClC,EAAE,WAAW,IAAI,+BAAO,SAAS,QAAQ,CAAC,GAAG,OAAO,MAAM,CAAC,EAAE;AAAA,MAC7D,CAAC,MAAM;AAAA,IACT;AAEA,UAAM,WAAW,QAAQ,KAAM;AAE/B,UAAM,iBAAiB,SAAS,SAAS,SAAS,CAAC;AACnD,UAAM,UAAU,KAAK,IAAI;AAEzB,UAAM,SAAS;AAAA,MACb;AAAA,MACA,OAAO;AAAA,MACP,WAAW,UAAU,aAAa;AAAA,IACpC;AAEA,SAAK,QAAQ,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,gBAAgB;AACrD,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ;AArHhB;AAsHI,YAAM,UAAK,YAAL,mBAAc;AAAA,EACtB;AAAA,EAEQ,cAAc,SAAgC;AACpD,UAAM,aAA4B,CAAC;AACnC,QAAI,UAAmC;AAEvC,eAAW,OAAO,SAAS;AACzB,YAAM,UAAU,IAAI;AACpB,UAAI,CAAC,QAAS;AAEd,YAAM,WAAO,4BAAc,OAAO;AAGlC,UAAI,YAAY,UAAa,QAAQ,SAAS,IAAI,MAAM;AACtD,gBAAQ,WAAW,IAAI,IAAI;AAAA,MAC7B,OAAO;AACL,mBAAW,KAAK,EAAE,MAAM,IAAI,MAAM,SAAS,KAAK,CAAC;AACjD,kBAAU,WAAW,WAAW,SAAS,CAAC;AAAA,MAC5C;AAAA,IACF;AAGA,UAAM,YAAY,KAAK,UAAW,oBAAoB,YAAY;AAAA,MAChE,uBAAuB;AAAA,MACvB,UAAU;AAAA,IACZ,CAAC;AAGD,WAAO,UAAU,MAAM,GAAG,UAAU,YAAY,YAAY,CAAC;AAAA,EAC/D;AACF;AAaO,MAAe,SAAS;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEE,kBAAwD,IAAI,qBAAO;AAAA,EAE7E,cAAU,mBAAI;AAAA,EAEd,YAAY,MAAuB;AACjC,UAAM;AAAA,MACJ,YAAY;AAAA,MACZ,eAAW,6BAAc,EAAE;AAAA,MAC3B;AAAA,MACA,gBAAgB;AAAA,IAClB,IAAI;AAEJ,SAAK,YAAY;AACjB,SAAK,WAAW;AAChB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AAErB,QAAI,eAAe;AACjB,kDAAuB;AAAA,QACrB,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,iCAAgB,SAAS;AAAA,QACnC,eAAe;AAAA,MACjB,CAAC,EAAE,KAAK,CAAC,SAAS;AAChB,aAAK,gBAAgB,QAAQ,KAAK,UAAM,6BAAa,MAAM,MAAM,CAAC,CAAC;AAAA,MACrE,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,kBAAkB,UAAgD;AACtE,QAAI,aAAa,QAAW;AAC1B,aAAO,KAAK;AAAA,IACd;AAEA,UAAM,OAAO,SAAS,YAAY;AAClC,UAAM,YAAY,MAAM,KAAK,gBAAgB;AAG7C,QAAI,WAAW,UAAU,IAAI;AAE7B,QAAI,aAAa,UAAa,KAAK,SAAS,GAAG,GAAG;AAChD,YAAM,WAAW,KAAK,MAAM,GAAG,EAAE,CAAC;AAClC,iBAAW,UAAU,QAAQ;AAAA,IAC/B;AAEA,QAAI,aAAa,QAAW;AAC1B,WAAK,QAAQ,KAAK,YAAY,QAAQ,6BAA6B;AACnE,aAAO;AAAA,IACT;AAGA,WAAO,KAAK,cAAc,SAAY,KAAK,YAAY,SAAS;AAAA,EAClE;AAAA,EAEA,MAAM,iBAAiB,UAAqC;AAC1D,WAAQ,MAAM,KAAK,kBAAkB,QAAQ,MAAO;AAAA,EACtD;AAAA;AAAA,EAGA,MAAM,iBAAiB,SAA0B,UAAkB,GAAoB;AACrF,QAAI,WAA0B,CAAC;AAE/B,eAAW,WAAW,QAAQ,OAAO;AAEnC,UAAI,QAAQ,SAAS,aAAa,QAAQ,QAAQ,CAAC,UAAU,WAAW,GAAG;AACzE;AAAA,MACF;AAEA,iBAAW,WAAW,QAAQ,SAAS;AACrC,YAAI,OAAO,YAAY,UAAU;AAC/B,mBAAS,KAAK;AAAA,YACZ,MAAM,QAAQ,SAAS,cAAc,cAAc;AAAA,YACnD;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,MAAM,CAAC,kCAAiB;AAE5C,UAAM,SAAS,MAAM,KAAK,SAAS,YAAY,KAAK,gBAAgB,GAAG,QAAQ;AAC/E,QAAI,WAAW,QAAW;AACxB,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AAEA,WAAQ,OAAqB;AAAA,EAC/B;AAGF;","names":["os"]}
|
|
1
|
+
{"version":3,"sources":["../../src/turn_detector/base.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type PreTrainedTokenizer } from '@huggingface/transformers';\nimport type { ipc, llm } from '@livekit/agents';\nimport { Future, InferenceRunner, getBaseLanguage, getJobContext, log } from '@livekit/agents';\nimport { readFileSync } from 'node:fs';\nimport os from 'node:os';\nimport { InferenceSession, Tensor } from 'onnxruntime-node';\nimport { downloadFileToCacheDir } from '../hf_utils.js';\nimport {\n type EOUModelType,\n HG_MODEL_REPO,\n MAX_HISTORY_TURNS,\n MODEL_REVISIONS,\n ONNX_FILEPATH,\n} from './constants.js';\nimport { normalizeText } from './utils.js';\n\ntype RawChatItem = { role: string; content: string };\n\ntype EOUOutput = { eouProbability: number; input: string; duration: number };\n\nexport abstract class EOURunnerBase extends InferenceRunner<RawChatItem[], EOUOutput> {\n private modelType: EOUModelType;\n private modelRevision: string;\n\n private session?: InferenceSession;\n private tokenizer?: PreTrainedTokenizer;\n\n #logger = log();\n\n constructor(modelType: EOUModelType) {\n super();\n this.modelType = modelType;\n this.modelRevision = MODEL_REVISIONS[modelType];\n }\n\n async initialize() {\n const { AutoTokenizer } = await import('@huggingface/transformers');\n\n try {\n const onnxModelPath = await downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: ONNX_FILEPATH,\n revision: this.modelRevision,\n localFileOnly: true,\n });\n\n // TODO(brian): support session config once onnxruntime-node supports it\n const sessOptions: InferenceSession.SessionOptions = {\n intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)),\n interOpNumThreads: 1,\n executionProviders: [{ name: 'cpu' }],\n };\n\n this.session = await InferenceSession.create(onnxModelPath, sessOptions);\n\n this.tokenizer = await AutoTokenizer.from_pretrained('livekit/turn-detector', {\n revision: this.modelRevision,\n local_files_only: true,\n });\n } catch (e) {\n const errorMessage = String(e);\n\n // Check if the error is related to missing local files\n if (\n errorMessage.includes('local_files_only=true') ||\n errorMessage.includes('file was not found locally') ||\n errorMessage.includes('File not found in cache')\n ) {\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: Required model files not found locally.\\n\\n` +\n `This usually means you need to download the model files first. Please run one of these commands:\\n\\n` +\n ` If using Node.js starter template:\\n` +\n ` pnpm download-files\\n\\n` +\n ` If using the agent directly:\\n` +\n ` node ./your_agent.ts download-files\\n\\n` +\n `Then try running your application again.\\n\\n` +\n `Original error: ${e}`,\n );\n }\n\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: ${e}`,\n );\n }\n }\n\n async run(data: RawChatItem[]) {\n const startTime = Date.now();\n\n const text = this.formatChatCtx(data);\n\n const inputs = this.tokenizer!.encode(text, { add_special_tokens: false });\n this.#logger.debug({ inputs: JSON.stringify(inputs), text }, 'EOU inputs');\n\n const outputs = await this.session!.run(\n { input_ids: new Tensor('int64', inputs, [1, inputs.length]) },\n ['prob'],\n );\n\n const probData = outputs.prob!.data;\n // should be the logits of the last token\n const eouProbability = probData[probData.length - 1] as number;\n const endTime = Date.now();\n\n const result = {\n eouProbability,\n input: text,\n duration: (endTime - startTime) / 1000,\n };\n\n this.#logger.child({ result }).debug('eou prediction');\n return result;\n }\n\n async close() {\n await this.session?.release();\n }\n\n private formatChatCtx(chatCtx: RawChatItem[]): string {\n const newChatCtx: RawChatItem[] = [];\n let lastMsg: RawChatItem | undefined = undefined;\n\n for (const msg of chatCtx) {\n const content = msg.content;\n if (!content) continue;\n\n const norm = normalizeText(content);\n\n // need to combine adjacent turns together to match training data\n if (lastMsg !== undefined && lastMsg.role === msg.role) {\n lastMsg.content += ` ${norm}`;\n } else {\n newChatCtx.push({ role: msg.role, content: norm });\n lastMsg = newChatCtx[newChatCtx.length - 1]!;\n }\n }\n\n // TODO(brian): investigate add_special_tokens options\n const convoText = this.tokenizer!.apply_chat_template(newChatCtx, {\n add_generation_prompt: false,\n tokenize: false,\n }) as string;\n\n // remove the EOU token from current utterance\n return convoText.slice(0, convoText.lastIndexOf('<|im_end|>'));\n }\n}\n\nexport interface EOUModelOptions {\n modelType: EOUModelType;\n executor?: ipc.InferenceExecutor;\n unlikelyThreshold?: number;\n loadLanguages?: boolean;\n}\n\ntype LanguageData = {\n threshold: number;\n};\n\nexport abstract class EOUModel {\n private modelType: EOUModelType;\n private executor: ipc.InferenceExecutor;\n private threshold: number | undefined;\n private loadLanguages: boolean;\n\n protected languagesFuture: Future<Record<string, LanguageData>> = new Future();\n\n #logger = log();\n\n get model(): string {\n return MODEL_REVISIONS[this.modelType];\n }\n\n get provider(): string {\n return 'livekit';\n }\n\n constructor(opts: EOUModelOptions) {\n const {\n modelType = 'en',\n executor = getJobContext().inferenceExecutor,\n unlikelyThreshold,\n loadLanguages = true,\n } = opts;\n\n this.modelType = modelType;\n this.executor = executor;\n this.threshold = unlikelyThreshold;\n this.loadLanguages = loadLanguages;\n\n if (loadLanguages) {\n downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: 'languages.json',\n revision: MODEL_REVISIONS[modelType],\n localFileOnly: true,\n }).then((path) => {\n this.languagesFuture.resolve(JSON.parse(readFileSync(path, 'utf8')));\n });\n }\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (language === undefined) {\n return this.threshold;\n }\n\n const lang = language.toLowerCase();\n const languages = await this.languagesFuture.await;\n\n // try the full language code first\n let langData = languages[lang];\n\n if (langData === undefined && lang.includes('-')) {\n langData = languages[getBaseLanguage(lang)];\n }\n\n if (langData === undefined) {\n this.#logger.warn(`Language ${language} not supported by EOU model`);\n return undefined;\n }\n\n // if a custom threshold is provided, use it\n return this.threshold !== undefined ? this.threshold : langData.threshold;\n }\n\n async supportsLanguage(language?: string): Promise<boolean> {\n return (await this.unlikelyThreshold(language)) !== undefined;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n let messages: RawChatItem[] = [];\n\n for (const message of chatCtx.items) {\n // skip system and developer messages or tool call messages\n if (message.type !== 'message' || message.role in ['system', 'developer']) {\n continue;\n }\n\n for (const content of message.content) {\n if (typeof content === 'string') {\n messages.push({\n role: message.role === 'assistant' ? 'assistant' : 'user',\n content: content,\n });\n }\n }\n }\n\n messages = messages.slice(-MAX_HISTORY_TURNS);\n\n const result = await this.executor.doInference(this.inferenceMethod(), messages);\n if (result === undefined) {\n throw new Error('EOU inference should always returns a result');\n }\n\n return (result as EOUOutput).eouProbability;\n }\n\n abstract inferenceMethod(): string;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,0BAAyC;AAEzC,oBAA6E;AAC7E,qBAA6B;AAC7B,qBAAe;AACf,8BAAyC;AACzC,sBAAuC;AACvC,uBAMO;AACP,mBAA8B;AAMvB,MAAe,sBAAsB,8BAA0C;AAAA,EAC5E;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EAER,cAAU,mBAAI;AAAA,EAEd,YAAY,WAAyB;AACnC,UAAM;AACN,SAAK,YAAY;AACjB,SAAK,gBAAgB,iCAAgB,SAAS;AAAA,EAChD;AAAA,EAEA,MAAM,aAAa;AACjB,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,2BAA2B;AAElE,QAAI;AACF,YAAM,gBAAgB,UAAM,wCAAuB;AAAA,QACjD,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,KAAK;AAAA,QACf,eAAe;AAAA,MACjB,CAAC;AAGD,YAAM,cAA+C;AAAA,QACnD,mBAAmB,KAAK,IAAI,GAAG,KAAK,MAAM,eAAAA,QAAG,KAAK,EAAE,SAAS,CAAC,CAAC;AAAA,QAC/D,mBAAmB;AAAA,QACnB,oBAAoB,CAAC,EAAE,MAAM,MAAM,CAAC;AAAA,MACtC;AAEA,WAAK,UAAU,MAAM,yCAAiB,OAAO,eAAe,WAAW;AAEvE,WAAK,YAAY,MAAM,cAAc,gBAAgB,yBAAyB;AAAA,QAC5E,UAAU,KAAK;AAAA,QACf,kBAAkB;AAAA,MACpB,CAAC;AAAA,IACH,SAAS,GAAG;AACV,YAAM,eAAe,OAAO,CAAC;AAG7B,UACE,aAAa,SAAS,uBAAuB,KAC7C,aAAa,SAAS,4BAA4B,KAClD,aAAa,SAAS,yBAAyB,GAC/C;AACA,cAAM,IAAI;AAAA,UACR,+CAA+C,KAAK,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAOxC,CAAC;AAAA,QACxB;AAAA,MACF;AAEA,YAAM,IAAI;AAAA,QACR,+CAA+C,KAAK,SAAS,uBAAuB,CAAC;AAAA,MACvF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,IAAI,MAAqB;AAC7B,UAAM,YAAY,KAAK,IAAI;AAE3B,UAAM,OAAO,KAAK,cAAc,IAAI;AAEpC,UAAM,SAAS,KAAK,UAAW,OAAO,MAAM,EAAE,oBAAoB,MAAM,CAAC;AACzE,SAAK,QAAQ,MAAM,EAAE,QAAQ,KAAK,UAAU,MAAM,GAAG,KAAK,GAAG,YAAY;AAEzE,UAAM,UAAU,MAAM,KAAK,QAAS;AAAA,MAClC,EAAE,WAAW,IAAI,+BAAO,SAAS,QAAQ,CAAC,GAAG,OAAO,MAAM,CAAC,EAAE;AAAA,MAC7D,CAAC,MAAM;AAAA,IACT;AAEA,UAAM,WAAW,QAAQ,KAAM;AAE/B,UAAM,iBAAiB,SAAS,SAAS,SAAS,CAAC;AACnD,UAAM,UAAU,KAAK,IAAI;AAEzB,UAAM,SAAS;AAAA,MACb;AAAA,MACA,OAAO;AAAA,MACP,WAAW,UAAU,aAAa;AAAA,IACpC;AAEA,SAAK,QAAQ,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,gBAAgB;AACrD,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ;AArHhB;AAsHI,YAAM,UAAK,YAAL,mBAAc;AAAA,EACtB;AAAA,EAEQ,cAAc,SAAgC;AACpD,UAAM,aAA4B,CAAC;AACnC,QAAI,UAAmC;AAEvC,eAAW,OAAO,SAAS;AACzB,YAAM,UAAU,IAAI;AACpB,UAAI,CAAC,QAAS;AAEd,YAAM,WAAO,4BAAc,OAAO;AAGlC,UAAI,YAAY,UAAa,QAAQ,SAAS,IAAI,MAAM;AACtD,gBAAQ,WAAW,IAAI,IAAI;AAAA,MAC7B,OAAO;AACL,mBAAW,KAAK,EAAE,MAAM,IAAI,MAAM,SAAS,KAAK,CAAC;AACjD,kBAAU,WAAW,WAAW,SAAS,CAAC;AAAA,MAC5C;AAAA,IACF;AAGA,UAAM,YAAY,KAAK,UAAW,oBAAoB,YAAY;AAAA,MAChE,uBAAuB;AAAA,MACvB,UAAU;AAAA,IACZ,CAAC;AAGD,WAAO,UAAU,MAAM,GAAG,UAAU,YAAY,YAAY,CAAC;AAAA,EAC/D;AACF;AAaO,MAAe,SAAS;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEE,kBAAwD,IAAI,qBAAO;AAAA,EAE7E,cAAU,mBAAI;AAAA,EAEd,IAAI,QAAgB;AAClB,WAAO,iCAAgB,KAAK,SAAS;AAAA,EACvC;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAAA,MACJ,YAAY;AAAA,MACZ,eAAW,6BAAc,EAAE;AAAA,MAC3B;AAAA,MACA,gBAAgB;AAAA,IAClB,IAAI;AAEJ,SAAK,YAAY;AACjB,SAAK,WAAW;AAChB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AAErB,QAAI,eAAe;AACjB,kDAAuB;AAAA,QACrB,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,iCAAgB,SAAS;AAAA,QACnC,eAAe;AAAA,MACjB,CAAC,EAAE,KAAK,CAAC,SAAS;AAChB,aAAK,gBAAgB,QAAQ,KAAK,UAAM,6BAAa,MAAM,MAAM,CAAC,CAAC;AAAA,MACrE,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,kBAAkB,UAAgD;AACtE,QAAI,aAAa,QAAW;AAC1B,aAAO,KAAK;AAAA,IACd;AAEA,UAAM,OAAO,SAAS,YAAY;AAClC,UAAM,YAAY,MAAM,KAAK,gBAAgB;AAG7C,QAAI,WAAW,UAAU,IAAI;AAE7B,QAAI,aAAa,UAAa,KAAK,SAAS,GAAG,GAAG;AAChD,iBAAW,cAAU,+BAAgB,IAAI,CAAC;AAAA,IAC5C;AAEA,QAAI,aAAa,QAAW;AAC1B,WAAK,QAAQ,KAAK,YAAY,QAAQ,6BAA6B;AACnE,aAAO;AAAA,IACT;AAGA,WAAO,KAAK,cAAc,SAAY,KAAK,YAAY,SAAS;AAAA,EAClE;AAAA,EAEA,MAAM,iBAAiB,UAAqC;AAC1D,WAAQ,MAAM,KAAK,kBAAkB,QAAQ,MAAO;AAAA,EACtD;AAAA;AAAA,EAGA,MAAM,iBAAiB,SAA0B,UAAkB,GAAoB;AACrF,QAAI,WAA0B,CAAC;AAE/B,eAAW,WAAW,QAAQ,OAAO;AAEnC,UAAI,QAAQ,SAAS,aAAa,QAAQ,QAAQ,CAAC,UAAU,WAAW,GAAG;AACzE;AAAA,MACF;AAEA,iBAAW,WAAW,QAAQ,SAAS;AACrC,YAAI,OAAO,YAAY,UAAU;AAC/B,mBAAS,KAAK;AAAA,YACZ,MAAM,QAAQ,SAAS,cAAc,cAAc;AAAA,YACnD;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,MAAM,CAAC,kCAAiB;AAE5C,UAAM,SAAS,MAAM,KAAK,SAAS,YAAY,KAAK,gBAAgB,GAAG,QAAQ;AAC/E,QAAI,WAAW,QAAW;AACxB,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AAEA,WAAQ,OAAqB;AAAA,EAC/B;AAGF;","names":["os"]}
|
|
@@ -42,6 +42,8 @@ export declare abstract class EOUModel {
|
|
|
42
42
|
private threshold;
|
|
43
43
|
private loadLanguages;
|
|
44
44
|
protected languagesFuture: Future<Record<string, LanguageData>>;
|
|
45
|
+
get model(): string;
|
|
46
|
+
get provider(): string;
|
|
45
47
|
constructor(opts: EOUModelOptions);
|
|
46
48
|
unlikelyThreshold(language?: string): Promise<number | undefined>;
|
|
47
49
|
supportsLanguage(language?: string): Promise<boolean>;
|
|
@@ -42,6 +42,8 @@ export declare abstract class EOUModel {
|
|
|
42
42
|
private threshold;
|
|
43
43
|
private loadLanguages;
|
|
44
44
|
protected languagesFuture: Future<Record<string, LanguageData>>;
|
|
45
|
+
get model(): string;
|
|
46
|
+
get provider(): string;
|
|
45
47
|
constructor(opts: EOUModelOptions);
|
|
46
48
|
unlikelyThreshold(language?: string): Promise<number | undefined>;
|
|
47
49
|
supportsLanguage(language?: string): Promise<boolean>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/turn_detector/base.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,MAAM,EAAE,eAAe,
|
|
1
|
+
{"version":3,"file":"base.d.ts","sourceRoot":"","sources":["../../src/turn_detector/base.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAChD,OAAO,EAAE,MAAM,EAAE,eAAe,EAAuC,MAAM,iBAAiB,CAAC;AAK/F,OAAO,EACL,KAAK,YAAY,EAKlB,MAAM,gBAAgB,CAAC;AAGxB,KAAK,WAAW,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAErD,KAAK,SAAS,GAAG;IAAE,cAAc,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC;AAE7E,8BAAsB,aAAc,SAAQ,eAAe,CAAC,WAAW,EAAE,EAAE,SAAS,CAAC;;IACnF,OAAO,CAAC,SAAS,CAAe;IAChC,OAAO,CAAC,aAAa,CAAS;IAE9B,OAAO,CAAC,OAAO,CAAC,CAAmB;IACnC,OAAO,CAAC,SAAS,CAAC,CAAsB;gBAI5B,SAAS,EAAE,YAAY;IAM7B,UAAU;IAmDV,GAAG,CAAC,IAAI,EAAE,WAAW,EAAE;;;;;IA4BvB,KAAK;IAIX,OAAO,CAAC,aAAa;CA4BtB;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,YAAY,CAAC;IACxB,QAAQ,CAAC,EAAE,GAAG,CAAC,iBAAiB,CAAC;IACjC,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAED,KAAK,YAAY,GAAG;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB,CAAC;AAEF,8BAAsB,QAAQ;;IAC5B,OAAO,CAAC,SAAS,CAAe;IAChC,OAAO,CAAC,QAAQ,CAAwB;IACxC,OAAO,CAAC,SAAS,CAAqB;IACtC,OAAO,CAAC,aAAa,CAAU;IAE/B,SAAS,CAAC,eAAe,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC,CAAgB;IAI/E,IAAI,KAAK,IAAI,MAAM,CAElB;IAED,IAAI,QAAQ,IAAI,MAAM,CAErB;gBAEW,IAAI,EAAE,eAAe;IAyB3B,iBAAiB,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAwBjE,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAKrD,gBAAgB,CAAC,OAAO,EAAE,GAAG,CAAC,WAAW,EAAE,OAAO,GAAE,MAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IA6BtF,QAAQ,CAAC,eAAe,IAAI,MAAM;CACnC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import {} from "@huggingface/transformers";
|
|
2
|
-
import { Future, InferenceRunner, getJobContext, log } from "@livekit/agents";
|
|
2
|
+
import { Future, InferenceRunner, getBaseLanguage, getJobContext, log } from "@livekit/agents";
|
|
3
3
|
import { readFileSync } from "node:fs";
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import { InferenceSession, Tensor } from "onnxruntime-node";
|
|
@@ -117,6 +117,12 @@ class EOUModel {
|
|
|
117
117
|
loadLanguages;
|
|
118
118
|
languagesFuture = new Future();
|
|
119
119
|
#logger = log();
|
|
120
|
+
get model() {
|
|
121
|
+
return MODEL_REVISIONS[this.modelType];
|
|
122
|
+
}
|
|
123
|
+
get provider() {
|
|
124
|
+
return "livekit";
|
|
125
|
+
}
|
|
120
126
|
constructor(opts) {
|
|
121
127
|
const {
|
|
122
128
|
modelType = "en",
|
|
@@ -147,8 +153,7 @@ class EOUModel {
|
|
|
147
153
|
const languages = await this.languagesFuture.await;
|
|
148
154
|
let langData = languages[lang];
|
|
149
155
|
if (langData === void 0 && lang.includes("-")) {
|
|
150
|
-
|
|
151
|
-
langData = languages[baseLang];
|
|
156
|
+
langData = languages[getBaseLanguage(lang)];
|
|
152
157
|
}
|
|
153
158
|
if (langData === void 0) {
|
|
154
159
|
this.#logger.warn(`Language ${language} not supported by EOU model`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/turn_detector/base.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type PreTrainedTokenizer } from '@huggingface/transformers';\nimport type { ipc, llm } from '@livekit/agents';\nimport { Future, InferenceRunner, getJobContext, log } from '@livekit/agents';\nimport { readFileSync } from 'node:fs';\nimport os from 'node:os';\nimport { InferenceSession, Tensor } from 'onnxruntime-node';\nimport { downloadFileToCacheDir } from '../hf_utils.js';\nimport {\n type EOUModelType,\n HG_MODEL_REPO,\n MAX_HISTORY_TURNS,\n MODEL_REVISIONS,\n ONNX_FILEPATH,\n} from './constants.js';\nimport { normalizeText } from './utils.js';\n\ntype RawChatItem = { role: string; content: string };\n\ntype EOUOutput = { eouProbability: number; input: string; duration: number };\n\nexport abstract class EOURunnerBase extends InferenceRunner<RawChatItem[], EOUOutput> {\n private modelType: EOUModelType;\n private modelRevision: string;\n\n private session?: InferenceSession;\n private tokenizer?: PreTrainedTokenizer;\n\n #logger = log();\n\n constructor(modelType: EOUModelType) {\n super();\n this.modelType = modelType;\n this.modelRevision = MODEL_REVISIONS[modelType];\n }\n\n async initialize() {\n const { AutoTokenizer } = await import('@huggingface/transformers');\n\n try {\n const onnxModelPath = await downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: ONNX_FILEPATH,\n revision: this.modelRevision,\n localFileOnly: true,\n });\n\n // TODO(brian): support session config once onnxruntime-node supports it\n const sessOptions: InferenceSession.SessionOptions = {\n intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)),\n interOpNumThreads: 1,\n executionProviders: [{ name: 'cpu' }],\n };\n\n this.session = await InferenceSession.create(onnxModelPath, sessOptions);\n\n this.tokenizer = await AutoTokenizer.from_pretrained('livekit/turn-detector', {\n revision: this.modelRevision,\n local_files_only: true,\n });\n } catch (e) {\n const errorMessage = String(e);\n\n // Check if the error is related to missing local files\n if (\n errorMessage.includes('local_files_only=true') ||\n errorMessage.includes('file was not found locally') ||\n errorMessage.includes('File not found in cache')\n ) {\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: Required model files not found locally.\\n\\n` +\n `This usually means you need to download the model files first. Please run one of these commands:\\n\\n` +\n ` If using Node.js starter template:\\n` +\n ` pnpm download-files\\n\\n` +\n ` If using the agent directly:\\n` +\n ` node ./your_agent.ts download-files\\n\\n` +\n `Then try running your application again.\\n\\n` +\n `Original error: ${e}`,\n );\n }\n\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: ${e}`,\n );\n }\n }\n\n async run(data: RawChatItem[]) {\n const startTime = Date.now();\n\n const text = this.formatChatCtx(data);\n\n const inputs = this.tokenizer!.encode(text, { add_special_tokens: false });\n this.#logger.debug({ inputs: JSON.stringify(inputs), text }, 'EOU inputs');\n\n const outputs = await this.session!.run(\n { input_ids: new Tensor('int64', inputs, [1, inputs.length]) },\n ['prob'],\n );\n\n const probData = outputs.prob!.data;\n // should be the logits of the last token\n const eouProbability = probData[probData.length - 1] as number;\n const endTime = Date.now();\n\n const result = {\n eouProbability,\n input: text,\n duration: (endTime - startTime) / 1000,\n };\n\n this.#logger.child({ result }).debug('eou prediction');\n return result;\n }\n\n async close() {\n await this.session?.release();\n }\n\n private formatChatCtx(chatCtx: RawChatItem[]): string {\n const newChatCtx: RawChatItem[] = [];\n let lastMsg: RawChatItem | undefined = undefined;\n\n for (const msg of chatCtx) {\n const content = msg.content;\n if (!content) continue;\n\n const norm = normalizeText(content);\n\n // need to combine adjacent turns together to match training data\n if (lastMsg !== undefined && lastMsg.role === msg.role) {\n lastMsg.content += ` ${norm}`;\n } else {\n newChatCtx.push({ role: msg.role, content: norm });\n lastMsg = newChatCtx[newChatCtx.length - 1]!;\n }\n }\n\n // TODO(brian): investigate add_special_tokens options\n const convoText = this.tokenizer!.apply_chat_template(newChatCtx, {\n add_generation_prompt: false,\n tokenize: false,\n }) as string;\n\n // remove the EOU token from current utterance\n return convoText.slice(0, convoText.lastIndexOf('<|im_end|>'));\n }\n}\n\nexport interface EOUModelOptions {\n modelType: EOUModelType;\n executor?: ipc.InferenceExecutor;\n unlikelyThreshold?: number;\n loadLanguages?: boolean;\n}\n\ntype LanguageData = {\n threshold: number;\n};\n\nexport abstract class EOUModel {\n private modelType: EOUModelType;\n private executor: ipc.InferenceExecutor;\n private threshold: number | undefined;\n private loadLanguages: boolean;\n\n protected languagesFuture: Future<Record<string, LanguageData>> = new Future();\n\n #logger = log();\n\n constructor(opts: EOUModelOptions) {\n const {\n modelType = 'en',\n executor = getJobContext().inferenceExecutor,\n unlikelyThreshold,\n loadLanguages = true,\n } = opts;\n\n this.modelType = modelType;\n this.executor = executor;\n this.threshold = unlikelyThreshold;\n this.loadLanguages = loadLanguages;\n\n if (loadLanguages) {\n downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: 'languages.json',\n revision: MODEL_REVISIONS[modelType],\n localFileOnly: true,\n }).then((path) => {\n this.languagesFuture.resolve(JSON.parse(readFileSync(path, 'utf8')));\n });\n }\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (language === undefined) {\n return this.threshold;\n }\n\n const lang = language.toLowerCase();\n const languages = await this.languagesFuture.await;\n\n // try the full language code first\n let langData = languages[lang];\n\n if (langData === undefined && lang.includes('-')) {\n const baseLang = lang.split('-')[0]!;\n langData = languages[baseLang];\n }\n\n if (langData === undefined) {\n this.#logger.warn(`Language ${language} not supported by EOU model`);\n return undefined;\n }\n\n // if a custom threshold is provided, use it\n return this.threshold !== undefined ? this.threshold : langData.threshold;\n }\n\n async supportsLanguage(language?: string): Promise<boolean> {\n return (await this.unlikelyThreshold(language)) !== undefined;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n let messages: RawChatItem[] = [];\n\n for (const message of chatCtx.items) {\n // skip system and developer messages or tool call messages\n if (message.type !== 'message' || message.role in ['system', 'developer']) {\n continue;\n }\n\n for (const content of message.content) {\n if (typeof content === 'string') {\n messages.push({\n role: message.role === 'assistant' ? 'assistant' : 'user',\n content: content,\n });\n }\n }\n }\n\n messages = messages.slice(-MAX_HISTORY_TURNS);\n\n const result = await this.executor.doInference(this.inferenceMethod(), messages);\n if (result === undefined) {\n throw new Error('EOU inference should always returns a result');\n }\n\n return (result as EOUOutput).eouProbability;\n }\n\n abstract inferenceMethod(): string;\n}\n"],"mappings":"AAGA,eAAyC;AAEzC,SAAS,QAAQ,iBAAiB,eAAe,WAAW;AAC5D,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AACf,SAAS,kBAAkB,cAAc;AACzC,SAAS,8BAA8B;AACvC;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,qBAAqB;AAMvB,MAAe,sBAAsB,gBAA0C;AAAA,EAC5E;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,WAAyB;AACnC,UAAM;AACN,SAAK,YAAY;AACjB,SAAK,gBAAgB,gBAAgB,SAAS;AAAA,EAChD;AAAA,EAEA,MAAM,aAAa;AACjB,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,2BAA2B;AAElE,QAAI;AACF,YAAM,gBAAgB,MAAM,uBAAuB;AAAA,QACjD,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,KAAK;AAAA,QACf,eAAe;AAAA,MACjB,CAAC;AAGD,YAAM,cAA+C;AAAA,QACnD,mBAAmB,KAAK,IAAI,GAAG,KAAK,MAAM,GAAG,KAAK,EAAE,SAAS,CAAC,CAAC;AAAA,QAC/D,mBAAmB;AAAA,QACnB,oBAAoB,CAAC,EAAE,MAAM,MAAM,CAAC;AAAA,MACtC;AAEA,WAAK,UAAU,MAAM,iBAAiB,OAAO,eAAe,WAAW;AAEvE,WAAK,YAAY,MAAM,cAAc,gBAAgB,yBAAyB;AAAA,QAC5E,UAAU,KAAK;AAAA,QACf,kBAAkB;AAAA,MACpB,CAAC;AAAA,IACH,SAAS,GAAG;AACV,YAAM,eAAe,OAAO,CAAC;AAG7B,UACE,aAAa,SAAS,uBAAuB,KAC7C,aAAa,SAAS,4BAA4B,KAClD,aAAa,SAAS,yBAAyB,GAC/C;AACA,cAAM,IAAI;AAAA,UACR,+CAA+C,KAAK,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAOxC,CAAC;AAAA,QACxB;AAAA,MACF;AAEA,YAAM,IAAI;AAAA,QACR,+CAA+C,KAAK,SAAS,uBAAuB,CAAC;AAAA,MACvF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,IAAI,MAAqB;AAC7B,UAAM,YAAY,KAAK,IAAI;AAE3B,UAAM,OAAO,KAAK,cAAc,IAAI;AAEpC,UAAM,SAAS,KAAK,UAAW,OAAO,MAAM,EAAE,oBAAoB,MAAM,CAAC;AACzE,SAAK,QAAQ,MAAM,EAAE,QAAQ,KAAK,UAAU,MAAM,GAAG,KAAK,GAAG,YAAY;AAEzE,UAAM,UAAU,MAAM,KAAK,QAAS;AAAA,MAClC,EAAE,WAAW,IAAI,OAAO,SAAS,QAAQ,CAAC,GAAG,OAAO,MAAM,CAAC,EAAE;AAAA,MAC7D,CAAC,MAAM;AAAA,IACT;AAEA,UAAM,WAAW,QAAQ,KAAM;AAE/B,UAAM,iBAAiB,SAAS,SAAS,SAAS,CAAC;AACnD,UAAM,UAAU,KAAK,IAAI;AAEzB,UAAM,SAAS;AAAA,MACb;AAAA,MACA,OAAO;AAAA,MACP,WAAW,UAAU,aAAa;AAAA,IACpC;AAEA,SAAK,QAAQ,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,gBAAgB;AACrD,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ;AArHhB;AAsHI,YAAM,UAAK,YAAL,mBAAc;AAAA,EACtB;AAAA,EAEQ,cAAc,SAAgC;AACpD,UAAM,aAA4B,CAAC;AACnC,QAAI,UAAmC;AAEvC,eAAW,OAAO,SAAS;AACzB,YAAM,UAAU,IAAI;AACpB,UAAI,CAAC,QAAS;AAEd,YAAM,OAAO,cAAc,OAAO;AAGlC,UAAI,YAAY,UAAa,QAAQ,SAAS,IAAI,MAAM;AACtD,gBAAQ,WAAW,IAAI,IAAI;AAAA,MAC7B,OAAO;AACL,mBAAW,KAAK,EAAE,MAAM,IAAI,MAAM,SAAS,KAAK,CAAC;AACjD,kBAAU,WAAW,WAAW,SAAS,CAAC;AAAA,MAC5C;AAAA,IACF;AAGA,UAAM,YAAY,KAAK,UAAW,oBAAoB,YAAY;AAAA,MAChE,uBAAuB;AAAA,MACvB,UAAU;AAAA,IACZ,CAAC;AAGD,WAAO,UAAU,MAAM,GAAG,UAAU,YAAY,YAAY,CAAC;AAAA,EAC/D;AACF;AAaO,MAAe,SAAS;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEE,kBAAwD,IAAI,OAAO;AAAA,EAE7E,UAAU,IAAI;AAAA,EAEd,YAAY,MAAuB;AACjC,UAAM;AAAA,MACJ,YAAY;AAAA,MACZ,WAAW,cAAc,EAAE;AAAA,MAC3B;AAAA,MACA,gBAAgB;AAAA,IAClB,IAAI;AAEJ,SAAK,YAAY;AACjB,SAAK,WAAW;AAChB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AAErB,QAAI,eAAe;AACjB,6BAAuB;AAAA,QACrB,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,gBAAgB,SAAS;AAAA,QACnC,eAAe;AAAA,MACjB,CAAC,EAAE,KAAK,CAAC,SAAS;AAChB,aAAK,gBAAgB,QAAQ,KAAK,MAAM,aAAa,MAAM,MAAM,CAAC,CAAC;AAAA,MACrE,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,kBAAkB,UAAgD;AACtE,QAAI,aAAa,QAAW;AAC1B,aAAO,KAAK;AAAA,IACd;AAEA,UAAM,OAAO,SAAS,YAAY;AAClC,UAAM,YAAY,MAAM,KAAK,gBAAgB;AAG7C,QAAI,WAAW,UAAU,IAAI;AAE7B,QAAI,aAAa,UAAa,KAAK,SAAS,GAAG,GAAG;AAChD,YAAM,WAAW,KAAK,MAAM,GAAG,EAAE,CAAC;AAClC,iBAAW,UAAU,QAAQ;AAAA,IAC/B;AAEA,QAAI,aAAa,QAAW;AAC1B,WAAK,QAAQ,KAAK,YAAY,QAAQ,6BAA6B;AACnE,aAAO;AAAA,IACT;AAGA,WAAO,KAAK,cAAc,SAAY,KAAK,YAAY,SAAS;AAAA,EAClE;AAAA,EAEA,MAAM,iBAAiB,UAAqC;AAC1D,WAAQ,MAAM,KAAK,kBAAkB,QAAQ,MAAO;AAAA,EACtD;AAAA;AAAA,EAGA,MAAM,iBAAiB,SAA0B,UAAkB,GAAoB;AACrF,QAAI,WAA0B,CAAC;AAE/B,eAAW,WAAW,QAAQ,OAAO;AAEnC,UAAI,QAAQ,SAAS,aAAa,QAAQ,QAAQ,CAAC,UAAU,WAAW,GAAG;AACzE;AAAA,MACF;AAEA,iBAAW,WAAW,QAAQ,SAAS;AACrC,YAAI,OAAO,YAAY,UAAU;AAC/B,mBAAS,KAAK;AAAA,YACZ,MAAM,QAAQ,SAAS,cAAc,cAAc;AAAA,YACnD;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,MAAM,CAAC,iBAAiB;AAE5C,UAAM,SAAS,MAAM,KAAK,SAAS,YAAY,KAAK,gBAAgB,GAAG,QAAQ;AAC/E,QAAI,WAAW,QAAW;AACxB,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AAEA,WAAQ,OAAqB;AAAA,EAC/B;AAGF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/turn_detector/base.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type PreTrainedTokenizer } from '@huggingface/transformers';\nimport type { ipc, llm } from '@livekit/agents';\nimport { Future, InferenceRunner, getBaseLanguage, getJobContext, log } from '@livekit/agents';\nimport { readFileSync } from 'node:fs';\nimport os from 'node:os';\nimport { InferenceSession, Tensor } from 'onnxruntime-node';\nimport { downloadFileToCacheDir } from '../hf_utils.js';\nimport {\n type EOUModelType,\n HG_MODEL_REPO,\n MAX_HISTORY_TURNS,\n MODEL_REVISIONS,\n ONNX_FILEPATH,\n} from './constants.js';\nimport { normalizeText } from './utils.js';\n\ntype RawChatItem = { role: string; content: string };\n\ntype EOUOutput = { eouProbability: number; input: string; duration: number };\n\nexport abstract class EOURunnerBase extends InferenceRunner<RawChatItem[], EOUOutput> {\n private modelType: EOUModelType;\n private modelRevision: string;\n\n private session?: InferenceSession;\n private tokenizer?: PreTrainedTokenizer;\n\n #logger = log();\n\n constructor(modelType: EOUModelType) {\n super();\n this.modelType = modelType;\n this.modelRevision = MODEL_REVISIONS[modelType];\n }\n\n async initialize() {\n const { AutoTokenizer } = await import('@huggingface/transformers');\n\n try {\n const onnxModelPath = await downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: ONNX_FILEPATH,\n revision: this.modelRevision,\n localFileOnly: true,\n });\n\n // TODO(brian): support session config once onnxruntime-node supports it\n const sessOptions: InferenceSession.SessionOptions = {\n intraOpNumThreads: Math.max(1, Math.floor(os.cpus().length / 2)),\n interOpNumThreads: 1,\n executionProviders: [{ name: 'cpu' }],\n };\n\n this.session = await InferenceSession.create(onnxModelPath, sessOptions);\n\n this.tokenizer = await AutoTokenizer.from_pretrained('livekit/turn-detector', {\n revision: this.modelRevision,\n local_files_only: true,\n });\n } catch (e) {\n const errorMessage = String(e);\n\n // Check if the error is related to missing local files\n if (\n errorMessage.includes('local_files_only=true') ||\n errorMessage.includes('file was not found locally') ||\n errorMessage.includes('File not found in cache')\n ) {\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: Required model files not found locally.\\n\\n` +\n `This usually means you need to download the model files first. Please run one of these commands:\\n\\n` +\n ` If using Node.js starter template:\\n` +\n ` pnpm download-files\\n\\n` +\n ` If using the agent directly:\\n` +\n ` node ./your_agent.ts download-files\\n\\n` +\n `Then try running your application again.\\n\\n` +\n `Original error: ${e}`,\n );\n }\n\n throw new Error(\n `agents-plugins-livekit failed to initialize ${this.modelType} EOU turn detector: ${e}`,\n );\n }\n }\n\n async run(data: RawChatItem[]) {\n const startTime = Date.now();\n\n const text = this.formatChatCtx(data);\n\n const inputs = this.tokenizer!.encode(text, { add_special_tokens: false });\n this.#logger.debug({ inputs: JSON.stringify(inputs), text }, 'EOU inputs');\n\n const outputs = await this.session!.run(\n { input_ids: new Tensor('int64', inputs, [1, inputs.length]) },\n ['prob'],\n );\n\n const probData = outputs.prob!.data;\n // should be the logits of the last token\n const eouProbability = probData[probData.length - 1] as number;\n const endTime = Date.now();\n\n const result = {\n eouProbability,\n input: text,\n duration: (endTime - startTime) / 1000,\n };\n\n this.#logger.child({ result }).debug('eou prediction');\n return result;\n }\n\n async close() {\n await this.session?.release();\n }\n\n private formatChatCtx(chatCtx: RawChatItem[]): string {\n const newChatCtx: RawChatItem[] = [];\n let lastMsg: RawChatItem | undefined = undefined;\n\n for (const msg of chatCtx) {\n const content = msg.content;\n if (!content) continue;\n\n const norm = normalizeText(content);\n\n // need to combine adjacent turns together to match training data\n if (lastMsg !== undefined && lastMsg.role === msg.role) {\n lastMsg.content += ` ${norm}`;\n } else {\n newChatCtx.push({ role: msg.role, content: norm });\n lastMsg = newChatCtx[newChatCtx.length - 1]!;\n }\n }\n\n // TODO(brian): investigate add_special_tokens options\n const convoText = this.tokenizer!.apply_chat_template(newChatCtx, {\n add_generation_prompt: false,\n tokenize: false,\n }) as string;\n\n // remove the EOU token from current utterance\n return convoText.slice(0, convoText.lastIndexOf('<|im_end|>'));\n }\n}\n\nexport interface EOUModelOptions {\n modelType: EOUModelType;\n executor?: ipc.InferenceExecutor;\n unlikelyThreshold?: number;\n loadLanguages?: boolean;\n}\n\ntype LanguageData = {\n threshold: number;\n};\n\nexport abstract class EOUModel {\n private modelType: EOUModelType;\n private executor: ipc.InferenceExecutor;\n private threshold: number | undefined;\n private loadLanguages: boolean;\n\n protected languagesFuture: Future<Record<string, LanguageData>> = new Future();\n\n #logger = log();\n\n get model(): string {\n return MODEL_REVISIONS[this.modelType];\n }\n\n get provider(): string {\n return 'livekit';\n }\n\n constructor(opts: EOUModelOptions) {\n const {\n modelType = 'en',\n executor = getJobContext().inferenceExecutor,\n unlikelyThreshold,\n loadLanguages = true,\n } = opts;\n\n this.modelType = modelType;\n this.executor = executor;\n this.threshold = unlikelyThreshold;\n this.loadLanguages = loadLanguages;\n\n if (loadLanguages) {\n downloadFileToCacheDir({\n repo: HG_MODEL_REPO,\n path: 'languages.json',\n revision: MODEL_REVISIONS[modelType],\n localFileOnly: true,\n }).then((path) => {\n this.languagesFuture.resolve(JSON.parse(readFileSync(path, 'utf8')));\n });\n }\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (language === undefined) {\n return this.threshold;\n }\n\n const lang = language.toLowerCase();\n const languages = await this.languagesFuture.await;\n\n // try the full language code first\n let langData = languages[lang];\n\n if (langData === undefined && lang.includes('-')) {\n langData = languages[getBaseLanguage(lang)];\n }\n\n if (langData === undefined) {\n this.#logger.warn(`Language ${language} not supported by EOU model`);\n return undefined;\n }\n\n // if a custom threshold is provided, use it\n return this.threshold !== undefined ? this.threshold : langData.threshold;\n }\n\n async supportsLanguage(language?: string): Promise<boolean> {\n return (await this.unlikelyThreshold(language)) !== undefined;\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n let messages: RawChatItem[] = [];\n\n for (const message of chatCtx.items) {\n // skip system and developer messages or tool call messages\n if (message.type !== 'message' || message.role in ['system', 'developer']) {\n continue;\n }\n\n for (const content of message.content) {\n if (typeof content === 'string') {\n messages.push({\n role: message.role === 'assistant' ? 'assistant' : 'user',\n content: content,\n });\n }\n }\n }\n\n messages = messages.slice(-MAX_HISTORY_TURNS);\n\n const result = await this.executor.doInference(this.inferenceMethod(), messages);\n if (result === undefined) {\n throw new Error('EOU inference should always returns a result');\n }\n\n return (result as EOUOutput).eouProbability;\n }\n\n abstract inferenceMethod(): string;\n}\n"],"mappings":"AAGA,eAAyC;AAEzC,SAAS,QAAQ,iBAAiB,iBAAiB,eAAe,WAAW;AAC7E,SAAS,oBAAoB;AAC7B,OAAO,QAAQ;AACf,SAAS,kBAAkB,cAAc;AACzC,SAAS,8BAA8B;AACvC;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,qBAAqB;AAMvB,MAAe,sBAAsB,gBAA0C;AAAA,EAC5E;AAAA,EACA;AAAA,EAEA;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YAAY,WAAyB;AACnC,UAAM;AACN,SAAK,YAAY;AACjB,SAAK,gBAAgB,gBAAgB,SAAS;AAAA,EAChD;AAAA,EAEA,MAAM,aAAa;AACjB,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,2BAA2B;AAElE,QAAI;AACF,YAAM,gBAAgB,MAAM,uBAAuB;AAAA,QACjD,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,KAAK;AAAA,QACf,eAAe;AAAA,MACjB,CAAC;AAGD,YAAM,cAA+C;AAAA,QACnD,mBAAmB,KAAK,IAAI,GAAG,KAAK,MAAM,GAAG,KAAK,EAAE,SAAS,CAAC,CAAC;AAAA,QAC/D,mBAAmB;AAAA,QACnB,oBAAoB,CAAC,EAAE,MAAM,MAAM,CAAC;AAAA,MACtC;AAEA,WAAK,UAAU,MAAM,iBAAiB,OAAO,eAAe,WAAW;AAEvE,WAAK,YAAY,MAAM,cAAc,gBAAgB,yBAAyB;AAAA,QAC5E,UAAU,KAAK;AAAA,QACf,kBAAkB;AAAA,MACpB,CAAC;AAAA,IACH,SAAS,GAAG;AACV,YAAM,eAAe,OAAO,CAAC;AAG7B,UACE,aAAa,SAAS,uBAAuB,KAC7C,aAAa,SAAS,4BAA4B,KAClD,aAAa,SAAS,yBAAyB,GAC/C;AACA,cAAM,IAAI;AAAA,UACR,+CAA+C,KAAK,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAOxC,CAAC;AAAA,QACxB;AAAA,MACF;AAEA,YAAM,IAAI;AAAA,QACR,+CAA+C,KAAK,SAAS,uBAAuB,CAAC;AAAA,MACvF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,IAAI,MAAqB;AAC7B,UAAM,YAAY,KAAK,IAAI;AAE3B,UAAM,OAAO,KAAK,cAAc,IAAI;AAEpC,UAAM,SAAS,KAAK,UAAW,OAAO,MAAM,EAAE,oBAAoB,MAAM,CAAC;AACzE,SAAK,QAAQ,MAAM,EAAE,QAAQ,KAAK,UAAU,MAAM,GAAG,KAAK,GAAG,YAAY;AAEzE,UAAM,UAAU,MAAM,KAAK,QAAS;AAAA,MAClC,EAAE,WAAW,IAAI,OAAO,SAAS,QAAQ,CAAC,GAAG,OAAO,MAAM,CAAC,EAAE;AAAA,MAC7D,CAAC,MAAM;AAAA,IACT;AAEA,UAAM,WAAW,QAAQ,KAAM;AAE/B,UAAM,iBAAiB,SAAS,SAAS,SAAS,CAAC;AACnD,UAAM,UAAU,KAAK,IAAI;AAEzB,UAAM,SAAS;AAAA,MACb;AAAA,MACA,OAAO;AAAA,MACP,WAAW,UAAU,aAAa;AAAA,IACpC;AAEA,SAAK,QAAQ,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,gBAAgB;AACrD,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,QAAQ;AArHhB;AAsHI,YAAM,UAAK,YAAL,mBAAc;AAAA,EACtB;AAAA,EAEQ,cAAc,SAAgC;AACpD,UAAM,aAA4B,CAAC;AACnC,QAAI,UAAmC;AAEvC,eAAW,OAAO,SAAS;AACzB,YAAM,UAAU,IAAI;AACpB,UAAI,CAAC,QAAS;AAEd,YAAM,OAAO,cAAc,OAAO;AAGlC,UAAI,YAAY,UAAa,QAAQ,SAAS,IAAI,MAAM;AACtD,gBAAQ,WAAW,IAAI,IAAI;AAAA,MAC7B,OAAO;AACL,mBAAW,KAAK,EAAE,MAAM,IAAI,MAAM,SAAS,KAAK,CAAC;AACjD,kBAAU,WAAW,WAAW,SAAS,CAAC;AAAA,MAC5C;AAAA,IACF;AAGA,UAAM,YAAY,KAAK,UAAW,oBAAoB,YAAY;AAAA,MAChE,uBAAuB;AAAA,MACvB,UAAU;AAAA,IACZ,CAAC;AAGD,WAAO,UAAU,MAAM,GAAG,UAAU,YAAY,YAAY,CAAC;AAAA,EAC/D;AACF;AAaO,MAAe,SAAS;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEE,kBAAwD,IAAI,OAAO;AAAA,EAE7E,UAAU,IAAI;AAAA,EAEd,IAAI,QAAgB;AAClB,WAAO,gBAAgB,KAAK,SAAS;AAAA,EACvC;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,MAAuB;AACjC,UAAM;AAAA,MACJ,YAAY;AAAA,MACZ,WAAW,cAAc,EAAE;AAAA,MAC3B;AAAA,MACA,gBAAgB;AAAA,IAClB,IAAI;AAEJ,SAAK,YAAY;AACjB,SAAK,WAAW;AAChB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AAErB,QAAI,eAAe;AACjB,6BAAuB;AAAA,QACrB,MAAM;AAAA,QACN,MAAM;AAAA,QACN,UAAU,gBAAgB,SAAS;AAAA,QACnC,eAAe;AAAA,MACjB,CAAC,EAAE,KAAK,CAAC,SAAS;AAChB,aAAK,gBAAgB,QAAQ,KAAK,MAAM,aAAa,MAAM,MAAM,CAAC,CAAC;AAAA,MACrE,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,MAAM,kBAAkB,UAAgD;AACtE,QAAI,aAAa,QAAW;AAC1B,aAAO,KAAK;AAAA,IACd;AAEA,UAAM,OAAO,SAAS,YAAY;AAClC,UAAM,YAAY,MAAM,KAAK,gBAAgB;AAG7C,QAAI,WAAW,UAAU,IAAI;AAE7B,QAAI,aAAa,UAAa,KAAK,SAAS,GAAG,GAAG;AAChD,iBAAW,UAAU,gBAAgB,IAAI,CAAC;AAAA,IAC5C;AAEA,QAAI,aAAa,QAAW;AAC1B,WAAK,QAAQ,KAAK,YAAY,QAAQ,6BAA6B;AACnE,aAAO;AAAA,IACT;AAGA,WAAO,KAAK,cAAc,SAAY,KAAK,YAAY,SAAS;AAAA,EAClE;AAAA,EAEA,MAAM,iBAAiB,UAAqC;AAC1D,WAAQ,MAAM,KAAK,kBAAkB,QAAQ,MAAO;AAAA,EACtD;AAAA;AAAA,EAGA,MAAM,iBAAiB,SAA0B,UAAkB,GAAoB;AACrF,QAAI,WAA0B,CAAC;AAE/B,eAAW,WAAW,QAAQ,OAAO;AAEnC,UAAI,QAAQ,SAAS,aAAa,QAAQ,QAAQ,CAAC,UAAU,WAAW,GAAG;AACzE;AAAA,MACF;AAEA,iBAAW,WAAW,QAAQ,SAAS;AACrC,YAAI,OAAO,YAAY,UAAU;AAC/B,mBAAS,KAAK;AAAA,YACZ,MAAM,QAAQ,SAAS,cAAc,cAAc;AAAA,YACnD;AAAA,UACF,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAEA,eAAW,SAAS,MAAM,CAAC,iBAAiB;AAE5C,UAAM,SAAS,MAAM,KAAK,SAAS,YAAY,KAAK,gBAAgB,GAAG,QAAQ;AAC/E,QAAI,WAAW,QAAW;AACxB,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AAEA,WAAQ,OAAqB;AAAA,EAC/B;AAGF;","names":[]}
|
|
@@ -49,6 +49,7 @@ class MultilingualModel extends import_base.EOUModel {
|
|
|
49
49
|
if (!language) {
|
|
50
50
|
return void 0;
|
|
51
51
|
}
|
|
52
|
+
language = (0, import_agents.normalizeLanguage)(language);
|
|
52
53
|
let threshold = await super.unlikelyThreshold(language);
|
|
53
54
|
if (threshold === void 0) {
|
|
54
55
|
const url = remoteInferenceUrl();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/turn_detector/multilingual.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { llm } from '@livekit/agents';\nimport { getJobContext, log } from '@livekit/agents';\nimport { EOUModel, EOURunnerBase } from './base.js';\nimport { MAX_HISTORY_TURNS } from './constants.js';\n\nconst REMOTE_INFERENCE_TIMEOUT = 2000;\n\nexport const INFERENCE_METHOD_MULTILINGUAL = 'lk_end_of_utterance_multilingual';\n\nexport class EUORunnerMultilingual extends EOURunnerBase {\n constructor() {\n super('multilingual');\n }\n}\n\nexport class MultilingualModel extends EOUModel {\n #logger = log();\n\n constructor(unlikelyThreshold?: number) {\n super({\n modelType: 'multilingual',\n unlikelyThreshold,\n });\n }\n\n inferenceMethod(): string {\n return INFERENCE_METHOD_MULTILINGUAL;\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (!language) {\n return undefined;\n }\n\n let threshold = await super.unlikelyThreshold(language);\n if (threshold === undefined) {\n const url = remoteInferenceUrl();\n if (!url) return undefined;\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify({\n language,\n }),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to fetch threshold: ${resp.statusText}`);\n }\n\n const data = (await resp.json()) as { threshold: number | undefined };\n threshold = data.threshold;\n if (threshold) {\n const languages = await this.languagesFuture.await;\n languages[language] = { threshold };\n }\n }\n\n return threshold;\n }\n\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n const url = remoteInferenceUrl();\n if (!url) {\n return await super.predictEndOfTurn(chatCtx, timeout);\n }\n\n // Copy and process chat context similar to Python implementation\n const messages = chatCtx\n .copy({\n excludeFunctionCall: true,\n excludeInstructions: true,\n excludeEmptyMessage: true,\n })\n .truncate(MAX_HISTORY_TURNS);\n\n // Get job context and build request\n const ctx = getJobContext();\n const request: any = {\n ...messages.toJSON({\n excludeImage: true,\n excludeAudio: true,\n excludeTimestamp: true,\n }),\n jobId: ctx.job.id,\n workerId: ctx.workerId,\n };\n\n // Add agentId from environment variable if available\n const agentId = process.env.LIVEKIT_AGENT_ID;\n if (agentId) {\n request.agentId = agentId;\n }\n\n const startedAt = performance.now();\n\n this.#logger.debug({ url, request }, '=== remote EOU inference');\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify(request),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to predict end of turn: ${resp.statusText}`);\n }\n\n const data = await resp.json();\n const probability = data.probability;\n if (typeof probability === 'number' && probability >= 0) {\n this.#logger.debug(\n {\n eouProbability: probability,\n duration: (performance.now() - startedAt) / 1000,\n },\n 'eou prediction',\n );\n return probability;\n }\n\n // default to indicate no prediction\n return 1;\n }\n}\n\nfunction remoteInferenceUrl() {\n const urlBase = process.env.LIVEKIT_REMOTE_EOT_URL;\n if (!urlBase) {\n return undefined;\n }\n return `${urlBase}/eot/multi`;\n}\n\nexport default EUORunnerMultilingual;\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,
|
|
1
|
+
{"version":3,"sources":["../../src/turn_detector/multilingual.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { llm } from '@livekit/agents';\nimport { getJobContext, log, normalizeLanguage } from '@livekit/agents';\nimport { EOUModel, EOURunnerBase } from './base.js';\nimport { MAX_HISTORY_TURNS } from './constants.js';\n\nconst REMOTE_INFERENCE_TIMEOUT = 2000;\n\nexport const INFERENCE_METHOD_MULTILINGUAL = 'lk_end_of_utterance_multilingual';\n\nexport class EUORunnerMultilingual extends EOURunnerBase {\n constructor() {\n super('multilingual');\n }\n}\n\nexport class MultilingualModel extends EOUModel {\n #logger = log();\n\n constructor(unlikelyThreshold?: number) {\n super({\n modelType: 'multilingual',\n unlikelyThreshold,\n });\n }\n\n inferenceMethod(): string {\n return INFERENCE_METHOD_MULTILINGUAL;\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (!language) {\n return undefined;\n }\n\n language = normalizeLanguage(language);\n\n let threshold = await super.unlikelyThreshold(language);\n if (threshold === undefined) {\n const url = remoteInferenceUrl();\n if (!url) return undefined;\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify({\n language,\n }),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to fetch threshold: ${resp.statusText}`);\n }\n\n const data = (await resp.json()) as { threshold: number | undefined };\n threshold = data.threshold;\n if (threshold) {\n const languages = await this.languagesFuture.await;\n languages[language] = { threshold };\n }\n }\n\n return threshold;\n }\n\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n const url = remoteInferenceUrl();\n if (!url) {\n return await super.predictEndOfTurn(chatCtx, timeout);\n }\n\n // Copy and process chat context similar to Python implementation\n const messages = chatCtx\n .copy({\n excludeFunctionCall: true,\n excludeInstructions: true,\n excludeEmptyMessage: true,\n })\n .truncate(MAX_HISTORY_TURNS);\n\n // Get job context and build request\n const ctx = getJobContext();\n const request: any = {\n ...messages.toJSON({\n excludeImage: true,\n excludeAudio: true,\n excludeTimestamp: true,\n }),\n jobId: ctx.job.id,\n workerId: ctx.workerId,\n };\n\n // Add agentId from environment variable if available\n const agentId = process.env.LIVEKIT_AGENT_ID;\n if (agentId) {\n request.agentId = agentId;\n }\n\n const startedAt = performance.now();\n\n this.#logger.debug({ url, request }, '=== remote EOU inference');\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify(request),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to predict end of turn: ${resp.statusText}`);\n }\n\n const data = await resp.json();\n const probability = data.probability;\n if (typeof probability === 'number' && probability >= 0) {\n this.#logger.debug(\n {\n eouProbability: probability,\n duration: (performance.now() - startedAt) / 1000,\n },\n 'eou prediction',\n );\n return probability;\n }\n\n // default to indicate no prediction\n return 1;\n }\n}\n\nfunction remoteInferenceUrl() {\n const urlBase = process.env.LIVEKIT_REMOTE_EOT_URL;\n if (!urlBase) {\n return undefined;\n }\n return `${urlBase}/eot/multi`;\n}\n\nexport default EUORunnerMultilingual;\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,oBAAsD;AACtD,kBAAwC;AACxC,uBAAkC;AAElC,MAAM,2BAA2B;AAE1B,MAAM,gCAAgC;AAEtC,MAAM,8BAA8B,0BAAc;AAAA,EACvD,cAAc;AACZ,UAAM,cAAc;AAAA,EACtB;AACF;AAEO,MAAM,0BAA0B,qBAAS;AAAA,EAC9C,cAAU,mBAAI;AAAA,EAEd,YAAY,mBAA4B;AACtC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,kBAA0B;AACxB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,kBAAkB,UAAgD;AACtE,QAAI,CAAC,UAAU;AACb,aAAO;AAAA,IACT;AAEA,mBAAW,iCAAkB,QAAQ;AAErC,QAAI,YAAY,MAAM,MAAM,kBAAkB,QAAQ;AACtD,QAAI,cAAc,QAAW;AAC3B,YAAM,MAAM,mBAAmB;AAC/B,UAAI,CAAC,IAAK,QAAO;AAEjB,YAAM,OAAO,MAAM,MAAM,KAAK;AAAA,QAC5B,QAAQ;AAAA,QACR,MAAM,KAAK,UAAU;AAAA,UACnB;AAAA,QACF,CAAC;AAAA,QACD,SAAS;AAAA,UACP,gBAAgB;AAAA,QAClB;AAAA,QACA,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AAED,UAAI,CAAC,KAAK,IAAI;AACZ,cAAM,IAAI,MAAM,8BAA8B,KAAK,UAAU,EAAE;AAAA,MACjE;AAEA,YAAM,OAAQ,MAAM,KAAK,KAAK;AAC9B,kBAAY,KAAK;AACjB,UAAI,WAAW;AACb,cAAM,YAAY,MAAM,KAAK,gBAAgB;AAC7C,kBAAU,QAAQ,IAAI,EAAE,UAAU;AAAA,MACpC;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,iBAAiB,SAA0B,UAAkB,GAAoB;AACrF,UAAM,MAAM,mBAAmB;AAC/B,QAAI,CAAC,KAAK;AACR,aAAO,MAAM,MAAM,iBAAiB,SAAS,OAAO;AAAA,IACtD;AAGA,UAAM,WAAW,QACd,KAAK;AAAA,MACJ,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,IACvB,CAAC,EACA,SAAS,kCAAiB;AAG7B,UAAM,UAAM,6BAAc;AAC1B,UAAM,UAAe;AAAA,MACnB,GAAG,SAAS,OAAO;AAAA,QACjB,cAAc;AAAA,QACd,cAAc;AAAA,QACd,kBAAkB;AAAA,MACpB,CAAC;AAAA,MACD,OAAO,IAAI,IAAI;AAAA,MACf,UAAU,IAAI;AAAA,IAChB;AAGA,UAAM,UAAU,QAAQ,IAAI;AAC5B,QAAI,SAAS;AACX,cAAQ,UAAU;AAAA,IACpB;AAEA,UAAM,YAAY,YAAY,IAAI;AAElC,SAAK,QAAQ,MAAM,EAAE,KAAK,QAAQ,GAAG,0BAA0B;AAE/D,UAAM,OAAO,MAAM,MAAM,KAAK;AAAA,MAC5B,QAAQ;AAAA,MACR,MAAM,KAAK,UAAU,OAAO;AAAA,MAC5B,SAAS;AAAA,QACP,gBAAgB;AAAA,MAClB;AAAA,MACA,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,IACtD,CAAC;AAED,QAAI,CAAC,KAAK,IAAI;AACZ,YAAM,IAAI,MAAM,kCAAkC,KAAK,UAAU,EAAE;AAAA,IACrE;AAEA,UAAM,OAAO,MAAM,KAAK,KAAK;AAC7B,UAAM,cAAc,KAAK;AACzB,QAAI,OAAO,gBAAgB,YAAY,eAAe,GAAG;AACvD,WAAK,QAAQ;AAAA,QACX;AAAA,UACE,gBAAgB;AAAA,UAChB,WAAW,YAAY,IAAI,IAAI,aAAa;AAAA,QAC9C;AAAA,QACA;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAGA,WAAO;AAAA,EACT;AACF;AAEA,SAAS,qBAAqB;AAC5B,QAAM,UAAU,QAAQ,IAAI;AAC5B,MAAI,CAAC,SAAS;AACZ,WAAO;AAAA,EACT;AACA,SAAO,GAAG,OAAO;AACnB;AAEA,IAAO,uBAAQ;","names":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"multilingual.d.ts","sourceRoot":"","sources":["../../src/turn_detector/multilingual.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAE3C,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAKpD,eAAO,MAAM,6BAA6B,qCAAqC,CAAC;AAEhF,qBAAa,qBAAsB,SAAQ,aAAa;;CAIvD;AAED,qBAAa,iBAAkB,SAAQ,QAAQ;;gBAGjC,iBAAiB,CAAC,EAAE,MAAM;IAOtC,eAAe,IAAI,MAAM;IAInB,iBAAiB,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;
|
|
1
|
+
{"version":3,"file":"multilingual.d.ts","sourceRoot":"","sources":["../../src/turn_detector/multilingual.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAE3C,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAKpD,eAAO,MAAM,6BAA6B,qCAAqC,CAAC;AAEhF,qBAAa,qBAAsB,SAAQ,aAAa;;CAIvD;AAED,qBAAa,iBAAkB,SAAQ,QAAQ;;gBAGjC,iBAAiB,CAAC,EAAE,MAAM;IAOtC,eAAe,IAAI,MAAM;IAInB,iBAAiB,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAsCjE,gBAAgB,CAAC,OAAO,EAAE,GAAG,CAAC,WAAW,EAAE,OAAO,GAAE,MAAU,GAAG,OAAO,CAAC,MAAM,CAAC;CAkEvF;AAUD,eAAe,qBAAqB,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { getJobContext, log } from "@livekit/agents";
|
|
1
|
+
import { getJobContext, log, normalizeLanguage } from "@livekit/agents";
|
|
2
2
|
import { EOUModel, EOURunnerBase } from "./base.js";
|
|
3
3
|
import { MAX_HISTORY_TURNS } from "./constants.js";
|
|
4
4
|
const REMOTE_INFERENCE_TIMEOUT = 2e3;
|
|
@@ -23,6 +23,7 @@ class MultilingualModel extends EOUModel {
|
|
|
23
23
|
if (!language) {
|
|
24
24
|
return void 0;
|
|
25
25
|
}
|
|
26
|
+
language = normalizeLanguage(language);
|
|
26
27
|
let threshold = await super.unlikelyThreshold(language);
|
|
27
28
|
if (threshold === void 0) {
|
|
28
29
|
const url = remoteInferenceUrl();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/turn_detector/multilingual.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { llm } from '@livekit/agents';\nimport { getJobContext, log } from '@livekit/agents';\nimport { EOUModel, EOURunnerBase } from './base.js';\nimport { MAX_HISTORY_TURNS } from './constants.js';\n\nconst REMOTE_INFERENCE_TIMEOUT = 2000;\n\nexport const INFERENCE_METHOD_MULTILINGUAL = 'lk_end_of_utterance_multilingual';\n\nexport class EUORunnerMultilingual extends EOURunnerBase {\n constructor() {\n super('multilingual');\n }\n}\n\nexport class MultilingualModel extends EOUModel {\n #logger = log();\n\n constructor(unlikelyThreshold?: number) {\n super({\n modelType: 'multilingual',\n unlikelyThreshold,\n });\n }\n\n inferenceMethod(): string {\n return INFERENCE_METHOD_MULTILINGUAL;\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (!language) {\n return undefined;\n }\n\n let threshold = await super.unlikelyThreshold(language);\n if (threshold === undefined) {\n const url = remoteInferenceUrl();\n if (!url) return undefined;\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify({\n language,\n }),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to fetch threshold: ${resp.statusText}`);\n }\n\n const data = (await resp.json()) as { threshold: number | undefined };\n threshold = data.threshold;\n if (threshold) {\n const languages = await this.languagesFuture.await;\n languages[language] = { threshold };\n }\n }\n\n return threshold;\n }\n\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n const url = remoteInferenceUrl();\n if (!url) {\n return await super.predictEndOfTurn(chatCtx, timeout);\n }\n\n // Copy and process chat context similar to Python implementation\n const messages = chatCtx\n .copy({\n excludeFunctionCall: true,\n excludeInstructions: true,\n excludeEmptyMessage: true,\n })\n .truncate(MAX_HISTORY_TURNS);\n\n // Get job context and build request\n const ctx = getJobContext();\n const request: any = {\n ...messages.toJSON({\n excludeImage: true,\n excludeAudio: true,\n excludeTimestamp: true,\n }),\n jobId: ctx.job.id,\n workerId: ctx.workerId,\n };\n\n // Add agentId from environment variable if available\n const agentId = process.env.LIVEKIT_AGENT_ID;\n if (agentId) {\n request.agentId = agentId;\n }\n\n const startedAt = performance.now();\n\n this.#logger.debug({ url, request }, '=== remote EOU inference');\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify(request),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to predict end of turn: ${resp.statusText}`);\n }\n\n const data = await resp.json();\n const probability = data.probability;\n if (typeof probability === 'number' && probability >= 0) {\n this.#logger.debug(\n {\n eouProbability: probability,\n duration: (performance.now() - startedAt) / 1000,\n },\n 'eou prediction',\n );\n return probability;\n }\n\n // default to indicate no prediction\n return 1;\n }\n}\n\nfunction remoteInferenceUrl() {\n const urlBase = process.env.LIVEKIT_REMOTE_EOT_URL;\n if (!urlBase) {\n return undefined;\n }\n return `${urlBase}/eot/multi`;\n}\n\nexport default EUORunnerMultilingual;\n"],"mappings":"AAIA,SAAS,eAAe,
|
|
1
|
+
{"version":3,"sources":["../../src/turn_detector/multilingual.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { llm } from '@livekit/agents';\nimport { getJobContext, log, normalizeLanguage } from '@livekit/agents';\nimport { EOUModel, EOURunnerBase } from './base.js';\nimport { MAX_HISTORY_TURNS } from './constants.js';\n\nconst REMOTE_INFERENCE_TIMEOUT = 2000;\n\nexport const INFERENCE_METHOD_MULTILINGUAL = 'lk_end_of_utterance_multilingual';\n\nexport class EUORunnerMultilingual extends EOURunnerBase {\n constructor() {\n super('multilingual');\n }\n}\n\nexport class MultilingualModel extends EOUModel {\n #logger = log();\n\n constructor(unlikelyThreshold?: number) {\n super({\n modelType: 'multilingual',\n unlikelyThreshold,\n });\n }\n\n inferenceMethod(): string {\n return INFERENCE_METHOD_MULTILINGUAL;\n }\n\n async unlikelyThreshold(language?: string): Promise<number | undefined> {\n if (!language) {\n return undefined;\n }\n\n language = normalizeLanguage(language);\n\n let threshold = await super.unlikelyThreshold(language);\n if (threshold === undefined) {\n const url = remoteInferenceUrl();\n if (!url) return undefined;\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify({\n language,\n }),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to fetch threshold: ${resp.statusText}`);\n }\n\n const data = (await resp.json()) as { threshold: number | undefined };\n threshold = data.threshold;\n if (threshold) {\n const languages = await this.languagesFuture.await;\n languages[language] = { threshold };\n }\n }\n\n return threshold;\n }\n\n async predictEndOfTurn(chatCtx: llm.ChatContext, timeout: number = 3): Promise<number> {\n const url = remoteInferenceUrl();\n if (!url) {\n return await super.predictEndOfTurn(chatCtx, timeout);\n }\n\n // Copy and process chat context similar to Python implementation\n const messages = chatCtx\n .copy({\n excludeFunctionCall: true,\n excludeInstructions: true,\n excludeEmptyMessage: true,\n })\n .truncate(MAX_HISTORY_TURNS);\n\n // Get job context and build request\n const ctx = getJobContext();\n const request: any = {\n ...messages.toJSON({\n excludeImage: true,\n excludeAudio: true,\n excludeTimestamp: true,\n }),\n jobId: ctx.job.id,\n workerId: ctx.workerId,\n };\n\n // Add agentId from environment variable if available\n const agentId = process.env.LIVEKIT_AGENT_ID;\n if (agentId) {\n request.agentId = agentId;\n }\n\n const startedAt = performance.now();\n\n this.#logger.debug({ url, request }, '=== remote EOU inference');\n\n const resp = await fetch(url, {\n method: 'POST',\n body: JSON.stringify(request),\n headers: {\n 'Content-Type': 'application/json',\n },\n signal: AbortSignal.timeout(REMOTE_INFERENCE_TIMEOUT),\n });\n\n if (!resp.ok) {\n throw new Error(`Failed to predict end of turn: ${resp.statusText}`);\n }\n\n const data = await resp.json();\n const probability = data.probability;\n if (typeof probability === 'number' && probability >= 0) {\n this.#logger.debug(\n {\n eouProbability: probability,\n duration: (performance.now() - startedAt) / 1000,\n },\n 'eou prediction',\n );\n return probability;\n }\n\n // default to indicate no prediction\n return 1;\n }\n}\n\nfunction remoteInferenceUrl() {\n const urlBase = process.env.LIVEKIT_REMOTE_EOT_URL;\n if (!urlBase) {\n return undefined;\n }\n return `${urlBase}/eot/multi`;\n}\n\nexport default EUORunnerMultilingual;\n"],"mappings":"AAIA,SAAS,eAAe,KAAK,yBAAyB;AACtD,SAAS,UAAU,qBAAqB;AACxC,SAAS,yBAAyB;AAElC,MAAM,2BAA2B;AAE1B,MAAM,gCAAgC;AAEtC,MAAM,8BAA8B,cAAc;AAAA,EACvD,cAAc;AACZ,UAAM,cAAc;AAAA,EACtB;AACF;AAEO,MAAM,0BAA0B,SAAS;AAAA,EAC9C,UAAU,IAAI;AAAA,EAEd,YAAY,mBAA4B;AACtC,UAAM;AAAA,MACJ,WAAW;AAAA,MACX;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,kBAA0B;AACxB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,kBAAkB,UAAgD;AACtE,QAAI,CAAC,UAAU;AACb,aAAO;AAAA,IACT;AAEA,eAAW,kBAAkB,QAAQ;AAErC,QAAI,YAAY,MAAM,MAAM,kBAAkB,QAAQ;AACtD,QAAI,cAAc,QAAW;AAC3B,YAAM,MAAM,mBAAmB;AAC/B,UAAI,CAAC,IAAK,QAAO;AAEjB,YAAM,OAAO,MAAM,MAAM,KAAK;AAAA,QAC5B,QAAQ;AAAA,QACR,MAAM,KAAK,UAAU;AAAA,UACnB;AAAA,QACF,CAAC;AAAA,QACD,SAAS;AAAA,UACP,gBAAgB;AAAA,QAClB;AAAA,QACA,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AAED,UAAI,CAAC,KAAK,IAAI;AACZ,cAAM,IAAI,MAAM,8BAA8B,KAAK,UAAU,EAAE;AAAA,MACjE;AAEA,YAAM,OAAQ,MAAM,KAAK,KAAK;AAC9B,kBAAY,KAAK;AACjB,UAAI,WAAW;AACb,cAAM,YAAY,MAAM,KAAK,gBAAgB;AAC7C,kBAAU,QAAQ,IAAI,EAAE,UAAU;AAAA,MACpC;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,iBAAiB,SAA0B,UAAkB,GAAoB;AACrF,UAAM,MAAM,mBAAmB;AAC/B,QAAI,CAAC,KAAK;AACR,aAAO,MAAM,MAAM,iBAAiB,SAAS,OAAO;AAAA,IACtD;AAGA,UAAM,WAAW,QACd,KAAK;AAAA,MACJ,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,IACvB,CAAC,EACA,SAAS,iBAAiB;AAG7B,UAAM,MAAM,cAAc;AAC1B,UAAM,UAAe;AAAA,MACnB,GAAG,SAAS,OAAO;AAAA,QACjB,cAAc;AAAA,QACd,cAAc;AAAA,QACd,kBAAkB;AAAA,MACpB,CAAC;AAAA,MACD,OAAO,IAAI,IAAI;AAAA,MACf,UAAU,IAAI;AAAA,IAChB;AAGA,UAAM,UAAU,QAAQ,IAAI;AAC5B,QAAI,SAAS;AACX,cAAQ,UAAU;AAAA,IACpB;AAEA,UAAM,YAAY,YAAY,IAAI;AAElC,SAAK,QAAQ,MAAM,EAAE,KAAK,QAAQ,GAAG,0BAA0B;AAE/D,UAAM,OAAO,MAAM,MAAM,KAAK;AAAA,MAC5B,QAAQ;AAAA,MACR,MAAM,KAAK,UAAU,OAAO;AAAA,MAC5B,SAAS;AAAA,QACP,gBAAgB;AAAA,MAClB;AAAA,MACA,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,IACtD,CAAC;AAED,QAAI,CAAC,KAAK,IAAI;AACZ,YAAM,IAAI,MAAM,kCAAkC,KAAK,UAAU,EAAE;AAAA,IACrE;AAEA,UAAM,OAAO,MAAM,KAAK,KAAK;AAC7B,UAAM,cAAc,KAAK;AACzB,QAAI,OAAO,gBAAgB,YAAY,eAAe,GAAG;AACvD,WAAK,QAAQ;AAAA,QACX;AAAA,UACE,gBAAgB;AAAA,UAChB,WAAW,YAAY,IAAI,IAAI,aAAa;AAAA,QAC9C;AAAA,QACA;AAAA,MACF;AACA,aAAO;AAAA,IACT;AAGA,WAAO;AAAA,EACT;AACF;AAEA,SAAS,qBAAqB;AAC5B,QAAM,UAAU,QAAQ,IAAI;AAC5B,MAAI,CAAC,SAAS;AACZ,WAAO;AAAA,EACT;AACA,SAAO,GAAG,OAAO;AACnB;AAEA,IAAO,uBAAQ;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-livekit",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Additional utilities for LiveKit Node Agents",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -29,10 +29,10 @@
|
|
|
29
29
|
"onnxruntime-common": "1.21.0",
|
|
30
30
|
"tsup": "^8.3.5",
|
|
31
31
|
"typescript": "^5.0.0",
|
|
32
|
-
"@livekit/agents": "1.0
|
|
32
|
+
"@livekit/agents": "1.1.0"
|
|
33
33
|
},
|
|
34
34
|
"peerDependencies": {
|
|
35
|
-
"@livekit/agents": "1.0
|
|
35
|
+
"@livekit/agents": "1.1.0"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@huggingface/hub": "2.4.1",
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { type PreTrainedTokenizer } from '@huggingface/transformers';
|
|
5
5
|
import type { ipc, llm } from '@livekit/agents';
|
|
6
|
-
import { Future, InferenceRunner, getJobContext, log } from '@livekit/agents';
|
|
6
|
+
import { Future, InferenceRunner, getBaseLanguage, getJobContext, log } from '@livekit/agents';
|
|
7
7
|
import { readFileSync } from 'node:fs';
|
|
8
8
|
import os from 'node:os';
|
|
9
9
|
import { InferenceSession, Tensor } from 'onnxruntime-node';
|
|
@@ -170,6 +170,14 @@ export abstract class EOUModel {
|
|
|
170
170
|
|
|
171
171
|
#logger = log();
|
|
172
172
|
|
|
173
|
+
get model(): string {
|
|
174
|
+
return MODEL_REVISIONS[this.modelType];
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
get provider(): string {
|
|
178
|
+
return 'livekit';
|
|
179
|
+
}
|
|
180
|
+
|
|
173
181
|
constructor(opts: EOUModelOptions) {
|
|
174
182
|
const {
|
|
175
183
|
modelType = 'en',
|
|
@@ -207,8 +215,7 @@ export abstract class EOUModel {
|
|
|
207
215
|
let langData = languages[lang];
|
|
208
216
|
|
|
209
217
|
if (langData === undefined && lang.includes('-')) {
|
|
210
|
-
|
|
211
|
-
langData = languages[baseLang];
|
|
218
|
+
langData = languages[getBaseLanguage(lang)];
|
|
212
219
|
}
|
|
213
220
|
|
|
214
221
|
if (langData === undefined) {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { llm } from '@livekit/agents';
|
|
5
|
-
import { getJobContext, log } from '@livekit/agents';
|
|
5
|
+
import { getJobContext, log, normalizeLanguage } from '@livekit/agents';
|
|
6
6
|
import { EOUModel, EOURunnerBase } from './base.js';
|
|
7
7
|
import { MAX_HISTORY_TURNS } from './constants.js';
|
|
8
8
|
|
|
@@ -35,6 +35,8 @@ export class MultilingualModel extends EOUModel {
|
|
|
35
35
|
return undefined;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
language = normalizeLanguage(language);
|
|
39
|
+
|
|
38
40
|
let threshold = await super.unlikelyThreshold(language);
|
|
39
41
|
if (threshold === undefined) {
|
|
40
42
|
const url = remoteInferenceUrl();
|